/* * Copyright ©2021 Chris Thachuk. All rights reserved. Permission is * hereby granted to students registered for University of Washington * CSE 333 for use solely during Autumn Quarter 2021 for purposes of * the course. No other use, copying, distribution, or modification * is permitted without prior written consent. Copyrights for * third-party components of this work must be honored. Instructors * interested in reusing these course materials should contact the * author. */ #ifndef DNAUTIL_H_ #define DNAUTIL_H_ #include #include #include namespace dnautil { // The four DNA bases represented by unsigned chars (1 byte). enum Base: unsigned char { A = 0, C = 1, G = 2, T = 3, }; // Our short, DNA oligos (synthetic strands) are always length 64. const int kOligoLength = 64; // The type representing a short DNA oligo. // In this example, we only care about the sequence of bases, // but this could be enhanced as a class that includes // relevant methods. struct Oligo { Base bases[kOligoLength]; }; // To keep things simple, a 'pool' of short oligos is stored // in a vector. using OligoPool = std::vector; // Returns `size` number of randomly generated oligos forming a pool. OligoPool RandomOligoPool(int size); // A Functor to calculate the Hamming distance of two oligos. class HammingDistance { public: int operator () (const Oligo &o1, const Oligo &o2) const { int distance = 0; for(int i = 0; i < kOligoLength; ++i) { if( o1.bases[i] != o2.bases[i] ) { distance += 1; } } return distance; } }; // A Functor to calculate the Hamming distance of two oligos using AVX512 instructions. class HammingDistanceAVX512 { public: int operator () (const Oligo &o1, const Oligo &o2) const { const __m512i v1 = _mm512_loadu_si512(reinterpret_cast(o1.bases)); const __m512i v2 = _mm512_loadu_si512(reinterpret_cast(o2.bases)); __mmask64 diff_mask = _mm512_cmp_epi8_mask(v1, v2, _MM_CMPINT_NE); int distance = _mm_popcnt_u64(diff_mask); return distance; } }; // A Functor to calculate the Hamming distance of two oligos using AVX2 instructions. class HammingDistanceAVX2 { public: int operator () (const Oligo &o1, const Oligo &o2) const { const __m256i l1 = _mm256_loadu_si256((const __m256i*)o1.bases); const __m256i l2 = _mm256_loadu_si256((const __m256i*)o2.bases); const __m256i h1 = _mm256_loadu_si256((const __m256i*)&o1.bases[32]); const __m256i h2 = _mm256_loadu_si256((const __m256i*)&o2.bases[32]); auto eq_lo = _mm256_cmpeq_epi8(l1, l2); int mask_lo = _mm256_movemask_epi8(eq_lo); int similarity_lo = _mm_popcnt_u32(mask_lo); auto eq_hi = _mm256_cmpeq_epi8(h1, h2); int mask_hi = _mm256_movemask_epi8(eq_hi); int similarity_hi = _mm_popcnt_u32(mask_hi); return kOligoLength - similarity_lo - similarity_hi; } }; template int MinPairwiseDistance(const OligoPool &pool) { Distance distance; const int kPoolSize = pool.size(); int min_distance = kOligoLength; for(int i = 0; i < kPoolSize - 1; ++i) { auto o1 = pool[i]; for(int j = i + 1; j < kPoolSize; ++j) { auto o2 = pool[j]; int d= distance(o1, o2); min_distance = std::min(d, min_distance); } } return min_distance; } } // namespace dnautil #endif // DNAUTIL_H_