Commit cf6fdf16 authored by Dr. Carsten Kemena's avatar Dr. Carsten Kemena
Browse files

changing suffix from bitfield to uint64 and bool

parent 34205e6d
Pipeline #43888 passed with stage
in 1 minute and 13 seconds
BioSeqDataLib @ f3129db5
Subproject commit a87a65922f43e84e9cea2d39616ff030aa1dd7bc
Subproject commit f3129db5400ab7ca3e70e2c4bb8f4c9b0b8b89ea
......@@ -21,33 +21,35 @@
namespace BSDL = BioSeqDataLib;
struct SuffixType {
uint64_t suffix : 60;
uint64_t position : 1;
typedef uint64_t SuffixType;
typedef uint32_t PrefixType;
const short suffixLength = 12;
const short alphabetBitSize = 5;
constexpr SuffixType suffixShift = suffixLength * alphabetBitSize;
SuffixType (): suffix(0), position(0)
struct CodedSuffix {
SuffixType suffix;
bool position;
CodedSuffix (): suffix(0), position(false)
{}
SuffixType (uint64_t s, uint64_t p): suffix(s), position(p)
CodedSuffix (SuffixType s, bool p): suffix(s), position(p)
{}
} __attribute__((packed));
bool operator<(const SuffixType &l, const SuffixType &r) {
return l.suffix < r.suffix;
}
//typedef uint64_t SuffixType;
typedef uint32_t PrefixType;
struct SuffixAcc
{
SuffixType suffix;
CodedSuffix suffix;
unsigned short accession;
SuffixAcc(const SuffixType &s, unsigned short a) : suffix(s), accession(a)
SuffixAcc(const CodedSuffix &s, unsigned short a) : suffix(s), accession(a)
{}
SuffixAcc() : suffix(0, 0), accession(0)
......@@ -55,11 +57,14 @@ struct SuffixAcc
} __attribute__((packed));
/*bool operator<(const SuffixAcc &l, const SuffixAcc &r) {
return l.accession < r.accession;
}*/
bool operator<(const CodedSuffix &l, const CodedSuffix &r)
{
return l.suffix < r.suffix;
}
bool operator<(const SuffixAcc &l, SuffixType r) {
bool operator<(const SuffixAcc &l, CodedSuffix r) {
return l.suffix.suffix < r.suffix;
}
......@@ -122,7 +127,7 @@ static const std::unordered_map<char, PrefixType> alphabet2bit8 =
*/
template<typename S>
bool
getCompletePrefixSuffix(const S &seq, size_t i, PrefixType &prefix, SuffixType &suffix)
getCompletePrefixSuffix(const S &seq, size_t i, PrefixType &prefix, CodedSuffix &suffix)
{
// get first prefix
prefix = 0;
......@@ -165,14 +170,14 @@ getCompletePrefixSuffix(const S &seq, size_t i, PrefixType &prefix, SuffixType &
*/
template<typename S>
bool
getNextPrefixSuffix(const S &seq, size_t i, PrefixType &prefix, SuffixType &suffix)
getNextPrefixSuffix(const S &seq, size_t i, PrefixType &prefix, CodedSuffix &suffix)
{
// check if new character is ok
auto itSuffix = alphabet2bit.find(seq[i+17]);
if (itSuffix == alphabet2bit.end())
return false;
static const PrefixType clearPrefix = ~(PrefixType(3) << 30);
//static const SuffixType clearSuffix = ~(SuffixType(15) << 60);
static const SuffixType clearSuffix = ~(SuffixType(15) << suffixShift);
// update prefix
auto itPrefix = alphabet2bit.find(seq[i+5]);
......@@ -182,7 +187,7 @@ getNextPrefixSuffix(const S &seq, size_t i, PrefixType &prefix, SuffixType &suff
// update suffix
suffix.suffix <<= 5;
//suffix.suffix &= clearSuffix;
suffix.suffix &= clearSuffix;
suffix.suffix |= itSuffix->second;
return true;
}
......
......@@ -11,7 +11,7 @@ namespace fs = boost::filesystem;
* @param outFile The file to write the database into
*/
void
write2file(const std::unordered_map<PrefixType, std::map<SuffixType, unsigned short > > &database, const std::string &outFile)
write2file(const std::unordered_map<PrefixType, std::map<CodedSuffix, unsigned short > > &database, const std::string &outFile)
{
std::ofstream fout(outFile, std::ios::out | std::ios::binary);
size_t val= database.size();
......@@ -70,7 +70,7 @@ main(int argc, char const *argv[])
omp_set_num_threads(nThreads);
std::unordered_map<PrefixType, std::map<SuffixType, unsigned short > > database;
std::unordered_map<PrefixType, std::map<CodedSuffix, unsigned short > > database;
turnFile2db(inFile, database, false);
write2file(database, outFile.string() + "_fwd.db");
......
......@@ -28,6 +28,7 @@ namespace BSDL = BioSeqDataLib;
namespace ct = boost::container;
/**
* \brief Turns a word arrangement into a domain annotation
* \param wordList The list of accession assignments.
......@@ -122,7 +123,7 @@ assignWords(const fs::path &inFile, BSDL::SequenceSet<BSDL::Sequence<> > &seqSet
assignments.resize(nSeqs);
const size_t wordSize = 18;
PrefixType prefix;
SuffixType suffix;
CodedSuffix suffix;
size_t l = 0;
int multiplyer = reverse ? 1 : -1;
......
......@@ -20,7 +20,7 @@ BOOST_AUTO_TEST_CASE(test)
BioSeqDataLib::Sequence<> seq(name, seqq, "", "");
PrefixType prefix = 0;
SuffixType suffix(0,0);
CodedSuffix suffix(0,0);
//auto a =
auto success = getCompletePrefixSuffix(seq, 0, prefix, suffix);
......
......@@ -15,13 +15,13 @@ BOOST_AUTO_TEST_SUITE(MakeDB_Test)
BOOST_AUTO_TEST_CASE(mergeDB)
{
std::unordered_map<PrefixType, std::map<SuffixType, unsigned short > > db1, db2;
db1.emplace(4, std::map<SuffixType, unsigned short >());
std::unordered_map<PrefixType, std::map<CodedSuffix, unsigned short > > db1, db2;
db1.emplace(4, std::map<CodedSuffix, unsigned short >());
db1[4].emplace(std::piecewise_construct, std::forward_as_tuple(5,1), std::forward_as_tuple(8));
db1[4].emplace(std::piecewise_construct, std::forward_as_tuple(8,1), std::forward_as_tuple(8));
db1[9].emplace(std::piecewise_construct, std::forward_as_tuple(99,0), std::forward_as_tuple(7));
db2.emplace(4, std::map<SuffixType, unsigned short >());
db2.emplace(4, std::map<CodedSuffix, unsigned short >());
db2[4].emplace(std::piecewise_construct, std::forward_as_tuple(4,1), std::forward_as_tuple(9));
db2[4].emplace(std::piecewise_construct, std::forward_as_tuple(7,0), std::forward_as_tuple(9));
db2[4].emplace(std::piecewise_construct, std::forward_as_tuple(8,1), std::forward_as_tuple(9));
......@@ -59,12 +59,12 @@ BOOST_AUTO_TEST_CASE(mergeDB)
BOOST_AUTO_TEST_CASE(mergeSuffixe)
{
std::unordered_map<PrefixType, std::map<SuffixType, unsigned short > > db1, db2;
db1.emplace(4, std::map<SuffixType, unsigned short >());
std::unordered_map<PrefixType, std::map<CodedSuffix, unsigned short > > db1, db2;
db1.emplace(4, std::map<CodedSuffix, unsigned short >());
db1[4].emplace(std::piecewise_construct, std::forward_as_tuple(5,1), std::forward_as_tuple(8));
db1[4].emplace(std::piecewise_construct, std::forward_as_tuple(8,1), std::forward_as_tuple(8));
db2.emplace(4, std::map<SuffixType, unsigned short >());
db2.emplace(4, std::map<CodedSuffix, unsigned short >());
db2[4].emplace(std::piecewise_construct, std::forward_as_tuple(4,1), std::forward_as_tuple(9));
db2[4].emplace(std::piecewise_construct, std::forward_as_tuple(7,0), std::forward_as_tuple(9));
db2[4].emplace(std::piecewise_construct, std::forward_as_tuple(8,1), std::forward_as_tuple(9));
......@@ -95,7 +95,7 @@ BOOST_AUTO_TEST_CASE(mergeSuffixe)
BOOST_AUTO_TEST_CASE(cleanDB)
{
std::map<SuffixType, unsigned short > db;
std::map<CodedSuffix, unsigned short > db;
db.emplace(std::piecewise_construct, std::forward_as_tuple(2,1), std::forward_as_tuple(4));
db.emplace(std::piecewise_construct, std::forward_as_tuple(5,1), std::forward_as_tuple(8));
db.emplace(std::piecewise_construct, std::forward_as_tuple(8,1), std::forward_as_tuple(8));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment