Commit 68f30a6a authored by Dr. Carsten Kemena's avatar Dr. Carsten Kemena
Browse files

writing tests

parent 27b95930
Pipeline #43876 failed with stage
in 59 seconds
......@@ -21,7 +21,25 @@
namespace BSDL = BioSeqDataLib;
typedef uint64_t SuffixType;
struct SuffixType {
uint64_t suffix : 60;
uint64_t position : 1;
SuffixType (): suffix(0), position(0)
{}
SuffixType (uint64_t s, uint64_t p): suffix(s), position(p)
{}
} __attribute__((packed));
bool operator<(const SuffixType &l, const SuffixType &r) {
return l.suffix < r.suffix;
}
//typedef uint64_t SuffixType;
typedef uint32_t PrefixType;
struct SuffixAcc
......@@ -32,7 +50,7 @@ struct SuffixAcc
SuffixAcc(const SuffixType &s, unsigned short a) : suffix(s), accession(a)
{}
SuffixAcc() : suffix(0), accession(0)
SuffixAcc() : suffix(0, 0), accession(0)
{}
} __attribute__((packed));
......@@ -42,9 +60,10 @@ bool operator<(const SuffixAcc &l, const SuffixAcc &r) {
}
bool operator<(const SuffixAcc &l, SuffixType r) {
return l.suffix < r;
return l.suffix.suffix < r.suffix;
}
/**
* Convertion of an amino acid into a binary representation. Binary representation has been chosen
* to reflect evolutionary alphaber described in UproC:
......@@ -119,13 +138,14 @@ getCompletePrefixSuffix(const S &seq, size_t i, PrefixType &prefix, SuffixType &
}
// get first suffix
suffix = 0;
suffix.suffix=0;
for (size_t j=i+6; j<i+18; ++j)
{
suffix <<= 5;
suffix.suffix <<= 5;
auto it = alphabet2bit.find(seq[j]);
if (it != itEnd)
suffix |= it->second;
suffix.suffix |= it->second;
else
return false;
}
......@@ -152,7 +172,7 @@ getNextPrefixSuffix(const S &seq, size_t i, PrefixType &prefix, SuffixType &suff
if (itSuffix == alphabet2bit.end())
return false;
static const PrefixType clearPrefix = ~(PrefixType(3) << 30);
static const SuffixType clearSuffix = ~(SuffixType(15) << 60);
//static const SuffixType clearSuffix = ~(SuffixType(15) << 60);
// update prefix
auto itPrefix = alphabet2bit.find(seq[i+5]);
......@@ -161,9 +181,9 @@ getNextPrefixSuffix(const S &seq, size_t i, PrefixType &prefix, SuffixType &suff
prefix |= itPrefix->second;
// update suffix
suffix <<= 5;
suffix &= clearSuffix;
suffix |= itSuffix->second;
suffix.suffix <<= 5;
//suffix.suffix &= clearSuffix;
suffix.suffix |= itSuffix->second;
return true;
}
......
......@@ -53,20 +53,23 @@ template<typename D>
void
merge2dbs(D &database1, D &database2)
{
auto itEnd = database1.end();
typename D::mapped_type::iterator itSuffix;
for (auto it = database1.begin(); it != itEnd; ++it)
auto itPrefixEnd1 = database1.end();
typename D::mapped_type::iterator itSuffix2;
for (auto itPrefix1 = database1.begin(); itPrefix1 != itPrefixEnd1; ++itPrefix1)
{
auto itPrefix = database2.find(it->first);
if (itPrefix == database2.end())
itPrefix = database2.emplace(it->first, typename D::mapped_type()).first;
auto it2End = it->second.end();
for (auto it2 = it->second.begin(); it2 != it2End; ++it2)
// check if prefix exists in db
auto itPrefix2 = database2.find(itPrefix1->first);
if (itPrefix2 == database2.end())
itPrefix2 = database2.emplace(itPrefix1->first, typename D::mapped_type()).first;
// copy suffixes over to database
auto itSuffix1End = itPrefix1->second.end();
for (auto itSuffix1 = itPrefix1->second.begin(); itSuffix1 != itSuffix1End; ++itSuffix1)
{
if ((itSuffix=itPrefix->second.find(it2->second)) == itPrefix->second.end())
itPrefix->second.emplace(std::move(it2->first), it2->second);
if ((itSuffix2=itPrefix2->second.find(itSuffix1->first)) == itPrefix2->second.end())
itPrefix2->second.emplace(std::move(itSuffix1->first), itSuffix1->second);
else
itSuffix->second = 0;
itSuffix2->second = 0;
}
}
}
......@@ -138,9 +141,9 @@ turnFile2db(fs::path &inFile, D &database, bool reverse)
if (reverse)
std::reverse(seq.begin(), seq.end());
bool last = false;
int k = 0;
int limit = seq.size() - windowSize;
while (k <= limit)
size_t k = 0;
size_t limit = (seq.size() >= windowSize) ? seq.size() - windowSize + 1: 0 ;
while (k < limit)
{
if (last)
{
......@@ -158,6 +161,7 @@ turnFile2db(fs::path &inFile, D &database, bool reverse)
if (last)
{
// add to list if possible
suffix.position = (k < seq.size()/2) ? 0 : 1;
auto it = dbTmp.find(prefix);
if (it != dbTmp.end())
{
......
......@@ -155,7 +155,7 @@ assignWords(const fs::path &inFile, BSDL::SequenceSet<BSDL::Sequence<> > &seqSet
auto it2 = lower_bound(it->second.begin(), it->second.end(), suffix);
if (it2 != it->second.end())
{
if (it2->suffix == suffix)
if (it2->suffix.suffix == suffix.suffix)
{
auto tmp = it2->accession;
assignment.emplace(l-(multiplyer*j), tmp);
......@@ -228,7 +228,7 @@ main(int argc, char const *argv[])
BSDL::SequenceSet<BSDL::Sequence<> > newSeqSet;
for (auto &seq : seqSet)
{
auto pair = BSDL::longestOrf(seq, {"ATG"}, {"TAA", "TAG", "TGA"}, 1);
auto pair = BSDL::longestOrf(seq, {"ATG"}, {"TAA", "TAG", "TGA"}, 0);
if (pair.first != pair.second)
{
if (pair.second < 0)
......
......@@ -20,26 +20,26 @@ BOOST_AUTO_TEST_CASE(test)
BioSeqDataLib::Sequence<> seq(name, seqq, "", "");
PrefixType prefix = 0;
SuffixType suffix = 0;
SuffixType suffix(0,0);
//auto a =
auto success = getCompletePrefixSuffix(seq, 0, prefix, suffix);
BOOST_CHECK_EQUAL(success, true);
std::bitset<32> prefix_target(std::string("000000000100010000110010000101"));
std::bitset<64> suffix_target(std::string("001100011101000010010101001011011000110101110011111000010001"));
std::bitset<60> suffix_target(std::string("001100011101000010010101001011011000110101110011111000010001"));
BOOST_CHECK_EQUAL(prefix, prefix_target.to_ulong());
BOOST_CHECK_EQUAL(suffix, suffix_target.to_ulong());
BOOST_CHECK_EQUAL(suffix.suffix, suffix_target.to_ulong());
success = getNextPrefixSuffix(seq, 1, prefix, suffix);
BOOST_CHECK_EQUAL(success, true);
std::bitset<32> prefix_target2(std::string("000010001000011001000010100110"));
std::bitset<64> suffix_target2(std::string("001110100001001010100101101100011010111001111100001000110010"));
std::cout << std::bitset< 64 >( suffix ).to_string() << "\n";
std::bitset<60> suffix_target2(std::string("001110100001001010100101101100011010111001111100001000110010"));
//std::cout << std::bitset< 60 >( suffix.suffix ).to_string() << "\n";
BOOST_CHECK_EQUAL(prefix, prefix_target2.to_ulong());
BOOST_CHECK_EQUAL(suffix, suffix_target2.to_ulong());
BOOST_CHECK_EQUAL(suffix.suffix, suffix_target2.to_ulong());
success = getNextPrefixSuffix(seq, 2, prefix, suffix);
BOOST_CHECK_EQUAL(success, true);
......@@ -78,4 +78,4 @@ BOOST_AUTO_TEST_SUITE_END()
#endif /* DOMAINTEST_HPP_ */
#endif /* ENCODE_TEST_HPP_ */
......@@ -6,3 +6,4 @@
#include "encode_Test.hpp"
#include "makeDB_Test.hpp"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment