Commit 34205e6d authored by Carsten Kemena's avatar Carsten Kemena
Browse files

code cleaning

parent 6b23b70c
Pipeline #43873 passed with stage
in 1 minute and 13 seconds
BioSeqDataLib @ a87a6592
Subproject commit f3129db5400ab7ca3e70e2c4bb8f4c9b0b8b89ea
Subproject commit a87a65922f43e84e9cea2d39616ff030aa1dd7bc
......@@ -126,7 +126,50 @@ cleanSuffixe(D &db)
db.clear();
}
template<typename S, typename D>
void splitSequence(S &seq, D &db, unsigned int windowSize, bool reverse)
{
typename D::key_type prefix;
typename D::mapped_type::key_type suffix;
if (reverse)
std::reverse(seq.begin(), seq.end());
bool last = false;
size_t k = 0;
size_t limit = (seq.size() >= windowSize) ? seq.size() - windowSize + 1: 0 ;
while (k < limit)
{
if (last)
{
last = getNextPrefixSuffix(seq, k, prefix, suffix);
if (last)
++k;
else
k += windowSize;
}
else
{
last = getCompletePrefixSuffix(seq, k, prefix, suffix);
++k;
}
if (last)
{
// add to list if possible
if (reverse)
suffix.position = (k < seq.size()/2) ? 1 : 0;
else
suffix.position = (k < seq.size()/2) ? 0 : 1;
auto it = db.find(prefix);
if (it != db.end())
{
auto it2 = it->second.find(suffix);
if (it2 == it->second.end())
it->second.emplace(suffix, stoi(seq.name()));
}
else
db[prefix].emplace(suffix, stoi(seq.name()));
}
}
}
/**
* \brief Turns the Pfam-A.fasta file into a database
......@@ -176,8 +219,6 @@ turnFile2db(fs::path &inFile, D &database, bool reverse)
#pragma omp single
threadDBs.resize(omp_get_num_threads());
std::string pre, suf;
typename D::key_type prefix;
typename D::mapped_type::key_type suffix;
D dbTmp;
D &threadDB = threadDBs[omp_get_thread_num()];
......@@ -186,49 +227,10 @@ turnFile2db(fs::path &inFile, D &database, bool reverse)
{
for (size_t j=families[i].first; j<families[i].second; ++j)
{
// split sequence into suffixes and store in database
// split sequence into words and store in database
auto &seq = seqSet[j];
if (seq.size() >= windowSize)
{
if (reverse)
std::reverse(seq.begin(), seq.end());
bool last = false;
size_t k = 0;
size_t limit = (seq.size() >= windowSize) ? seq.size() - windowSize + 1: 0 ;
while (k < limit)
{
if (last)
{
last = getNextPrefixSuffix(seq, k, prefix, suffix);
if (last)
++k;
else
k += windowSize;
}
else
{
last = getCompletePrefixSuffix(seq, k, prefix, suffix);
++k;
}
if (last)
{
// add to list if possible
if (reverse)
suffix.position = (k < seq.size()/2) ? 1 : 0;
else
suffix.position = (k < seq.size()/2) ? 0 : 1;
auto it = dbTmp.find(prefix);
if (it != dbTmp.end())
{
auto it2 = it->second.find(suffix);
if (it2 == it->second.end())
it->second.emplace(suffix, stoi(seq.name()));
}
else
dbTmp[prefix].emplace(suffix, stoi(seq.name()));
}
}
}
splitSequence(seq, dbTmp, windowSize, reverse);
}
merge2dbs(dbTmp, threadDB);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment