Commit a7264b31 authored by Dr. Carsten Kemena's avatar Dr. Carsten Kemena
Browse files

added real clan/type/name information

parent cf6fdf16
Pipeline #43892 passed with stage
in 1 minute and 7 seconds
......@@ -90,6 +90,7 @@ target_link_libraries(${radiantDB_exe}
SET(radiant_src ./src/radiant.cpp ${BSDL_src} ${BSDL_PATH}/external_interfaces/domainProgs.cpp ${BSDL_PATH}utility/stringHelpers.cpp
${BSDL_PATH}domain/Domain.cpp ${BSDL_PATH}domain/DomainExt.cpp ${BSDL_PATH}domain/PfamDomain.cpp ${BSDL_PATH}domain/DomainArrangement.cpp
${BSDL_PATH}utility/utility.cpp
)
SET(radiant_exe radiant)
ADD_EXECUTABLE(${radiant_exe} ${radiant_src})
......
BioSeqDataLib @ e1c709fd
Subproject commit f3129db5400ab7ca3e70e2c4bb8f4c9b0b8b89ea
Subproject commit e1c709fd376ce96f32fd1dc806c363a5ddb3adae
......@@ -23,7 +23,7 @@ write2file(const std::unordered_map<PrefixType, std::map<CodedSuffix, unsigned s
fout.write((char*)&val, sizeof(size_t));
for (auto it2=it->second.begin(); it2!=it->second.end(); ++it2)
{
fout.write((char*)&(it2->first), sizeof(SuffixType));
fout.write((char*)&(it2->first), sizeof(CodedSuffix));
fout.write((char*)&(it2->second), sizeof(unsigned short));
}
}
......
......@@ -157,7 +157,7 @@ void splitSequence(S &seq, D &db, unsigned int windowSize, bool reverse)
if (reverse)
suffix.position = (k < seq.size()/2) ? 1 : 0;
else
suffix.position = (k < seq.size()/2) ? 0 : 1;
suffix.position = (k <= seq.size()/2) ? 0 : 1;
auto it = db.find(prefix);
if (it != db.end())
{
......@@ -171,6 +171,7 @@ void splitSequence(S &seq, D &db, unsigned int windowSize, bool reverse)
}
}
/**
* \brief Turns the Pfam-A.fasta file into a database
* @param infile The fasta file
......
......@@ -18,6 +18,7 @@
#include "../libs/BioSeqDataLib/src/sequence/SeqFunctions.hpp"
#include "../libs/BioSeqDataLib/src/sequence/SequenceSet.hpp"
#include "../libs/BioSeqDataLib/src/DomainModule.hpp"
#include "../libs/BioSeqDataLib/src/utility/utility.hpp"
#include "common.hpp"
......@@ -272,11 +273,23 @@ main(int argc, char const *argv[])
}
std::map<std::string, BioSeqDataLib::DomainInfo> infoSet;
fs::path home = BioSeqDataLib::getEnv("DOMAINWORLD_DATA");
if (home.empty())
{
home = BioSeqDataLib::getEnv("HOME");
home /= ".domainWorld";
}
home /= "external/pfam_info_31.txt";
readDomainInfo(home, infoSet);
AlgorithmPack::Output out(outFile);
out << "# pfam_scan.pl, run at Fri Apr 7 13:47:34 2017\n";
for (size_t i = 0; i< seqSet.size(); ++i)
{
auto &assignment = assignments[i];
//cout << seqSet[i].name() << endl;
// if (seqSet[i].name() == "ENSP00000362111.4")
/*{
......@@ -288,8 +301,11 @@ main(int argc, char const *argv[])
if (da.size() == 0)
da = words2arrangement(assignment, 5, 7);
for (auto domain : da)
out << seqSet[i].name() << " " << domain.start() << " " << domain.end() << " " << domain.start() << " " << domain.end() << " " << domain.accession() << " XXXXX Family 2 230 231 180.3 4.4e-53 1 CL0347\n";
{
auto domInfo = infoSet.find(domain.accession());
out << seqSet[i].name() << " " << domain.start() << " " << domain.end() << " " << domain.start() << " " << domain.end() \
<< " " << domain.accession() << " " << domInfo->second.name << " " << domInfo->second.type << " 2 230 231 180.3 4.4e-53 1 " << domInfo->second.clan << "\n";
}
}
return 0;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment