RadiantDB.cpp 3.63 KB
Newer Older
Dr. Carsten Kemena's avatar
Dr. Carsten Kemena committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
#include "RadiantDB.hpp"
#include <bitset> 

using namespace std;
namespace fs = boost::filesystem;
namespace BSDL = BioSeqDataLib;

void
RadiantDB::read(const fs::path &path)
{
	database_.clear();
	ifstream fin(path.string(), ios::in | ios::binary);
	if (!fin.is_open())
		throw std::runtime_error("Error! Could not open database file: " + path.string() + "!");

	size_t dbLength = 0;
	fin.read((char*)&dbLength, sizeof(size_t));
	database_.reserve(dbLength);
	for (size_t i=0; i<dbLength; ++i)
	{
		// read prefix
		PrefixType prefix;
		fin.read((char*)&prefix, sizeof(PrefixType));
		auto it = database_.emplace(std::move(prefix), vector<SuffixAcc>()).first;

		// read suffixe and accession from file
		size_t numSuffixes;
		fin.read((char*)&numSuffixes, sizeof(size_t));
		it->second.resize(numSuffixes);
		fin.read((char*)&(it->second[0]), numSuffixes*(sizeof(SuffixAcc)));
	}
	fin.close();
}

/*
void
RadiantDB::build(const fs::path &inPath, bool forward, const fs::path &outPath)
{

	std::ofstream fout(outPath, std::ios::out | std::ios::binary);
	size_t val= database.size();
	fout.write((char*)&val, sizeof(size_t));
	for (auto it=database.begin(); it!=database.end(); ++it)
	{
		size_t val= it->second.size();
		fout.write((char*)&it->first, sizeof(PrefixType));
		fout.write((char*)&val, sizeof(size_t));
		for (auto it2=it->second.begin(); it2!=it->second.end(); ++it2)
		{
			fout.write((char*)&(it2->first), sizeof(CodedSuffix));
			fout.write((char*)&(it2->second), sizeof(unsigned short));
		}
	}
 	fout.close();
}*/

short
RadiantDB::char_diff_(SuffixType val1, SuffixType val2)
{
	SuffixType val = val1^val2;
	short counter = 0;
	for (short i=0; i< 10; ++i)
	{
		for (short j=0; j<3; ++j)
		{
			short n = i*3+j;
			if (val & (1<<n))
			{
				++counter;
				break;
			}
		}
	}
	return counter;
}

short
RadiantDB::getDomID(const PrefixType &prefix, const CodedSuffix &suffix, bool &position) const
{
	auto it = database_.find(prefix);
	// read new suffixes from file if necessary

	if (it != database_.end())
	{
		auto it2 = lower_bound(it->second.begin(), it->second.end(), suffix);
		if (it2 != it->second.end())
		{
			if (it2->suffix.suffix == suffix.suffix)
			{
				position = it2->suffix.position;
				return it2->accession;
			}
			else
			{
				auto tmp = it2->accession;
				auto x = it2->suffix.suffix;
				if (it2 != it->second.begin())
				{
					--it2;
					if (it2->accession == tmp)
					{
						position = it2->suffix.position;
						return tmp;
					}
					else
					{
						short threshold = 2;
						auto diff1 = char_diff_(x, suffix.suffix);
						auto diff2 = char_diff_(it2->suffix.suffix, suffix.suffix);
						if ((diff1 < diff2) && (diff1 <= threshold))
							return tmp;
						else
							if (diff2 <= threshold)
								return it2->accession;
					}
				}
			}
		}

	}
	return 0;
}
/*
void
RadiantDB::write_(const fs::path &path, const std::unordered_map<PrefixType, std::map<CodedSuffix, unsigned short > > &database) const
{
	std::ofstream fout(path.string(), std::ios::out | std::ios::binary);
	std::ofstream fout_index(path.string()+".index", std::ios::out | std::ios::binary);
	size_t val= database.size();
	fout.write((char*)&val, sizeof(size_t));
	for (auto it=database.begin(); it!=database.end(); ++it)
	{
		size_t val= it->second.size();
		fout_index.write((char*)&it->first, sizeof(PrefixType));
		auto x = fout.tellp();
		fout_index.write((char*)&x, sizeof(std::streampos));
		fout.write((char*)&val, sizeof(size_t));

		for (auto it2=it->second.begin(); it2!=it->second.end(); ++it2)
		{
			fout.write((char*)&(it2->first), sizeof(CodedSuffix));
			fout.write((char*)&(it2->second), sizeof(unsigned short));
		}
	}
 	fout.close();
	fout_index.close();
}*/