Skip to content
Snippets Groups Projects
Commit ea2640bd authored by Dr. Elias Dohmen's avatar Dr. Elias Dohmen
Browse files

Merge branch 'ed_develop' into 'master'

DOGMA v3.8

See merge request !4
parents b66fa5a6 c715ce04
No related branches found
Tags v3.8
1 merge request!4DOGMA v3.8
DOGMA 3.8
=========
- update to Pfam Database version 36 (default for DOGMA from this version on)
DOGMA 3.7
=========
- update to Pfam Database version 35 (default for DOGMA from this version on)
- runs with new RADIANT version 1.0
- updated error handling for initial RADIANT annotation of fasta files if RADIANT is not in the PATH variable
......
......@@ -16,7 +16,7 @@ Current dependencies are (but you will need just one of them):
- RADIANT (https://domainworld.uni-muenster.de/programs/radiant/)
OR
- pfam_scan.pl (ftp://ftp.ebi.ac.uk/pub/databases/Pfam/Tools/)
- pfam_scan.pl (http://ftp.ebi.ac.uk/pub/databases/Pfam/Tools/)
Furthermore you will need a core set to run DOGMA.
......
UserManual.pdf 100644 → 100755
No preview for this file type
#!/usr/bin/env python
# DOGMA version 3.7
# DOGMA version 3.8
# DOGMA is a python script that can assess proteome or transcriptome quality based on conserved protein domains.
# To score the domain completeness of a proteome or transcriptome, DOGMA searches its domain annotation for conserved
# domains. By default, the conserved domains comprise domains that are shared by six eukaryotic model species.
# Copyright (C) 2015-2023 Elias Dohmen
# <e.dohmen@wwu.de> based on code by Lukas Kremer.
# <e.dohmen@uni-muenster.de> based on code by Lukas Kremer.
# DOGMA is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
......@@ -48,7 +48,7 @@ conversion_dictionary = None
def main():
dogma_version = '3.7'
dogma_version = '3.8'
try:
# top level argument parsing
......@@ -94,8 +94,8 @@ def main():
parser_proteome.add_argument("-o", "--outfile", action="store", type=str, default=None,
help="Summary will be saved in a file with the given name (and path), "
"instead of printed in the console.")
parser_proteome.add_argument("-m", "--pfam", action="store", type=str, default="35",
help="The version number of the pfam database that should be used (Default is 35).")
parser_proteome.add_argument("-m", "--pfam", action="store", type=str, default="36",
help="The version number of the pfam database that should be used (Default is 36).")
parser_proteome.add_argument("-d", "--database", action="store", type=str, default=None,
help="If the RADIANT database is not located in the RADIANT directory, please specify"
" path and name of the database. (Just necessary for -i option)")
......@@ -126,9 +126,9 @@ def main():
parser_transcriptome.add_argument("-s", "--cda_size", action="store", default=3, type=int,
help="Specifies up to which size subsets of CDAs should be considered "
"(default=3; A-B-C-D --> A-B-C, A-B-D, B-C-D etc.).")
parser_transcriptome.add_argument("-m", "--pfam", action="store", type=str, default="35",
parser_transcriptome.add_argument("-m", "--pfam", action="store", type=str, default="36",
help="The version number of the pfam database that should be used "
"(Default is 35).")
"(Default is 36).")
parser_transcriptome.add_argument("-d", "--database", action="store", default=None,
help="If the RADIANT database is not located in the RADIANT directory, please specify"
" path and name of the database. (Just necessary for -i option)")
......@@ -267,7 +267,7 @@ class ConversionDictionary(dict):
def score_single_transcriptome(annotation_file, outfile=None, max_dom_tup_len=3,
hq_transcriptomes=None, mode='transcriptome', pfam='35', initial=None, version='3.7',
hq_transcriptomes=None, mode='transcriptome', pfam='36', initial=None, version='3.8',
annotype='pfsc', coverage=0.5):
"""
combines the functions and classes to score a sample proteome in terms of it's domain completeness.
......@@ -345,7 +345,7 @@ def score_single_transcriptome(annotation_file, outfile=None, max_dom_tup_len=3,
def score_single_proteome(annotation_file, outfile=None, cutoff=2,
max_dom_tup_len=3, hq_proteomes=None, mode='proteome', pfam='35', initial=None, version='3.7',
max_dom_tup_len=3, hq_proteomes=None, mode='proteome', pfam='36', initial=None, version='3.8',
annotype='pfsc', coverage=0.5):
"""
combines the functions and classes to score a sample proteome in terms of it's domain completeness.
......@@ -776,7 +776,7 @@ class QualityChecker:
if count_in_candidate < count_in_HQ:
self.doms_missing_number += count_in_HQ - count_in_candidate
self.out.append((count_in_candidate, self.cda_count_dict[dom], dom))
self.out.sort(key=lambda l: l[2])
self.out.sort(key=lambda ele: ele[2])
if self.number_of_CDA == 0:
self.percentage = "NA"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment