Merge branch 'ed_develop' into 'master'

DOGMA v3.8 See merge request !4

Merge branch 'ed_develop' into 'master'
DOGMA v3.8 See merge request !4
ea2640bd · Dr. Elias Dohmen · b66fa5a6 · c715ce04 · ea2640bd · ea2640bd
Commit ea2640bd authored 1 year ago by Dr. Elias Dohmen
--- a/CHANGELOG
+++ b/CHANGELOG
+DOGMA 3.8
+=========
+
+- update to Pfam Database version 36 (default for DOGMA from this version on)
+
 DOGMA 3.7
 =========
+
 - update to Pfam Database version 35 (default for DOGMA from this version on)
 - runs with new RADIANT version 1.0 
 - updated error handling for initial RADIANT annotation of fasta files if RADIANT is not in the PATH variable

--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ Current dependencies are (but you will need just one of them):

 - RADIANT (https://domainworld.uni-muenster.de/programs/radiant/)
 OR
- pfam_scan.pl (ftp://ftp.ebi.ac.uk/pub/databases/Pfam/Tools/)
+- pfam_scan.pl (http://ftp.ebi.ac.uk/pub/databases/Pfam/Tools/)


 Furthermore you will need a core set to run DOGMA. 

--- a/UserManual.pdf
+++ b/UserManual.pdf
--- a/dogma.py
+++ b/dogma.py
 #!/usr/bin/env python

-# DOGMA version 3.7
+# DOGMA version 3.8

 # DOGMA is a python script that can assess proteome or transcriptome quality based on conserved protein domains.
 # To score the domain completeness of a proteome or transcriptome, DOGMA searches its domain annotation for conserved
 # domains. By default, the conserved domains comprise domains that are shared by six eukaryotic model species.

 # Copyright (C) 2015-2023 Elias Dohmen
-# <e.dohmen@wwu.de> based on code by Lukas Kremer.
+# <e.dohmen@uni-muenster.de> based on code by Lukas Kremer.

 # DOGMA is free software: you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -48,7 +48,7 @@ conversion_dictionary = None


 def main():
-    dogma_version = '3.7'
+    dogma_version = '3.8'

    try:
        # top level argument parsing
@@ -94,8 +94,8 @@ def main():
        parser_proteome.add_argument("-o", "--outfile", action="store", type=str, default=None,
                                     help="Summary will be saved in a file with the given name (and path), "
                                          "instead of printed in the console.")
-        parser_proteome.add_argument("-m", "--pfam", action="store", type=str, default="35",
-                                     help="The version number of the pfam database that should be used (Default is 35).")
+        parser_proteome.add_argument("-m", "--pfam", action="store", type=str, default="36",
+                                     help="The version number of the pfam database that should be used (Default is 36).")
        parser_proteome.add_argument("-d", "--database", action="store", type=str, default=None,
                                     help="If the RADIANT database is not located in the RADIANT directory, please specify"
                                          " path and name of the database. (Just necessary for -i option)")
@@ -126,9 +126,9 @@ def main():
        parser_transcriptome.add_argument("-s", "--cda_size", action="store", default=3, type=int,
                                          help="Specifies up to which size subsets of CDAs should be considered "
                                               "(default=3; A-B-C-D --> A-B-C, A-B-D, B-C-D etc.).")
-        parser_transcriptome.add_argument("-m", "--pfam", action="store", type=str, default="35",
+        parser_transcriptome.add_argument("-m", "--pfam", action="store", type=str, default="36",
                                          help="The version number of the pfam database that should be used "
-                                               "(Default is 35).")
+                                               "(Default is 36).")
        parser_transcriptome.add_argument("-d", "--database", action="store", default=None,
                                          help="If the RADIANT database is not located in the RADIANT directory, please specify"
                                               " path and name of the database. (Just necessary for -i option)")
@@ -267,7 +267,7 @@ class ConversionDictionary(dict):


 def score_single_transcriptome(annotation_file, outfile=None, max_dom_tup_len=3,
-                               hq_transcriptomes=None, mode='transcriptome', pfam='35', initial=None, version='3.7',
+                               hq_transcriptomes=None, mode='transcriptome', pfam='36', initial=None, version='3.8',
                               annotype='pfsc', coverage=0.5):
    """
    combines the functions and classes to score a sample proteome in terms of it's domain completeness.
@@ -345,7 +345,7 @@ def score_single_transcriptome(annotation_file, outfile=None, max_dom_tup_len=3,


 def score_single_proteome(annotation_file, outfile=None, cutoff=2,
-                          max_dom_tup_len=3, hq_proteomes=None, mode='proteome', pfam='35', initial=None, version='3.7',
+                          max_dom_tup_len=3, hq_proteomes=None, mode='proteome', pfam='36', initial=None, version='3.8',
                          annotype='pfsc', coverage=0.5):
    """
    combines the functions and classes to score a sample proteome in terms of it's domain completeness.
@@ -776,7 +776,7 @@ class QualityChecker:
            if count_in_candidate < count_in_HQ:
                self.doms_missing_number += count_in_HQ - count_in_candidate
                self.out.append((count_in_candidate, self.cda_count_dict[dom], dom))
-            self.out.sort(key=lambda l: l[2])
+            self.out.sort(key=lambda ele: ele[2])

        if self.number_of_CDA == 0:
            self.percentage = "NA"