Merge branch 'ed_develop' into 'master'

DOGMA v3.7 See merge request !3

Merge branch 'ed_develop' into 'master'
DOGMA v3.7 See merge request !3
b66fa5a6 · Carsten Kemena-Rinke · 82c91d84 · 2211e50b · b66fa5a6 · b66fa5a6
Commit b66fa5a6 authored 2 years ago by Carsten Kemena-Rinke
--- a/CHANGELOG
+++ b/CHANGELOG
+DOGMA 3.7
+=========
+- update to Pfam Database version 35 (default for DOGMA from this version on)
+- runs with new RADIANT version 1.0 
+- updated error handling for initial RADIANT annotation of fasta files if RADIANT is not in the PATH variable
+
 DOGMA 3.6
 =========


--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ Requirements
 ------------

 We try to keep the dependencies as little as possible. 
-Current dependencies are (but you will just need one of them):
+Current dependencies are (but you will need just one of them):

 - RADIANT (https://domainworld.uni-muenster.de/programs/radiant/)
 OR

--- a/UserManual.pdf
+++ b/UserManual.pdf
--- a/dogma.py
+++ b/dogma.py
 #!/usr/bin/env python

-# DOGMA version 3.6
+# DOGMA version 3.7

 # DOGMA is a python script that can assess proteome or transcriptome quality based on conserved protein domains.
 # To score the domain completeness of a proteome or transcriptome, DOGMA searches its domain annotation for conserved
 # domains. By default, the conserved domains comprise domains that are shared by six eukaryotic model species.

-# Copyright (C) 2015-2021 Elias Dohmen
+# Copyright (C) 2015-2023 Elias Dohmen
 # <e.dohmen@wwu.de> based on code by Lukas Kremer.

 # DOGMA is free software: you can redistribute it and/or modify it
@@ -48,7 +48,7 @@ conversion_dictionary = None


 def main():
-    dogma_version = '3.6'
+    dogma_version = '3.7'

    try:
        # top level argument parsing
@@ -94,15 +94,14 @@ def main():
        parser_proteome.add_argument("-o", "--outfile", action="store", type=str, default=None,
                                     help="Summary will be saved in a file with the given name (and path), "
                                          "instead of printed in the console.")
-        parser_proteome.add_argument("-m", "--pfam", action="store", type=str, default="34",
-                                     help="The version number of the pfam database that should be used (Default is 34).")
+        parser_proteome.add_argument("-m", "--pfam", action="store", type=str, default="35",
+                                     help="The version number of the pfam database that should be used (Default is 35).")
        parser_proteome.add_argument("-d", "--database", action="store", type=str, default=None,
                                     help="If the RADIANT database is not located in the RADIANT directory, please specify"
                                          " path and name of the database. (Just necessary for -i option)")
        parser_proteome.add_argument("-cov", "--coverage", action="store", default=0.5, type=float,
                                     help="Specifies how much of a domain has to be annotated to count as a partial domain. Default=0.5 "
-                                          "This would mean if less than 50%% of the domain is annotated, it is considered a partial domain. "
-                                          "The partial domain analysis is just available with PfamScan annotations.")
+                                          "This would mean if less than 50%% of the domain is annotated, it is considered a partial domain.")

        # transcriptome mode argument parsing
        parser_transcriptome = subparsers.add_parser("transcriptome", help="Analyse transcriptome data.")
@@ -127,16 +126,15 @@ def main():
        parser_transcriptome.add_argument("-s", "--cda_size", action="store", default=3, type=int,
                                          help="Specifies up to which size subsets of CDAs should be considered "
                                               "(default=3; A-B-C-D --> A-B-C, A-B-D, B-C-D etc.).")
-        parser_transcriptome.add_argument("-m", "--pfam", action="store", type=str, default="34",
+        parser_transcriptome.add_argument("-m", "--pfam", action="store", type=str, default="35",
                                          help="The version number of the pfam database that should be used "
-                                               "(Default is 34).")
+                                               "(Default is 35).")
        parser_transcriptome.add_argument("-d", "--database", action="store", default=None,
                                          help="If the RADIANT database is not located in the RADIANT directory, please specify"
                                               " path and name of the database. (Just necessary for -i option)")
        parser_transcriptome.add_argument("-cov", "--coverage", action="store", default=0.5, type=float,
                                     help="Specifies how much of a domain has to be annotated to count as a partial domain. Default=0.5 "
-                                          "This would mean if less than 50%% of the domain is annotated, it is considered a partial domain. "
-                                          "The partial domain analysis is just available with PfamScan annotations.")
+                                          "This would mean if less than 50%% of the domain is annotated, it is considered a partial domain.")

        args = parser.parse_args()

@@ -159,8 +157,8 @@ def main():
                    call(['radiant', '-i', args.initial_radiant_run, '-d', database, '-o',
                          args.initial_radiant_run + '.rad'])
                    args.annotation_file = args.initial_radiant_run + '.rad'
-                except AttributeError:
-                    raise AttributeError("No valid installation of RADIANT found.")
+                except TypeError:
+                    raise RuntimeError("No valid installation of RADIANT found.")

            if args.annotation_file is None:
                parser.error('No input file specified. Please use -a or -i option to specify the proteome input file.')
@@ -269,7 +267,7 @@ class ConversionDictionary(dict):


 def score_single_transcriptome(annotation_file, outfile=None, max_dom_tup_len=3,
-                               hq_transcriptomes=None, mode='transcriptome', pfam='34', initial=None, version='3.6',
+                               hq_transcriptomes=None, mode='transcriptome', pfam='35', initial=None, version='3.7',
                               annotype='pfsc', coverage=0.5):
    """
    combines the functions and classes to score a sample proteome in terms of it's domain completeness.
@@ -347,7 +345,7 @@ def score_single_transcriptome(annotation_file, outfile=None, max_dom_tup_len=3,


 def score_single_proteome(annotation_file, outfile=None, cutoff=2,
-                          max_dom_tup_len=3, hq_proteomes=None, mode='proteome', pfam='34', initial=None, version='3.6',
+                          max_dom_tup_len=3, hq_proteomes=None, mode='proteome', pfam='35', initial=None, version='3.7',
                          annotype='pfsc', coverage=0.5):
    """
    combines the functions and classes to score a sample proteome in terms of it's domain completeness.
@@ -477,6 +475,7 @@ class DomainCounter(dict):
                                domain_start_pos = int(line[1])
                                domain_name = re.match('([^.]+)', line[3]).group(1)
                                arrangement_dict[seq_id].append((domain_start_pos, domain_name))
+
                    else:
                        raise NotImplementedError(
                            "File format couldn't be detected. Please make sure you use RADIANT or "
@@ -510,6 +509,7 @@ class DomainCounter(dict):
                                domain_start_pos = int(line[1])
                                domain_name = re.match('([^.]+)', line[3]).group(1)
                                arrangement_dict[seq_id].append((domain_start_pos, domain_name))
+
                    else:
                        raise NotImplementedError(
                            "File format couldn't be detected. Please make sure you use RADIANT or "