diff --git a/CHANGELOG b/CHANGELOG index 4fb7395b09ecef845a62a860f0856803aac52020..36794ba6de53340b46cf27078b63befa463e221f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,9 @@ +DOGMA 3.7 +========= +- update to Pfam Database version 35 (default for DOGMA from this version on) +- runs with new RADIANT version 1.0 +- updated error handling for initial RADIANT annotation of fasta files if RADIANT is not in the PATH variable + DOGMA 3.6 ========= diff --git a/README.md b/README.md index 71b47bce9fcd62c5c6015ca01311443fba12e707..4a861b650024133b7a0b2dc89f9d4e271f5afd9f 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ Requirements ------------ We try to keep the dependencies as little as possible. -Current dependencies are (but you will just need one of them): +Current dependencies are (but you will need just one of them): - RADIANT (https://domainworld.uni-muenster.de/programs/radiant/) OR diff --git a/UserManual.pdf b/UserManual.pdf index f2bd809240f99d7dee6c9baa1acbf8540bbaed24..34b82ae26159fcf201d9ce9989769aefb153bf23 100644 Binary files a/UserManual.pdf and b/UserManual.pdf differ diff --git a/dogma.py b/dogma.py index fa01976b61feacea7f190c92f6a6bc4d3f4a7693..d34fcd41aff9bd58b08e822d3598f08c287d802f 100755 --- a/dogma.py +++ b/dogma.py @@ -1,12 +1,12 @@ #!/usr/bin/env python -# DOGMA version 3.6 +# DOGMA version 3.7 # DOGMA is a python script that can assess proteome or transcriptome quality based on conserved protein domains. # To score the domain completeness of a proteome or transcriptome, DOGMA searches its domain annotation for conserved # domains. By default, the conserved domains comprise domains that are shared by six eukaryotic model species. -# Copyright (C) 2015-2021 Elias Dohmen +# Copyright (C) 2015-2023 Elias Dohmen # <e.dohmen@wwu.de> based on code by Lukas Kremer. # DOGMA is free software: you can redistribute it and/or modify it @@ -48,7 +48,7 @@ conversion_dictionary = None def main(): - dogma_version = '3.6' + dogma_version = '3.7' try: # top level argument parsing @@ -94,15 +94,14 @@ def main(): parser_proteome.add_argument("-o", "--outfile", action="store", type=str, default=None, help="Summary will be saved in a file with the given name (and path), " "instead of printed in the console.") - parser_proteome.add_argument("-m", "--pfam", action="store", type=str, default="34", - help="The version number of the pfam database that should be used (Default is 34).") + parser_proteome.add_argument("-m", "--pfam", action="store", type=str, default="35", + help="The version number of the pfam database that should be used (Default is 35).") parser_proteome.add_argument("-d", "--database", action="store", type=str, default=None, help="If the RADIANT database is not located in the RADIANT directory, please specify" " path and name of the database. (Just necessary for -i option)") parser_proteome.add_argument("-cov", "--coverage", action="store", default=0.5, type=float, help="Specifies how much of a domain has to be annotated to count as a partial domain. Default=0.5 " - "This would mean if less than 50%% of the domain is annotated, it is considered a partial domain. " - "The partial domain analysis is just available with PfamScan annotations.") + "This would mean if less than 50%% of the domain is annotated, it is considered a partial domain.") # transcriptome mode argument parsing parser_transcriptome = subparsers.add_parser("transcriptome", help="Analyse transcriptome data.") @@ -127,16 +126,15 @@ def main(): parser_transcriptome.add_argument("-s", "--cda_size", action="store", default=3, type=int, help="Specifies up to which size subsets of CDAs should be considered " "(default=3; A-B-C-D --> A-B-C, A-B-D, B-C-D etc.).") - parser_transcriptome.add_argument("-m", "--pfam", action="store", type=str, default="34", + parser_transcriptome.add_argument("-m", "--pfam", action="store", type=str, default="35", help="The version number of the pfam database that should be used " - "(Default is 34).") + "(Default is 35).") parser_transcriptome.add_argument("-d", "--database", action="store", default=None, help="If the RADIANT database is not located in the RADIANT directory, please specify" " path and name of the database. (Just necessary for -i option)") parser_transcriptome.add_argument("-cov", "--coverage", action="store", default=0.5, type=float, help="Specifies how much of a domain has to be annotated to count as a partial domain. Default=0.5 " - "This would mean if less than 50%% of the domain is annotated, it is considered a partial domain. " - "The partial domain analysis is just available with PfamScan annotations.") + "This would mean if less than 50%% of the domain is annotated, it is considered a partial domain.") args = parser.parse_args() @@ -159,8 +157,8 @@ def main(): call(['radiant', '-i', args.initial_radiant_run, '-d', database, '-o', args.initial_radiant_run + '.rad']) args.annotation_file = args.initial_radiant_run + '.rad' - except AttributeError: - raise AttributeError("No valid installation of RADIANT found.") + except TypeError: + raise RuntimeError("No valid installation of RADIANT found.") if args.annotation_file is None: parser.error('No input file specified. Please use -a or -i option to specify the proteome input file.') @@ -269,7 +267,7 @@ class ConversionDictionary(dict): def score_single_transcriptome(annotation_file, outfile=None, max_dom_tup_len=3, - hq_transcriptomes=None, mode='transcriptome', pfam='34', initial=None, version='3.6', + hq_transcriptomes=None, mode='transcriptome', pfam='35', initial=None, version='3.7', annotype='pfsc', coverage=0.5): """ combines the functions and classes to score a sample proteome in terms of it's domain completeness. @@ -347,7 +345,7 @@ def score_single_transcriptome(annotation_file, outfile=None, max_dom_tup_len=3, def score_single_proteome(annotation_file, outfile=None, cutoff=2, - max_dom_tup_len=3, hq_proteomes=None, mode='proteome', pfam='34', initial=None, version='3.6', + max_dom_tup_len=3, hq_proteomes=None, mode='proteome', pfam='35', initial=None, version='3.7', annotype='pfsc', coverage=0.5): """ combines the functions and classes to score a sample proteome in terms of it's domain completeness. @@ -477,6 +475,7 @@ class DomainCounter(dict): domain_start_pos = int(line[1]) domain_name = re.match('([^.]+)', line[3]).group(1) arrangement_dict[seq_id].append((domain_start_pos, domain_name)) + else: raise NotImplementedError( "File format couldn't be detected. Please make sure you use RADIANT or " @@ -510,6 +509,7 @@ class DomainCounter(dict): domain_start_pos = int(line[1]) domain_name = re.match('([^.]+)', line[3]).group(1) arrangement_dict[seq_id].append((domain_start_pos, domain_name)) + else: raise NotImplementedError( "File format couldn't be detected. Please make sure you use RADIANT or "