Skip to content
Snippets Groups Projects
Commit b66fa5a6 authored by Carsten Kemena-Rinke's avatar Carsten Kemena-Rinke
Browse files

Merge branch 'ed_develop' into 'master'

DOGMA v3.7

See merge request !3
parents 82c91d84 2211e50b
No related branches found
Tags v3.7
1 merge request!3DOGMA v3.7
DOGMA 3.7
=========
- update to Pfam Database version 35 (default for DOGMA from this version on)
- runs with new RADIANT version 1.0
- updated error handling for initial RADIANT annotation of fasta files if RADIANT is not in the PATH variable
DOGMA 3.6
=========
......
......@@ -12,7 +12,7 @@ Requirements
------------
We try to keep the dependencies as little as possible.
Current dependencies are (but you will just need one of them):
Current dependencies are (but you will need just one of them):
- RADIANT (https://domainworld.uni-muenster.de/programs/radiant/)
OR
......
No preview for this file type
#!/usr/bin/env python
# DOGMA version 3.6
# DOGMA version 3.7
# DOGMA is a python script that can assess proteome or transcriptome quality based on conserved protein domains.
# To score the domain completeness of a proteome or transcriptome, DOGMA searches its domain annotation for conserved
# domains. By default, the conserved domains comprise domains that are shared by six eukaryotic model species.
# Copyright (C) 2015-2021 Elias Dohmen
# Copyright (C) 2015-2023 Elias Dohmen
# <e.dohmen@wwu.de> based on code by Lukas Kremer.
# DOGMA is free software: you can redistribute it and/or modify it
......@@ -48,7 +48,7 @@ conversion_dictionary = None
def main():
dogma_version = '3.6'
dogma_version = '3.7'
try:
# top level argument parsing
......@@ -94,15 +94,14 @@ def main():
parser_proteome.add_argument("-o", "--outfile", action="store", type=str, default=None,
help="Summary will be saved in a file with the given name (and path), "
"instead of printed in the console.")
parser_proteome.add_argument("-m", "--pfam", action="store", type=str, default="34",
help="The version number of the pfam database that should be used (Default is 34).")
parser_proteome.add_argument("-m", "--pfam", action="store", type=str, default="35",
help="The version number of the pfam database that should be used (Default is 35).")
parser_proteome.add_argument("-d", "--database", action="store", type=str, default=None,
help="If the RADIANT database is not located in the RADIANT directory, please specify"
" path and name of the database. (Just necessary for -i option)")
parser_proteome.add_argument("-cov", "--coverage", action="store", default=0.5, type=float,
help="Specifies how much of a domain has to be annotated to count as a partial domain. Default=0.5 "
"This would mean if less than 50%% of the domain is annotated, it is considered a partial domain. "
"The partial domain analysis is just available with PfamScan annotations.")
"This would mean if less than 50%% of the domain is annotated, it is considered a partial domain.")
# transcriptome mode argument parsing
parser_transcriptome = subparsers.add_parser("transcriptome", help="Analyse transcriptome data.")
......@@ -127,16 +126,15 @@ def main():
parser_transcriptome.add_argument("-s", "--cda_size", action="store", default=3, type=int,
help="Specifies up to which size subsets of CDAs should be considered "
"(default=3; A-B-C-D --> A-B-C, A-B-D, B-C-D etc.).")
parser_transcriptome.add_argument("-m", "--pfam", action="store", type=str, default="34",
parser_transcriptome.add_argument("-m", "--pfam", action="store", type=str, default="35",
help="The version number of the pfam database that should be used "
"(Default is 34).")
"(Default is 35).")
parser_transcriptome.add_argument("-d", "--database", action="store", default=None,
help="If the RADIANT database is not located in the RADIANT directory, please specify"
" path and name of the database. (Just necessary for -i option)")
parser_transcriptome.add_argument("-cov", "--coverage", action="store", default=0.5, type=float,
help="Specifies how much of a domain has to be annotated to count as a partial domain. Default=0.5 "
"This would mean if less than 50%% of the domain is annotated, it is considered a partial domain. "
"The partial domain analysis is just available with PfamScan annotations.")
"This would mean if less than 50%% of the domain is annotated, it is considered a partial domain.")
args = parser.parse_args()
......@@ -159,8 +157,8 @@ def main():
call(['radiant', '-i', args.initial_radiant_run, '-d', database, '-o',
args.initial_radiant_run + '.rad'])
args.annotation_file = args.initial_radiant_run + '.rad'
except AttributeError:
raise AttributeError("No valid installation of RADIANT found.")
except TypeError:
raise RuntimeError("No valid installation of RADIANT found.")
if args.annotation_file is None:
parser.error('No input file specified. Please use -a or -i option to specify the proteome input file.')
......@@ -269,7 +267,7 @@ class ConversionDictionary(dict):
def score_single_transcriptome(annotation_file, outfile=None, max_dom_tup_len=3,
hq_transcriptomes=None, mode='transcriptome', pfam='34', initial=None, version='3.6',
hq_transcriptomes=None, mode='transcriptome', pfam='35', initial=None, version='3.7',
annotype='pfsc', coverage=0.5):
"""
combines the functions and classes to score a sample proteome in terms of it's domain completeness.
......@@ -347,7 +345,7 @@ def score_single_transcriptome(annotation_file, outfile=None, max_dom_tup_len=3,
def score_single_proteome(annotation_file, outfile=None, cutoff=2,
max_dom_tup_len=3, hq_proteomes=None, mode='proteome', pfam='34', initial=None, version='3.6',
max_dom_tup_len=3, hq_proteomes=None, mode='proteome', pfam='35', initial=None, version='3.7',
annotype='pfsc', coverage=0.5):
"""
combines the functions and classes to score a sample proteome in terms of it's domain completeness.
......@@ -477,6 +475,7 @@ class DomainCounter(dict):
domain_start_pos = int(line[1])
domain_name = re.match('([^.]+)', line[3]).group(1)
arrangement_dict[seq_id].append((domain_start_pos, domain_name))
else:
raise NotImplementedError(
"File format couldn't be detected. Please make sure you use RADIANT or "
......@@ -510,6 +509,7 @@ class DomainCounter(dict):
domain_start_pos = int(line[1])
domain_name = re.match('([^.]+)', line[3]).group(1)
arrangement_dict[seq_id].append((domain_start_pos, domain_name))
else:
raise NotImplementedError(
"File format couldn't be detected. Please make sure you use RADIANT or "
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment