Skip to content
Snippets Groups Projects
Commit 82c91d84 authored by Dr. Elias Dohmen's avatar Dr. Elias Dohmen
Browse files

Merge branch 'ed_develop' into 'master'

DOGMA v3.6

See merge request !2
parents 3d67e1e6 4f63c2c0
No related branches found
No related tags found
1 merge request!2DOGMA v3.6
DOGMA 3.6
=========
- update to Pfam Database version 34 (default for DOGMA from this version on)
DOGMA 3.5 DOGMA 3.5
========= =========
...@@ -5,8 +11,6 @@ DOGMA 3.5 ...@@ -5,8 +11,6 @@ DOGMA 3.5
- adjustment of help texts - adjustment of help texts
DOGMA 3.4 DOGMA 3.4
========= =========
......
...@@ -25,9 +25,6 @@ We provide several precomputed core sets for different clades here: ...@@ -25,9 +25,6 @@ We provide several precomputed core sets for different clades here:
https://domainworld.uni-muenster.de/programs/dogma/ https://domainworld.uni-muenster.de/programs/dogma/
UProC is not longer supported from DOGMA version 3.0 onwards, however the old coresets and databases can still be found on our website https://domainworld.uni-muenster.de/data/uprocdb/
Usage Usage
----- -----
......
No preview for this file type
#!/usr/bin/env python #!/usr/bin/env python
# DOGMA version 3.5 # DOGMA version 3.6
# DOGMA is a python script that can assess proteome or transcriptome quality based on conserved protein domains. # DOGMA is a python script that can assess proteome or transcriptome quality based on conserved protein domains.
# To score the domain completeness of a proteome or transcriptome, DOGMA searches its domain annotation for conserved # To score the domain completeness of a proteome or transcriptome, DOGMA searches its domain annotation for conserved
# domains. By default, the conserved domains comprise domains that are shared by six eukaryotic model species. # domains. By default, the conserved domains comprise domains that are shared by six eukaryotic model species.
# Copyright (C) 2015-2020 Elias Dohmen # Copyright (C) 2015-2021 Elias Dohmen
# <e.dohmen@wwu.de> based on code by Lukas Kremer. # <e.dohmen@wwu.de> based on code by Lukas Kremer.
# DOGMA is free software: you can redistribute it and/or modify it # DOGMA is free software: you can redistribute it and/or modify it
...@@ -48,7 +48,7 @@ conversion_dictionary = None ...@@ -48,7 +48,7 @@ conversion_dictionary = None
def main(): def main():
dogma_version = '3.5' dogma_version = '3.6'
try: try:
# top level argument parsing # top level argument parsing
...@@ -94,8 +94,8 @@ def main(): ...@@ -94,8 +94,8 @@ def main():
parser_proteome.add_argument("-o", "--outfile", action="store", type=str, default=None, parser_proteome.add_argument("-o", "--outfile", action="store", type=str, default=None,
help="Summary will be saved in a file with the given name (and path), " help="Summary will be saved in a file with the given name (and path), "
"instead of printed in the console.") "instead of printed in the console.")
parser_proteome.add_argument("-m", "--pfam", action="store", type=str, default="33.1", parser_proteome.add_argument("-m", "--pfam", action="store", type=str, default="34",
help="The version number of the pfam database that should be used (Default is 33.1).") help="The version number of the pfam database that should be used (Default is 34).")
parser_proteome.add_argument("-d", "--database", action="store", type=str, default=None, parser_proteome.add_argument("-d", "--database", action="store", type=str, default=None,
help="If the RADIANT database is not located in the RADIANT directory, please specify" help="If the RADIANT database is not located in the RADIANT directory, please specify"
" path and name of the database. (Just necessary for -i option)") " path and name of the database. (Just necessary for -i option)")
...@@ -127,9 +127,9 @@ def main(): ...@@ -127,9 +127,9 @@ def main():
parser_transcriptome.add_argument("-s", "--cda_size", action="store", default=3, type=int, parser_transcriptome.add_argument("-s", "--cda_size", action="store", default=3, type=int,
help="Specifies up to which size subsets of CDAs should be considered " help="Specifies up to which size subsets of CDAs should be considered "
"(default=3; A-B-C-D --> A-B-C, A-B-D, B-C-D etc.).") "(default=3; A-B-C-D --> A-B-C, A-B-D, B-C-D etc.).")
parser_transcriptome.add_argument("-m", "--pfam", action="store", type=str, default="33.1", parser_transcriptome.add_argument("-m", "--pfam", action="store", type=str, default="34",
help="The version number of the pfam database that should be used " help="The version number of the pfam database that should be used "
"(Default is 33.1).") "(Default is 34).")
parser_transcriptome.add_argument("-d", "--database", action="store", default=None, parser_transcriptome.add_argument("-d", "--database", action="store", default=None,
help="If the RADIANT database is not located in the RADIANT directory, please specify" help="If the RADIANT database is not located in the RADIANT directory, please specify"
" path and name of the database. (Just necessary for -i option)") " path and name of the database. (Just necessary for -i option)")
...@@ -269,7 +269,7 @@ class ConversionDictionary(dict): ...@@ -269,7 +269,7 @@ class ConversionDictionary(dict):
def score_single_transcriptome(annotation_file, outfile=None, max_dom_tup_len=3, def score_single_transcriptome(annotation_file, outfile=None, max_dom_tup_len=3,
hq_transcriptomes=None, mode='transcriptome', pfam='33.1', initial=None, version='3.5', hq_transcriptomes=None, mode='transcriptome', pfam='34', initial=None, version='3.6',
annotype='pfsc', coverage=0.5): annotype='pfsc', coverage=0.5):
""" """
combines the functions and classes to score a sample proteome in terms of it's domain completeness. combines the functions and classes to score a sample proteome in terms of it's domain completeness.
...@@ -347,7 +347,7 @@ def score_single_transcriptome(annotation_file, outfile=None, max_dom_tup_len=3, ...@@ -347,7 +347,7 @@ def score_single_transcriptome(annotation_file, outfile=None, max_dom_tup_len=3,
def score_single_proteome(annotation_file, outfile=None, cutoff=2, def score_single_proteome(annotation_file, outfile=None, cutoff=2,
max_dom_tup_len=3, hq_proteomes=None, mode='proteome', pfam='33.1', initial=None, version='3.5', max_dom_tup_len=3, hq_proteomes=None, mode='proteome', pfam='34', initial=None, version='3.6',
annotype='pfsc', coverage=0.5): annotype='pfsc', coverage=0.5):
""" """
combines the functions and classes to score a sample proteome in terms of it's domain completeness. combines the functions and classes to score a sample proteome in terms of it's domain completeness.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment