diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 00000000..df4c8f50 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,2 @@ +[run] +source=OrthoEvol \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 7db8c2b3..0a674999 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,7 +10,9 @@ notifications: install: - "pip install --upgrade pip setuptools wheel" - "pip install --only-binary=numpy,scipy numpy scipy" - - "pip install matplotlib ipython jupyter sympy nose" + - "pip install matplotlib ipython jupyter sympy pytest" - "pip install -r requirements.txt" -# command to run nosetests -script: nosetests tests/ --verbosity=3 \ No newline at end of file + - "pip install ." +# command to run unittests via pytest +script: + - pytest tests/ diff --git a/OrthoEvol/Cookies/cookie_jar.py b/OrthoEvol/Cookies/cookie_jar.py index 06b67a7a..7171e1db 100644 --- a/OrthoEvol/Cookies/cookie_jar.py +++ b/OrthoEvol/Cookies/cookie_jar.py @@ -127,7 +127,7 @@ def _check_ingredients(self, cookie, path, no_input, extra_context): self.cookielog.info('%s was created. ✔' % str(path)) def bake_the_repo(self, cookie_jar=None): - self.cookielog.warn('Creating directories from the Repository Cookie template.') + self.cookielog.warning('Creating directories from the Repository Cookie template.') """ This function creates a new repository. If a repository name is given to the class, then it is given a name. If not, cookiecutter @@ -163,7 +163,7 @@ def bake_the_user(self, cookie_jar=None): :param cookie_jar: (Default value = None) """ - self.cookielog.warn('Creating directories from the User Cookie template.') + self.cookielog.warning('Creating directories from the User Cookie template.') if cookie_jar: self.cookie_jar = cookie_jar @@ -181,7 +181,7 @@ def bake_the_project(self, cookie_jar=None): :return: A new project inside the user's project directory. """ - self.cookielog.warn('Creating directories from the Project Cookie template.') + self.cookielog.warning('Creating directories from the Project Cookie template.') if cookie_jar: self.cookie_jar = cookie_jar # Add the project @@ -198,7 +198,7 @@ def bake_the_project(self, cookie_jar=None): if self.exists(str(self.cookie_jar / Path(self.project))): self.cookielog.info('Project exists. ✔') else: - self.cookielog.warn('A project linked to a user/repository is being created.') + self.cookielog.warning('A project linked to a user/repository is being created.') cookiecutter(str(self.Recipes.project_cookie), extra_context=e_c, no_input=no_input, output_dir=str(self.cookie_jar)) # Logging @@ -212,7 +212,7 @@ def bake_the_project(self, cookie_jar=None): if self.exists(str(self.cookie_jar / Path(self.project))): self.cookielog.info('Project exists. ✔') else: - self.cookielog.warn('A basic standalone project is being created.') + self.cookielog.warning('A basic standalone project is being created.') cookiecutter(str(self.Recipes.basic_project_cookie), extra_context=e_c, no_input=no_input, output_dir=str(self.cookie_jar)) self.cookielog.info( @@ -271,7 +271,7 @@ def bake_the_website(self, host, port, website_path, cookie_jar=None): :param cookie_jar: (Default value = None) """ - self.cookielog.warn('Creating directories from the Website Cookie template.') + self.cookielog.warning('Creating directories from the Website Cookie template.') if cookie_jar: self.cookie_jar = cookie_jar # TODO-ROB: Add heavy logging here @@ -290,8 +290,8 @@ def bake_the_website(self, host, port, website_path, cookie_jar=None): run_script(script_path=str(script_path), cwd=str(website_path)) self.cookielog.info( 'Directories have been created for the Flask Web Server, %s. ✔' % self.website) - self.cookielog.warn('The %s Flask Server should now be running on http://%s:%s' % - (self.website, host, port)) + self.cookielog.warning('The %s Flask Server should now be running on http://%s:%s' % + (self.website, host, port)) def bake_the_research(self, research_type, research, cookie_jar=None): """Create a directory for a new research project. @@ -301,7 +301,7 @@ def bake_the_research(self, research_type, research, cookie_jar=None): :param cookie_jar: (Default value = None) """ - self.cookielog.warn('Creating directories from the Research Cookie template.') + self.cookielog.warning('Creating directories from the Research Cookie template.') if cookie_jar: self.cookie_jar = cookie_jar @@ -320,7 +320,7 @@ def bake_the_app(self, app, cookie_jar=None): :param cookie_jar: (Default value = None) """ - self.cookielog.warn('Creating directories from the App Cookie template.') + self.cookielog.warning('Creating directories from the App Cookie template.') if cookie_jar: self.cookie_jar = cookie_jar e_c = {"app_name": app} diff --git a/OrthoEvol/Orthologs/Blast/blast.py b/OrthoEvol/Orthologs/Blast/blast.py index 719d1c27..7b3aa314 100644 --- a/OrthoEvol/Orthologs/Blast/blast.py +++ b/OrthoEvol/Orthologs/Blast/blast.py @@ -40,7 +40,7 @@ def __init__(self, project, method, acc_file, copy_from_package, :param kwargs:""" super().__init__(project=project, method=method, acc_file=acc_file, - copy_from_package=copy_from_package, + copy_from_package=copy_from_package, ref_species=ref_species, template=template, save_data=save_data, verbose=verbose, **kwargs) @@ -246,7 +246,7 @@ def configure(self, query_accessions, query_organism, auto_start=False): self.blastn_log.debug('Blast configuration has begun.') self.blastn_log.debug('Configuring the accession file.') - + if self.ref_species: query_organism = self.ref_species @@ -468,7 +468,7 @@ class OrthoBlastN(BaseBlastN): def __init__(self, project="orthology-gpcr", project_path=os.getcwd(), method=1, template=None, save_data=True, acc_file="gpcr.csv", - copy_from_package=True, **kwargs): + copy_from_package=True, auto_start=False, **kwargs): """This class inherits from the BaseBlastN class. This class utilizes it's parent classes to search a standalone @@ -494,6 +494,7 @@ def __init__(self, project="orthology-gpcr", project_path=os.getcwd(), self.proj_mana = None self.acc_file = acc_file self.copy_from_package = copy_from_package + self.auto_start = auto_start # Initialize class super().__init__(project=project, method=method, template=template, @@ -503,11 +504,14 @@ def __init__(self, project="orthology-gpcr", project_path=os.getcwd(), taxon_file=self.taxon_file, post_blast=self.__post_blast, project_path=self.project_path, - proj_mana=self.proj_mana, **kwargs) + proj_mana=self.proj_mana, + auto_start=self.auto_start, + **kwargs) def run(self): """Run the blast using a default configuration.""" - self.configure(self.blast_human, self.ref_species, auto_start=True) + self.configure(self.blast_human, self.ref_species, + auto_start=self.auto_start) class BlastFailure(BaseException): diff --git a/OrthoEvol/Orthologs/GenBank/genbank.py b/OrthoEvol/Orthologs/GenBank/genbank.py index ad2ade14..1d57edb0 100644 --- a/OrthoEvol/Orthologs/GenBank/genbank.py +++ b/OrthoEvol/Orthologs/GenBank/genbank.py @@ -47,8 +47,9 @@ def __init__(self, project, project_path=None, solo=False, multi=True, self.genbanklog = LogIt().default(logname="GenBank", logfile=None) # Configuration of class attributes - add_self = self.genbank_utils.attribute_config(self, composer=blast, checker=OrthoBlastN, checker2=BaseComparativeGenetics, - project=project, project_path=project_path) + add_self = self.genbank_utils.attribute_config(self, composer=blast, checker=OrthoBlastN, + checker2=BaseComparativeGenetics, + project=project, project_path=project_path) for var, attr in add_self.__dict__.items(): setattr(self, var, attr) @@ -64,7 +65,8 @@ def __init__(self, project, project_path=None, solo=False, multi=True, self.db_files_list.append(str(FILE)) @staticmethod - def name_fasta_file(self, path, gene, org, feat_type, feat_type_rank, extension, mode): + def name_fasta_file(self, path, gene, org, feat_type, + feat_type_rank, extension, mode): """ Provide a uniquely named FASTA file: * Coding sequence: @@ -189,7 +191,7 @@ def get_gbk_file(self, accession, gene, organism, server_flag=None): db = server[SUB_DB_NAME] try: record = db.lookup(accession=accession) - gbk_file = '%s_%s.gbk' % (gene , organism) + gbk_file = '%s_%s.gbk' % (gene, organism) gbk_file_path = gene_path / Path(gbk_file) with open(gbk_file_path, 'w') as GB_file: GB_file.write(record.format('genbank')) @@ -200,12 +202,16 @@ def get_gbk_file(self, accession, gene, organism, server_flag=None): server_flag = True break except IndexError: - self.genbanklog.critical('Index Error in %s. Moving to the next database...' % SUB_DB_NAME) + self.genbanklog.critical( + 'Index Error in %s. Moving to the next database...' % + SUB_DB_NAME) continue # If the file has not been created after searching, then raise an error if server_flag is not True: - self.genbanklog.critical("The GenBank file was not created for %s (%s, %s)." % (accession, gene, organism)) + self.genbanklog.critical( + "The GenBank file was not created for %s (%s, %s)." % + (accession, gene, organism)) raise FileNotFoundError def gbk_quality_control(self, gbk_file, gene, organism): @@ -234,13 +240,16 @@ def gbk_quality_control(self, gbk_file, gene, organism): gbk_organism = gbk_organism[0] gbk_organism = gbk_organism.replace(" ", "_") else: - self.genbanklog.critical("Two organisms exist in the GenBank file. Is this normal?") + self.genbanklog.critical( + "Two organisms exist in the GenBank file. Is this normal?") raise BrokenPipeError # Check to make sure the organism in the GenBank file matches the # organism from the accession file if gbk_organism == organism: - self.genbanklog.info("The GenBank organism, %s, has been verified for %s." % (organism, gene)) + self.genbanklog.info( + "The GenBank organism, %s, has been verified for %s." % + (organism, gene)) else: organism_flag = True @@ -260,7 +269,9 @@ def gbk_quality_control(self, gbk_file, gene, organism): for gbk_gene in gbk_genes: if gbk_gene == gene: gene_flag = False - self.genbanklog.info("The GenBank gene, %s, has been verified for %s." % (gene, organism)) + self.genbanklog.info( + "The GenBank gene, %s, has been verified for %s." % + (gene, organism)) break else: gene_flag = True @@ -301,14 +312,18 @@ def gbk_upload(self): db_file_path = self.target_gbk_db_path / Path(db_name) # Create the db file if it exists if os.path.isfile(str(db_file_path)) is False: - self.genbanklog.warn('Copying Template BioSQL Database... This may take a few minutes...') + self.genbanklog.warn( + 'Copying Template BioSQL Database... This may take a few minutes...') shutil.copy2('Template_BioSQL_DB.db', str(db_file_path)) - # If it already exists then the database is bad, or needs to be update. Delete it. + # If it already exists then the database is bad, or needs to be update. + # Delete it. else: - # TODO-ROB: This part is broken until the template db creation and management is added + # TODO-ROB: This part is broken until the template db creation and + # management is added os.remove(str(db_file_path)) - self.genbanklog.warn('Copying Template BioSQL Database... This may take a few minutes...') + self.genbanklog.warn( + 'Copying Template BioSQL Database... This may take a few minutes...') shutil.copy2('Template_BioSQL_DB.db', str(db_file_path)) server = BioSeqDatabase.open_database(driver='sqlite3', db=str(db_file_path)) @@ -317,7 +332,8 @@ def gbk_upload(self): for GENE in os.listdir(str(gene_path)): sub_db_name = GENE genbank_path = gene_path / Path(GENE) / Path('GENBANK') - # Parse the GenBank file names for each gene in order to upload them to a custom BioSQL database + # Parse the GenBank file names for each gene in order to upload them to a + # custom BioSQL database for FILE in os.listdir(str(genbank_path)): # Try to load the database. try: @@ -327,11 +343,18 @@ def gbk_upload(self): count = db.load(SeqIO.parse(FILE, 'genbank')) server.commit() self.genbanklog.info('Server Commited %s' % sub_db_name) - self.genbanklog.info('%s database loaded with %s.' % (db.dbid, FILE)) - self.genbanklog.info("That file contains %s genbank records." % str(count)) + self.genbanklog.info( + '%s database loaded with %s.' % + (db.dbid, FILE)) + self.genbanklog.info( + "That file contains %s genbank records." % + str(count)) t_count = t_count + count - self.genbanklog.info('The total number of files loaded so far is %i.' % t_count) - # If the database cannot be loaded then rollback the server and raise an error. + self.genbanklog.info( + 'The total number of files loaded so far is %i.' % + t_count) + # If the database cannot be loaded then rollback the server and raise + # an error. except BaseException: server.rollback() # Try to delete the sub database and commit @@ -365,18 +388,21 @@ def get_fasta_files(self, acc_dict, db=True): try: for db_name in server.keys(): db = server[db_name] - # For each GenBank record in the database write a set of FASTA files. + # For each GenBank record in the database write a set of FASTA + # files. for item in db.keys(): record = db.lookup(item) self.write_fasta_files(record, acc_dict) - self.genbanklog.info("FASTA files for %s created from BioSQL database." % item) - except: + self.genbanklog.info( + "FASTA files for %s created from BioSQL database." % item) + except BaseException: raise() # Get FASTA files from the GenBank files. # TODO-ROB change this. Broken by new directory structure # TODO-ROB directory looks like /raw_data/Gene_1/GENBANK/*.gbk elif db is False: - # Parse the directory that contain the GenBank records for the project of interest. + # Parse the directory that contain the GenBank records for the project of + # interest. for _, _, gbk_files in os.walk(str(self.target_gbk_files_path)): # For each genbank record write a set of FASTA files. for gbk_file in gbk_files: @@ -436,13 +462,16 @@ def write_fasta_files(self, record, acc_dict): 'feat_type_rank': str(feat_type_rank), 'path': str(self.raw_data / Path(gene) / Path('GENBANK')) } - # Set up minimalistic FASTA headers and sequence entries for Nucleic Acid and Amino Acid sequences. + # Set up minimalistic FASTA headers and sequence entries for Nucleic Acid + # and Amino Acid sequences. na_entry = ">{min_org}\n{na_seq}\n".format(**fmt) aa_entry = ">{min_org}\n{aa_seq}\n".format(**fmt) # For full FASTA headers/sequences set min_fasta to False if self.min_fasta is False: - na_entry = ">gi|{na_gi}|ref|{na_acc_n}| {na_description}\n{na_seq}\n".format(**fmt) - aa_entry = ">gi|{aa_gi}|reg|{aa_acc_n}| {aa_description} {org}\n{aa_seq}\n".format(**fmt) + na_entry = ">gi|{na_gi}|ref|{na_acc_n}| {na_description}\n{na_seq}\n".format( + **fmt) + aa_entry = ">gi|{aa_gi}|reg|{aa_acc_n}| {aa_description} {org}\n{aa_seq}\n".format( + **fmt) # ######### End ######### # # ############ Write desired FASTA files ############ # @@ -472,28 +501,33 @@ def solo_fasta(self, na_entry, aa_entry, fmt): if feat_type == "CDS": # Create a .ffn file (FASTA for Coding Nucleic Acids) extension = '.ffn' - file = self.name_fasta_file(path, gene, org, feat_type, feat_type_rank, extension, mode) + file = self.name_fasta_file( + path, gene, org, feat_type, feat_type_rank, extension, mode) file.write(na_entry) file.close() # Create a .faa file (FASTA for Amino Acids) extension = '.faa' - file = self.name_fasta_file(path, gene, org, 'Protein', feat_type_rank, extension, mode) + file = self.name_fasta_file( + path, gene, org, 'Protein', feat_type_rank, extension, mode) file.write(aa_entry) file.close() elif feat_type == "misc_feature": # Create a custom entry for miscellaneous features. - na_entry = ">gi|{na_gi}|ref|{na_acc_n}| {na_description} Feature: {na_misc_feat}\n{na_seq}\n".format(**fmt) + na_entry = ">gi|{na_gi}|ref|{na_acc_n}| {na_description} Feature: {na_misc_feat}\n{na_seq}\n".format( + **fmt) # Creates .fna files (generic FASTA file for Nucleic Acids) extension = '.fna' - file = self.name_fasta_file(path, gene, org, feat_type, feat_type_rank, extension, mode) + file = self.name_fasta_file( + path, gene, org, feat_type, feat_type_rank, extension, mode) file.write(na_entry) file.close() elif feat_type != "variation": # Creates .fasta files (generic FASTA file) extension = '.fasta' - file = self.name_fasta_file(path, gene, org, 'Other', feat_type_rank, extension, mode) + file = self.name_fasta_file( + path, gene, org, 'Other', feat_type_rank, extension, mode) file.write(na_entry) file.close() @@ -531,7 +565,8 @@ def multi_fasta(self, na_entry, aa_entry, fmt): file.write(aa_entry) file.close() elif feat_type == "misc_feature": - na_entry = ">gi|{na_gi}|ref|{na_acc_n}| {na_description} Feature: {na_misc_feat}\n{na_seq}\n".format(**fmt) + na_entry = ">gi|{na_gi}|ref|{na_acc_n}| {na_description} Feature: {na_misc_feat}\n{na_seq}\n".format( + **fmt) # Creates .fna files (generic FASTA file for Nucleic Acids) extension = '.fna' file = self.name_fasta_file(path, gene, org, feat_type, diff --git a/OrthoEvol/Tools/ftp/ncbiftp.py b/OrthoEvol/Tools/ftp/ncbiftp.py index 7c4041b3..c1d81e1e 100644 --- a/OrthoEvol/Tools/ftp/ncbiftp.py +++ b/OrthoEvol/Tools/ftp/ncbiftp.py @@ -93,7 +93,9 @@ def walk(self, path): try: self.ftp.cwd(path) except error_perm as ep: - self.ncbiftp_log.info("Current path: %s" % self.ftp.pwd() + ep.__str__() + path) + self.ncbiftp_log.info( + "Current path: %s" % + self.ftp.pwd() + ep.__str__() + path) return [], [] else: self.ftp.retrlines('LIST', lambda x: file_list.append(x.split())) @@ -126,7 +128,8 @@ def _download_pool(self, files): download_time_secs = time() with ThreadPool(self.cpus) as download_pool: with tqdm(total=len(files)) as pbar: - for i, _ in tqdm(enumerate(download_pool.imap(self.download_file, files))): + for i, _ in tqdm( + enumerate(download_pool.imap(self.download_file, files))): pbar.update() minutes = round(((time() - download_time_secs) / 60), 2) self.ncbiftp_log.info("Took %s minutes to download the files." % @@ -144,7 +147,9 @@ def _download_windowmasker(self, windowmaskerfile): if not os.path.exists(windowmaskerfile): try: with open(windowmaskerfile, 'wb') as localfile: - self.ftp.retrbinary('RETR %s/wmasker.%s' % (taxid, wm_ext), localfile.write) + self.ftp.retrbinary( + 'RETR %s/wmasker.%s' % + (taxid, wm_ext), localfile.write) self.ncbiftp_log.info('%s was downloaded.' % str(windowmaskerfile)) except all_errors: os.remove(windowmaskerfile) @@ -253,7 +258,8 @@ def getwindowmaskerfiles(self, taxonomy_ids, download_path): download_time_secs = time() with ThreadPool(1) as download_pool: with tqdm(total=len(self.files2download)) as pbar: - for i, _ in tqdm(enumerate(download_pool.imap(self._download_windowmasker, windowmaskerfiles))): + for i, _ in tqdm(enumerate(download_pool.imap( + self._download_windowmasker, windowmaskerfiles))): pbar.update() minutes = round(((time() - download_time_secs) / 60), 2) self.ncbiftp_log.info("Took %s minutes to download the files." % diff --git a/OrthoEvol/Tools/mygene/mygene.py b/OrthoEvol/Tools/mygene/mygene.py index 5342db27..d5507fb5 100644 --- a/OrthoEvol/Tools/mygene/mygene.py +++ b/OrthoEvol/Tools/mygene/mygene.py @@ -39,7 +39,7 @@ def _import_accfile(self): accfile = pd.read_csv(self.infile) # Ensure infile has 'Homo_sapiens' column. if 'Homo_sapiens' not in accfile.columns: - raise KeyError('"Homo_sapiens" column does noFt exist.') + raise KeyError('"Homo_sapiens" column does not exist.') else: acclist = list([accession.upper() for accession in accfile.Homo_sapiens]) return acclist diff --git a/OrthoEvol/Tools/parallel/README.md b/OrthoEvol/Tools/parallel/README.md index b04a615b..fbb10d3a 100644 --- a/OrthoEvol/Tools/parallel/README.md +++ b/OrthoEvol/Tools/parallel/README.md @@ -22,9 +22,9 @@ def printwords(word): print(word) -words = ['bae', 'luh', 'cuh'] +words = ['python', 'rust', 'javascript'] if __name__ == '__main__': mp = Multiprocess() - mp.map2function(printwords, words) + mp.map_to_function(printwords, words) ``` diff --git a/OrthoEvol/Tools/parallel/multiprocess.py b/OrthoEvol/Tools/parallel/multiprocess.py index 3201a5a7..d84bfec7 100644 --- a/OrthoEvol/Tools/parallel/multiprocess.py +++ b/OrthoEvol/Tools/parallel/multiprocess.py @@ -34,7 +34,7 @@ def _logger(): logger = logzero.logger return logger - def map2function(self, function, iterable): + def map_to_function(self, function, iterable): """Start a pool to run your function with a list. :param function: Input a python function. diff --git a/OrthoEvol/Tools/pbs/qstat.py b/OrthoEvol/Tools/pbs/qstat.py index 238706ff..61ddac7b 100644 --- a/OrthoEvol/Tools/pbs/qstat.py +++ b/OrthoEvol/Tools/pbs/qstat.py @@ -1,22 +1,24 @@ import asyncio -import os import csv -import yaml -import sys import json +import os import subprocess as sp +import sys +from collections import OrderedDict +from datetime import datetime +from pathlib import Path +from time import sleep + import pandas as pd -import plotly.graph_objs as go import plotly +import plotly.graph_objs as go +import yaml from dateutil import parser -from datetime import datetime -from time import sleep from pkg_resources import resource_filename -from collections import OrderedDict -from pathlib import Path -from OrthoEvol.utilities import FullUtilities + from OrthoEvol.Manager.config import yml from OrthoEvol.Tools.logit import LogIt +from OrthoEvol.utilities import FullUtilities class TargetJobKeyError(KeyError): @@ -29,19 +31,19 @@ class BaseQstat(object): # Static qstat Keywords __misc_kw = ["Checkpoint", "Error_Path", "exec_host", "exec_vnode", "Hold_Types", "Join_Path", - "Keep_Files", "Mail_Points", "Output_Path", "Rerunable", "Resource_List.mpiprocs", - "Resource_List.ncpus", "Resource_List.nodect", "Resource_List.nodes", - "Resource_List.place", "Resource_List.select", "jobdir", "Variable_List", "umask", - "project", "Submit_arguments"] + "Keep_Files", "Mail_Points", "Output_Path", "Rerunable", "Resource_List.mpiprocs", + "Resource_List.ncpus", "Resource_List.nodect", "Resource_List.nodes", + "Resource_List.place", "Resource_List.select", "jobdir", "Variable_List", "umask", + "project", "Submit_arguments"] __job_limits_kw = ["ctime", "etime", "qtime", "stime", "mtime", "Resource_List.walltime", "Resource_List.cput", - "Resource_List.mem"] + "Resource_List.mem"] __job_time_kw = ["ctime", "etime", "qtime", "stime", "mtime"] __job_info_kw = ["Job_Id", "Job_Name", "Job_Owner", "queue", "server", "session_id"] __static_kw = __job_info_kw + __job_limits_kw + __misc_kw # Dynamic qstat Keywords __misc_data_kw = ["job_state", "Priority", "substate", "comment", "run_count"] __job_data_kw = ["resources_used.cpupercent", "resources_used.cput", "resources_used.mem", - "resources_used.vmem", "resources_used.walltime", "resources_used.ncpus"] + "resources_used.vmem", "resources_used.walltime", "resources_used.ncpus"] __dynamic_kw = __job_data_kw + __misc_data_kw # All Keywords __keywords = __static_kw + __dynamic_kw @@ -173,7 +175,8 @@ def run_qstat(self, csv_flag=True, sqlite_flag=False, ordered=False, capture_jso capture_json=True) self.qstat_dict = self.qstat_data['Jobs'] else: - self.qstat_data = self.qstat_output(cmd=self.cmd, log_file=str(self.qstat_log_file), print_flag=False) + self.qstat_data = self.qstat_output( + cmd=self.cmd, log_file=str(self.qstat_log_file), print_flag=False) # Convert raw data to nested dictionary self.qstat_dict = self.to_dict(qstat_data=self.qstat_data, ordered=ordered) # Isolate data for target PBS job @@ -181,10 +184,12 @@ def run_qstat(self, csv_flag=True, sqlite_flag=False, ordered=False, capture_jso # Isolate static data for target PBS job self.static_dict = self.static_data(qstat_dict=self.qstat_dict, target_job=self.pbs_job_id) # Create a pandas dataframe for target PBS job, formatted for creating a CSV file. - self.job_dataframe = self.to_dataframe(qstat_dict=self.qstat_dict, target_job=self.pbs_job_id) + self.job_dataframe = self.to_dataframe( + qstat_dict=self.qstat_dict, target_job=self.pbs_job_id) if csv_flag: self.to_csv(file=self.data_file, qstat_dict=self.qstat_dict, target_job=self.pbs_job_id) - self.static_data_to_yaml(file=self.info_file, qstat_dict=self.qstat_dict, target_job=self.pbs_job_id) + self.static_data_to_yaml( + file=self.info_file, qstat_dict=self.qstat_dict, target_job=self.pbs_job_id) if sqlite_flag: self.to_sqlite() @@ -541,7 +546,8 @@ def static_data(self, qstat_dict, target_job): for keyword in qstat_dict[target_job].keys(): if keyword in self.__static_kw: if keyword in self.__job_time_kw: - data_dict[target_job][keyword] = str(parser.parse(qstat_dict[target_job][keyword])) + data_dict[target_job][keyword] = str( + parser.parse(qstat_dict[target_job][keyword])) else: data_dict[target_job][keyword] = qstat_dict[target_job][keyword] return data_dict @@ -710,7 +716,8 @@ def _watch(self, count=None, first_time=None, max_count=None): first_time = first_time try: self.run_qstat(csv_flag=True, sqlite_flag=False) - self.qstat_log.info("Added data-point %s from qstat for %s." % (self.watch_count, self.pbs_job_id)) + self.qstat_log.info("Added data-point %s from qstat for %s." % + (self.watch_count, self.pbs_job_id)) if not first_time: if self.watch_count == max_count: raise TargetJobKeyError @@ -855,7 +862,8 @@ def watch(self, jobs, infile=None, outfile=None, cmd=None, wait_time=120): :param wait_time: The amount of time to wait in between each point of data being collected. :type wait_time: int. """ - self.job_dict = self.get_qstat_dict(jobs, infile=infile, outfile=outfile, cmd=cmd, wait_time=wait_time) + self.job_dict = self.get_qstat_dict( + jobs, infile=infile, outfile=outfile, cmd=cmd, wait_time=wait_time) self.job_list = self.multi_watch(job_dict=self.job_dict) def get_qstat_dict(self, jobs, infile=None, outfile=None, cmd=None, wait_time=120): @@ -885,7 +893,8 @@ def get_qstat_dict(self, jobs, infile=None, outfile=None, cmd=None, wait_time=12 for job in jobs: # Get qstat parameters for each target job home = str(self.config_home / job) - _qstat = Qstat(job_id=job, home=home, infile=infile, outfile=outfile, cmd=cmd, wait_time=wait_time) + _qstat = Qstat(job_id=job, home=home, infile=infile, + outfile=outfile, cmd=cmd, wait_time=wait_time) # Create a dictionary value for each job job_dict[job] = _qstat return job_dict @@ -908,7 +917,8 @@ def multi_watch(self, job_dict): for _qstat in job_dict.values(): # Append task list for asnychronous programming - tasks.append(asyncio.ensure_future(self._async_watch(qstat=_qstat, count=_qstat.watch_count))) + tasks.append(asyncio.ensure_future( + self._async_watch(qstat=_qstat, count=_qstat.watch_count))) # Run task list and then close job_list = ioloop.run_until_complete(asyncio.wait(tasks)) ioloop.close() @@ -942,7 +952,8 @@ async def _async_watch(self, qstat, first_time=None, count=None): try: qstat.run_qstat(csv_flag=True, sqlite_flag=False) - qstat.qstat_log.info("Added data-point %s from qstat for %s." % (qstat.watch_count, qstat.pbs_job_id)) + qstat.qstat_log.info("Added data-point %s from qstat for %s." % + (qstat.watch_count, qstat.pbs_job_id)) if not first_time: await asyncio.sleep(qstat.wait_time) temp_qstat = self._async_watch(qstat=qstat, first_time=False) @@ -954,4 +965,3 @@ async def _async_watch(self, qstat, first_time=None, count=None): temp_qstat = qstat qstat = temp_qstat return qstat - diff --git a/OrthoEvol/Tools/pbs/qsub.py b/OrthoEvol/Tools/pbs/qsub.py index 1b605b46..7eaf4433 100644 --- a/OrthoEvol/Tools/pbs/qsub.py +++ b/OrthoEvol/Tools/pbs/qsub.py @@ -1,16 +1,18 @@ import getpass -import string -import random import os +import random import shutil +import string import subprocess as sp -from pathlib import Path from collections import OrderedDict -from pkg_resources import resource_filename from datetime import datetime as d +from pathlib import Path from time import sleep -from OrthoEvol.Tools.logit import LogIt + +from pkg_resources import resource_filename + from OrthoEvol.Manager.config import templates +from OrthoEvol.Tools.logit import LogIt from OrthoEvol.utilities import FullUtilities @@ -385,14 +387,17 @@ def set_up_pbs_script(self, pbs_template_string=None, pbs_template_file=None, pb """ if pbs_template_file == self.pbs_script: - raise FileExistsError("The script provided, %s, already exists. Do not overwrite." % self.pbs_script) + raise FileExistsError( + "The script provided, %s, already exists. Do not overwrite." % self.pbs_script) # Configure the PBS Code if pbs_attributes is not None: if pbs_template_file is not None: - pbs_code = self.format_template_string(template=pbs_template_file, attributes=pbs_attributes) + pbs_code = self.format_template_string( + template=pbs_template_file, attributes=pbs_attributes) elif pbs_template_string is not None: - pbs_code = self.format_template_string(code=pbs_template_string, attributes=pbs_attributes) + pbs_code = self.format_template_string( + code=pbs_template_string, attributes=pbs_attributes) else: raise ValueError("Please supply the pbs_template_file or pbs_template_string to generate the proper" "pbs script.") @@ -440,7 +445,8 @@ def submit_python_job(self, cmd=None, py_template_string=None, py_template_file= python_attributes=python_attributes) self.qsub_log.info("The Python script has been formatted.") elif not self.python_script.exists(): - self.copy_supplied_script(supplied_script=self.supplied_python_script, new_script=self.python_script) + self.copy_supplied_script( + supplied_script=self.supplied_python_script, new_script=self.python_script) # Set up the custom python command if custom_python_cmd is not None: @@ -453,7 +459,8 @@ def submit_python_job(self, cmd=None, py_template_string=None, py_template_file= pbs_attributes=pbs_attributes) self.qsub_log.info("The PBS script has been set up.") if not self.pbs_script.exists(): - self.copy_supplied_script(supplied_script=self.supplied_pbs_script, new_script=self.pbs_script) + self.copy_supplied_script( + supplied_script=self.supplied_pbs_script, new_script=self.pbs_script) # Submit job self.submit_pbs_script(cmd=cmd) diff --git a/OrthoEvol/utilities.py b/OrthoEvol/utilities.py index 760b39c4..094fd030 100644 --- a/OrthoEvol/utilities.py +++ b/OrthoEvol/utilities.py @@ -103,7 +103,8 @@ def gene_list_config(self, file, data_path, gene_list, taxon_dict, logger): count += 1 ending = row # The last row gene = ending[1] # The last row's gene - org = header[len(row) - 1] # The last column(organism) accessed in the last row + # The last column(organism) accessed in the last row + org = header[len(row) - 1] taxid = taxon_dict[org] # The taxon id of the organism # Start logging @@ -119,7 +120,9 @@ def gene_list_config(self, file, data_path, gene_list, taxon_dict, logger): count = count - 2 # End logging # The continued gene list starts with the previous gene. - continued_gene_list = list(x for i, x in enumerate(gene_list, 1) if i > count) + continued_gene_list = list( + x for i, x in enumerate( + gene_list, 1) if i > count) return continued_gene_list # If the file doesn't exist return nothing else: @@ -138,7 +141,8 @@ def my_gene_info(self, acc_dataframe, blast_query='Homo_sapiens'): """ mygene = import_module('mygene') - blastutils_log.info("Getting Pre-BLAST information about the target genes using MyGene...") + blastutils_log.info( + "Getting Pre-BLAST information about the target genes using MyGene...") # Initialize variables and import my-gene search command urls = [] df = acc_dataframe @@ -150,7 +154,8 @@ def my_gene_info(self, acc_dataframe, blast_query='Homo_sapiens'): human = list(x.upper() for x in blast_query_list) mygene_query = mg.querymany(human, scopes='refseq', fields='symbol,name,entrezgene,summary', - species='human', returnall=True, as_dataframe=True, + species='human', returnall=True, + as_dataframe=True, size=1, verbose=True) # TODO-ROB: Logging here # Turn my-gene queries into a data frame and then reset the index @@ -171,11 +176,12 @@ def my_gene_info(self, acc_dataframe, blast_query='Homo_sapiens'): ncbi = pd.DataFrame(urls, columns=['NCBI Link'], dtype=str) # Merge, sort, and return the my-gene data frame - hot_data = pd.concat([pd.Series(df.Tier, dtype=str), df.Gene, mg_df, ncbi], axis=1) - hot_data.rename(columns={'Gene': 'Gene Symbol'}, inplace=True) - hot_data = hot_data.sort_values(['Tier'], ascending=True) + query_data = pd.concat([pd.Series(df.Tier, dtype=str), + df.Gene, mg_df, ncbi], axis=1) + query_data.rename(columns={'Gene': 'Gene Symbol'}, inplace=True) + query_data = query_data.sort_values(['Tier'], ascending=True) - return hot_data + return query_data def get_dup_acc(self, acc_dict, gene_list, org_list): """Get duplicated accession numbers during post-blast analysis. @@ -225,7 +231,8 @@ def get_dup_acc(self, acc_dict, gene_list, org_list): # Categorize the different types of duplication # Duplicates that persist across an organisms if orgs.count(o) == len(go_list): - blastutils_log.warning("A duplicate accession number(%s) persists ONLY across %s for %s." % (accession, o, genes)) + blastutils_log.warning( + "A duplicate accession number(%s) persists ONLY across %s for %s." % (accession, o, genes)) duplicated_dict['organisms'][o][accession] = genes del duplicated_dict['genes'][g] break @@ -233,13 +240,16 @@ def get_dup_acc(self, acc_dict, gene_list, org_list): elif orgs.count(o) != 1: alt_genes = list( gene for gene, org in go_list if org == o) - blastutils_log.warn("A duplicate accession number(%s) persists across %s for %s." % (accession, o, alt_genes)) - blastutils_log.warn("%s is also duplicated elsewhere." % accession) + blastutils_log.warning( + "A duplicate accession number(%s) persists across %s for %s." % (accession, o, alt_genes)) + blastutils_log.warning( + "%s is also duplicated elsewhere." % accession) duplicated_dict['organisms'][o][accession] = alt_genes # Duplicates that persist across a gene if genes.count(g) == len(go_list): - blastutils_log.critical("A duplicate accession number(%s) persists across %s for %s." % (accession, g, orgs)) + blastutils_log.critical( + "A duplicate accession number(%s) persists across %s for %s." % (accession, g, orgs)) duplicated_dict['genes'][g][accession] = orgs del duplicated_dict['organisms'][o] break @@ -247,11 +257,14 @@ def get_dup_acc(self, acc_dict, gene_list, org_list): elif genes.count(g) != 1: alt_orgs = list( org for gene, org in go_list if gene == g) - blastutils_log.critical("A duplicate accession number(%s) persists across %s for %s." % (accession, g, alt_orgs)) - blastutils_log.critical("%s is also duplicated elsewhere." % accession) + blastutils_log.critical( + "A duplicate accession number(%s) persists across %s for %s." % (accession, g, alt_orgs)) + blastutils_log.critical( + "%s is also duplicated elsewhere." % accession) duplicated_dict['genes'][g][accession] = alt_orgs - # This is the "somewhere else" if the duplication is random or not categorized + # This is the "somewhere else" if the duplication + # is random or not categorized # The duplication is random if genes.count(g) == 1 and orgs.count(o) == 1: del duplicated_dict['organisms'][o] @@ -268,7 +281,8 @@ def get_dup_acc(self, acc_dict, gene_list, org_list): del duplicated_dict['genes'][g] if accession not in duplicated_dict['other']: duplicated_dict['other'][accession] = [] - blastutils_log.critical("%s is duplicated, but cannot be categorized as random." % accession) + blastutils_log.critical( + "%s is duplicated, but cannot be categorized as random." % accession) duplicated_dict['other'][accession].append(go) # Duplicate Organism count dictionary dup_org = pd.DataFrame.from_dict(duplicated_dict['organisms']) @@ -315,11 +329,12 @@ def get_miss_acc(self, acc_dataframe): if miss != 0: missing_dict['organisms'][organism] = {} # Missing Gene dict {'HTR1A': True} - missing_genes = miss_gene_df.ix[:, organism].to_dict() + missing_genes = miss_gene_df.loc[:, organism].to_dict() # Do a list comprehension to get a list of genes missing_dict['organisms'][organism]['missing genes'] = list(key for key, value in missing_genes.items() if value) # Value is True for miss accns - blastutils_log.critical("%s is missing %s." % (organism, str(missing_dict['organisms'][organism]['missing genes']))) + blastutils_log.critical("%s is missing %s." % (organism, str( + missing_dict['organisms'][organism]['missing genes']))) # Number of missing accessions per organism missing_dict['organisms'][organism]['count'] = miss total_miss += miss @@ -330,13 +345,14 @@ def get_miss_acc(self, acc_dataframe): total_miss = 0 for gene, miss in gene_dict.items(): if miss != 0: - missing_orgs = miss_gene_df.T.ix[:, gene].to_dict() + missing_orgs = miss_gene_df.T.loc[:, gene].to_dict() missing_dict['genes'][gene] = {} # Do a list comprehension to get a list of organisms missing_dict['genes'][gene]['missing organisms'] = list(key for key, value in missing_orgs.items() if value # Value is True for missing accessions if key != 'Tier') # Don't include 'Tier' - blastutils_log.critical("%s is missing %s." % (gene, str(missing_dict['genes'][gene]['missing organisms']))) + blastutils_log.critical("%s is missing %s." % ( + gene, str(missing_dict['genes'][gene]['missing organisms']))) # Number of missing accessions per gene missing_dict['genes'][gene]['count'] = miss total_miss += miss @@ -405,7 +421,8 @@ def __init__(self): def multi_fasta_manipulator(self, target_file, man_file, output_file, manipulation='remove'): - # Inspired by the BioPython Tutorial and Cookbook ("20.1.1 Filtering a sequence file") + # Inspired by the BioPython Tutorial and Cookbook ("20.1.1 Filtering a + # sequence file") """Manipulate reference sequences in multifasta files. The original purpose was to filter files created by the GUIDANCE2 @@ -481,8 +498,12 @@ def multi_fasta_remove(self, target_file, man_file, output_file): elif isinstance(man_file, list): ids = man_file - new_records = (record for record in SeqIO.parse(target_file, 'fasta') if record.id not in ids) - old_records = (record for record in SeqIO.parse(target_file, 'fasta') if record.id in ids) + new_records = (record for record in SeqIO.parse( + target_file, 'fasta') if record.id not in ids) + old_records = ( + record for record in SeqIO.parse( + target_file, + 'fasta') if record.id in ids) print('Sequences have been filtered.') SeqIO.write(new_records, str(output_file), 'fasta') @@ -503,10 +524,12 @@ def muli_fasta_add(self, target_file, man_file, output_file): # TODO-ROB: Check for duplicates. # Concatenate the multifasta files together by chaining the SeqIO.parse generators # Allows one to overwrite a file by using temporary files for storage - # adding generators - https://stackoverflow.com/questions/3211041/how-to-join-two-generators-in-python + # adding generators - + # https://stackoverflow.com/questions/3211041/how-to-join-two-generators-in-python if os.path.isfile(man_file): with TemporaryFile('r+', dir=str(Path(target_file).parent)) as tmp_file: - new_records = itertools.chain(SeqIO.parse(target_file, 'fasta', ), SeqIO.parse(man_file, 'fasta')) + new_records = itertools.chain(SeqIO.parse( + target_file, 'fasta', ), SeqIO.parse(man_file, 'fasta')) count = SeqIO.write(new_records, tmp_file, 'fasta') tmp_file.seek(0) print('temp file count: ' + str(count)) @@ -564,7 +587,8 @@ def __init__(self): BlastUtils.__init__(self) GenbankUtils.__init__(self) - def attribute_config(self, cls, composer, checker, project=None, project_path=None, checker2=None): + def attribute_config(self, cls, composer, checker, project=None, + project_path=None, checker2=None): """Set or configure attributes. Attribute Configuration takes an instance of a class and sets various @@ -596,23 +620,28 @@ def attribute_config(self, cls, composer, checker, project=None, project_path=No if issubclass(type(composer), checker) or check2: for key, value in composer.__dict__.items(): setattr(cls, key, value) - ac_log.info("The attribute configuration was accomplished by composing %s with %s." % (cls.__class__.__name__, composer.__class__.__name__)) + ac_log.info("The attribute configuration was accomplished by composing %s with %s." % ( + cls.__class__.__name__, composer.__class__.__name__)) # Attribute configuration using a dictionary. elif isinstance(composer, dict): for key, value in composer.items(): setattr(cls, key, value) - ac_log.info("The attribute configuration of %s was accomplished by using a dictionary." % cls.__class__.__name__) + ac_log.info( + "The attribute configuration of %s was accomplished by using a dictionary." % cls.__class__.__name__) # Attribute configuration without composer elif composer is None: if not (project or project_path): - raise BrokenPipeError("Without the Project Management class, a project name and project path must be included.") + raise BrokenPipeError( + "Without the Project Management class, a project name and project path must be included.") cls = self.standalone_config(cls, project, project_path) - ac_log.info("The attribute configuration of %s was accomplished by using a standalone project." % cls.__class__.__name__) + ac_log.info( + "The attribute configuration of %s was accomplished by using a standalone project." % cls.__class__.__name__) # Make sure self.project and self.project_path have values if not (cls.project or cls.project_path): - raise BrokenPipeError("The project name and project path attributes have not been set.") + raise BrokenPipeError( + "The project name and project path attributes have not been set.") return cls @@ -652,7 +681,8 @@ def standalone_config(self, cls, project, project_path, custom=None): cls.itis_db_repo = cls.user_db / Path('ITIS') cls.ncbi_db_repo = cls.user_db / Path('NCBI') cls.blast_db = cls.ncbi_db_repo / Path('blast') / Path('db') - cls.windowmaker_files = cls.ncbi_db_repo / Path('blast') / Path('windowmaker_files') + cls.windowmaker_files = cls.ncbi_db_repo / \ + Path('blast') / Path('windowmaker_files') cls.ncbi_taxonomy = cls.ncbi_db_repo / Path('pub') / Path('taxonomy') cls.NCBI_refseq_release = cls.ncbi_db_repo / Path('refseq') / Path('release') @@ -790,17 +820,21 @@ def archive(self, database_path, archive_path, option, delete_flag=False): archive_filename = shutil.make_archive(base_name=str(output_pathname), format="xztar", root_dir=root_dir, base_dir=base_dir, logger=archive_log) archive_size = self.get_size(archive_filename) - archive_log.warning("A %s archive file was created at %s." % (archive_filename, archive_size)) + archive_log.warning("A %s archive file was created at %s." % + (archive_filename, archive_size)) # TODO-ROB: Logging. And log to a README.md file. # Delete the files if desired. if delete_flag: - archive_log.critical("The original data will be deleted recursively at %s." % data_path) + archive_log.critical( + "The original data will be deleted recursively at %s." % data_path) from OrthoEvol import OrthoEvolWarning - OrthoEvolWarning("You're about to delete your database (%s). Are you sure??" % data_path) + OrthoEvolWarning( + "You're about to delete your database (%s). Are you sure??" % data_path) shutil.rmtree(path=data_path) archive_list.append(str(archive_filename)) else: - archive_log.critical("The original data will be moved recursively from %s to %s." % (data_path, output_pathname)) + archive_log.critical("The original data will be moved recursively from %s to %s." % ( + data_path, output_pathname)) output_pathname.mkdir() shutil.move(src=str(data_path), dst=str(output_pathname)) shutil.move(src=str(archive_filename), dst=str(output_pathname)) @@ -822,19 +856,20 @@ def get_size(self, start_path, units="KB"): total_size = 0 if os.path.isfile(start_path): size = os.path.getsize(start_path) - size = str(size/self.bytesize_options[units]) + (" %s" % units) + size = str(size / self.bytesize_options[units]) + (" %s" % units) return size for dirpath, _, filenames in os.walk(start_path): for f in filenames: fp = os.path.join(dirpath, f) total_size += os.path.getsize(fp) - total_size = str(total_size/self.bytesize_options[units]) + (" %s" % units) + total_size = str(total_size / self.bytesize_options[units]) + (" %s" % units) return total_size class PackageVersion(object): """Get the version of an installed python package.""" + def __init__(self, packagename): self.packagename = packagename self._getversion() @@ -847,6 +882,7 @@ def _getversion(self): class FunctionRepeater(object): """Repeats a function every interval. Ref: https://tinyurl.com/yckgv8m2""" + def __init__(self, interval, function, *args, **kwargs): self._timer = None self.function = function @@ -880,7 +916,8 @@ def __init__(self): ManagerUtils.__init__(self) OrthologUtils.__init__(self) - def system_cmd(self, cmd, timeout=None, print_flag=True, write_flag=False, file_name=None, **kwargs): + def system_cmd(self, cmd, timeout=None, print_flag=True, + write_flag=False, file_name=None, **kwargs): """ A function for making system calls, while preforming proper exception handling. diff --git a/README.rst b/README.rst index fa989eae..f55cd77e 100644 --- a/README.rst +++ b/README.rst @@ -13,6 +13,9 @@ .. image:: https://readthedocs.org/projects/orthoevolution/badge/?version=latest :target: http://orthoevolution.readthedocs.io/en/latest/?badge=latest +.. image:: https://codecov.io/gh/codecov/OrthoEvolution/branch/master/graph/badge.svg + :target: https://codecov.io/gh/codecov/OrthoEvolution + OrthoEvolution ==================== @@ -136,9 +139,9 @@ Creating projects and databases dynamically Tests ---------------- -To run tests, type ``nosetests tests/`` in the OrthoEvolution directory. +To run tests, type ``pytest tests`` in the OrthoEvolution directory. -First, install the ``nose` package using pip. +First, install the ``pytest` package using pip. Contributors ---------------- diff --git a/tests/README.md b/tests/README.md index 0cb97c1c..372e32c7 100644 --- a/tests/README.md +++ b/tests/README.md @@ -5,9 +5,9 @@ This is the test suite. Usage -------- -Ensure that nose is installed. +Ensure that pytest is installed. -`pip install nose` +`pip install pytest` Run our tests from the top directory, `OrthoEvolution`, by typing: -`nosetests Tests/` \ No newline at end of file +`pytest tests` \ No newline at end of file diff --git a/tests/mp_test/child.py b/tests/mp_test/child.py deleted file mode 100644 index f9b9c95a..00000000 --- a/tests/mp_test/child.py +++ /dev/null @@ -1,14 +0,0 @@ -import platform -import time -from mpi4py import MPI - -# Get child process information -comm = MPI.COMM_WORLD -rank = comm.Get_rank() -size = comm.Get_size() -machine = platform.node() -x = 0 -while x < 10: - print("In process %s x is %s" % (rank, x)) - time.sleep(3) - x += 1 diff --git a/tests/mp_test/mp.py b/tests/mp_test/mp.py deleted file mode 100644 index c4a90d85..00000000 --- a/tests/mp_test/mp.py +++ /dev/null @@ -1,6 +0,0 @@ -import subprocess -import re -print("Begin subprocess.run call") -qsub = subprocess.check_output(['qsub mp.sh'], universal_newlines=True) -job_id = re.findall(r'\d+', qsub)[0] - diff --git a/tests/mp_test/mp.sh b/tests/mp_test/mp.sh deleted file mode 100644 index 4e8ad8c2..00000000 --- a/tests/mp_test/mp.sh +++ /dev/null @@ -1,18 +0,0 @@ -# This is a simple bash script that gets GI lists for the local blast. - -#PBS -S /bin/bash -#PBS -m bea -#PBS -l select=10:ncpus=1:mem=1gb -l place=free -#PBS -l cput=24:00:00 -#PBS -l walltime=32:00:00 -#PBS -N mptest -#PBS -o getgilists.o${PBS_JOBID} -#PBS -e getgilists.e${PBS_JOBID} -#PBS -j oe - -cd ${PBS_O_WORKDIR} - -mpiexec python child.py - - - diff --git a/tests/test_orthologs.py b/tests/test_orthologs.py index df94be77..a285122a 100644 --- a/tests/test_orthologs.py +++ b/tests/test_orthologs.py @@ -2,7 +2,7 @@ import unittest from shutil import rmtree -from OrthoEvol.Orthologs.Blast import BaseBlastN +from OrthoEvol.Orthologs.Blast import BaseBlastN, OrthoBlastN class TestOrthologs(unittest.TestCase): @@ -33,6 +33,16 @@ def test_baseblastn(self): self.assertTrue(gpcr_blastn.copy_from_package) self.delete_project(project_path=self.project_path) + def test_orthoblastn(self): + """Test the OrthoBlastN class.""" + with self.assertRaises(EnvironmentError): + ortho_blastn = OrthoBlastN(project="orthology-project", + method=1, save_data=True, + acc_file="gpcr.csv", + copy_from_package=True) + self.assertEqual(ortho_blastn.ref_species, 'Homo_sapiens') + self.assertTrue(ortho_blastn.copy_from_package) + if __name__ == '__main__': unittest.main() diff --git a/tests/test_tools.py b/tests/test_tools.py index 7804d1b0..310d2a68 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -32,6 +32,7 @@ def test_multiprocess(self): """Test the Multiprocess class.""" mp = Multiprocess() self.assertIsNotNone(mp.cpus) + self.assertEqual(mp.num_procs, mp.cpus - 1) def test_ncbiftpclient(self): """Test the NcbiFTPClient class."""