diff --git a/bin/litscan-create-xml-metadata.py b/bin/litscan-create-xml-metadata.py index 95e31285..04cf6755 100755 --- a/bin/litscan-create-xml-metadata.py +++ b/bin/litscan-create-xml-metadata.py @@ -79,7 +79,8 @@ def main(conn_string, filename, output): database = line[1] # get hit_count - cursor.execute("SELECT hit_count FROM litscan_job WHERE job_id='{0}'".format(job_id.lower())) + cursor.execute( + "SELECT hit_count FROM litscan_job WHERE job_id='{0}'".format(job_id.lower())) result = cursor.fetchone() hit_count = str(result[0]) if result else "" diff --git a/bin/litscan-get-statistics.py b/bin/litscan-get-statistics.py index 048cfc67..000a214d 100755 --- a/bin/litscan-get-statistics.py +++ b/bin/litscan-get-statistics.py @@ -47,7 +47,8 @@ def main(database, output): results['ids_in_use'] = cursor.fetchone()[0] # number of urs in the current version - cursor.execute(""" SELECT COUNT(DISTINCT job_id) FROM litscan_database WHERE job_id like 'urs%' """) + cursor.execute( + """ SELECT COUNT(DISTINCT job_id) FROM litscan_database WHERE job_id like 'urs%' """) results['urs'] = cursor.fetchone()[0] # number of expert dbs diff --git a/bin/litscan-retracted-articles.py b/bin/litscan-retracted-articles.py index 8c3636d3..6e397f3d 100755 --- a/bin/litscan-retracted-articles.py +++ b/bin/litscan-retracted-articles.py @@ -77,7 +77,7 @@ def main(database, webhook): message = f'{len(retracted_articles)} {"articles have" if len(retracted_articles) > 1 else "article has"} ' \ f'been retracted: {", ".join(retracted_articles)}' requests.post(webhook, json.dumps({"text": message})) - except (Exception, psycopg2.DatabaseError) as error: + except (ValueError, psycopg2.DatabaseError) as error: requests.post(webhook, json.dumps({"text": error})) finally: if conn is not None: diff --git a/rnacentral_pipeline/cli/ensembl.py b/rnacentral_pipeline/cli/ensembl.py index 3ba49c22..3a4845c3 100644 --- a/rnacentral_pipeline/cli/ensembl.py +++ b/rnacentral_pipeline/cli/ensembl.py @@ -89,7 +89,9 @@ def parse_data(division, embl_file, gff_file, output, family_file=None): writer.write(entries) except ValueError: print("Empty entries, implies no ncRNAs. You should check that") - message = f"No ncRNA entries found for {embl_file.name}, or {gff_file.name}. Empty data supplied for now, but you should check the legitimacy of this result.\n" + message = (f"No ncRNA entries found for {embl_file.name}, or {gff_file.name}. " + + "Empty data supplied for now" + + ", but you should check the legitimacy of this result.\n") message += "For reference, the other parameters to the parser were:\n" message += f"division: {division}\n" message += f"embl_file: {embl_file.name}\n" diff --git a/rnacentral_pipeline/databases/europepmc/stream.py b/rnacentral_pipeline/databases/europepmc/stream.py index 148f6687..1a8c07de 100644 --- a/rnacentral_pipeline/databases/europepmc/stream.py +++ b/rnacentral_pipeline/databases/europepmc/stream.py @@ -40,7 +40,7 @@ def fallback(data): try: ref = fetch.lookup(id_ref) yield id_ref, ref, rows - except Exception: + except (fetch.UnknownReference, fetch.TooManyPublications): pass diff --git a/rnacentral_pipeline/databases/evlncrnas/lookup.py b/rnacentral_pipeline/databases/evlncrnas/lookup.py index a98927ce..c065b969 100644 --- a/rnacentral_pipeline/databases/evlncrnas/lookup.py +++ b/rnacentral_pipeline/databases/evlncrnas/lookup.py @@ -105,7 +105,8 @@ def mapping(db_url, data): def as_mapping(db_url, data): - # data = data.explode('Aliases').drop_duplicates(subset='Aliases').rename(columns={'Aliases':'external_id'})#.set_index('external_id') + # data = data.explode('Aliases').drop_duplicates(subset='Aliases').rename( + # columns={'Aliases':'external_id'})#.set_index('external_id') print(len(data)) data = data.drop(data[data["Name"] == " "].index) print(data) diff --git a/rnacentral_pipeline/databases/evlncrnas/parser.py b/rnacentral_pipeline/databases/evlncrnas/parser.py index 8f06b32d..05d5782e 100644 --- a/rnacentral_pipeline/databases/evlncrnas/parser.py +++ b/rnacentral_pipeline/databases/evlncrnas/parser.py @@ -270,7 +270,8 @@ def parse(db_dir: Path, db_dumps: tuple[Path], db_url: str) -> None: ) # ## Match with RNAcentral based on the gene name - ## This is optionally chunked to save memory - split the lookup file and provide a list on the commandline + ## This is optionally chunked to save memory - + ## split the lookup file and provide a list on the commandline matched_frame = pd.concat( [get_db_matches(no_accession_frame, dump_chunk) for dump_chunk in db_dumps] ) diff --git a/rnacentral_pipeline/databases/hgnc/helpers.py b/rnacentral_pipeline/databases/hgnc/helpers.py index d777e3c6..4248f8e3 100644 --- a/rnacentral_pipeline/databases/hgnc/helpers.py +++ b/rnacentral_pipeline/databases/hgnc/helpers.py @@ -124,7 +124,7 @@ def ensembl_sequence(context: Context, ensembl_id: str) -> ty.Optional[str]: response = requests.get(url) try: response.raise_for_status() - except Exception: + except requests.exceptions.HTTPError: return None return response.text diff --git a/rnacentral_pipeline/databases/mgnify/prepare.py b/rnacentral_pipeline/databases/mgnify/prepare.py index a09d2091..4cb5b2b8 100644 --- a/rnacentral_pipeline/databases/mgnify/prepare.py +++ b/rnacentral_pipeline/databases/mgnify/prepare.py @@ -37,7 +37,8 @@ def prepare_mgnify_data(data, conn_str): ## Define fallback taxids of the general metagenome of the environment ## These are used if we can't do any better fallback = { - "zebrafish fecal genome catalogue": 1331678, # zebrafish metagenome - more accurate then generic fish fecal? + "zebrafish fecal genome catalogue": 1331678, # zebrafish metagenome - + # more accurate then generic fish fecal? "human gut genome catalogue": 408170, # human gut metagenome "human oral genome catalogue": 447426, # human oral metagenome "marine genome catalogue": 2994539, # human skin metagenome diff --git a/rnacentral_pipeline/databases/plncdb/parser.py b/rnacentral_pipeline/databases/plncdb/parser.py index 7be36f96..819f513c 100644 --- a/rnacentral_pipeline/databases/plncdb/parser.py +++ b/rnacentral_pipeline/databases/plncdb/parser.py @@ -96,7 +96,8 @@ def parse(data:pathlib.Path) -> ty.Iterable[Entry]: species_info["taxid"] = species_info["Species"].apply(phy.taxid) - total_entries = len(gff_db.execute("select DISTINCT(id) from features where featuretype = 'transcript' ").fetchall()) + total_entries = len(gff_db.execute( + "select DISTINCT(id) from features where featuretype = 'transcript' ").fetchall()) entries = [] for gene_id_q in tqdm(gff_db.execute("select id from features"), total=total_entries): primary_id = gene_id_q["id"] diff --git a/rnacentral_pipeline/databases/psi_mi/tab.py b/rnacentral_pipeline/databases/psi_mi/tab.py index 38c5c6a2..d6003448 100644 --- a/rnacentral_pipeline/databases/psi_mi/tab.py +++ b/rnacentral_pipeline/databases/psi_mi/tab.py @@ -120,7 +120,6 @@ def as_pubs(value): refs.append(pubs.reference(ident.value)) except data.UnknownPublicationType: LOGGER.warn("Could not handle publication %s", ident) - pass return refs diff --git a/rnacentral_pipeline/databases/tmrna/helpers.py b/rnacentral_pipeline/databases/tmrna/helpers.py index b0ca3997..3df14e6f 100644 --- a/rnacentral_pipeline/databases/tmrna/helpers.py +++ b/rnacentral_pipeline/databases/tmrna/helpers.py @@ -15,7 +15,8 @@ async def fetch_records(session, accessions: ty.List[str]): try: accession_str = ",".join(accessions) async with session.get( - f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id={accession_str}&rettype=gb&retmode=text" + ("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?" + + f"db=nuccore&id={accession_str}&rettype=gb&retmode=text") ) as response: records_text = await response.text() handle = io.StringIO(records_text) diff --git a/rnacentral_pipeline/rnacentral/genome_mapping/blat.py b/rnacentral_pipeline/rnacentral/genome_mapping/blat.py index aa519d62..e5fd0fd9 100644 --- a/rnacentral_pipeline/rnacentral/genome_mapping/blat.py +++ b/rnacentral_pipeline/rnacentral/genome_mapping/blat.py @@ -40,7 +40,8 @@ "qBaseInsert", # Number of bases inserted in query "tNumInsert", # Number of inserts in target "tBaseInsert", # Number of bases inserted in target - "strand", # "+" or "-" for query strand. For translated alignments, second "+"or "-" is for target genomic strand. + "strand", # "+" or "-" for query strand. For translated alignments, + # second "+"or "-" is for target genomic strand. "qName", # Query sequence name "qSize", # Query sequence size. "qStart", # Alignment start position in query @@ -50,7 +51,10 @@ "tStart", # Alignment start position in target "tEnd", # Alignment end position in target "blockCount", # Number of blocks in the alignment (a block contains no gaps) - "blockSizes", # Comma-separated list of sizes of each block. If the query is a protein and the target the genome, blockSizes are in amino acids. See below for more information on protein query PSLs. + "blockSizes", # Comma-separated list of sizes of each block. + # If the query is a protein and the target the genome, + # blockSizes are in amino acids. + # See below for more information on protein query PSLs. "qStarts", # Comma-separated list of starting positions of each block in query "tStarts", # Comma-separated list of starting positions of each block in target ] diff --git a/rnacentral_pipeline/rnacentral/genome_mapping/igv.py b/rnacentral_pipeline/rnacentral/genome_mapping/igv.py index 4a867590..66f3669a 100644 --- a/rnacentral_pipeline/rnacentral/genome_mapping/igv.py +++ b/rnacentral_pipeline/rnacentral/genome_mapping/igv.py @@ -38,7 +38,7 @@ def ftp(host): try: conn.quit() - except Exception as err: + except ftplib.all_errors as err: LOGGER.info("Failed to close FTP connection") LOGGER.exception(err) diff --git a/rnacentral_pipeline/rnacentral/precompute/utils.py b/rnacentral_pipeline/rnacentral/precompute/utils.py index 1b631d35..8382b1cb 100644 --- a/rnacentral_pipeline/rnacentral/precompute/utils.py +++ b/rnacentral_pipeline/rnacentral/precompute/utils.py @@ -76,7 +76,8 @@ def entropy(data): structures) the name will be very long because it contains the sequence itself. For example: - RNA (5'-R(*GP*UP*GP*GP*UP*CP*UP*GP*AP*UP*GP*AP*GP*GP*CP*C)-3') from synthetic construct (PDB 3D0M, chain X) + RNA (5'-R(*GP*UP*GP*GP*UP*CP*UP*GP*AP*UP*GP*AP*GP*GP*CP*C)-3') + from synthetic construct (PDB 3D0M, chain X) This is not a useful name, but it is very long. Thus we do not want it. What we are generally after is something with the most information (to a diff --git a/rnacentral_pipeline/rnacentral/r2dt/parser.py b/rnacentral_pipeline/rnacentral/r2dt/parser.py index bec61a02..5b440ca9 100644 --- a/rnacentral_pipeline/rnacentral/r2dt/parser.py +++ b/rnacentral_pipeline/rnacentral/r2dt/parser.py @@ -94,7 +94,7 @@ def parse( try: info.validate() - except Exception as e: + except AssertionError as e: if allow_missing: LOGGER.warn("Did not find all required files for %s", urs) LOGGER.exception(e) diff --git a/rnacentral_pipeline/rnacentral/search_export/compare.py b/rnacentral_pipeline/rnacentral/search_export/compare.py index 0a76cedc..38b92587 100644 --- a/rnacentral_pipeline/rnacentral/search_export/compare.py +++ b/rnacentral_pipeline/rnacentral/search_export/compare.py @@ -75,7 +75,8 @@ def compare(output, results1, results2, facet): def write(output: ty.IO): """ """ - index1 = "http://www.ebi.ac.uk/ebisearch/ws/rest/rnacentral?query={query}&format=json&facetfields={facet}&facetcount=30" + index1 = ("http://www.ebi.ac.uk/ebisearch/ws/rest/rnacentral" + + "?query={query}&format=json&facetfields={facet}&facetcount=30") index2 = index1.replace("http://www.", "http://wwwdev.") queries = ["RNA", 'TAXONOMY:"9606"'] + EXPERT_DATABASES facets = ["rna_type", "has_genomic_coordinates"] diff --git a/rnacentral_pipeline/utils.py b/rnacentral_pipeline/utils.py index 7e1c299a..a98a20f5 100644 --- a/rnacentral_pipeline/utils.py +++ b/rnacentral_pipeline/utils.py @@ -37,7 +37,8 @@ def __init__(self, co): self.done = False self.result = None self.lock = threading.RLock() - ## This needs to be a re-rntrant lock so it is only release by the coroutine that acquired it + ## This needs to be a re-rntrant lock so it is only + ## release by the coroutine that acquired it def __await__(self): with self.lock: