Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pylint alerts corrections as part of an intervention experiment 1853 #208

Merged
merged 19 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
35d5194
bin\litscan-create-xml-metadata.py line-too-long
evidencebp Nov 18, 2024
cab32fa
rnacentral_pipeline\rnacentral\genome_mapping\blat.py line-too-long
evidencebp Nov 18, 2024
8689b59
rnacentral_pipeline\utils.py line-too-long
evidencebp Nov 18, 2024
51a100d
rnacentral_pipeline\databases\evlncrnas\lookup.py line-too-long
evidencebp Nov 18, 2024
8a05dd2
rnacentral_pipeline\databases\evlncrnas\parser.py line-too-long
evidencebp Nov 18, 2024
4edfd12
rnacentral_pipeline\databases\mgnify\prepare.py line-too-long
evidencebp Nov 18, 2024
4f15d94
rnacentral_pipeline\databases\plncdb\parser.py line-too-long
evidencebp Nov 18, 2024
ac41092
bin\litscan-get-statistics.py line-too-long
evidencebp Nov 18, 2024
cbaeb05
rnacentral_pipeline\cli\ensembl.py line-too-long
evidencebp Nov 18, 2024
6913c5f
rnacentral_pipeline\databases\tmrna\helpers.py line-too-long
evidencebp Nov 18, 2024
73a5ed6
Leftover from rnacentral_pipeline\cli\ensembl.py
evidencebp Nov 18, 2024
16dc5b5
rnacentral_pipeline\rnacentral\precompute\utils.py line-too-long
evidencebp Nov 18, 2024
b4b385e
rnacentral_pipeline\rnacentral\search_export\compare.py line-too-long
evidencebp Nov 18, 2024
4644e8c
rnacentral_pipeline\databases\psi_mi\tab.py unnecessary-pass
evidencebp Nov 18, 2024
b31a011
rnacentral_pipeline\databases\hgnc\helpers.py broad-exception-caught
evidencebp Nov 18, 2024
5d12811
rnacentral_pipeline\rnacentral\r2dt\parser.py broad-exception-caught
evidencebp Nov 18, 2024
d458b84
rnacentral_pipeline\rnacentral\genome_mapping\igv.py broad-exception-…
evidencebp Nov 18, 2024
ba4a8bf
rnacentral_pipeline\databases\europepmc\stream.py broad-exception-caught
evidencebp Nov 20, 2024
00612f7
bin\litscan-retracted-articles.py broad-exception-caught
evidencebp Nov 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion bin/litscan-create-xml-metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ def main(conn_string, filename, output):
database = line[1]

# get hit_count
cursor.execute("SELECT hit_count FROM litscan_job WHERE job_id='{0}'".format(job_id.lower()))
cursor.execute(
"SELECT hit_count FROM litscan_job WHERE job_id='{0}'".format(job_id.lower()))
result = cursor.fetchone()
hit_count = str(result[0]) if result else ""

Expand Down
3 changes: 2 additions & 1 deletion bin/litscan-get-statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ def main(database, output):
results['ids_in_use'] = cursor.fetchone()[0]

# number of urs in the current version
cursor.execute(""" SELECT COUNT(DISTINCT job_id) FROM litscan_database WHERE job_id like 'urs%' """)
cursor.execute(
""" SELECT COUNT(DISTINCT job_id) FROM litscan_database WHERE job_id like 'urs%' """)
results['urs'] = cursor.fetchone()[0]

# number of expert dbs
Expand Down
2 changes: 1 addition & 1 deletion bin/litscan-retracted-articles.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def main(database, webhook):
message = f'{len(retracted_articles)} {"articles have" if len(retracted_articles) > 1 else "article has"} ' \
f'been retracted: {", ".join(retracted_articles)}'
requests.post(webhook, json.dumps({"text": message}))
except (Exception, psycopg2.DatabaseError) as error:
except (ValueError, psycopg2.DatabaseError) as error:
requests.post(webhook, json.dumps({"text": error}))
finally:
if conn is not None:
Expand Down
4 changes: 3 additions & 1 deletion rnacentral_pipeline/cli/ensembl.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,9 @@ def parse_data(division, embl_file, gff_file, output, family_file=None):
writer.write(entries)
except ValueError:
print("Empty entries, implies no ncRNAs. You should check that")
message = f"No ncRNA entries found for {embl_file.name}, or {gff_file.name}. Empty data supplied for now, but you should check the legitimacy of this result.\n"
message = (f"No ncRNA entries found for {embl_file.name}, or {gff_file.name}. "
+ "Empty data supplied for now"
+ ", but you should check the legitimacy of this result.\n")
message += "For reference, the other parameters to the parser were:\n"
message += f"division: {division}\n"
message += f"embl_file: {embl_file.name}\n"
Expand Down
2 changes: 1 addition & 1 deletion rnacentral_pipeline/databases/europepmc/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def fallback(data):
try:
ref = fetch.lookup(id_ref)
yield id_ref, ref, rows
except Exception:
except (fetch.UnknownReference, fetch.TooManyPublications):
pass


Expand Down
3 changes: 2 additions & 1 deletion rnacentral_pipeline/databases/evlncrnas/lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ def mapping(db_url, data):


def as_mapping(db_url, data):
# data = data.explode('Aliases').drop_duplicates(subset='Aliases').rename(columns={'Aliases':'external_id'})#.set_index('external_id')
# data = data.explode('Aliases').drop_duplicates(subset='Aliases').rename(
# columns={'Aliases':'external_id'})#.set_index('external_id')
print(len(data))
data = data.drop(data[data["Name"] == " "].index)
print(data)
Expand Down
3 changes: 2 additions & 1 deletion rnacentral_pipeline/databases/evlncrnas/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,8 @@ def parse(db_dir: Path, db_dumps: tuple[Path], db_url: str) -> None:
) #

## Match with RNAcentral based on the gene name
## This is optionally chunked to save memory - split the lookup file and provide a list on the commandline
## This is optionally chunked to save memory -
## split the lookup file and provide a list on the commandline
matched_frame = pd.concat(
[get_db_matches(no_accession_frame, dump_chunk) for dump_chunk in db_dumps]
)
Expand Down
2 changes: 1 addition & 1 deletion rnacentral_pipeline/databases/hgnc/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def ensembl_sequence(context: Context, ensembl_id: str) -> ty.Optional[str]:
response = requests.get(url)
try:
response.raise_for_status()
except Exception:
except requests.exceptions.HTTPError:
return None
return response.text

Expand Down
3 changes: 2 additions & 1 deletion rnacentral_pipeline/databases/mgnify/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def prepare_mgnify_data(data, conn_str):
## Define fallback taxids of the general metagenome of the environment
## These are used if we can't do any better
fallback = {
"zebrafish fecal genome catalogue": 1331678, # zebrafish metagenome - more accurate then generic fish fecal?
"zebrafish fecal genome catalogue": 1331678, # zebrafish metagenome -
# more accurate then generic fish fecal?
"human gut genome catalogue": 408170, # human gut metagenome
"human oral genome catalogue": 447426, # human oral metagenome
"marine genome catalogue": 2994539, # human skin metagenome
Expand Down
3 changes: 2 additions & 1 deletion rnacentral_pipeline/databases/plncdb/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ def parse(data:pathlib.Path) -> ty.Iterable[Entry]:
species_info["taxid"] = species_info["Species"].apply(phy.taxid)


total_entries = len(gff_db.execute("select DISTINCT(id) from features where featuretype = 'transcript' ").fetchall())
total_entries = len(gff_db.execute(
"select DISTINCT(id) from features where featuretype = 'transcript' ").fetchall())
entries = []
for gene_id_q in tqdm(gff_db.execute("select id from features"), total=total_entries):
primary_id = gene_id_q["id"]
Expand Down
1 change: 0 additions & 1 deletion rnacentral_pipeline/databases/psi_mi/tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ def as_pubs(value):
refs.append(pubs.reference(ident.value))
except data.UnknownPublicationType:
LOGGER.warn("Could not handle publication %s", ident)
pass
return refs


Expand Down
3 changes: 2 additions & 1 deletion rnacentral_pipeline/databases/tmrna/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ async def fetch_records(session, accessions: ty.List[str]):
try:
accession_str = ",".join(accessions)
async with session.get(
f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id={accession_str}&rettype=gb&retmode=text"
("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?"
+ f"db=nuccore&id={accession_str}&rettype=gb&retmode=text")
) as response:
records_text = await response.text()
handle = io.StringIO(records_text)
Expand Down
8 changes: 6 additions & 2 deletions rnacentral_pipeline/rnacentral/genome_mapping/blat.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@
"qBaseInsert", # Number of bases inserted in query
"tNumInsert", # Number of inserts in target
"tBaseInsert", # Number of bases inserted in target
"strand", # "+" or "-" for query strand. For translated alignments, second "+"or "-" is for target genomic strand.
"strand", # "+" or "-" for query strand. For translated alignments,
# second "+"or "-" is for target genomic strand.
"qName", # Query sequence name
"qSize", # Query sequence size.
"qStart", # Alignment start position in query
Expand All @@ -50,7 +51,10 @@
"tStart", # Alignment start position in target
"tEnd", # Alignment end position in target
"blockCount", # Number of blocks in the alignment (a block contains no gaps)
"blockSizes", # Comma-separated list of sizes of each block. If the query is a protein and the target the genome, blockSizes are in amino acids. See below for more information on protein query PSLs.
"blockSizes", # Comma-separated list of sizes of each block.
# If the query is a protein and the target the genome,
# blockSizes are in amino acids.
# See below for more information on protein query PSLs.
"qStarts", # Comma-separated list of starting positions of each block in query
"tStarts", # Comma-separated list of starting positions of each block in target
]
Expand Down
2 changes: 1 addition & 1 deletion rnacentral_pipeline/rnacentral/genome_mapping/igv.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def ftp(host):

try:
conn.quit()
except Exception as err:
except ftplib.all_errors as err:
LOGGER.info("Failed to close FTP connection")
LOGGER.exception(err)

Expand Down
3 changes: 2 additions & 1 deletion rnacentral_pipeline/rnacentral/precompute/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ def entropy(data):
structures) the name will be very long because it contains the sequence
itself. For example:

RNA (5'-R(*GP*UP*GP*GP*UP*CP*UP*GP*AP*UP*GP*AP*GP*GP*CP*C)-3') from synthetic construct (PDB 3D0M, chain X)
RNA (5'-R(*GP*UP*GP*GP*UP*CP*UP*GP*AP*UP*GP*AP*GP*GP*CP*C)-3')
from synthetic construct (PDB 3D0M, chain X)

This is not a useful name, but it is very long. Thus we do not want it.
What we are generally after is something with the most information (to a
Expand Down
2 changes: 1 addition & 1 deletion rnacentral_pipeline/rnacentral/r2dt/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def parse(

try:
info.validate()
except Exception as e:
except AssertionError as e:
if allow_missing:
LOGGER.warn("Did not find all required files for %s", urs)
LOGGER.exception(e)
Expand Down
3 changes: 2 additions & 1 deletion rnacentral_pipeline/rnacentral/search_export/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ def compare(output, results1, results2, facet):

def write(output: ty.IO):
""" """
index1 = "http://www.ebi.ac.uk/ebisearch/ws/rest/rnacentral?query={query}&format=json&facetfields={facet}&facetcount=30"
index1 = ("http://www.ebi.ac.uk/ebisearch/ws/rest/rnacentral"
+ "?query={query}&format=json&facetfields={facet}&facetcount=30")
index2 = index1.replace("http://www.", "http://wwwdev.")
queries = ["RNA", 'TAXONOMY:"9606"'] + EXPERT_DATABASES
facets = ["rna_type", "has_genomic_coordinates"]
Expand Down
3 changes: 2 additions & 1 deletion rnacentral_pipeline/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def __init__(self, co):
self.done = False
self.result = None
self.lock = threading.RLock()
## This needs to be a re-rntrant lock so it is only release by the coroutine that acquired it
## This needs to be a re-rntrant lock so it is only
## release by the coroutine that acquired it

def __await__(self):
with self.lock:
Expand Down