Skip to content

Commit 55d2bfd

Browse files
committed
enum_annotator working again
there's some nested loop(s) in there
1 parent 32585e8 commit 55d2bfd

File tree

3 files changed

+38
-33
lines changed

3 files changed

+38
-33
lines changed

linkml_model_enrichment/annotators/enum_annotator.py

+34-33
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,10 @@
3030
# # add caching of already searched terms
3131
# # add default values for functions
3232

33-
34-
# # PARANOID SETUP
35-
# # delete venv
36-
# # purge pip cache
37-
# # create new venv and enter it
38-
# # install wheel
39-
# # install packages from requirements file
40-
33+
# todo this silences
34+
# SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame
35+
# but I should really be dealing with it
36+
pd.options.mode.chained_assignment = None # default='warn'
4137

4238
logger = logging.getLogger(__name__)
4339
click_log.basic_config(logger)
@@ -106,7 +102,7 @@ def ols_term_search(term, chars_to_whiteout, ontology_param, qf_param, rowcount_
106102
'rows=' + str(rowcount_param) + '&' + \
107103
qf_param
108104

109-
# logger.debug(request_string)
105+
logger.debug(request_string)
110106

111107
# this gets matching terms but doesn't show why they matched
112108
response_param = session_param.get(request_string)
@@ -218,25 +214,18 @@ def get_ols_term_annotations(iri_param, ontology_param, session_param, ols_terms
218214
help="""how much of a cosine distance will you tolerate
219215
when comparing an enum name to a term lable or synonym?""",
220216
default=0.05, show_default=True)
221-
# looks like click already provides -v, --verbosity LVL
222-
@click.option('--log_level',
223-
help="at what level do you want to see logging messages?",
224-
default='INFO', type=click.Choice(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']), show_default=True)
225217
@click.option('--query_field_string',
226218
help="""do you want to define a custom list of fields to search in?
227219
The default settings work well in most cases.""",
228220
default='', show_default=True)
229221
@click.option('--test_sample_size',
230222
help="""if greater than 0, the enum name list will be samples at this size before mapping.""",
231223
default=0, show_default=True)
232-
def make_iot_yaml(modelfile, all_mappings_fn, requested_enum_name, whiteout_chars, ontology_string,
233-
ols_search_base_url, ols_terms_based_url, desired_row_count, shingle_size, max_cosine,
234-
overwrite_meaning, log_level, query_field_string, test_sample_size):
224+
def enum_annotator(modelfile, all_mappings_fn, requested_enum_name, whiteout_chars, ontology_string,
225+
ols_search_base_url, ols_terms_based_url, desired_row_count, shingle_size, max_cosine,
226+
overwrite_meaning, query_field_string, test_sample_size):
235227
# show entire width of data frames
236228
pd.set_option('display.expand_frame_repr', False)
237-
# DEBUG, INFO, WARNING, ERROR, or CRITICAL
238-
# allow log messages to be sent to a file, too?
239-
logger.setLevel(log_level)
240229

241230
# GLOBALS within this method
242231
blank_row = {'title': '', 'id': '', 'iri': '', 'is_defining_ontology': '',
@@ -255,13 +244,13 @@ def make_iot_yaml(modelfile, all_mappings_fn, requested_enum_name, whiteout_char
255244

256245
requested_pvs_names = get_pv_names(requested_pvs_obj)
257246
requested_pvs_names.sort()
258-
# logger.debug(requested_pvs_names)
247+
logger.debug(requested_pvs_names)
259248

260249
ontologies_phrased = make_ontolgy_phrase(ontology_string)
261-
# logger.debug(ontologies_phrased)
250+
logger.debug(ontologies_phrased)
262251

263252
qf_phrased = make_qf_phrase(query_field_string)
264-
# logger.debug(qf_phrased)
253+
logger.debug(qf_phrased)
265254

266255
cosine_obj = make_cosine_obj(shingle_size)
267256
# logger.debug(cosine_obj)
@@ -293,13 +282,15 @@ def make_iot_yaml(modelfile, all_mappings_fn, requested_enum_name, whiteout_char
293282
# # could look at growth in enum_name_mappings
294283

295284
enum_name_mapping_frame = enum_name_mappings.get()
285+
# logger.debug(enum_name_mapping_frame)
296286

297287
term_and_source = enum_name_mapping_frame.loc[enum_name_mapping_frame['raw_query'].eq(pv_name)]
298288

299289
term_and_source = term_and_source[["iri", "ontology_name"]]
300290
term_and_source.drop_duplicates(inplace=True)
301291
term_and_source = term_and_source.loc[~term_and_source['iri'].eq("")]
302292
term_and_source.sort_values(["iri", "ontology_name"], inplace=True)
293+
logger.debug(term_and_source)
303294

304295
term_and_source = term_and_source.to_dict(orient="records")
305296

@@ -314,8 +305,11 @@ def make_iot_yaml(modelfile, all_mappings_fn, requested_enum_name, whiteout_char
314305
suffixes=('_term', '_ano'))
315306

316307
for_str_dist = raw_through_annotations[["tidied_query", "name"]]
308+
# 20211215 0912
309+
# A value is trying to be set on a copy of a slice from a DataFrame.
317310
for_str_dist["tidied_query_lc"] = for_str_dist["tidied_query"].str.lower()
318311
for_str_dist["name_lc"] = for_str_dist["name"].str.lower()
312+
logger.debug(for_str_dist)
319313

320314
# favoring simplicity over efficiency
321315
# ie may be string-comparing some duplicates
@@ -344,17 +338,18 @@ def make_iot_yaml(modelfile, all_mappings_fn, requested_enum_name, whiteout_char
344338
for_str_dist = pd.DataFrame(new_pair_list)
345339
for_str_dist.drop(labels=["tidied_query_lc", "name_lc"], axis=1, inplace=True)
346340

347-
# logger.debug(for_str_dist)
348-
341+
# was debug
342+
logger.debug(for_str_dist)
349343
raw_through_dist = raw_through_annotations.merge(for_str_dist, how="left", on=["tidied_query", "name"])
350-
351344
all_mappings_frame = []
352345

353346
new_enum = linkml_runtime.linkml_model.EnumDefinition(name=requested_enum_name)
354347
# logger.info(new_enum)
355348

349+
# looping inside the same loop ?!
356350
for i in requested_pvs_names:
357-
# logger.info(i)
351+
# todo unnest loop?
352+
logger.debug(i)
358353
ce = requested_pvs_obj[i]
359354
cr = raw_through_dist.loc[raw_through_dist["raw_query"].eq(i)]
360355
all_mappings_frame.append(cr)
@@ -365,6 +360,7 @@ def make_iot_yaml(modelfile, all_mappings_fn, requested_enum_name, whiteout_char
365360
with_min_row_count = len(with_min.index)
366361
if with_min_row_count > 0:
367362
with_min = with_min.drop(labels=['xrefs'], axis=1)
363+
with_min['description'] = str(with_min['description'])
368364
with_min.drop_duplicates(inplace=True)
369365
deduped_row_count = len(with_min.index)
370366
# # I'm surprised that there aren't any 2+ equally good mappings here
@@ -388,24 +384,29 @@ def make_iot_yaml(modelfile, all_mappings_fn, requested_enum_name, whiteout_char
388384
new_enum.permissible_values[i] = ce
389385

390386
all_mappings_frame = pd.concat(all_mappings_frame)
391-
all_mappings_frame.drop(labels="xrefs", axis=1, inplace=True)
387+
388+
all_mappings_frame.to_csv("all_mappings_frame.tsv", sep="\t", index=False)
389+
390+
all_mappings_frame['description'] = str(all_mappings_frame['description'])
391+
all_mappings_frame['xrefs'] = str(all_mappings_frame['xrefs'])
392+
392393
all_mappings_frame.drop_duplicates(inplace=True)
394+
393395
all_mappings_frame.to_csv(all_mappings_fn, sep="\t", index=False)
394396

395397
gs_tq_vc = all_mappings_frame["raw_query"].value_counts()
398+
logger.info("Number of hits for each term, regardless of quality. Doesn't show terms with 0 hits.")
396399
logger.info(gs_tq_vc)
397400

398401
current_schema.enums[requested_enum_name] = new_enum
399402

400403
string_dumped_schema = yaml_dumper.dumps(current_schema)
401404

402-
logger.debug(string_dumped_schema)
403-
405+
# todo send to STDOUT or output file?
404406
print(string_dumped_schema)
405-
406-
# with open(xxx, 'w') as file:
407-
# documents = yaml.safe_dump(string_dumped_schema, file)
407+
# # with open(xxx, 'w') as file:
408+
# # documents = yaml.safe_dump(string_dumped_schema, file)
408409

409410

410411
if __name__ == '__main__':
411-
make_iot_yaml()
412+
enum_annotator()

pyproject.toml

+4
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,7 @@ PyYAML = "^5.3.1"
3333
[build-system]
3434
requires = ["poetry-core>=1.0.0"]
3535
build-backend = "poetry.core.masonry.api"
36+
37+
[tool.poetry.scripts]
38+
#demos = "linkml_round_trips.demos:hello"
39+
enum_annotator = "linkml_model_enrichment.annotators.enum_annotator:enum_annotator"

target/placeholder

Whitespace-only changes.

0 commit comments

Comments
 (0)