From a3100fa834b1ee8368d960886f0001ccf443236a Mon Sep 17 00:00:00 2001 From: Dana Alsharif Date: Thu, 27 Jun 2024 23:43:23 +0200 Subject: [PATCH] util: update_fixtures_cross_sections to avoid empty values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only add cross_section field when total_value is not zero. Fix code formatting to conform to `black` coding style. Enrich authors list and fix empty links there. Co-authored-by: Tibor Šimko --- README.rst | 6 +- utils/update_fixtures_cross_sections.py | 309 +++++++++++++----------- 2 files changed, 169 insertions(+), 146 deletions(-) diff --git a/README.rst b/README.rst index b0ccdd9ca..d289e9a38 100644 --- a/README.rst +++ b/README.rst @@ -83,9 +83,10 @@ The list of contributors in alphabetical order: - `Anna Trzcinska `_ - `Artemis Lavasa `_ -- `Audrius Mecionis <>`_ +- `Audrius Mecionis `_ - `Heitor de Bittencourt `_ -- `Jan Okraska <>`_ +- `Dana Alsharif `_ +- `Jan Okraska `_ - `Julie Hogan `_ - `Joud Masoud `_ - `Kati Lassila-Perini `_ @@ -94,4 +95,5 @@ The list of contributors in alphabetical order: - `Nancy Hamdan `_ - `Osama Sh. Almomani `_ - `Tibor Šimko `_ +- `Xiaohe Shen `_ - `Zach Marshall `_ diff --git a/utils/update_fixtures_cross_sections.py b/utils/update_fixtures_cross_sections.py index 158883806..ea6e09572 100755 --- a/utils/update_fixtures_cross_sections.py +++ b/utils/update_fixtures_cross_sections.py @@ -45,7 +45,7 @@ def main(cross_sections_path, input_path, output_path): # noqa: D301,D412 sub_categories = os.listdir(cross_sections_path) # amend target records of all target datasets - if "MC2015/StandardModelPhysics" in cross_sections_path: # for 2015 SM only + if "MC2015/StandardModelPhysics" in cross_sections_path: # for 2015 SM only for categ in sub_categories: for json_file_name in os.listdir(f"{cross_sections_path}/{categ}"): total_cross_section_files += 1 @@ -55,7 +55,7 @@ def main(cross_sections_path, input_path, output_path): # noqa: D301,D412 json_record = json.loads(json_file_content) dataset = json_record[0]["metadata"]["Dataset"] - + new_file_name = f"{dataset.replace('/', '$')}.json" if new_file_name[0] != "$": new_file_name = "$" + new_file_name @@ -81,7 +81,9 @@ def main(cross_sections_path, input_path, output_path): # noqa: D301,D412 # amend target records of all target datasets for target_dataset_path in target_datasets_paths: # read target records - target_dataset_basename = os.path.basename(target_dataset_path)[: -len(".json")] + target_dataset_basename = os.path.basename(target_dataset_path)[ + : -len(".json") + ] target_dataset_file = open(target_dataset_path, "r") target_dataset_content = target_dataset_file.read() target_dataset_file.close() @@ -93,9 +95,7 @@ def main(cross_sections_path, input_path, output_path): # noqa: D301,D412 for record in target_records: # find the record's corresponding cross-section values json file cross_sections_file_name = record["title"].replace("/", "$") - find_cross_sections_cmd = ( - f"find {cross_sections_path} -name '{cross_sections_file_name}.json'" - ) + find_cross_sections_cmd = f"find {cross_sections_path} -name '{cross_sections_file_name}.json'" cross_sections_file = subprocess.getoutput(find_cross_sections_cmd) if not cross_sections_file: @@ -108,108 +108,111 @@ def main(cross_sections_path, input_path, output_path): # noqa: D301,D412 cross_sections_json_record = json.loads(cross_sections_json_content) cross_sections_json_data = cross_sections_json_record[1] - record["cross_section"] = {} - # check the presence of certain attributes to identify the format the file is in - # see: https://github.com/Ari-mu-l/OpenData/tree/main - # Format 1 - if ( - "totX_beforeMat" in cross_sections_json_data - and "matchingEff" in cross_sections_json_data - ): - total_format1 += 1 - record["cross_section"]["total_value"] = cross_sections_json_data[ - "totX_final" - ] - record["cross_section"]["total_value_uncertainty"] = ( - cross_sections_json_data["totX_final_err"] - ) - record["cross_section"]["matching_efficiency"] = ( - cross_sections_json_data["matchingEff"] - ) - record["cross_section"]["filter_efficiency"] = cross_sections_json_data[ - "filterEff_weights" - ] - record["cross_section"]["neg_weight_fraction"] = ( - cross_sections_json_data["negWeightFrac"] - ) - # Format 2 - elif "totX_beforeMat" in cross_sections_json_data: - total_format2 += 1 - record["cross_section"]["total_value"] = cross_sections_json_data[ - "totX_final" - ] - record["cross_section"]["total_value_uncertainty"] = ( - cross_sections_json_data["totX_final_err"] - ) - record["cross_section"]["matching_efficiency"] = "" - record["cross_section"]["filter_efficiency"] = cross_sections_json_data[ - "filterEff_weights" - ] - record["cross_section"]["neg_weight_fraction"] = "" - # Format 3 - elif ( - "totX_beforeFilter" in cross_sections_json_data - and "negWeightFrac" in cross_sections_json_data - ): - total_format3 += 1 - record["cross_section"]["total_value"] = cross_sections_json_data[ - "totX_final" - ] - record["cross_section"]["total_value_uncertainty"] = ( - cross_sections_json_data["totX_final_err"] - ) - record["cross_section"]["matching_efficiency"] = "" - record["cross_section"]["filter_efficiency"] = cross_sections_json_data[ - "filterEff_weights" - ] - record["cross_section"]["neg_weight_fraction"] = ( - cross_sections_json_data["negWeightFrac"] - ) - # Format 6 (unlisted format, but it is there in some json files) - elif "filterEff(weights)" in cross_sections_json_data: - total_format6 += 1 - record["cross_section"]["total_value"] = cross_sections_json_data[ - "totX_final" - ] - record["cross_section"]["total_value_uncertainty"] = ( - cross_sections_json_data["totX_final_err"] - ) - record["cross_section"]["matching_efficiency"] = "" - record["cross_section"]["filter_efficiency"] = cross_sections_json_data[ - "filterEff(weights)" - ] - record["cross_section"]["neg_weight_fraction"] = "" - # Format 4 - elif "totX_beforeFilter" in cross_sections_json_data: - total_format4 += 1 - record["cross_section"]["total_value"] = cross_sections_json_data[ - "totX_final" - ] - record["cross_section"]["total_value_uncertainty"] = ( - cross_sections_json_data["totX_final_err"] - ) - record["cross_section"]["matching_efficiency"] = "" - record["cross_section"]["filter_efficiency"] = cross_sections_json_data[ - "filterEff_weights" - ] - record["cross_section"]["neg_weight_fraction"] = "" - # Format 5 - else: - total_format5 += 1 - record["cross_section"]["total_value"] = cross_sections_json_data[ - "totX_final" - ] - record["cross_section"]["total_value_uncertainty"] = ( - cross_sections_json_data["totX_final_err"] - ) - record["cross_section"]["matching_efficiency"] = "" - record["cross_section"]["filter_efficiency"] = cross_sections_json_data[ - "filterEff_weights" - ] - record["cross_section"]["neg_weight_fraction"] = "" + if cross_sections_json_data["totX_final"] != "0.000e+00": + + record["cross_section"] = {} + # check the presence of certain attributes to identify the format the file is in + # see: https://github.com/Ari-mu-l/OpenData/tree/main + # Format 1 + if ( + "totX_beforeMat" in cross_sections_json_data + and "matchingEff" in cross_sections_json_data + ): + total_format1 += 1 + record["cross_section"]["total_value"] = ( + cross_sections_json_data["totX_final"] + ) + record["cross_section"]["total_value_uncertainty"] = ( + cross_sections_json_data["totX_final_err"] + ) + record["cross_section"]["matching_efficiency"] = ( + cross_sections_json_data["matchingEff"] + ) + record["cross_section"]["filter_efficiency"] = ( + cross_sections_json_data["filterEff_weights"] + ) + record["cross_section"]["neg_weight_fraction"] = ( + cross_sections_json_data["negWeightFrac"] + ) + # Format 2 + elif "totX_beforeMat" in cross_sections_json_data: + total_format2 += 1 + record["cross_section"]["total_value"] = ( + cross_sections_json_data["totX_final"] + ) + record["cross_section"]["total_value_uncertainty"] = ( + cross_sections_json_data["totX_final_err"] + ) + record["cross_section"]["matching_efficiency"] = "" + record["cross_section"]["filter_efficiency"] = ( + cross_sections_json_data["filterEff_weights"] + ) + record["cross_section"]["neg_weight_fraction"] = "" + # Format 3 + elif ( + "totX_beforeFilter" in cross_sections_json_data + and "negWeightFrac" in cross_sections_json_data + ): + total_format3 += 1 + record["cross_section"]["total_value"] = ( + cross_sections_json_data["totX_final"] + ) + record["cross_section"]["total_value_uncertainty"] = ( + cross_sections_json_data["totX_final_err"] + ) + record["cross_section"]["matching_efficiency"] = "" + record["cross_section"]["filter_efficiency"] = ( + cross_sections_json_data["filterEff_weights"] + ) + record["cross_section"]["neg_weight_fraction"] = ( + cross_sections_json_data["negWeightFrac"] + ) + # Format 6 (unlisted format, but it is there in some json files) + elif "filterEff(weights)" in cross_sections_json_data: + total_format6 += 1 + record["cross_section"]["total_value"] = ( + cross_sections_json_data["totX_final"] + ) + record["cross_section"]["total_value_uncertainty"] = ( + cross_sections_json_data["totX_final_err"] + ) + record["cross_section"]["matching_efficiency"] = "" + record["cross_section"]["filter_efficiency"] = ( + cross_sections_json_data["filterEff(weights)"] + ) + record["cross_section"]["neg_weight_fraction"] = "" + # Format 4 + elif "totX_beforeFilter" in cross_sections_json_data: + total_format4 += 1 + record["cross_section"]["total_value"] = ( + cross_sections_json_data["totX_final"] + ) + record["cross_section"]["total_value_uncertainty"] = ( + cross_sections_json_data["totX_final_err"] + ) + record["cross_section"]["matching_efficiency"] = "" + record["cross_section"]["filter_efficiency"] = ( + cross_sections_json_data["filterEff_weights"] + ) + record["cross_section"]["neg_weight_fraction"] = "" + # Format 5 + else: + total_format5 += 1 + record["cross_section"]["total_value"] = ( + cross_sections_json_data["totX_final"] + ) + record["cross_section"]["total_value_uncertainty"] = ( + cross_sections_json_data["totX_final_err"] + ) + record["cross_section"]["matching_efficiency"] = "" + record["cross_section"]["filter_efficiency"] = ( + cross_sections_json_data["filterEff_weights"] + ) + record["cross_section"]["neg_weight_fraction"] = "" + + total_datasets_amended += 1 new_target_records.append(record) - total_datasets_amended += 1 # save the amended dataset new_dataset_json = json.dumps( @@ -241,7 +244,7 @@ def main(cross_sections_path, input_path, output_path): # noqa: D301,D412 print(f"Total number of datasets amended using Format 5: {total_format5}") print(f"Total number of datasets amended using Format 6: {total_format6}") - else: # For 2015 Higgs and all 2016 + else: # For 2015 Higgs and all 2016 for categ in sub_categories: for json_file_name in os.listdir(f"{cross_sections_path}/{categ}"): total_cross_section_files += 1 @@ -261,18 +264,20 @@ def main(cross_sections_path, input_path, output_path): # noqa: D301,D412 ) # find paths to all datasets that need to be updated - find_datasets_cmd = ( - f'find {input_path} -type f -name "cms-simulated-datasets-2015*.json"' # TODO: change to 2016 if updating 2016 - ) - if ('2015' in find_datasets_cmd) and ('2016' in cross_sections_path): - sys.exit("Error: Using 2016 xsec to update 2015 json files. Update cms-simulated-datasets-2015*.json to cms-simulated-datasets-2016*.json in L265.") + find_datasets_cmd = f'find {input_path} -type f -name "cms-simulated-datasets-2015*.json"' # TODO: change to 2016 if updating 2016 + if ("2015" in find_datasets_cmd) and ("2016" in cross_sections_path): + sys.exit( + "Error: Using 2016 xsec to update 2015 json files. Update cms-simulated-datasets-2015*.json to cms-simulated-datasets-2016*.json in L265." + ) target_datasets_paths = subprocess.getoutput(find_datasets_cmd).split("\n") total_datasets_amended = 0 - + for target_dataset_path in target_datasets_paths: # read target records - target_dataset_basename = os.path.basename(target_dataset_path)[: -len(".json")] + target_dataset_basename = os.path.basename(target_dataset_path)[ + : -len(".json") + ] target_dataset_file = open(target_dataset_path, "r") target_dataset_content = target_dataset_file.read() target_dataset_file.close() @@ -284,9 +289,7 @@ def main(cross_sections_path, input_path, output_path): # noqa: D301,D412 for record in target_records: # find the record's corresponding cross-section values json file cross_sections_file_name = record["title"].replace("/", "$") - find_cross_sections_cmd = ( - f"find {cross_sections_path} -name '{cross_sections_file_name}.json'" - ) + find_cross_sections_cmd = f"find {cross_sections_path} -name '{cross_sections_file_name}.json'" cross_sections_file = subprocess.getoutput(find_cross_sections_cmd) if not cross_sections_file: @@ -299,35 +302,52 @@ def main(cross_sections_path, input_path, output_path): # noqa: D301,D412 cross_sections_json_record = json.loads(cross_sections_json_content) cross_sections_json_data = cross_sections_json_record[1] - record["cross_section"] = {} - - # The following lists alll the columns availalbe in the xsec json files - # UNCOMMENT AND ADD CONDITION IF NEEDED - if cross_sections_json_data["total_value"]!="-9": record["cross_section"]["total_value"] = cross_sections_json_data["total_value"] - if cross_sections_json_data["total_value_uncertainty"]!="-9": record["cross_section"]["total_value_uncertainty"] = cross_sections_json_data["total_value_uncertainty"] - if cross_sections_json_data["matching_efficiency"]!="-9": record["cross_section"]["matching_efficiency"] = cross_sections_json_data["matching_efficiency"] - if cross_sections_json_data["filter_efficiency"]!="-9": record["cross_section"]["filter_efficiency"] = cross_sections_json_data["filter_efficiency"] - if cross_sections_json_data["neg_weight_fraction"]!="-9": record["cross_section"]["neg_weight_fraction"] = cross_sections_json_data["neg_weight_fraction"] - #if cross_sections_json_data["xsec_before_matching"]!="-9": record["cross_section"]["xsec_before_matching"] = cross_sections_json_data["xsec_before_matching"] - # record["cross_section"]["xsec_before_matching_uncertainty"] = cross_sections_json_data["xsec_before_matching_uncertainty"] - # record["cross_section"]["xsec_after_matching"] = cross_sections_json_data[""] - # record["cross_section"]["xsec_after_matching_uncertainty"] = cross_sections_json_data[""] - # record["cross_section"]["xsec_before_filter"] = cross_sections_json_data[""] - # record["cross_section"]["xsec_before_filter_uncertainty"] = cross_sections_json_data[""] - #record["cross_section"]["matching_efficiency_uncertainty"] = cross_sections_json_data["matching_efficiency_uncertainty"] - #record["cross_section"]["HepMC_filter_efficiency"] = cross_sections_json_data["HepMC_filter_efficiency"] - #record["cross_section"]["HepMC_filter_efficiency_uncertainty"] = cross_sections_json_data["HepMC_filter_efficiency_uncertainty"] - #record["cross_section"]["HepMC_filter_efficiency_evt"] = cross_sections_json_data["HepMC_filter_efficiency_evt"] - #record["cross_section"]["HepMC_filter_efficiency_evt_uncertainty"] = cross_sections_json_data["HepMC_filter_efficiency_evt_uncertainty"] - #record["cross_section"]["filter_efficiency_uncertainty"] = cross_sections_json_data["filter_efficiency_uncertainty"] - #record["cross_section"]["filter_efficiency_evt"] = cross_sections_json_data["filter_efficiency_evt"] - #record["cross_section"]["filter_efficiency_evt_uncertainty"] = cross_sections_json_data["filter_efficiency_evt_uncertainty"] - #record["cross_section"]["neg_weight_fraction_uncertainty"] = cross_sections_json_data["neg_weight_fraction_uncertainty"] - #record["cross_section"]["equivalent_lumi"] = cross_sections_json_data["equivalent_lumi"] - #record["cross_section"]["equivalent_lumi_uncertainty"] = cross_sections_json_data["equivalent_lumi_uncertainty"] + if cross_sections_json_data["total_value"] != "0.000e+00": + record["cross_section"] = {} + + # The following lists alll the columns availalbe in the xsec json files + # UNCOMMENT AND ADD CONDITION IF NEEDED + if cross_sections_json_data["total_value"] != "-9": + record["cross_section"]["total_value"] = ( + cross_sections_json_data["total_value"] + ) + if cross_sections_json_data["total_value_uncertainty"] != "-9": + record["cross_section"]["total_value_uncertainty"] = ( + cross_sections_json_data["total_value_uncertainty"] + ) + if cross_sections_json_data["matching_efficiency"] != "-9": + record["cross_section"]["matching_efficiency"] = ( + cross_sections_json_data["matching_efficiency"] + ) + if cross_sections_json_data["filter_efficiency"] != "-9": + record["cross_section"]["filter_efficiency"] = ( + cross_sections_json_data["filter_efficiency"] + ) + if cross_sections_json_data["neg_weight_fraction"] != "-9": + record["cross_section"]["neg_weight_fraction"] = ( + cross_sections_json_data["neg_weight_fraction"] + ) + # if cross_sections_json_data["xsec_before_matching"]!="-9": record["cross_section"]["xsec_before_matching"] = cross_sections_json_data["xsec_before_matching"] + # record["cross_section"]["xsec_before_matching_uncertainty"] = cross_sections_json_data["xsec_before_matching_uncertainty"] + # record["cross_section"]["xsec_after_matching"] = cross_sections_json_data[""] + # record["cross_section"]["xsec_after_matching_uncertainty"] = cross_sections_json_data[""] + # record["cross_section"]["xsec_before_filter"] = cross_sections_json_data[""] + # record["cross_section"]["xsec_before_filter_uncertainty"] = cross_sections_json_data[""] + # record["cross_section"]["matching_efficiency_uncertainty"] = cross_sections_json_data["matching_efficiency_uncertainty"] + # record["cross_section"]["HepMC_filter_efficiency"] = cross_sections_json_data["HepMC_filter_efficiency"] + # record["cross_section"]["HepMC_filter_efficiency_uncertainty"] = cross_sections_json_data["HepMC_filter_efficiency_uncertainty"] + # record["cross_section"]["HepMC_filter_efficiency_evt"] = cross_sections_json_data["HepMC_filter_efficiency_evt"] + # record["cross_section"]["HepMC_filter_efficiency_evt_uncertainty"] = cross_sections_json_data["HepMC_filter_efficiency_evt_uncertainty"] + # record["cross_section"]["filter_efficiency_uncertainty"] = cross_sections_json_data["filter_efficiency_uncertainty"] + # record["cross_section"]["filter_efficiency_evt"] = cross_sections_json_data["filter_efficiency_evt"] + # record["cross_section"]["filter_efficiency_evt_uncertainty"] = cross_sections_json_data["filter_efficiency_evt_uncertainty"] + # record["cross_section"]["neg_weight_fraction_uncertainty"] = cross_sections_json_data["neg_weight_fraction_uncertainty"] + # record["cross_section"]["equivalent_lumi"] = cross_sections_json_data["equivalent_lumi"] + # record["cross_section"]["equivalent_lumi_uncertainty"] = cross_sections_json_data["equivalent_lumi_uncertainty"] + + total_datasets_amended += 1 new_target_records.append(record) - total_datasets_amended += 1 # save the amended dataset new_dataset_json = json.dumps( @@ -353,5 +373,6 @@ def main(cross_sections_path, input_path, output_path): # noqa: D301,D412 f"Total number of cross-section values json files: {total_cross_section_files}, Total number of amended datasets: {total_datasets_amended}" ) + if __name__ == "__main__": main()