|
| 1 | +""" |
| 2 | +Note: this module will be removed after the next tag, don't use anything from here |
| 3 | +""" |
| 4 | + |
| 5 | +import dataclasses |
| 6 | +import logging |
| 7 | +from operator import attrgetter |
| 8 | + |
| 9 | +import pandas as pd |
| 10 | + |
| 11 | +from nnpdf_data.coredata import CommonData |
| 12 | + |
| 13 | +log = logging.getLogger(__name__) |
| 14 | + |
| 15 | +log.warning( |
| 16 | + "You are loading deprecated functionality that use the old commondata parser. This is no longer supported and will be removed in the near future" |
| 17 | +) |
| 18 | + |
| 19 | + |
| 20 | +### Old commondata: |
| 21 | +### All code below this line is deprecated and will be removed |
| 22 | +def load_commondata_old(commondatafile, systypefile, setname): |
| 23 | + """Parse a commondata file and a systype file into a CommonData. |
| 24 | +
|
| 25 | + Parameters |
| 26 | + ---------- |
| 27 | + commondatafile : file or path to file |
| 28 | + systypefile : file or path to file |
| 29 | +
|
| 30 | + Returns |
| 31 | + ------- |
| 32 | + commondata : CommonData |
| 33 | + An object containing the data and information from the commondata |
| 34 | + and systype files. |
| 35 | + """ |
| 36 | + # First parse commondata file |
| 37 | + commondatatable = pd.read_csv(commondatafile, sep=r"\s+", skiprows=1, header=None) |
| 38 | + # Remove NaNs |
| 39 | + # TODO: replace commondata files with bad formatting |
| 40 | + # Build header |
| 41 | + commondataheader = ["entry", "process", "kin1", "kin2", "kin3", "data", "stat"] |
| 42 | + nsys = (commondatatable.shape[1] - len(commondataheader)) // 2 |
| 43 | + |
| 44 | + commondataheader += ["ADD", "MULT"] * nsys |
| 45 | + commondatatable.columns = commondataheader |
| 46 | + commondatatable.set_index("entry", inplace=True) |
| 47 | + ndata = len(commondatatable) |
| 48 | + commondataproc = commondatatable["process"][1] |
| 49 | + # Check for consistency with commondata metadata |
| 50 | + cdmetadata = peek_commondata_metadata(commondatafile) |
| 51 | + if (nsys, ndata) != attrgetter("nsys", "ndata")(cdmetadata): |
| 52 | + raise ValueError(f"Commondata table information does not match metadata for {setname}") |
| 53 | + |
| 54 | + # Now parse the systype file |
| 55 | + systypetable = parse_systypes(systypefile) |
| 56 | + |
| 57 | + # Populate CommonData object |
| 58 | + return CommonData( |
| 59 | + setname=setname, |
| 60 | + ndata=ndata, |
| 61 | + commondataproc=commondataproc, |
| 62 | + nkin=3, |
| 63 | + nsys=nsys, |
| 64 | + commondata_table=commondatatable, |
| 65 | + systype_table=systypetable, |
| 66 | + legacy=True, |
| 67 | + ) |
| 68 | + |
| 69 | + |
| 70 | +def parse_systypes(systypefile): |
| 71 | + """Parses a systype file and returns a pandas dataframe.""" |
| 72 | + systypeheader = ["sys_index", "treatment", "name"] |
| 73 | + try: |
| 74 | + systypetable = pd.read_csv( |
| 75 | + systypefile, sep=r"\s+", names=systypeheader, skiprows=1, header=None |
| 76 | + ) |
| 77 | + systypetable.dropna(axis="columns", inplace=True) |
| 78 | + # Some datasets e.g. CMSWCHARMRAT have no systematics |
| 79 | + except pd.errors.EmptyDataError: |
| 80 | + systypetable = pd.DataFrame(columns=systypeheader) |
| 81 | + |
| 82 | + systypetable.set_index("sys_index", inplace=True) |
| 83 | + |
| 84 | + return systypetable |
| 85 | + |
| 86 | + |
| 87 | +@dataclasses.dataclass(frozen=True) |
| 88 | +class CommonDataMetadata: |
| 89 | + """Contains metadata information about the data being read""" |
| 90 | + |
| 91 | + name: str |
| 92 | + nsys: int |
| 93 | + ndata: int |
| 94 | + process_type: str |
| 95 | + |
| 96 | + |
| 97 | +def peek_commondata_metadata(commondatafilename): |
| 98 | + """Read some of the properties of the commondata object as a CommonData Metadata""" |
| 99 | + with open(commondatafilename) as f: |
| 100 | + try: |
| 101 | + l = f.readline() |
| 102 | + name, nsys_str, ndata_str = l.split() |
| 103 | + l = f.readline() |
| 104 | + process_type_str = l.split()[1] |
| 105 | + except Exception: |
| 106 | + log.error(f"Error processing {commondatafilename}") |
| 107 | + raise |
| 108 | + |
| 109 | + return CommonDataMetadata( |
| 110 | + name, int(nsys_str), int(ndata_str), get_kinlabel_key(process_type_str) |
| 111 | + ) |
| 112 | + |
| 113 | + |
| 114 | +def get_plot_kinlabels(commondata): |
| 115 | + """Return the LaTex kinematic labels for a given Commondata""" |
| 116 | + key = commondata.process_type |
| 117 | + |
| 118 | + # TODO: the keys in KINLABEL_LATEX need to be updated for the new commondata |
| 119 | + return KINLABEL_LATEX.get(key, key) |
| 120 | + |
| 121 | + |
| 122 | +def get_kinlabel_key(process_label): |
| 123 | + """ |
| 124 | + Since there is no 1:1 correspondence between latex keys and the old libNNPDF names |
| 125 | + we match the longest key such that the proc label starts with it. |
| 126 | + """ |
| 127 | + l = process_label |
| 128 | + try: |
| 129 | + if process_label == "EWK_RAP_ASY": |
| 130 | + # TODO this function is disappearing in this PR |
| 131 | + l = "EWK_RAP" |
| 132 | + return next(k for k in sorted(KINLABEL_LATEX, key=len, reverse=True) if l.startswith(k)) |
| 133 | + except StopIteration as e: |
| 134 | + raise ValueError( |
| 135 | + "Could not find a set of kinematic " |
| 136 | + "variables matching the process %s Check the " |
| 137 | + "labels defined in commondata.cc. " % (l) |
| 138 | + ) from e |
0 commit comments