diff --git a/offsets_db_data/cercarbono.py b/offsets_db_data/cercarbono.py new file mode 100644 index 0000000..5be8013 --- /dev/null +++ b/offsets_db_data/cercarbono.py @@ -0,0 +1,220 @@ +import pandas as pd +import pandas_flavor as pf + +from offsets_db_data.common import ( + BERKELEY_PROJECT_TYPE_UPATH, + CREDIT_SCHEMA_UPATH, + PROJECT_SCHEMA_UPATH, + load_column_mapping, + load_inverted_protocol_mapping, + load_registry_project_column_mapping, + load_type_category_mapping, +) +from offsets_db_data.credits import ( + aggregate_issuance_transactions, # noqa: F401 + harmonize_beneficiary_data, # noqa: F401 + merge_with_arb, # noqa: F401 +) +from offsets_db_data.models import credit_without_id_schema, project_schema +from offsets_db_data.projects import ( + add_category, # noqa: F401 + add_first_issuance_and_retirement_dates, # noqa: F401 + add_is_compliance_flag, # noqa: F401 + add_retired_and_issued_totals, # noqa: F401 + harmonize_country_names, # noqa: F401 + harmonize_status_codes, # noqa: F401 + map_protocol, # noqa: F401 +) + + +@pf.register_dataframe_method +def infer_cercarbono_project_type(df: pd.DataFrame) -> pd.DataFrame: + """Infer project types for Cercarbono projects based on protocol. + + Parameters + ---------- + df : pd.DataFrame + Input dataframe containing Cercarbono project data with 'protocol' column. + + Returns + ------- + pd.DataFrame + Dataframe with inferred 'project_type' column. + """ + # Mapping from Cercarbono protocol codes to project types + ccb_protocol_to_type = { + 'ccb-redd': 'redd+', + 'ccb-reforest': 'afforestation/reforestation', + 'ccb-renewables': 're bundled', + } + + df['project_type'] = 'unknown' + df['project_type_source'] = 'carbonplan' + + # Map based on first protocol in list (protocols are stored as lists) + for idx, row in df.iterrows(): + protocols = row.get('protocol', []) + if isinstance(protocols, list) and len(protocols) > 0: + first_protocol = protocols[0] + if first_protocol in ccb_protocol_to_type: + df.at[idx, 'project_type'] = ccb_protocol_to_type[first_protocol] + + return df + + +@pf.register_dataframe_method +def add_cercarbono_project_url(df: pd.DataFrame) -> pd.DataFrame: + """Add project URL column for Cercarbono projects. + + Parameters + ---------- + df : pd.DataFrame + Input dataframe containing Cercarbono project data. + + Returns + ------- + pd.DataFrame + Dataframe with added project URL column. + """ + base_url = 'https://www.ecoregistry.io/projects' + df['project_url'] = df['project_id'].apply(lambda x: f'{base_url}/{x}') + return df + + +@pf.register_dataframe_method +def add_cercarbono_project_id(df: pd.DataFrame, prefix: str = 'CCB') -> pd.DataFrame: + """Add project ID column for Cercarbono credits dataframe. + + Parameters + ---------- + df : pd.DataFrame + Input dataframe containing Cercarbono credit transactions data. + + Returns + ------- + pd.DataFrame + Dataframe with added project ID column. + """ + df = df.copy() + df['project_id'] = prefix + df['project_id'].astype(str).str.split('-').str[-1] + return df + + +@pf.register_dataframe_method +def process_cercarbono_credits( + df: pd.DataFrame, + *, + download_type: str, + registry_name: str = 'cercarbono', + prefix: str = 'CCB', + harmonize_beneficiary_info: bool = False, +) -> pd.DataFrame: + """Process Cercarbono transactions dataframe to conform to offsets-db schema. + + Parameters + ---------- + df : pd.DataFrame + Input dataframe containing Cercarbono credit transactions data. + download_type : str, optional + Type of data to download, either 'issuances' or 'retirements'. + registry_name : str, optional + Name of the registry to be added to the dataframe, by default "cercarbono" + prefix : str, optional + Prefix to add to project IDs, by default "CCB" + + Returns + ------- + pd.DataFrame + Processed dataframe conforming to offsets-db schema. + """ + + if download_type == 'issuances': + # TODO: @badgley, please confirm this is the correct way to extract vintage year for issuances + df['vintage'] = df['vintage_of_credits'].str.split(' / ').str[-1].str[:4].astype(int) + df['transaction_type'] = 'issuance' + df['project_id'] = prefix + df.serial.str.split('_').str[1] + + else: + df['project_id'] = prefix + df['project_id'].astype(str) + df['transaction_type'] = 'retirement' + + column_mapping = load_column_mapping( + registry_name=registry_name, download_type=download_type, mapping_path=CREDIT_SCHEMA_UPATH + ) + + columns = {v: k for k, v in column_mapping.items()} + + data = ( + df.rename(columns=columns) + .set_registry(registry_name=registry_name) + .convert_to_datetime(columns=['transaction_date'], format='ISO8601') + .add_missing_columns(schema=credit_without_id_schema) + .validate(schema=credit_without_id_schema) + ) + + if harmonize_beneficiary_info: + data = data.pipe( + harmonize_beneficiary_data, registry_name=registry_name, download_type=download_type + ) + return data + + +@pf.register_dataframe_method +def process_cercarbono_projects( + df: pd.DataFrame, + *, + credits: pd.DataFrame, + registry_name: str = 'cercarbono', +) -> pd.DataFrame: + """Process Cercarbono projects dataframe to conform to offsets-db schema. + + Parameters + ---------- + df : pd.DataFrame + Input dataframe containing Cercarbono project data. + registry_name : str, optional + Name of the registry to be added to the dataframe, by default "cercarbon + + + Returns + ------- + pd.DataFrame + Processed dataframe conforming to offsets-db schema. + """ + + registry_project_column_mapping = load_registry_project_column_mapping( + registry_name=registry_name, file_path=PROJECT_SCHEMA_UPATH + ) + inverted_column_mapping = {value: key for key, value in registry_project_column_mapping.items()} + type_category_mapping = load_type_category_mapping() + inverted_protocol_mapping = load_inverted_protocol_mapping() + df = df.copy() + df['country'] = df.locations.map( + lambda x: x[0]['country'] + ) # extract country from locations by taking first entry + + data = ( + df.rename(columns=inverted_column_mapping) + .set_registry(registry_name=registry_name) + .add_cercarbono_project_url() # this must be called before adding project id because the url function uses the original project_id value + .add_cercarbono_project_id() + .harmonize_country_names() + .harmonize_status_codes() + .map_protocol(inverted_protocol_mapping=inverted_protocol_mapping) + .infer_cercarbono_project_type() # Use Cercarbono-specific inference + .override_project_types( + override_data_path=BERKELEY_PROJECT_TYPE_UPATH, source_str='berkeley' + ) + .add_category( + type_category_mapping=type_category_mapping + ) # must come after types; type -> category + .map_project_type_to_display_name(type_category_mapping=type_category_mapping) + .add_is_compliance_flag() + .add_retired_and_issued_totals(credits=credits) + .add_first_issuance_and_retirement_dates(credits=credits) + .add_missing_columns(schema=project_schema) + .convert_to_datetime(columns=['listed_at', 'first_issuance_at', 'first_retirement_at']) + .validate(schema=project_schema) + ) + + return data diff --git a/offsets_db_data/common.py b/offsets_db_data/common.py index 54a45e3..b70e6fa 100644 --- a/offsets_db_data/common.py +++ b/offsets_db_data/common.py @@ -5,7 +5,7 @@ import numpy as np import pandas as pd import pandas_flavor as pf -import pandera as pa +import pandera.pandas as pa import upath CREDIT_SCHEMA_UPATH = ( @@ -54,7 +54,9 @@ def load_inverted_protocol_mapping() -> dict: return store -def load_column_mapping(*, registry_name: str, download_type: str, mapping_path: str) -> dict: +def load_column_mapping( + *, registry_name: str, download_type: str, mapping_path: upath.UPath | str +) -> dict: with open(mapping_path) as f: registry_credit_column_mapping = json.load(f) return registry_credit_column_mapping[registry_name][download_type] diff --git a/offsets_db_data/configs/all-protocol-mapping.json b/offsets_db_data/configs/all-protocol-mapping.json index 27b86b5..770314d 100644 --- a/offsets_db_data/configs/all-protocol-mapping.json +++ b/offsets_db_data/configs/all-protocol-mapping.json @@ -211,7 +211,6 @@ }, "acm0006": { "category": "fuel-switching", - "sub-category": "biomass", "known-strings": [ "ACM0002: Grid-connected electricity generation from renewable sources / ACM0006: Electricity and heat generation from biomass", "CDM - ACM0006: Electricity and heat generation from biomass", @@ -221,6 +220,7 @@ "ACM0006 Large-Scale Consolidated Methodology Electricity and heat generation from biomass [Version: 16.0]" ], "program": "cdm", + "sub-category": "biomass", "url": "https://cdm.unfccc.int/methodologies/documentation/meth_booklet.pdf#ACM0006" }, "acm0007": { @@ -305,13 +305,13 @@ }, "acm0016": { "category": "energy-efficiency", - "sub-category": "transportation", "known-strings": [ "ACM0016", "ACM0016 V5.0 - Mass rapid transit projects", "ACM0016: Mass Rapid Transit Projects --- Version 5.0" ], - "program": "cdm" + "program": "cdm", + "sub-category": "transportation" }, "acm0017": { "category": "fuel-switching", @@ -464,12 +464,12 @@ }, "acr-idling": { "category": "fuel-switching", - "sub-category": "transportation", "known-strings": [ "Truck Stop Electrification", "Emissions Reductions though Anti-Idling Technologies" ], "program": "acr", + "sub-category": "transportation", "url": "https://americancarbonregistry.org/carbon-accounting/standards-methodologies/emission-reductions-through-truck-stop-electrification" }, "acr-ifm-nonfed": { @@ -645,9 +645,9 @@ }, "am0031": { "category": "energy-efficiency", - "sub-category": "transportation", "known-strings": ["AM0031"], - "program": "cdm" + "program": "cdm", + "sub-category": "transportation" }, "am0034": { "category": "ghg-management", @@ -753,9 +753,9 @@ }, "am0090": { "category": "energy-efficiency", - "sub-category": "transportation", "known-strings": ["AM0090"], "program": "cdm", + "sub-category": "transportation", "url": "https://cdm.unfccc.int/methodologies/documentation/meth_booklet.pdf#AM0090" }, "am0107": { @@ -1031,9 +1031,9 @@ }, "ams-i-k": { "category": "renewable-energy", - "sub-category": "household", "known-strings": ["AMS-I.K. Solar cookers for households"], - "program": "cdm" + "program": "cdm", + "sub-category": "household" }, "ams-i-l": { "category": "renewable-energy", @@ -1162,7 +1162,6 @@ }, "ams-iii-aq": { "category": "fuel-switching", - "sub-category": "transportation", "known-strings": [ "AMS-III.H.; AMS-III.AQ", "AMS-I.D.; AMS-III.D.; AMS-III.H.; AMS-III.O.; AMS-III.AQ", @@ -1171,7 +1170,8 @@ "AMS-III.D.; AMS-III.AO.; AMS-III.AQ", "AMS-III.D - Version 21.0, AMS-III.AQ. - Version 2.0" ], - "program": "cdm" + "program": "cdm", + "sub-category": "transportation" }, "ams-iii-ar": { "category": "fuel-switching", @@ -1205,13 +1205,13 @@ }, "ams-iii-av": { "category": "energy-efficiency", - "sub-category": "drinking-water", "known-strings": [ "VMR0006; AMS-III.AV.", "AMS-III.AV.", "AMS-III.AV. Low greenhouse gas emitting water purification systems" ], - "program": "cdm" + "program": "cdm", + "sub-category": "drinking-water" }, "ams-iii-b": { "category": "fuel-switching", @@ -1237,12 +1237,12 @@ }, "ams-iii-bc": { "category": "energy-efficiency", - "sub-category": "transportation", "known-strings": [ "AMS-III.BC.", "AMS-III.BC.: Emissions Reductions through Improved Efficiency of Vehicle Fleet" ], - "program": "cdm" + "program": "cdm", + "sub-category": "transportation" }, "ams-iii-bd": { "category": "energy-efficiency", @@ -1252,35 +1252,36 @@ }, "ams-iii-bg": { "category": "energy-efficiency", - "sub-category": "biomass", "known-strings": [ "VMR0006; AMS-III.BG", "AMS-III.BG Emission reduction through sustainable charcoal production and consumption" ], - "program": "cdm" + "program": "cdm", + "sub-category": "biomass" }, "ams-iii-bm": { "category": "energy-efficiency", - "sub-category": "transportation", "known-strings": [ "AMS-III.BM", "AMS-III.BM: Lightweight two and three wheeled personal transportation version 1.0" ], - "program": "cdm" + "program": "cdm", + "sub-category": "transportation" }, "ams-iii-c": { "category": "energy-efficiency", - "sub-category": "transportation", "known-strings": [ "AMS-III.C. Emission reductions by electric and hybrid vehicles", "AMS-I.F.; AMS-III.C.", "AMS-III.C.; VM0038", "AMS-III.C." ], - "program": "cdm" + "program": "cdm", + "sub-category": "transportation" }, "ams-iii-d": { "category": "ghg-management", + "is_methane": true, "known-strings": [ "AMS-I.D.; AMS-III.D.; AMS-III.H.; AMS-III.O.; AMS-III.AQ", "AMS-III.D. Methane recovery in animal manure management systems", @@ -1307,7 +1308,6 @@ "AMS-III.F.: Avoidance of methane emissions through composting - Ver12.0; AMS-III.D: Methane recovery in animal manure management systems Ver 21.0" ], "program": "cdm", - "is_methane": true, "sub-category": "livestock-methane", "url": "https://cdm.unfccc.int/methodologies/documentation/meth_booklet.pdf#AMS_III_D" }, @@ -1346,6 +1346,7 @@ }, "ams-iii-g": { "category": "ghg-management", + "is_methane": true, "known-strings": [ "AMS-III.G. Landfill methane recovery", "AMS-I.C.; AMS-III.G.", @@ -1366,7 +1367,6 @@ "AMS-III.G: “Landfill methane recovery” - Version 10.0; AMS-I.D: “Grid connected renewable electricity generation” - Version 18.0" ], "program": "cdm", - "is_methane": true, "sub-category": "landfill-methane" }, "ams-iii-h": { @@ -1458,21 +1458,21 @@ }, "ams-iii-s": { "category": "fuel-switching", - "sub-category": "transportation", "known-strings": [ "AMS-I.F.; AMS-III.S.", "AMS-III.S.", "AMS-III.S. Introduction of low-emission vehicles/technologies to commercial vehicle fleets" ], - "program": "cdm" + "program": "cdm", + "sub-category": "transportation" }, "ams-iii-u": { "category": "fuel-switching", - "sub-category": "transportation", "known-strings": [ "AMS-III.U.: Cable Cars for Mass Rapid Transit System (MRTS)" ], - "program": "cdm" + "program": "cdm", + "sub-category": "transportation" }, "ams-iii-y": { "category": "ghg-management", @@ -1779,6 +1779,29 @@ "sub-category": "waste-management", "url": "https://www.climateactionreserve.org/how/protocols/waste/organic-waste-digestion/" }, + "ccb-redd": { + "category": "forest", + "known-strings": [ + "CCB - M/LU-REDD+: Methodology for the Implementation of REDD+ Projects Consistent with National Reference Level" + ], + "program": "ccb", + "url": "https://www.cercarbono.com/wp-content/uploads/2023/01/REDD-Methodology-V-1.3.1.pdf" + }, + "ccb-reforest": { + "category": "forest", + "known-strings": [ + "CCB - M/UT/F-A01: Methodology To Implement GHG Removal Projects Through Reforestation, Forest Restoration and the Establishment of Woody Crop" + ], + "program": "ccb", + "url": "https://www.cercarbono.com/wp-content/uploads/2021/11/Forest-and-Woody-Crops-Methodology-V.1.1.pdf" + }, + "ccb-renewables": { + "category": "renewable-energy", + "known-strings": [ + "CCB - M/E-ER01: Methodology for the realization of GHG emissions reduction projects by the use of renewable energy" + ], + "program": "ccb" + }, "climate-leaders-landfill": { "category": "ghg-management", "is_methane": true, @@ -1804,37 +1827,18 @@ "program": "acr", "sub-category": "industrial-gases" }, - "gs-shipping": { - "category": "energy-efficiency", - "sub-category": "transportation", - "known-strings": [ - "GS Retrofit Energy Efficiency Measures in Shipping v2", - "GS Retrofit Energy Efficiency Measures in Shipping v1" - ], - "url": "https://globalgoals.goldstandard.org/standards/422_V2.0_EE_Shipping_Retrofit-Energy-Efficiency-Measures-in-Shipping.pdf" - }, "gs-alt-coal": { "category": "energy-efficiency", - "sub-category": "cooking", "known-strings": ["GS Alternative Ignition Coal Fires v1."], + "sub-category": "cooking", "url": "https://www.goldstandard.org/sites/default/files/documents/alternative_ignition.pdf" }, - "gs-reforest": { - "category": "forest", - "known-strings": [ - "Afforestation/Reforestation GHG Emissions Reduction & Sequestration Methodology" - ], - "url": "https://globalgoals.goldstandard.org/standards/403_V2.0_LUF_AR-Methodology-GHGs-emission-reduction-and-Sequestration-Methodology.pdf" - }, - "gs-tpddtec": { + "gs-drinking-water": { "category": "energy-efficiency", "known-strings": [ - "GS TPDDTEC v 1.", - "GS TPDDTEC v 2.", - "GS TPDDTEC v 3.", - "GS TPDDTEC v3.1", - "GS TPDDTEC V4.0: REDUCED EMISSIONS FROM COOKING AND HEATING – TECHNOLOGIES AND PRACTICES TO DISPLACE DECENTRALIZED THERMAL ENERGY CONSUMPTION" - ] + "GS Methodology for emission reductions from safe drinking water supply" + ], + "sub-category": "drinking-water" }, "gs-kitchen-cookstoves": { "category": "energy-efficiency", @@ -1844,18 +1848,27 @@ "GS Methodology for Improved Cook stoves and Kitchen Regimes v2." ] }, - "gs-drinking-water": { + "gs-metered-cookstoves": { "category": "energy-efficiency", - "sub-category": "drinking-water", "known-strings": [ - "GS Methodology for emission reductions from safe drinking water supply" + "Methodology for Metered & Measured Energy Cooking Devices" ] }, - "gs-metered-cookstoves": { + "gs-reforest": { + "category": "forest", + "known-strings": [ + "Afforestation/Reforestation GHG Emissions Reduction & Sequestration Methodology" + ], + "url": "https://globalgoals.goldstandard.org/standards/403_V2.0_LUF_AR-Methodology-GHGs-emission-reduction-and-Sequestration-Methodology.pdf" + }, + "gs-shipping": { "category": "energy-efficiency", "known-strings": [ - "Methodology for Metered & Measured Energy Cooking Devices" - ] + "GS Retrofit Energy Efficiency Measures in Shipping v2", + "GS Retrofit Energy Efficiency Measures in Shipping v1" + ], + "sub-category": "transportation", + "url": "https://globalgoals.goldstandard.org/standards/422_V2.0_EE_Shipping_Retrofit-Energy-Efficiency-Measures-in-Shipping.pdf" }, "gs-simplified-cookstoves": { "category": "energy-efficiency", @@ -1865,6 +1878,73 @@ "The Gold Standard Simplified Methodology for Clean and Efficient Cookstoves, Version 3.0" ] }, + "gs-tpddtec": { + "category": "energy-efficiency", + "known-strings": [ + "GS TPDDTEC v 1.", + "GS TPDDTEC v 2.", + "GS TPDDTEC v 3.", + "GS TPDDTEC v3.1", + "GS TPDDTEC V4.0: REDUCED EMISSIONS FROM COOKING AND HEATING – TECHNOLOGIES AND PRACTICES TO DISPLACE DECENTRALIZED THERMAL ENERGY CONSUMPTION" + ] + }, + "iso-bio-burial": { + "category": "biomass-cdr", + "known-strings": ["subsurface-biomass"], + "program": "iso" + }, + "iso-bio-ccs": { + "category": "biomass-cdr", + "known-strings": ["biogenic-capture-and-storage"], + "program": "iso" + }, + "iso-bio-geo": { + "category": "biomass-cdr", + "known-strings": ["biomass-geological-storage"], + "program": "iso", + "url": "https://isometric.com/pathways/biomass-geological-storage" + }, + "iso-bio-oil": { + "category": "biomass-cdr", + "known-strings": ["bio-oil-geological-storage"], + "program": "iso" + }, + "iso-biochar": { + "category": "biomass-cdr", + "known-strings": ["biochar"], + "program": "iso", + "url": "https://isometric.com/pathways/biochar" + }, + "iso-dac": { + "category": "air-capture", + "known-strings": ["direct-air-capture"], + "program": "iso" + }, + "iso-erw": { + "category": "alkalinity-cdr", + "known-strings": ["enhanced-weathering-agriculture"], + "program": "iso" + }, + "iso-oae": { + "category": "alkalinity-cdr", + "known-strings": ["ocean-alkalinity-enhancement"], + "program": "iso" + }, + "iso-refor": { + "category": "forest", + "known-strings": ["reforestation"], + "program": "iso" + }, + "iso-river": { + "category": "alkalinity-cdr", + "known-strings": ["river-alkalinity-enhancement"], + "program": "iso" + }, + "iso-wastewater": { + "category": "alkalinity-cdr", + "known-strings": ["wastewater-alkalinity-enhancement"], + "program": "iso" + }, "unknown": { "category": "unknown", "known-strings": [ @@ -1931,9 +2011,9 @@ }, "vm0008": { "category": "energy-efficiency", - "sub-category": "weatherization", "known-strings": ["VM0008"], - "program": "vcs" + "program": "vcs", + "sub-category": "weatherization" }, "vm0009": { "category": "forest", @@ -2004,9 +2084,9 @@ }, "vm0019": { "category": "fuel-switching", - "sub-category": "transportation", "known-strings": ["VM0019"], - "program": "vcs" + "program": "vcs", + "sub-category": "transportation" }, "vm0021": { "known-strings": ["AMS-III.F.; VM0021", "VM0021"], @@ -2066,9 +2146,9 @@ }, "vm0038": { "category": "fuel-switching", - "sub-category": "transportation", "known-strings": ["AMS-III.C.; VM0038", "VM0038"], "program": "vcs", + "sub-category": "transportation", "url": "https://verra.org/methodologies/vm0038-methodology-for-electric-vehicle-charging-systems-v1-0/" }, "vm0039": { @@ -2167,80 +2247,5 @@ ], "program": "vcs", "url": "https://verra.org/methodologies/vmr0006-methodology-for-installation-of-high-efficiency-firewood-cookstoves/" - }, - "ccb-redd": { - "category": "forest", - "known-strings": [ - "CCB - M/LU-REDD+: Methodology for the Implementation of REDD+ Projects Consistent with National Reference Level" - ], - "program": "ccb", - "url": "https://www.cercarbono.com/wp-content/uploads/2023/01/REDD-Methodology-V-1.3.1.pdf" - }, - "ccb-reforest": { - "category": "forest", - "known-strings": [ - "CCB - M/UT/F-A01: Methodology To Implement GHG Removal Projects Through Reforestation, Forest Restoration and the Establishment of Woody Crop" - ], - "program": "ccb", - "url": "https://www.cercarbono.com/wp-content/uploads/2021/11/Forest-and-Woody-Crops-Methodology-V.1.1.pdf" - }, - "ccb-renewables": { - "category": "renewable-energy", - "known-strings": [ - "CCB - M/E-ER01: Methodology for the realization of GHG emissions reduction projects by the use of renewable energy" - ], - "program": "ccb" - }, - "iso-biochar": { - "category": "biomass-cdr", - "known-strings": ["biochar"], - "program": "iso", - "url": "https://isometric.com/pathways/biochar" - }, - "iso-erw": { - "category": "alkalinity-cdr", - "known-strings": ["enhanced-weathering-agriculture"], - "program": "iso" - }, - "iso-dac": { - "category": "air-capture", - "known-strings": ["direct-air-capture"], - "program": "iso" - }, - "iso-bio-oil": { - "category": "biomass-cdr", - "known-strings": ["bio-oil-geological-storage"], - "program": "iso" - }, - "iso-bio-geo": { - "category": "biomass-cdr", - "known-strings": ["biomass-geological-storage"], - "program": "iso", - "url": "https://isometric.com/pathways/biomass-geological-storage" - }, - "iso-wastewater": { - "category": "alkalinity-cdr", - "known-strings": ["wastewater-alkalinity-enhancement"], - "program": "iso" - }, - "iso-river": { - "category": "alkalinity-cdr", - "known-strings": ["river-alkalinity-enhancement"], - "program": "iso" - }, - "iso-bio-burial": { - "category": "biomass-cdr", - "known-strings": ["subsurface-biomass"], - "program": "iso" - }, - "iso-bio-ccs": { - "category": "biomass-cdr", - "known-strings": ["biogenic-capture-and-storage"], - "program": "iso" - }, - "iso-refor": { - "category": "forest", - "known-strings": ["reforestation"], - "program": "iso" } } diff --git a/offsets_db_data/configs/credits-raw-columns-mapping.json b/offsets_db_data/configs/credits-raw-columns-mapping.json index d5a7d6c..7e90cf1 100644 --- a/offsets_db_data/configs/credits-raw-columns-mapping.json +++ b/offsets_db_data/configs/credits-raw-columns-mapping.json @@ -63,6 +63,28 @@ "vintage": "Vintage" } }, + "cercarbono": { + "issuances": { + "project_id": null, + "quantity": "issued_quantity", + "retirement_account": null, + "retirement_beneficiary": null, + "retirement_note": null, + "retirement_reason": null, + "transaction_date": "issuance_date", + "vintage": null + }, + "retirements": { + "project_id": "project_id", + "quantity": "quantity", + "retirement_account": null, + "retirement_beneficiary": "end_user", + "retirement_note": null, + "retirement_reason": null, + "transaction_date": "date", + "vintage": "vintage" + } + }, "climate-action-reserve": { "cancellations": { "project_id": "Project ID", @@ -117,6 +139,28 @@ "vintage": "Vintage" } }, + "isometric": { + "issuances": { + "project_id": "project_id", + "quantity": "credit_batch_size_total.credits", + "retirement_account": null, + "retirement_beneficiary": null, + "retirement_note": null, + "retirement_reason": null, + "transaction_date": "issued_at", + "vintage": null + }, + "retirements": { + "project_id": "project_id", + "quantity": "size.credits", + "retirement_account": "owner.name", + "retirement_beneficiary": "beneficiary.name", + "retirement_note": null, + "retirement_reason": null, + "transaction_date": "retired_at", + "vintage": "sequestered_on" + } + }, "verra": { "transactions": { "project_id": null, @@ -126,7 +170,7 @@ "retirement_note": "Retirement Details", "retirement_reason": "Retirement Reason", "transaction_date": null, - "vintage": null + "vintage": "null" } } } diff --git a/offsets_db_data/configs/projects-raw-columns-mapping.json b/offsets_db_data/configs/projects-raw-columns-mapping.json index 2d1cdac..833e6d4 100644 --- a/offsets_db_data/configs/projects-raw-columns-mapping.json +++ b/offsets_db_data/configs/projects-raw-columns-mapping.json @@ -2,50 +2,64 @@ "country": { "american-carbon-registry": "Project Site Country", "art-trees": "Program Country", + "cercarbono": null, "climate-action-reserve": "Project Site Country", "gold-standard": "Country", + "isometric": "location.country_name", "verra": "Country/Area" }, "listed_at": { "american-carbon-registry": null, "art-trees": null, + "cercarbono": null, "climate-action-reserve": "Project Listed Date", "gold-standard": null, + "isometric": null, "verra": null }, "name": { "american-carbon-registry": "Project Name", "art-trees": "Program Name", + "cercarbono": "name", "climate-action-reserve": "Project Name", "gold-standard": "Project Name", + "isometric": "name", "verra": "Name" }, "original_protocol": { "american-carbon-registry": "Project Methodology/Protocol", "art-trees": null, + "cercarbono": "evaluation_criteria", "climate-action-reserve": "Project Type", "gold-standard": "Methodology", + "isometric": "protocol_slug", "verra": "Methodology" }, "project_id": { "american-carbon-registry": "Project ID", "art-trees": "Program ID", + "cercarbono": "code", "climate-action-reserve": "Project ID", "gold-standard": "GSID", + "isometric": "short_code", "verra": "ID" }, "proponent": { "american-carbon-registry": null, "art-trees": "Sovereign Program Developer", + "cercarbono": "owner", "climate-action-reserve": "Project Owner", "gold-standard": "Project Developer Name", + "isometric": "supplier.organisation.name", "verra": "Proponent" }, "status": { "american-carbon-registry": null, "art-trees": "Status", + "cercarbono": "stage", "climate-action-reserve": "Status", "gold-standard": "Status", + "isometric": "status", "verra": "Status" } } diff --git a/offsets_db_data/configs/type-category-mapping.json b/offsets_db_data/configs/type-category-mapping.json index 4f2b20b..bcd8a17 100644 --- a/offsets_db_data/configs/type-category-mapping.json +++ b/offsets_db_data/configs/type-category-mapping.json @@ -302,5 +302,41 @@ "wind": { "category": "renewable-energy", "project-type-display-name": "Wind" + }, + "bio-oil": { + "category": "carbon-capture", + "project-type-display-name": "Bio-Oil Storage" + }, + "biomass geological storage": { + "category": "carbon-capture", + "project-type-display-name": "Biomass Geological Storage" + }, + "biomass burial": { + "category": "carbon-capture", + "project-type-display-name": "Biomass Burial" + }, + "biogenic ccs": { + "category": "carbon-capture", + "project-type-display-name": "Biogenic CCS" + }, + "direct air capture": { + "category": "carbon-capture", + "project-type-display-name": "Direct Air Capture" + }, + "enhanced weathering": { + "category": "carbon-capture", + "project-type-display-name": "Enhanced Weathering" + }, + "ocean alkalinity enhancement": { + "category": "carbon-capture", + "project-type-display-name": "Ocean Alkalinity Enhancement" + }, + "wastewater alkalinity enhancement": { + "category": "carbon-capture", + "project-type-display-name": "Wastewater Alkalinity Enhancement" + }, + "river alkalinity enhancement": { + "category": "carbon-capture", + "project-type-display-name": "River Alkalinity Enhancement" } } diff --git a/offsets_db_data/isometric.py b/offsets_db_data/isometric.py new file mode 100644 index 0000000..125c15b --- /dev/null +++ b/offsets_db_data/isometric.py @@ -0,0 +1,230 @@ +import pandas as pd +import pandas_flavor as pf + +from offsets_db_data.common import ( + BERKELEY_PROJECT_TYPE_UPATH, + CREDIT_SCHEMA_UPATH, + PROJECT_SCHEMA_UPATH, + load_column_mapping, + load_inverted_protocol_mapping, + load_registry_project_column_mapping, + load_type_category_mapping, +) +from offsets_db_data.credits import ( + aggregate_issuance_transactions, # noqa: F401 + filter_and_merge_transactions, # noqa: F401 + harmonize_beneficiary_data, + merge_with_arb, # noqa: F401 +) +from offsets_db_data.models import credit_without_id_schema, project_schema +from offsets_db_data.projects import ( + add_category, # noqa: F401 + add_first_issuance_and_retirement_dates, # noqa: F401 + add_is_compliance_flag, # noqa: F401 + add_retired_and_issued_totals, # noqa: F401 + harmonize_country_names, # noqa: F401 + harmonize_status_codes, # noqa: F401 + map_protocol, # noqa: F401 +) + + +@pf.register_dataframe_method +def infer_isometric_project_type(df: pd.DataFrame) -> pd.DataFrame: + """Infer project types for Isometric projects based on protocol. + + Parameters + ---------- + df : pd.DataFrame + Input dataframe containing Isometric project data with 'protocol' column. + + Returns + ------- + pd.DataFrame + Dataframe with inferred 'project_type' column. + """ + # Mapping from Isometric protocol codes to project types + iso_protocol_to_type = { + 'iso-biochar': 'biochar', + 'iso-dac': 'direct air capture', + 'iso-bio-oil': 'bio-oil', + 'iso-bio-geo': 'biomass geological storage', + 'iso-erw': 'enhanced weathering', + 'iso-oae': 'ocean alkalinity enhancement', + 'iso-wastewater': 'wastewater alkalinity enhancement', + 'iso-river': 'river alkalinity enhancement', + 'iso-bio-burial': 'biomass burial', + 'iso-bio-ccs': 'biogenic ccs', + 'iso-refor': 'afforestation/reforestation', + } + + df['project_type'] = 'unknown' + df['project_type_source'] = 'carbonplan' + + # Map based on first protocol in list (protocols are stored as lists) + for idx, row in df.iterrows(): + protocols = row.get('protocol', []) + if isinstance(protocols, list) and len(protocols) > 0: + first_protocol = protocols[0] + if first_protocol in iso_protocol_to_type: + df.at[idx, 'project_type'] = iso_protocol_to_type[first_protocol] + + return df + + +@pf.register_dataframe_method +def add_isometric_project_url(df: pd.DataFrame) -> pd.DataFrame: + """Add project URL column for Isometric projects. + + Parameters + ---------- + df : pd.DataFrame + Input dataframe containing Isometric project data. + + Returns + ------- + pd.DataFrame + Dataframe with added project URL column. + """ + df['project_url'] = df['url'] + return df + + +@pf.register_dataframe_method +def add_isometric_project_id(df: pd.DataFrame, prefix: str = 'ISO') -> pd.DataFrame: + """Add project ID column for Isometric credits dataframe. + + Parameters + ---------- + df : pd.DataFrame + Input dataframe containing Isometric credit transactions data. + + Returns + ------- + pd.DataFrame + Dataframe with added project ID column. + """ + df = df.copy() + df['project_id'] = prefix + df['project_id'].astype(str) + + return df + + +@pf.register_dataframe_method +def process_isometric_credits( + df: pd.DataFrame, + *, + download_type: str, + prj_id_to_short_code: dict | None = None, + registry_name: str = 'isometric', + prefix: str = 'ISO', + harmonize_beneficiary_info: bool = False, +) -> pd.DataFrame: + """Process Isometric credits dataframe to conform to offsets-db schema. + + Parameters + ---------- + df : pd.DataFrame + Input dataframe containing Isometric credit transactions data. + + Returns + ------- + pd.DataFrame + Dataframe conforming to offsets-db credit schema. + """ + column_mapping = load_column_mapping( + registry_name=registry_name, download_type=download_type, mapping_path=CREDIT_SCHEMA_UPATH + ) + + columns = {v: k for k, v in column_mapping.items()} + df = df.copy() + # Add project ID with prefix using the prj_id_to_short_code mapping if provided + if prj_id_to_short_code is not None: + df['project_id'] = prefix + df['project_id'].map(prj_id_to_short_code) + + if df.empty: + return ( + pd.DataFrame(columns=credit_without_id_schema.columns.keys()) + .add_missing_columns(schema=credit_without_id_schema) + .convert_to_datetime(columns=['transaction_date'], format='%Y-%m-%d') + .add_missing_columns(schema=credit_without_id_schema) + .validate(schema=credit_without_id_schema) + ) + if download_type == 'issuances': + df['transaction_type'] = 'issuance' + elif download_type == 'retirements': + df = df.convert_to_datetime(columns=['sequestered_on']) + df['sequestered_on'] = df['sequestered_on'].dt.year + df['transaction_type'] = 'retirement' + data = ( + df.rename(columns=columns) + .set_registry(registry_name=registry_name) + .convert_to_datetime(columns=['transaction_date'], format='%Y-%m-%d') + .add_missing_columns(schema=credit_without_id_schema) + .validate(schema=credit_without_id_schema) + ) + + if harmonize_beneficiary_info: + data = data.pipe( + harmonize_beneficiary_data, registry_name=registry_name, download_type=download_type + ) + + return data + + +@pf.register_dataframe_method +def process_isometric_projects( + df: pd.DataFrame, + *, + credits: pd.DataFrame, + registry_name: str = 'isometric', +) -> pd.DataFrame: + """Process Isometric projects dataframe to conform to offsets-db schema. + + Parameters + ---------- + df : pd.DataFrame + Input dataframe containing Isometric project data. + credits : pd.DataFrame + Dataframe containing credit transactions data. + registry_name : str, optional + Name of the registry to be added to the dataframe, by default "isometric" + + Returns + ------- + pd.DataFrame + Dataframe conforming to offsets-db project schema. + """ + + registry_project_column_mapping = load_registry_project_column_mapping( + registry_name=registry_name, file_path=PROJECT_SCHEMA_UPATH + ) + inverted_column_mapping = {value: key for key, value in registry_project_column_mapping.items()} + type_category_mapping = load_type_category_mapping() + inverted_protocol_mapping = load_inverted_protocol_mapping() + + df = df.copy() + credits = credits.copy() + + data = ( + df.rename(columns=inverted_column_mapping) + .set_registry(registry_name=registry_name) + .add_isometric_project_id() + .add_isometric_project_url() + .harmonize_country_names() + .harmonize_status_codes() + .map_protocol(inverted_protocol_mapping=inverted_protocol_mapping) + .infer_isometric_project_type() # Use Isometric-specific inference + .override_project_types( + override_data_path=BERKELEY_PROJECT_TYPE_UPATH, source_str='berkeley' + ) + .add_category( + type_category_mapping=type_category_mapping + ) # must come after types; type -> category + .map_project_type_to_display_name(type_category_mapping=type_category_mapping) + .add_retired_and_issued_totals(credits=credits) + .add_first_issuance_and_retirement_dates(credits=credits) + .add_missing_columns(schema=project_schema) + .convert_to_datetime(columns=['listed_at', 'first_issuance_at', 'first_retirement_at']) + .validate(schema=project_schema) + ) + return data diff --git a/offsets_db_data/models.py b/offsets_db_data/models.py index 6b50342..d656f42 100644 --- a/offsets_db_data/models.py +++ b/offsets_db_data/models.py @@ -2,7 +2,7 @@ import janitor # noqa: F401 import pandas as pd -import pandera as pa +import pandera.pandas as pa RegistryType = typing.Literal[ 'verra', @@ -23,10 +23,10 @@ 'project_type': pa.Column(pa.String, nullable=False), 'project_type_source': pa.Column(pa.String, nullable=False), 'retired': pa.Column( - pa.Int, pa.Check.greater_than_or_equal_to(0), nullable=True, coerce=True + pa.Float32, pa.Check.greater_than_or_equal_to(0), nullable=True, coerce=True ), 'issued': pa.Column( - pa.Int, pa.Check.greater_than_or_equal_to(0), nullable=True, coerce=True + pa.Float32, pa.Check.greater_than_or_equal_to(0), nullable=True, coerce=True ), 'project_id': pa.Column(pa.String, nullable=False), 'name': pa.Column(pa.String, nullable=True), @@ -46,7 +46,7 @@ credit_without_id_schema = pa.DataFrameSchema( { 'quantity': pa.Column( - pa.Int, pa.Check.greater_than_or_equal_to(0), nullable=True, coerce=True + pa.Float32, pa.Check.greater_than_or_equal_to(0), nullable=True, coerce=True ), 'project_id': pa.Column(pa.String, nullable=False), 'vintage': pa.Column(pa.Int, nullable=True, coerce=True), diff --git a/offsets_db_data/registry.py b/offsets_db_data/registry.py index 9eb5e32..8147fa4 100644 --- a/offsets_db_data/registry.py +++ b/offsets_db_data/registry.py @@ -4,6 +4,8 @@ 'acr': 'american-carbon-registry', 'art': 'art-trees', 'gld': 'gold-standard', + 'iso': 'isometric', + 'ccb': 'cercarbono', } diff --git a/offsets_db_data/vcs.py b/offsets_db_data/vcs.py index 6fd1550..510da97 100644 --- a/offsets_db_data/vcs.py +++ b/offsets_db_data/vcs.py @@ -351,7 +351,6 @@ def process_vcs_projects( *, credits: pd.DataFrame, registry_name: str = 'verra', - download_type: str = 'projects', ) -> pd.DataFrame: """ Process Verra projects data, including renaming, adding, and validating columns, and merging with credits data. diff --git a/tests/test_vcs.py b/tests/test_vcs.py index 2009133..8fbcd1c 100644 --- a/tests/test_vcs.py +++ b/tests/test_vcs.py @@ -443,9 +443,7 @@ def test_add_vcs_compliance_projects(vcs_projects): def test_process_vcs_projects(vcs_projects, vcs_transactions): vcs_credits = process_vcs_credits(vcs_transactions, harmonize_beneficiary_info=False) - df = process_vcs_projects( - vcs_projects, credits=vcs_credits, registry_name='verra', download_type='projects' - ) + df = process_vcs_projects(vcs_projects, credits=vcs_credits, registry_name='verra') assert 'listed_at' in df.columns # check project_url series @@ -477,7 +475,7 @@ def test_process_vcs_projects_with_totals_and_dates(vcs_projects, vcs_transactio # Process the vcs_projects processed_projects = process_vcs_projects( - vcs_projects, credits=vcs_credits, registry_name='verra', download_type='projects' + vcs_projects, credits=vcs_credits, registry_name='verra' ) # Assertions for retired and issued totals, and first issuance/retirement dates