Skip to content
Open
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
3a02bef
Add Cercarbono project processing and update raw columns mapping
andersy005 Dec 10, 2025
47829e2
Merge branch 'main' into add-Cercarbano
andersy005 Dec 11, 2025
95d9a1a
Update Cercarbono mappings in projects-raw-columns-mapping.json
andersy005 Dec 11, 2025
dbbc2c6
Add method to generate project URLs for Cercarbono projects
andersy005 Dec 11, 2025
8a06d33
Add processing method for Cercarbono transactions and update column m…
andersy005 Dec 11, 2025
7a1dcc4
Update transaction date conversion to use ISO8601 format
andersy005 Dec 11, 2025
a05fc97
Extract vintage year from vintage_of_credits in process_cercarbono_tr…
andersy005 Dec 11, 2025
328d074
Add missing columns handling in process_cercarbono_transactions
andersy005 Dec 11, 2025
2c2df90
Refactor process_cercarbono_projects to accept credits DataFrame and …
andersy005 Dec 11, 2025
3a36872
Remove unnecessary parameter from process_vcs_projects calls in tests
andersy005 Dec 11, 2025
3f6029a
Add process_isometric_projects function to handle Isometric project data
andersy005 Dec 11, 2025
121a275
Add isometric project mappings to projects-raw-columns-mapping.json
andersy005 Dec 11, 2025
e8d93cc
Add project URL handling and enhance isometric project processing
andersy005 Dec 11, 2025
53b461e
Rename process_cercarbono_transactions to process_cercarbono_credits …
andersy005 Dec 12, 2025
a7a7540
Enhance process_isometric_credits function to include datetime conver…
andersy005 Dec 12, 2025
156694b
Add project ID and vintage year extraction to process_isometric_credi…
andersy005 Dec 12, 2025
31b6cb4
Change integer columns to Float32 in project_schema and credit_withou…
andersy005 Dec 12, 2025
cf5ca9c
Uncomment methods to add retired and issued totals, and first issuanc…
andersy005 Dec 12, 2025
783b1d2
Refactor process_isometric_credits function to handle transaction typ…
andersy005 Dec 12, 2025
eaa2599
Add 'isometric' and 'cercarbono' to registry abbreviation mapping
andersy005 Jan 5, 2026
04072d5
Update project_id mapping in cercarbono retirements and remove redund…
andersy005 Jan 14, 2026
d97f43b
Add project ID methods for Cercarbono and Isometric credits dataframe…
andersy005 Jan 14, 2026
5efeea9
Fix project ID assignment order in process_cercarbono_projects and up…
andersy005 Jan 14, 2026
0b6d441
Refactor process_cercarbono_credits to streamline data handling for i…
andersy005 Jan 14, 2026
3966307
Merge branch 'main' into add-Cercarbano
andersy005 Jan 14, 2026
ad1804a
Enhance process_isometric_credits to support project ID mapping with …
andersy005 Jan 14, 2026
d2bc9a2
Add harmonization option for beneficiary data in process functions
andersy005 Jan 14, 2026
62153ae
Refactor process_isometric_credits to improve flow and readability by…
andersy005 Jan 14, 2026
75a1c69
Merge branch 'main' into add-Cercarbano
andersy005 Jan 29, 2026
1afb6e8
Retrigger CI
andersy005 Feb 12, 2026
0c1c6d9
Merge branch 'main' into add-Cercarbano
andersy005 Feb 12, 2026
adcfc70
Refactor import statements for pandera to use pandas submodule
andersy005 Feb 12, 2026
9816997
Add new project types and update isometric project type inference logic
andersy005 Feb 12, 2026
99b03bf
Add Cercarbono project type inference and update protocol mapping
andersy005 Feb 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 168 additions & 0 deletions offsets_db_data/cercarbono.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import pandas as pd
import pandas_flavor as pf

from offsets_db_data.common import (
BERKELEY_PROJECT_TYPE_UPATH,
CREDIT_SCHEMA_UPATH,
PROJECT_SCHEMA_UPATH,
load_column_mapping,
load_inverted_protocol_mapping,
load_registry_project_column_mapping,
load_type_category_mapping,
)
from offsets_db_data.credits import (
aggregate_issuance_transactions, # noqa: F401
filter_and_merge_transactions, # noqa: F401
merge_with_arb, # noqa: F401
)
from offsets_db_data.models import credit_without_id_schema, project_schema
from offsets_db_data.projects import (
add_category, # noqa: F401
add_first_issuance_and_retirement_dates, # noqa: F401
add_is_compliance_flag, # noqa: F401
add_retired_and_issued_totals, # noqa: F401
harmonize_country_names, # noqa: F401
harmonize_status_codes, # noqa: F401
map_protocol, # noqa: F401
)


@pf.register_dataframe_method
def add_cercarbono_project_url(df: pd.DataFrame) -> pd.DataFrame:
"""Add project URL column for Cercarbono projects.

Parameters
----------
df : pd.DataFrame
Input dataframe containing Cercarbono project data.

Returns
-------
pd.DataFrame
Dataframe with added project URL column.
"""
base_url = 'https://www.ecoregistry.io/projects'
df['project_url'] = df['project_id'].apply(lambda x: f'{base_url}/{x}')
return df


@pf.register_dataframe_method
def process_cercarbono_credits(
projects: pd.DataFrame,
retirements: pd.DataFrame,
download_type: str = 'retirements',
registry_name: str = 'cercarbono',
) -> pd.DataFrame:
"""Process Cercarbono transactions dataframe to conform to offsets-db schema.

Parameters
----------
projects : pd.DataFrame
Input dataframe containing Cercarbono project data.
retirements : pd.DataFrame
Input dataframe containing Cercarbono retirement data.
download_type : str, optional
Type of data to download, by default "retirements"
registry_name : str, optional
Name of the registry to be added to the dataframe, by default "cercarbono"

Returns
-------
pd.DataFrame
Processed dataframe conforming to offsets-db schema.
"""
all_issuances = []
for idx, row in projects.iterrows():
issuances = row['serials']
for issuance in issuances:
issuance['project_id'] = row['code']
issuance['name'] = row['name']
all_issuances.extend(issuances)

issuances = pd.json_normalize(all_issuances).rename(
columns={'issued_quantity': 'quantity', 'issuance_date': 'date'}
)
# Extract vintage year from the last date in vintage_of_credits (format: "YYYY-MM-DD / YYYY-MM-DD")
# TODO: @badgley, please confirm this is the correct way to extract vintage year for issuances
issuances['vintage'] = (
issuances['vintage_of_credits'].str.split(' / ').str[-1].str[:4].astype(int)
)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc @badgley for feedback

issuances['transaction_type'] = 'issuance'
# add CDC- prefix to project IDs
retirements['project_id'] = retirements['project_id'].apply(lambda x: f'CDC-{x}')
retirements['transaction_type'] = 'retirement'

column_mapping = load_column_mapping(
registry_name=registry_name, download_type=download_type, mapping_path=CREDIT_SCHEMA_UPATH
)

columns = {v: k for k, v in column_mapping.items()}

df = pd.concat([issuances, retirements]).reset_index(drop=True).rename(columns=columns)
data = (
df.set_registry(registry_name=registry_name)
.convert_to_datetime(columns=['transaction_date'], format='ISO8601')
.add_missing_columns(schema=credit_without_id_schema)
.validate(schema=credit_without_id_schema)
)
return data


@pf.register_dataframe_method
def process_cercarbono_projects(
df: pd.DataFrame,
*,
credits: pd.DataFrame,
registry_name: str = 'cercarbono',
) -> pd.DataFrame:
"""Process Cercarbono projects dataframe to conform to offsets-db schema.

Parameters
----------
df : pd.DataFrame
Input dataframe containing Cercarbono project data.
registry_name : str, optional
Name of the registry to be added to the dataframe, by default "cercarbon


Returns
-------
pd.DataFrame
Processed dataframe conforming to offsets-db schema.
"""

registry_project_column_mapping = load_registry_project_column_mapping(
registry_name=registry_name, file_path=PROJECT_SCHEMA_UPATH
)
inverted_column_mapping = {value: key for key, value in registry_project_column_mapping.items()}
type_category_mapping = load_type_category_mapping()
inverted_protocol_mapping = load_inverted_protocol_mapping()
df = df.copy()
df['country'] = df.locations.map(
lambda x: x[0]['country']
) # extract country from locations by taking first entry

data = (
df.rename(columns=inverted_column_mapping)
.set_registry(registry_name=registry_name)
.add_cercarbono_project_url()
.harmonize_country_names()
.harmonize_status_codes()
.map_protocol(inverted_protocol_mapping=inverted_protocol_mapping)
.infer_project_type()
.override_project_types(
override_data_path=BERKELEY_PROJECT_TYPE_UPATH, source_str='berkeley'
)
.add_category(
type_category_mapping=type_category_mapping
) # must come after types; type -> category
.map_project_type_to_display_name(type_category_mapping=type_category_mapping)
.add_is_compliance_flag()
.add_retired_and_issued_totals(credits=credits)
.add_first_issuance_and_retirement_dates(credits=credits)
.add_missing_columns(schema=project_schema)
.convert_to_datetime(columns=['listed_at', 'first_issuance_at', 'first_retirement_at'])
.validate(schema=project_schema)
)

return data
4 changes: 3 additions & 1 deletion offsets_db_data/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,9 @@ def load_inverted_protocol_mapping() -> dict:
return store


def load_column_mapping(*, registry_name: str, download_type: str, mapping_path: str) -> dict:
def load_column_mapping(
*, registry_name: str, download_type: str, mapping_path: upath.UPath | str
) -> dict:
with open(mapping_path) as f:
registry_credit_column_mapping = json.load(f)
return registry_credit_column_mapping[registry_name][download_type]
Expand Down
34 changes: 34 additions & 0 deletions offsets_db_data/configs/credits-raw-columns-mapping.json
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,18 @@
"vintage": "Vintage"
}
},
"cercarbono": {
"retirements": {
"project_id": null,
"quantity": null,
"retirement_account": null,
"retirement_beneficiary": null,
"retirement_note": null,
"retirement_reason": null,
"transaction_date": "date",
"vintage": "vintage"
}
},
"climate-action-reserve": {
"cancellations": {
"project_id": "Project ID",
Expand Down Expand Up @@ -117,6 +129,28 @@
"vintage": "Vintage"
}
},
"isometric": {
"issuances": {
"project_id": "project_id",
"quantity": "credit_batch_size_total.credits",
"retirement_account": null,
"retirement_beneficiary": null,
"retirement_note": null,
"retirement_reason": null,
"transaction_date": "issued_at",
"vintage": null
},
"retirements": {
"project_id": null,
"quantity": "credit_batch_size_total.credits",
"retirement_account": "owner.name",
"retirement_beneficiary": "beneficiary.name",
"retirement_note": "notes",
"retirement_reason": "purposes",
"transaction_date": "retired_at",
"vintage": null
}
},
"verra": {
"transactions": {
"project_id": null,
Expand Down
14 changes: 14 additions & 0 deletions offsets_db_data/configs/projects-raw-columns-mapping.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,50 +2,64 @@
"country": {
"american-carbon-registry": "Project Site Country",
"art-trees": "Program Country",
"cercarbono": null,
"climate-action-reserve": "Project Site Country",
"gold-standard": "Country",
"isometric": "location.country_name",
"verra": "Country/Area"
},
"listed_at": {
"american-carbon-registry": null,
"art-trees": null,
"cercarbono": null,
"climate-action-reserve": "Project Listed Date",
"gold-standard": null,
"isometric": null,
"verra": null
},
"name": {
"american-carbon-registry": "Project Name",
"art-trees": "Program Name",
"cercarbono": "name",
"climate-action-reserve": "Project Name",
"gold-standard": "Project Name",
"isometric": "name",
"verra": "Name"
},
"original_protocol": {
"american-carbon-registry": "Project Methodology/Protocol",
"art-trees": null,
"cercarbono": "evaluation_criteria",
"climate-action-reserve": "Project Type",
"gold-standard": "Methodology",
"isometric": "protocol_slug",
"verra": "Methodology"
},
"project_id": {
"american-carbon-registry": "Project ID",
"art-trees": "Program ID",
"cercarbono": "code",
"climate-action-reserve": "Project ID",
"gold-standard": "GSID",
"isometric": "id",
"verra": "ID"
},
"proponent": {
"american-carbon-registry": null,
"art-trees": "Sovereign Program Developer",
"cercarbono": "owner",
"climate-action-reserve": "Project Owner",
"gold-standard": "Project Developer Name",
"isometric": "supplier.organisation.name",
"verra": "Proponent"
},
"status": {
"american-carbon-registry": null,
"art-trees": "Status",
"cercarbono": "stage",
"climate-action-reserve": "Status",
"gold-standard": "Status",
"isometric": "status",
"verra": "Status"
}
}
Loading