Skip to content

Commit

Permalink
Add raw retirement beneficiary data to the processed transactions
Browse files Browse the repository at this point in the history
  • Loading branch information
andersy005 committed Aug 28, 2024
1 parent 7a18dd1 commit ef1c282
Show file tree
Hide file tree
Showing 8 changed files with 70 additions and 10 deletions.
4 changes: 3 additions & 1 deletion offsets_db_data/apx.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,9 @@ def process_apx_credits(
if download_type == 'issuances':
data = data.aggregate_issuance_transactions()

data = data.validate(schema=credit_without_id_schema)
data = data.add_missing_columns(schema=credit_without_id_schema).validate(
schema=credit_without_id_schema
)
if arb is not None and not arb.empty:
data = data.merge_with_arb(arb=arb)
return data
Expand Down
6 changes: 4 additions & 2 deletions offsets_db_data/arb.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,10 @@ def process_arb(df: pd.DataFrame) -> pd.DataFrame:
data['registry'] = data.project_id.apply(_get_registry)
data['vintage'] = data['vintage'].astype(int)

data = data.convert_to_datetime(columns=['transaction_date']).validate(
schema=credit_without_id_schema
data = (
data.add_missing_columns(schema=credit_without_id_schema)
.convert_to_datetime(columns=['transaction_date'])
.validate(schema=credit_without_id_schema)
)

return data
2 changes: 1 addition & 1 deletion offsets_db_data/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def add_missing_columns(df: pd.DataFrame, *, schema: pa.DataFrameSchema) -> pd.D
for column, value in schema.columns.items():
dtype = value.dtype.type
if column not in df.columns:
default_value = default_values.get(dtype, None)
default_value = default_values.get(dtype)
df[column] = pd.Series([default_value] * len(df), index=df.index, dtype=dtype)
return df

Expand Down
48 changes: 48 additions & 0 deletions offsets_db_data/configs/credits-raw-columns-mapping.json
Original file line number Diff line number Diff line change
@@ -1,82 +1,130 @@
{
"american-carbon-registry": {
"cancellations": {
"account": null,
"beneficiary": null,
"note": null,
"project_id": "Project ID",
"quantity": "Quantity of Credits",
"reason": null,
"transaction_date": "Status Effective (GMT)",
"vintage": "Vintage"
},
"issuances": {
"account": null,
"beneficiary": null,
"note": null,
"project_id": "Project ID",
"quantity": "Total Credits Issued",
"reason": null,
"transaction_date": "Date Issued (GMT)",
"vintage": "Vintage"
},
"retirements": {
"account": "Account Holder",
"beneficiary": null,
"note": "Purpose of Retirement",
"project_id": "Project ID",
"quantity": "Quantity of Credits",
"reason": "Retirement Reason",
"transaction_date": "Status Effective (GMT)",
"vintage": "Vintage"
}
},
"art-trees": {
"cancellations": {
"account": null,
"beneficiary": null,
"note": null,
"project_id": "Program ID",
"quantity": "Quantity of Credits",
"reason": null,
"transaction_date": "Status Effective",
"vintage": "Vintage"
},
"issuances": {
"account": null,
"beneficiary": null,
"note": null,
"project_id": "Program ID",
"quantity": "Credits Verified",
"reason": null,
"transaction_date": "Date Approved",
"vintage": "Vintage"
},
"retirements": {
"account": "Account Holder",
"beneficiary": null,
"note": "Retirement Reason Details",
"project_id": "Program ID",
"quantity": "Quantity of Credits",
"reason": "Retirement Reason",
"transaction_date": "Status Effective",
"vintage": "Vintage"
}
},
"climate-action-reserve": {
"cancellations": {
"account": null,
"beneficiary": null,
"note": null,
"project_id": "Project ID",
"quantity": "Quantity of Offset Credits",
"reason": null,
"transaction_date": "Status Effective",
"vintage": "Vintage"
},
"issuances": {
"account": null,
"beneficiary": null,
"note": null,
"project_id": "Project ID",
"quantity": "Total Offset Credits Issued",
"reason": null,
"transaction_date": "Date Issued",
"vintage": "Vintage"
},
"retirements": {
"account": "Account Holder",
"beneficiary": null,
"note": "Retirement Reason Details",
"project_id": "Project ID",
"quantity": "Quantity of Offset Credits",
"reason": "Retirement Reason",
"transaction_date": "Status Effective",
"vintage": "Vintage"
}
},
"gold-standard": {
"issuances": {
"account": null,
"beneficiary": null,
"note": null,
"project_id": "GSID",
"quantity": "Quantity",
"reason": null,
"transaction_date": "Issuance Date",
"vintage": "Vintage"
},
"retirements": {
"account": null,
"beneficiary": "Using Entity",
"note": "Note",
"project_id": "GSID",
"quantity": "Quantity",
"reason": null,
"transaction_date": "Retirement Date",
"vintage": "Vintage"
}
},
"verra": {
"transactions": {
"account": null,
"beneficiary": "Retirement Beneficiary",
"note": "Retirement Details",
"project_id": null,
"quantity": null,
"reason": "Retirement Reason",
"transaction_date": null,
"vintage": null
}
Expand Down
5 changes: 4 additions & 1 deletion offsets_db_data/gld.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,9 @@ def process_gld_credits(
if download_type == 'issuances':
data = data.aggregate_issuance_transactions()

data = data.validate(schema=credit_without_id_schema)
data = data.add_missing_columns(schema=credit_without_id_schema).validate(
schema=credit_without_id_schema
)

if arb is not None and not arb.empty:
data = data.merge_with_arb(arb=arb)
Expand All @@ -136,6 +138,7 @@ def process_gld_credits(
pd.DataFrame(columns=credit_without_id_schema.columns.keys())
.add_missing_columns(schema=credit_without_id_schema)
.convert_to_datetime(columns=['transaction_date'], format='%Y-%m-%d')
.add_missing_columns(schema=credit_without_id_schema)
.validate(schema=credit_without_id_schema)
)

Expand Down
4 changes: 4 additions & 0 deletions offsets_db_data/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@
'vintage': pa.Column(pa.Int, nullable=True, coerce=True),
'transaction_date': pa.Column(pd.DatetimeTZDtype(tz='UTC'), nullable=True),
'transaction_type': pa.Column(pa.String, nullable=True),
'account': pa.Column(pa.String, nullable=True),
'reason': pa.Column(pa.String, nullable=True),
'note': pa.Column(pa.String, nullable=True),
'beneficiary': pa.Column(pa.String, nullable=True),
}
)

Expand Down
1 change: 1 addition & 0 deletions offsets_db_data/vcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ def process_vcs_credits(
data = (
pd.concat([issuances, retirements])
.reset_index(drop=True)
.add_missing_columns(schema=credit_without_id_schema)
.validate(schema=credit_without_id_schema)
)

Expand Down
10 changes: 5 additions & 5 deletions tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

@pytest.fixture
def date() -> str:
return '2024-05-03'
return '2024-08-27'


@pytest.fixture
Expand All @@ -21,7 +21,7 @@ def bucket() -> str:
@pytest.fixture
def arb() -> pd.DataFrame:
data = pd.read_excel(
's3://carbonplan-offsets-db/raw/2024-05-03/arb/nc-arboc_issuance.xlsx', sheet_name=3
's3://carbonplan-offsets-db/raw/2024-08-27/arb/nc-arboc_issuance.xlsx', sheet_name=3
)
return data.process_arb()

Expand Down Expand Up @@ -103,10 +103,10 @@ def test_gld(
pd.concat(
[
pd.read_csv(
's3://carbonplan-offsets-db/raw/2024-05-03/gold-standard/issuances.csv.gz'
's3://carbonplan-offsets-db/raw/2024-08-27/gold-standard/issuances.csv.gz'
).process_gld_credits(download_type='issuances'),
pd.read_csv(
's3://carbonplan-offsets-db/raw/2024-05-03/gold-standard/retirements.csv.gz'
's3://carbonplan-offsets-db/raw/2024-08-27/gold-standard/retirements.csv.gz'
).process_gld_credits(download_type='retirements'),
]
),
Expand All @@ -116,7 +116,7 @@ def test_gld(
'projects',
[
pd.DataFrame(),
pd.read_csv('s3://carbonplan-offsets-db/raw/2024-05-03/gold-standard/projects.csv.gz'),
pd.read_csv('s3://carbonplan-offsets-db/raw/2024-08-27/gold-standard/projects.csv.gz'),
],
)
def test_gld_empty(df_credits, projects):
Expand Down

0 comments on commit ef1c282

Please sign in to comment.