Add raw retirement beneficiary data to the processed transactions

carbonplan · Aug 28, 2024 · ef1c282 · ef1c282
1 parent 7a18dd1
commit ef1c282
Show file tree

Hide file tree

Showing 8 changed files with 70 additions and 10 deletions.
diff --git a/offsets_db_data/apx.py b/offsets_db_data/apx.py
@@ -89,7 +89,9 @@ def process_apx_credits(
     if download_type == 'issuances':
         data = data.aggregate_issuance_transactions()
 
-    data = data.validate(schema=credit_without_id_schema)
+    data = data.add_missing_columns(schema=credit_without_id_schema).validate(
+        schema=credit_without_id_schema
+    )
     if arb is not None and not arb.empty:
         data = data.merge_with_arb(arb=arb)
     return data

diff --git a/offsets_db_data/arb.py b/offsets_db_data/arb.py
@@ -132,8 +132,10 @@ def process_arb(df: pd.DataFrame) -> pd.DataFrame:
     data['registry'] = data.project_id.apply(_get_registry)
     data['vintage'] = data['vintage'].astype(int)
 
-    data = data.convert_to_datetime(columns=['transaction_date']).validate(
-        schema=credit_without_id_schema
+    data = (
+        data.add_missing_columns(schema=credit_without_id_schema)
+        .convert_to_datetime(columns=['transaction_date'])
+        .validate(schema=credit_without_id_schema)
     )
 
     return data
diff --git a/offsets_db_data/common.py b/offsets_db_data/common.py
@@ -141,7 +141,7 @@ def add_missing_columns(df: pd.DataFrame, *, schema: pa.DataFrameSchema) -> pd.D
     for column, value in schema.columns.items():
         dtype = value.dtype.type
         if column not in df.columns:
-            default_value = default_values.get(dtype, None)
+            default_value = default_values.get(dtype)
             df[column] = pd.Series([default_value] * len(df), index=df.index, dtype=dtype)
     return df
 

diff --git a/offsets_db_data/configs/credits-raw-columns-mapping.json b/offsets_db_data/configs/credits-raw-columns-mapping.json
@@ -1,82 +1,130 @@
 {
   "american-carbon-registry": {
     "cancellations": {
+      "account": null,
+      "beneficiary": null,
+      "note": null,
       "project_id": "Project ID",
       "quantity": "Quantity of Credits",
+      "reason": null,
       "transaction_date": "Status Effective  (GMT)",
       "vintage": "Vintage"
     },
     "issuances": {
+      "account": null,
+      "beneficiary": null,
+      "note": null,
       "project_id": "Project ID",
       "quantity": "Total Credits Issued",
+      "reason": null,
       "transaction_date": "Date Issued (GMT)",
       "vintage": "Vintage"
     },
     "retirements": {
+      "account": "Account Holder",
+      "beneficiary": null,
+      "note": "Purpose of Retirement",
       "project_id": "Project ID",
       "quantity": "Quantity of Credits",
+      "reason": "Retirement Reason",
       "transaction_date": "Status Effective (GMT)",
       "vintage": "Vintage"
     }
   },
   "art-trees": {
     "cancellations": {
+      "account": null,
+      "beneficiary": null,
+      "note": null,
       "project_id": "Program ID",
       "quantity": "Quantity of Credits",
+      "reason": null,
       "transaction_date": "Status Effective",
       "vintage": "Vintage"
     },
     "issuances": {
+      "account": null,
+      "beneficiary": null,
+      "note": null,
       "project_id": "Program ID",
       "quantity": "Credits Verified",
+      "reason": null,
       "transaction_date": "Date Approved",
       "vintage": "Vintage"
     },
     "retirements": {
+      "account": "Account Holder",
+      "beneficiary": null,
+      "note": "Retirement Reason Details",
       "project_id": "Program ID",
       "quantity": "Quantity of Credits",
+      "reason": "Retirement Reason",
       "transaction_date": "Status Effective",
       "vintage": "Vintage"
     }
   },
   "climate-action-reserve": {
     "cancellations": {
+      "account": null,
+      "beneficiary": null,
+      "note": null,
       "project_id": "Project ID",
       "quantity": "Quantity of Offset Credits",
+      "reason": null,
       "transaction_date": "Status Effective",
       "vintage": "Vintage"
     },
     "issuances": {
+      "account": null,
+      "beneficiary": null,
+      "note": null,
       "project_id": "Project ID",
       "quantity": "Total Offset Credits Issued",
+      "reason": null,
       "transaction_date": "Date Issued",
       "vintage": "Vintage"
     },
     "retirements": {
+      "account": "Account Holder",
+      "beneficiary": null,
+      "note": "Retirement Reason Details",
       "project_id": "Project ID",
       "quantity": "Quantity of Offset Credits",
+      "reason": "Retirement Reason",
       "transaction_date": "Status Effective",
       "vintage": "Vintage"
     }
   },
   "gold-standard": {
     "issuances": {
+      "account": null,
+      "beneficiary": null,
+      "note": null,
       "project_id": "GSID",
       "quantity": "Quantity",
+      "reason": null,
       "transaction_date": "Issuance Date",
       "vintage": "Vintage"
     },
     "retirements": {
+      "account": null,
+      "beneficiary": "Using Entity",
+      "note": "Note",
       "project_id": "GSID",
       "quantity": "Quantity",
+      "reason": null,
       "transaction_date": "Retirement Date",
       "vintage": "Vintage"
     }
   },
   "verra": {
     "transactions": {
+      "account": null,
+      "beneficiary": "Retirement Beneficiary",
+      "note": "Retirement Details",
       "project_id": null,
       "quantity": null,
+      "reason": "Retirement Reason",
       "transaction_date": null,
       "vintage": null
     }

diff --git a/offsets_db_data/gld.py b/offsets_db_data/gld.py
@@ -126,7 +126,9 @@ def process_gld_credits(
         if download_type == 'issuances':
             data = data.aggregate_issuance_transactions()
 
-        data = data.validate(schema=credit_without_id_schema)
+        data = data.add_missing_columns(schema=credit_without_id_schema).validate(
+            schema=credit_without_id_schema
+        )
 
         if arb is not None and not arb.empty:
             data = data.merge_with_arb(arb=arb)
@@ -136,6 +138,7 @@ def process_gld_credits(
             pd.DataFrame(columns=credit_without_id_schema.columns.keys())
             .add_missing_columns(schema=credit_without_id_schema)
             .convert_to_datetime(columns=['transaction_date'], format='%Y-%m-%d')
+            .add_missing_columns(schema=credit_without_id_schema)
             .validate(schema=credit_without_id_schema)
         )
 

diff --git a/offsets_db_data/models.py b/offsets_db_data/models.py
@@ -48,6 +48,10 @@
         'vintage': pa.Column(pa.Int, nullable=True, coerce=True),
         'transaction_date': pa.Column(pd.DatetimeTZDtype(tz='UTC'), nullable=True),
         'transaction_type': pa.Column(pa.String, nullable=True),
+        'account': pa.Column(pa.String, nullable=True),
+        'reason': pa.Column(pa.String, nullable=True),
+        'note': pa.Column(pa.String, nullable=True),
+        'beneficiary': pa.Column(pa.String, nullable=True),
     }
 )
 

diff --git a/offsets_db_data/vcs.py b/offsets_db_data/vcs.py
@@ -225,6 +225,7 @@ def process_vcs_credits(
     data = (
         pd.concat([issuances, retirements])
         .reset_index(drop=True)
+        .add_missing_columns(schema=credit_without_id_schema)
         .validate(schema=credit_without_id_schema)
     )
 

diff --git a/tests/test_integration.py b/tests/test_integration.py
@@ -10,7 +10,7 @@
 
 @pytest.fixture
 def date() -> str:
-    return '2024-05-03'
+    return '2024-08-27'
 
 
 @pytest.fixture
@@ -21,7 +21,7 @@ def bucket() -> str:
 @pytest.fixture
 def arb() -> pd.DataFrame:
     data = pd.read_excel(
-        's3://carbonplan-offsets-db/raw/2024-05-03/arb/nc-arboc_issuance.xlsx', sheet_name=3
+        's3://carbonplan-offsets-db/raw/2024-08-27/arb/nc-arboc_issuance.xlsx', sheet_name=3
     )
     return data.process_arb()
 
@@ -103,10 +103,10 @@ def test_gld(
         pd.concat(
             [
                 pd.read_csv(
-                    's3://carbonplan-offsets-db/raw/2024-05-03/gold-standard/issuances.csv.gz'
+                    's3://carbonplan-offsets-db/raw/2024-08-27/gold-standard/issuances.csv.gz'
                 ).process_gld_credits(download_type='issuances'),
                 pd.read_csv(
-                    's3://carbonplan-offsets-db/raw/2024-05-03/gold-standard/retirements.csv.gz'
+                    's3://carbonplan-offsets-db/raw/2024-08-27/gold-standard/retirements.csv.gz'
                 ).process_gld_credits(download_type='retirements'),
             ]
         ),
@@ -116,7 +116,7 @@ def test_gld(
     'projects',
     [
         pd.DataFrame(),
-        pd.read_csv('s3://carbonplan-offsets-db/raw/2024-05-03/gold-standard/projects.csv.gz'),
+        pd.read_csv('s3://carbonplan-offsets-db/raw/2024-08-27/gold-standard/projects.csv.gz'),
     ],
 )
 def test_gld_empty(df_credits, projects):