6
6
from __future__ import print_function
7
7
from lxml import etree
8
8
from datetime import date , datetime , timedelta
9
- from collections import defaultdict , OrderedDict
9
+ from collections import Counter , defaultdict , OrderedDict
10
10
from decimal import Decimal , InvalidOperation
11
11
import os
12
12
import re
@@ -1370,6 +1370,7 @@ def _sum_transactions_by_type_by_year(self):
1370
1370
return out
1371
1371
1372
1372
@returns_numberdictdictdict
1373
+ @memoize
1373
1374
def sum_transactions_by_type_by_year_usd (self ):
1374
1375
out = defaultdict (lambda : defaultdict (lambda : defaultdict (Decimal )))
1375
1376
@@ -1378,6 +1379,10 @@ def sum_transactions_by_type_by_year_usd(self):
1378
1379
for transaction_type , data in list (self ._sum_transactions_by_type_by_year ().items ()):
1379
1380
for currency , years in list (data .items ()):
1380
1381
for year , value in list (years .items ()):
1382
+ # FIXME currently there's no currency data in this repo
1383
+ # after 2014, it is better to use 2014 than silently failing
1384
+ if year > 2014 :
1385
+ year = 2014
1381
1386
if None not in [currency , value , year ]:
1382
1387
out [transaction_type ]['USD' ][year ] += get_USD_value (currency , value , year )
1383
1388
return out
@@ -1444,6 +1449,24 @@ def activities_with_future_transactions(self):
1444
1449
return 1
1445
1450
return 0
1446
1451
1452
+ @returns_numberdict
1453
+ def provider_activity_id (self ):
1454
+ out = dict (Counter (self .element .xpath ('transaction/provider-org/@provider-activity-id' )))
1455
+ if self .iati_identifier () in out :
1456
+ del out [self .iati_identifier ()]
1457
+ return out
1458
+
1459
+ @returns_numberdict
1460
+ def sum_commitments_and_disbursements_by_activity_id_usd (self ):
1461
+ # Handle 1.0x?
1462
+ sum_commitments_by_year_by_year_usd = self .sum_transactions_by_type_by_year_usd ().get ('2' , {}).get ('USD' , {})
1463
+ sum_disbursements_by_year_by_year_usd = self .sum_transactions_by_type_by_year_usd ().get ('3' , {}).get ('USD' , {})
1464
+ sum_commitments_and_disbursements_usd = sum (sum_commitments_by_year_by_year_usd .values ()) + sum (sum_disbursements_by_year_by_year_usd .values ())
1465
+ if sum_commitments_and_disbursements_usd :
1466
+ return {self .iati_identifier (): sum_commitments_and_disbursements_usd }
1467
+ else :
1468
+ return {}
1469
+
1447
1470
1448
1471
ckan = json .load (open ('helpers/ckan.json' ))
1449
1472
publisher_re = re .compile (r'(.*)\-[^\-]' )
@@ -1808,6 +1831,28 @@ def _latest_transaction_date(self):
1808
1831
if transaction_dates :
1809
1832
return str (max (transaction_dates ))
1810
1833
1834
+ @returns_numberdict
1835
+ def provider_activity_id_without_own (self ):
1836
+ out = {k : v for k , v in self .aggregated ['provider_activity_id' ].items () if k not in self .aggregated ['iati_identifiers' ]}
1837
+ return out
1838
+
1839
+ @returns_numberdict
1840
+ def provider_activity_id_own (self ):
1841
+ out = {k : v for k , v in self .aggregated ['provider_activity_id' ].items () if k in self .aggregated ['iati_identifiers' ]}
1842
+ return out
1843
+
1844
+ @returns_numberdictdict
1845
+ def sum_commitments_and_disbursements_by_activity_id_by_publisher_id_usd (self ):
1846
+ # These 2 by_publisher_id functions produce similar data to the invert
1847
+ # step, but we have to include them here to make that data available in
1848
+ # the AllDataStats step.
1849
+ return {self .folder : self .aggregated ['sum_commitments_and_disbursements_by_activity_id_usd' ]}
1850
+
1851
+ @returns_numberdictdict
1852
+ def iati_identifiers_by_publisher_id (self ):
1853
+ # See comment on by_publisher_id above
1854
+ return {self .folder : self .aggregated ['iati_identifiers' ]}
1855
+
1811
1856
1812
1857
class OrganisationFileStats (GenericFileStats ):
1813
1858
""" Stats calculated for an IATI Organisation XML file. """
@@ -1847,3 +1892,39 @@ def unique_identifiers(self):
1847
1892
@returns_numberdict
1848
1893
def _duplicate_identifiers (self ):
1849
1894
return {k : v for k , v in self .aggregated ['iati_identifiers' ].items () if v > 1 }
1895
+
1896
+ @returns_numberdict
1897
+ def traceable_sum_commitments_and_disbursements_by_publisher_id (self ):
1898
+ out = defaultdict (Decimal )
1899
+ for publisher_id , d in self .aggregated ['sum_commitments_and_disbursements_by_activity_id_by_publisher_id_usd' ].items ():
1900
+ for k , v in d .items ():
1901
+ if k in self .aggregated ['provider_activity_id_without_own' ]:
1902
+ out [publisher_id ] += v
1903
+ return out
1904
+
1905
+ @returns_numberdict
1906
+ def traceable_sum_commitments_and_disbursements_by_publisher_id_denominator (self ):
1907
+ out = defaultdict (Decimal )
1908
+ for publisher_id , d in self .aggregated ['sum_commitments_and_disbursements_by_activity_id_by_publisher_id_usd' ].items ():
1909
+ for k , v in d .items ():
1910
+ if k not in self .aggregated ['provider_activity_id_own' ]:
1911
+ out [publisher_id ] += v
1912
+ return out
1913
+
1914
+ @returns_numberdict
1915
+ def traceable_activities_by_publisher_id (self ):
1916
+ out = defaultdict (int )
1917
+ for publisher_id , iati_identifiers_counts in self .aggregated ['iati_identifiers_by_publisher_id' ].items ():
1918
+ for iati_identifier , count in iati_identifiers_counts .items ():
1919
+ if iati_identifier in self .aggregated ['provider_activity_id_without_own' ]:
1920
+ out [publisher_id ] += count
1921
+ return out
1922
+
1923
+ @returns_numberdict
1924
+ def traceable_activities_by_publisher_id_denominator (self ):
1925
+ out = defaultdict (int )
1926
+ for publisher_id , iati_identifiers_counts in self .aggregated ['iati_identifiers_by_publisher_id' ].items ():
1927
+ for iati_identifier , count in iati_identifiers_counts .items ():
1928
+ if iati_identifier not in self .aggregated ['provider_activity_id_own' ]:
1929
+ out [publisher_id ] += count
1930
+ return out
0 commit comments