66from __future__ import print_function
77from lxml import etree
88from datetime import date , datetime , timedelta
9- from collections import defaultdict , OrderedDict
9+ from collections import Counter , defaultdict , OrderedDict
1010from decimal import Decimal , InvalidOperation
1111import os
1212import re
@@ -1370,6 +1370,7 @@ def _sum_transactions_by_type_by_year(self):
13701370 return out
13711371
13721372 @returns_numberdictdictdict
1373+ @memoize
13731374 def sum_transactions_by_type_by_year_usd (self ):
13741375 out = defaultdict (lambda : defaultdict (lambda : defaultdict (Decimal )))
13751376
@@ -1378,6 +1379,10 @@ def sum_transactions_by_type_by_year_usd(self):
13781379 for transaction_type , data in list (self ._sum_transactions_by_type_by_year ().items ()):
13791380 for currency , years in list (data .items ()):
13801381 for year , value in list (years .items ()):
1382+ # FIXME currently there's no currency data in this repo
1383+ # after 2014, it is better to use 2014 than silently failing
1384+ if year > 2014 :
1385+ year = 2014
13811386 if None not in [currency , value , year ]:
13821387 out [transaction_type ]['USD' ][year ] += get_USD_value (currency , value , year )
13831388 return out
@@ -1444,6 +1449,24 @@ def activities_with_future_transactions(self):
14441449 return 1
14451450 return 0
14461451
1452+ @returns_numberdict
1453+ def provider_activity_id (self ):
1454+ out = dict (Counter (self .element .xpath ('transaction/provider-org/@provider-activity-id' )))
1455+ if self .iati_identifier () in out :
1456+ del out [self .iati_identifier ()]
1457+ return out
1458+
1459+ @returns_numberdict
1460+ def sum_commitments_and_disbursements_by_activity_id_usd (self ):
1461+ # Handle 1.0x?
1462+ sum_commitments_by_year_by_year_usd = self .sum_transactions_by_type_by_year_usd ().get ('2' , {}).get ('USD' , {})
1463+ sum_disbursements_by_year_by_year_usd = self .sum_transactions_by_type_by_year_usd ().get ('3' , {}).get ('USD' , {})
1464+ sum_commitments_and_disbursements_usd = sum (sum_commitments_by_year_by_year_usd .values ()) + sum (sum_disbursements_by_year_by_year_usd .values ())
1465+ if sum_commitments_and_disbursements_usd :
1466+ return {self .iati_identifier (): sum_commitments_and_disbursements_usd }
1467+ else :
1468+ return {}
1469+
14471470
14481471ckan = json .load (open ('helpers/ckan.json' ))
14491472publisher_re = re .compile (r'(.*)\-[^\-]' )
@@ -1808,6 +1831,28 @@ def _latest_transaction_date(self):
18081831 if transaction_dates :
18091832 return str (max (transaction_dates ))
18101833
1834+ @returns_numberdict
1835+ def provider_activity_id_without_own (self ):
1836+ out = {k : v for k , v in self .aggregated ['provider_activity_id' ].items () if k not in self .aggregated ['iati_identifiers' ]}
1837+ return out
1838+
1839+ @returns_numberdict
1840+ def provider_activity_id_own (self ):
1841+ out = {k : v for k , v in self .aggregated ['provider_activity_id' ].items () if k in self .aggregated ['iati_identifiers' ]}
1842+ return out
1843+
1844+ @returns_numberdictdict
1845+ def sum_commitments_and_disbursements_by_activity_id_by_publisher_id_usd (self ):
1846+ # These 2 by_publisher_id functions produce similar data to the invert
1847+ # step, but we have to include them here to make that data available in
1848+ # the AllDataStats step.
1849+ return {self .folder : self .aggregated ['sum_commitments_and_disbursements_by_activity_id_usd' ]}
1850+
1851+ @returns_numberdictdict
1852+ def iati_identifiers_by_publisher_id (self ):
1853+ # See comment on by_publisher_id above
1854+ return {self .folder : self .aggregated ['iati_identifiers' ]}
1855+
18111856
18121857class OrganisationFileStats (GenericFileStats ):
18131858 """ Stats calculated for an IATI Organisation XML file. """
@@ -1847,3 +1892,39 @@ def unique_identifiers(self):
18471892 @returns_numberdict
18481893 def _duplicate_identifiers (self ):
18491894 return {k : v for k , v in self .aggregated ['iati_identifiers' ].items () if v > 1 }
1895+
1896+ @returns_numberdict
1897+ def traceable_sum_commitments_and_disbursements_by_publisher_id (self ):
1898+ out = defaultdict (Decimal )
1899+ for publisher_id , d in self .aggregated ['sum_commitments_and_disbursements_by_activity_id_by_publisher_id_usd' ].items ():
1900+ for k , v in d .items ():
1901+ if k in self .aggregated ['provider_activity_id_without_own' ]:
1902+ out [publisher_id ] += v
1903+ return out
1904+
1905+ @returns_numberdict
1906+ def traceable_sum_commitments_and_disbursements_by_publisher_id_denominator (self ):
1907+ out = defaultdict (Decimal )
1908+ for publisher_id , d in self .aggregated ['sum_commitments_and_disbursements_by_activity_id_by_publisher_id_usd' ].items ():
1909+ for k , v in d .items ():
1910+ if k not in self .aggregated ['provider_activity_id_own' ]:
1911+ out [publisher_id ] += v
1912+ return out
1913+
1914+ @returns_numberdict
1915+ def traceable_activities_by_publisher_id (self ):
1916+ out = defaultdict (int )
1917+ for publisher_id , iati_identifiers_counts in self .aggregated ['iati_identifiers_by_publisher_id' ].items ():
1918+ for iati_identifier , count in iati_identifiers_counts .items ():
1919+ if iati_identifier in self .aggregated ['provider_activity_id_without_own' ]:
1920+ out [publisher_id ] += count
1921+ return out
1922+
1923+ @returns_numberdict
1924+ def traceable_activities_by_publisher_id_denominator (self ):
1925+ out = defaultdict (int )
1926+ for publisher_id , iati_identifiers_counts in self .aggregated ['iati_identifiers_by_publisher_id' ].items ():
1927+ for iati_identifier , count in iati_identifiers_counts .items ():
1928+ if iati_identifier not in self .aggregated ['provider_activity_id_own' ]:
1929+ out [publisher_id ] += count
1930+ return out
0 commit comments