Skip to content

Commit 9fe1e85

Browse files
committed
Calculate publishers spending that is traceable
#21
1 parent f4e3307 commit 9fe1e85

File tree

1 file changed

+82
-1
lines changed

1 file changed

+82
-1
lines changed

Diff for: stats/analytics.py

+82-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from __future__ import print_function
77
from lxml import etree
88
from datetime import date, datetime, timedelta
9-
from collections import defaultdict, OrderedDict
9+
from collections import Counter, defaultdict, OrderedDict
1010
from decimal import Decimal, InvalidOperation
1111
import os
1212
import re
@@ -1370,6 +1370,7 @@ def _sum_transactions_by_type_by_year(self):
13701370
return out
13711371

13721372
@returns_numberdictdictdict
1373+
@memoize
13731374
def sum_transactions_by_type_by_year_usd(self):
13741375
out = defaultdict(lambda: defaultdict(lambda: defaultdict(Decimal)))
13751376

@@ -1378,6 +1379,10 @@ def sum_transactions_by_type_by_year_usd(self):
13781379
for transaction_type, data in list(self._sum_transactions_by_type_by_year().items()):
13791380
for currency, years in list(data.items()):
13801381
for year, value in list(years.items()):
1382+
# FIXME currently there's no currency data in this repo
1383+
# after 2014, it is better to use 2014 than silently failing
1384+
if year > 2014:
1385+
year = 2014
13811386
if None not in [currency, value, year]:
13821387
out[transaction_type]['USD'][year] += get_USD_value(currency, value, year)
13831388
return out
@@ -1444,6 +1449,24 @@ def activities_with_future_transactions(self):
14441449
return 1
14451450
return 0
14461451

1452+
@returns_numberdict
1453+
def provider_activity_id(self):
1454+
out = dict(Counter(self.element.xpath('transaction/provider-org/@provider-activity-id')))
1455+
if self.iati_identifier() in out:
1456+
del out[self.iati_identifier()]
1457+
return out
1458+
1459+
@returns_numberdict
1460+
def sum_commitments_and_disbursements_by_activity_id_usd(self):
1461+
# Handle 1.0x?
1462+
sum_commitments_by_year_by_year_usd = self.sum_transactions_by_type_by_year_usd().get('2', {}).get('USD', {})
1463+
sum_disbursements_by_year_by_year_usd = self.sum_transactions_by_type_by_year_usd().get('3', {}).get('USD', {})
1464+
sum_commitments_and_disbursements_usd = sum(sum_commitments_by_year_by_year_usd.values()) + sum(sum_disbursements_by_year_by_year_usd.values())
1465+
if sum_commitments_and_disbursements_usd:
1466+
return {self.iati_identifier(): sum_commitments_and_disbursements_usd}
1467+
else:
1468+
return {}
1469+
14471470

14481471
ckan = json.load(open('helpers/ckan.json'))
14491472
publisher_re = re.compile(r'(.*)\-[^\-]')
@@ -1808,6 +1831,28 @@ def _latest_transaction_date(self):
18081831
if transaction_dates:
18091832
return str(max(transaction_dates))
18101833

1834+
@returns_numberdict
1835+
def provider_activity_id_without_own(self):
1836+
out = {k: v for k, v in self.aggregated['provider_activity_id'].items() if k not in self.aggregated['iati_identifiers']}
1837+
return out
1838+
1839+
@returns_numberdict
1840+
def provider_activity_id_own(self):
1841+
out = {k: v for k, v in self.aggregated['provider_activity_id'].items() if k in self.aggregated['iati_identifiers']}
1842+
return out
1843+
1844+
@returns_numberdictdict
1845+
def sum_commitments_and_disbursements_by_activity_id_by_publisher_id_usd(self):
1846+
# These 2 by_publisher_id functions produce similar data to the invert
1847+
# step, but we have to include them here to make that data available in
1848+
# the AllDataStats step.
1849+
return {self.folder: self.aggregated['sum_commitments_and_disbursements_by_activity_id_usd']}
1850+
1851+
@returns_numberdictdict
1852+
def iati_identifiers_by_publisher_id(self):
1853+
# See comment on by_publisher_id above
1854+
return {self.folder: self.aggregated['iati_identifiers']}
1855+
18111856

18121857
class OrganisationFileStats(GenericFileStats):
18131858
""" Stats calculated for an IATI Organisation XML file. """
@@ -1847,3 +1892,39 @@ def unique_identifiers(self):
18471892
@returns_numberdict
18481893
def _duplicate_identifiers(self):
18491894
return {k: v for k, v in self.aggregated['iati_identifiers'].items() if v > 1}
1895+
1896+
@returns_numberdict
1897+
def traceable_sum_commitments_and_disbursements_by_publisher_id(self):
1898+
out = defaultdict(Decimal)
1899+
for publisher_id, d in self.aggregated['sum_commitments_and_disbursements_by_activity_id_by_publisher_id_usd'].items():
1900+
for k, v in d.items():
1901+
if k in self.aggregated['provider_activity_id_without_own']:
1902+
out[publisher_id] += v
1903+
return out
1904+
1905+
@returns_numberdict
1906+
def traceable_sum_commitments_and_disbursements_by_publisher_id_denominator(self):
1907+
out = defaultdict(Decimal)
1908+
for publisher_id, d in self.aggregated['sum_commitments_and_disbursements_by_activity_id_by_publisher_id_usd'].items():
1909+
for k, v in d.items():
1910+
if k not in self.aggregated['provider_activity_id_own']:
1911+
out[publisher_id] += v
1912+
return out
1913+
1914+
@returns_numberdict
1915+
def traceable_activities_by_publisher_id(self):
1916+
out = defaultdict(int)
1917+
for publisher_id, iati_identifiers_counts in self.aggregated['iati_identifiers_by_publisher_id'].items():
1918+
for iati_identifier, count in iati_identifiers_counts.items():
1919+
if iati_identifier in self.aggregated['provider_activity_id_without_own']:
1920+
out[publisher_id] += count
1921+
return out
1922+
1923+
@returns_numberdict
1924+
def traceable_activities_by_publisher_id_denominator(self):
1925+
out = defaultdict(int)
1926+
for publisher_id, iati_identifiers_counts in self.aggregated['iati_identifiers_by_publisher_id'].items():
1927+
for iati_identifier, count in iati_identifiers_counts.items():
1928+
if iati_identifier not in self.aggregated['provider_activity_id_own']:
1929+
out[publisher_id] += count
1930+
return out

0 commit comments

Comments
 (0)