Class to interact with and query the EITI API.
# Load Libraries
import os
import requests # So we can query a web API
from urllib.parse import urlencode # So we can manipulate the API requests and define the search
import pandas as pd
import json
import datetime
from termcolor import colored, cprint # to print errors in red!
import numpy as np
class EITIstream():
"""
Container to manage EITI API
"""
def __init__(self, version, section, headers, params=None):
self.headers = headers
self.key = version
self.section = section
self.url = 'https://eiti.org/api/' + version + "/" + section
if params:
self.response = dict(requests.get(self.url, params=params, headers = headers).json())
else:
self.response = dict(requests.get(self.url, params={}, headers = headers).json())
self.current = 0
"""def hasNext(self):
if (self.current >= self.response['count']):
return(False)
else:
return(True)"""
def hasNext(self):
"""
Helper funtion to check if there exists a next page
"""
if "next" in list(self.response.keys()):
return(True)
else:
return(False)
def nextPage(self):
"""
Helper funtion that graps the next page
"""
nextPage = self.response['next']['href']
self.response = dict(requests.get(nextPage).json())
def get_organisations(self):
"""
Method to collect information on all organizations later to be matched with
revenue data.
----
Return: List of Dictionaries
"""
while self.hasNext(): # while there exists a new page
data = self.response['data'] # retrieve data
self.current += len(self.response['data']) # update count by number of entries retrieved in this step
self.nextPage() # update url
return(data)
else:
data = self.response['data'] # retrieve data
self.current += len(self.response['data']) # update count by number of entries retrieved in this step
return(data)
def get_countries(self):
"""
Method to collect information on all countries in the EITI dataset
----
Return: DataFrame
"""
data = self.response['data'] # retrieve data
count = self.response['count'] # number of reports
if count == len(data): # sanity check
countries = [] # initalize country list
for country in data: # loop through countries
if country['reports']:
# if there exist reports write report years in ascending order as comma sperated string
years = list(country['reports'].keys())
years.sort()
report_years = ",".join(years)
else:
report_years = None
info = {'country_id': country['id'],
'country': country['label'],
'iso2': country['iso2'],
'iso3': country['iso3'],
'report_years': report_years,
'latest_validation_date': country['latest_validation_date'],
#'latest_validation_url': country['latest_validation_url']
}
countries.append(info) # append info for country
return pd.DataFrame(countries)
else:
cprint("Error: the response count does not equal the number of data reports retrieved", "red")
def get_JoinLeaveDates(self):
data = self.response['data'] # retrieve data
count = self.response['count'] # number of codes
if version == "v2.0": # sanity check as data only retrievable if version 2
if count == len(data): # sanity check
info = [] # empty list to save dictionaries
for country in data:
dic = {"id_v2": country['id'],
"country": country['label'],
"iso2": country['iso3'],
"iso3": country['iso3'],
"join_date": country['join_date'],
"leave_date": country['leave_date'],
"latest_validation_date": country['latest_validation_date']
}
info.append(dic)
return pd.DataFrame(info)
else:
cprint("Error: The response count does not equal the number of data reports retrieved", "red")
else:
cprint("Error: Requested Data only retrievable in 'v2.0'. Check Version!", "red")
def get_gfsCodes(self):
"""
Method to collect information on all GFS Codes/Descriptions in the EITI dataset
----
Return: DataFrame
"""
data = self.response['data'] # retrieve data
count = self.response['count'] # number of codes
if count == len(data): # sanity check
codes = [] # initalize
for code in data: # loop through codes
info = {'gfs_code_id': code['id'],
'gfs_code': code['code'],
'gfs_description': code['label'],
'gfs_parent': code['parent']
}
codes.append(info) # append info for code
return pd.DataFrame(codes)
else:
cprint("Error: the response count does not equal the number of data reports retrieved", "red")
def get_revenues(self):
"""
Method to collect information on revenues paid by companies.
----
Return: DataFrame
"""
data = self.response['data'] # retrieve data
count = self.response['count'] # number of reports
if count == len(data): # sanity check
if data: # if there exists data
reports = [] # initalize reports list
for report in data: # loop through reports
print(f"Working on {report['label']}")
# save contextual info as a dictionary
info = {'report_id': report['id'],
'report_label': report['label'],
'country_id': report['country']['id'],
'country': report['country']['label'],
'government_entities_nr': report['government_entities_nr'], # number of goverment organizations\
'company_entities_nr': report['company_entities_nr'], # number of company organizations
'publication_date': report['publication_date_EITI_report'], # format of date???
'year_start': report['year_start'],
'year_end': report['year_end']
}
# save renues data as DataFrame
rev_gov = report['revenue_government']
rev_comp = report['revenue_company']
revenues = []
for rev in [rev_gov, rev_comp]: # to accomodate cases when revenue data is empty
if rev:
revenues.extend(rev)
if revenues: # if revenue info exists
df = pd.DataFrame(revenues)
# add contextual info to DataFrame
for key in info.keys():
loc = df.shape[1] # position
df.insert(loc, column = key, value = info[key])
else:
df = None
# append report
reports.append(df)
# concat DataFrames for country, else return None
return pd.concat(reports, ignore_index=True)
else: # if no data exists
return None
else:
cprint("Error: the response count does not equal the number of data reports retrieved", "red")