User:Raminagrobis/codeELM

From Wikimedia Commons, the free media repository
Jump to navigation Jump to search

Python source code used to generate all the oil balance graphs from all countries.

To run this, one needs to have numpy matplotlib and openpyxl libraries installed.

The XLSX file released by BP (BP statistical report) must be in the some folder.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Sep 11 14:43:27 2021

@author: raming
"""

import matplotlib.pyplot as plt
import numpy as np

from openpyxl import load_workbook

import pywikibot
from pywikibot.bot import suggest_help
from pywikibot.specialbots import UploadRobot
pywikibot.config.usernames['commons']['commons'] = 'Raminagrobis'
pywikibot.config.password_file = 'password.txt'


def convtostairs(xin, yin): #add points to make a stair plot
    x = np.array(xin)
    y = np.array(yin)
    xout = 0.5+np.insert(x,0, x[0]-1)
    yout = np.insert(y,0, y[0])
    return xout, yout


class dataset:
    unit = 'none'# to be overloaded
    ressource = 'unknown' # to be overloaded
    figsize = (11,6)
    fontsize = 17 # for titles and labels
    # one such object is  created for each country/ressources combinaison
    # so one country can have up to three such objects : oil, gas, coal
    
    def __init__(self, countryname,years, prodfig, consumpfig ):
        self.countryname = countryname # name of country
        self.years = years #time axis
        self.prodfig = prodfig # figures
        self.consumpfig = consumpfig
        
    def graph(self): #selects which type of graph to make
        if self.prodfig is None:
            f = self.graph_cons()
        elif self.consumpfig is None:
            f = self.graph_prod()
        else:
            f = self.graph_bal()
        plt.xlabel('Years. Data from BP statistical report, 2021 edition', fontsize=self.fontsize)
        plt.ylabel(self.unit, fontsize=self.fontsize)
        figfilename = self.ressource+' Balance '+self.countryname+".svg"
       
        return f, figfilename
    
    def graph_bal(self): # create the graph, returns the figure handler, both production and consumption
        figure, ax = plt.subplots(figsize=self.figsize)
 
        bal = [(i-j) for (i,j) in zip(self.prodfig,self.consumpfig)]
        balpos = [max(0, bal[ii]) for ii in range(len(self.years))]
        balneg = [min(0, bal[ii]) for ii in range(len(self.years))]       
        p1 = plt.bar(self.years, self.prodfig, color='darkgrey', edgecolor='darkgrey', label = 'Production')
        yrs, cstairs = convtostairs(self.years, self.consumpfig)
        p2 = plt.step(yrs,cstairs, linewidth=2, color = 'black', label = 'Consumption')
        p3 = plt.bar(self.years, balpos, color='green', edgecolor='green', label = 'Net Exports')
        p4 = plt.bar(self.years, balneg, color='red', edgecolor='red', label = 'Net Imports')
        print(p2)
        plt.legend(loc='best', frameon=True )         
        mytitle = self.ressource + ' Balance for '+self.countryname
        plt.title(mytitle, fontsize=self.fontsize)
        plt.plot([self.years[0], 2022],[0,0], color='black') 
        plt.grid(False)
        return figure
    
    def graph_cons(self): # graph for countries where only copnsumption figure is available
        figure, ax = plt.subplots(figsize=self.figsize)
        
        yrs, cstairs = (self.years, self.consumpfig)
        p1 = plt.step(yrs,cstairs, linewidth=2, color = 'black')
        plt.legend([p1], [self.ressource+" consumption"], loc='best', frameon=True )      
        mytitle = self.ressource + ' consumption for '+self.countryname
        plt.title(mytitle, fontsize=self.fontsize)
        plt.grid(True)     
        return figure

    def graph_prod(self): # graph for countries where only production figure is available
        figure, ax = plt.subplots(figsize=self.figsize)
        
        p1 = plt.step(1+np.insert(self.years,0, self.years[0]-1), np.insert(self.prodfig,0, self.prodfig[0]), linewidth=2, color = 'black')     
        plt.legend([p1], [self.ressource+" production"], loc='best', frameon=True )      
        mytitle = self.ressource + ' production for '+self.countryname
        plt.title(mytitle, fontsize=self.fontsize)
        plt.grid(True)
        return figure
        
        

class dataset_oil(dataset):
    unit = 'Thousand barrels a day'
    ressource = 'Oil'
    prodrange = ('B', 'BE')
    consrange = ('B', 'BE')
    yearsrange = range(1965, 2021)        
    prodtabname = 'Oil Production - Barrels'
    constabname = 'Oil Consumption - Barrels'
class dataset_gas(dataset):
    unit = 'Billion cubic meters per year'
    ressource = 'Gas'
    prodrange = ('B', 'AZ')
    consrange = ('G', 'BE')
    yearsrange = range(1970, 2021)        
    prodtabname = 'Gas Production - Bcm'
    constabname = 'Gas Consumption - Bcm'
class dataset_coal(dataset):
    unit = 'Exajoules per year'
    ressource = 'Coal'
    prodrange = ('B', 'AO')
    consrange = ('R', 'BE')
    yearsrange = range(1981, 2021) 
    prodtabname = 'Coal Production - EJ'
    constabname = 'Coal Consumption - EJ'

class datasource: # encapsulates the xlsx file 
    
    def __init__(self, filepathname):
        self.wb = load_workbook(filepathname, read_only=True)
        
    def search_country(self, worksheet, countryname):  # seach for a country name, returns None if not found
        line = 1
        column = 'A'
        foundplace = None
        value = ''
        while value != 'Total World': # read the first column until "total world is found
            cell_name = column+str(line)
            value = worksheet[cell_name].value
            if  value == countryname:
                foundplace = line
            line = line+1
        return foundplace
        
    def read_onedata(self, tabname, countryname, columnsrange): # reads one line of data
        worksheet = self.wb[tabname]
        line = self.search_country(worksheet, countryname)
        if line is None:
            return None
        else:
            strbegin = columnsrange[0] + str(line)
            strend = columnsrange[1] + str(line)
            figures = []
            for row in worksheet[strbegin:strend]: # only one row, but it an interable nonetheless. deal with it.
                for cell in row:
                    content = cell.value
                    if type(content) in [float, int]:
                         figures.append(content)      
                    else:
                         figures.append(0)     
        return figures # list of numerical values
  
        
    def extract_any(self, countryname, datasetclass):  
        # returns a dataset if prod or consumption data are both found
        # else, returns a None objet
        # production
        dataprod = self.read_onedata(datasetclass.prodtabname, countryname, datasetclass.prodrange)
        # consuption 
        datacons = self.read_onedata(datasetclass.constabname, countryname, datasetclass.consrange)
         
        if dataprod is None and datacons is None: # missing data
            return None
        else:
            newdataset = datasetclass(countryname,datasetclass.yearsrange, dataprod, datacons)
            return newdataset
        
    
    def extract_oil(self, countryname):
        newds = self.extract_any(countryname, dataset_oil)
        return newds
    
    def extract_gas(self, countryname):
 
        newds = self.extract_any(countryname,dataset_gas)
        return newds
    
    def extract_coal(self, countryname):      

        newds = self.extract_any(countryname, dataset_coal)
        return newds


def uploadgraph(dataset): # the dataset is not None. this function makes the draph, saves it, and uploads it to commons
    figure, fname= dataset.graph()
    figure.savefig(fname, transparent=False)
    url = [ fname ]#=={{int:filedesc}}==
    desc1 = r"""
    {{Information
    |description={{en|1="""
    desc2 = dataset.ressource + ' balance for '+dataset.countryname 
    desc3 = r"""This is meant to supersede old "ELM" graphs uploaded in 2008 by Jonathan Callahan , that are out-of-date, non-vectorial and of dubious license. 
    The data are from BP statistical report (2021 Edition). 
    note that imports or exports are approximated using the difference between production and consumption. This is a good approximation, but not 100% accurate because some countries can import and export petroleum in the same time, and it ignores refinery gains and stock changes. 
    The graphs are generated automagically from BP's excel file using a PYTHON script. It can be found [[User:Raminagrobis/codeELM|here]] for future updates.}}
    |date=2021-09-11
    |source={{own}}
    |author=[[User:Raminagrobis|Raminagrobis]]
    |permission=
    |other versions=
    }}

    {{self|cc-by-sa-4.0}}
    
    [[Category:Petroleum statistics]]"""  #=={{int:license-header}}==
    description= desc1+desc2+desc3
    keepFilename = True        #set to True to skip double-checking/editing destination filename
    verifyDescription = False    #set to False to skip double-checking/editing description => change to bot-mode
    targetSite = pywikibot.Site('commons', 'commons')
    
    bot = UploadRobot(url, description=description, use_filename=fname, keep_filename=keepFilename, verify_description=verifyDescription, target_site=targetSite)
    bot.run()    

def runcountry(mydatasource,countryname): # do all it can does for a country
    ds_oil = mydatasource.extract_oil(countryname)
    ds_gas = mydatasource.extract_gas(countryname)
    ds_coal = mydatasource.extract_coal(countryname)
    if ds_oil is not None:
       uploadgraph(ds_oil)
    if ds_gas is not None:
        uploadgraph(ds_gas)
    if ds_coal is not None:
        uploadgraph(ds_coal)


BPstat=datasource('bp-stats-review-2021-all-data.xlsx')
runcountry(BPstat, 'Tunisia')