analysis.py (43 lines of code) (raw):
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pandas as pd
since_date = "2010-01-01"
#%%
# Components by index
df = pd.read_csv('cpi-index.csv')
df[['series1','category','city']] = df['series'].str.split(" ; ", expand=True)
cols = list(df.columns)
df = df[['date','value','category','city']]
df['city'] = df['city'].str.replace(" ;","")
df['city'] = df['city'].str.strip()
df['category'] = df['category'].str.strip()
unique_cats = list(df['category'].unique())
exp_class = pd.read_csv("expenditure_classes.csv")
exp_class_list = list(exp_class['expenditure_class'])
exp_class_list.append("All groups CPI")
# exp_class_list.remove("Tobacco")
# removes = [
# "All groups CPI excluding 'volatile items'",
# "All groups CPI excluding Alcohol and tobacco",
# "All groups CPI excluding Clothing and footwear",
# "All groups CPI excluding Communication",
# "All groups CPI excluding Education",
# "All groups CPI excluding Food and non-alcoholic beverages",
# "All groups CPI excluding Furnishings, household equipment and services",
# "All groups CPI excluding Health",
# "All groups CPI excluding Housing",
# "All groups CPI excluding Housing and Insurance and financial services",
# "All groups CPI excluding Insurance and financial services",
# "All groups CPI excluding Medical and hospital services",
# "All groups CPI excluding Recreation and culture",
# "All groups CPI excluding Transport",
# "All groups CPI excluding food and energy",
# "All groups CPI including deposit and loan facilities (indirect charges)"
# ]
short = df[df['category'].isin(exp_class_list)]
short = short[short['date'] >= since_date]
# Split by cities
cities = list(short['city'].unique())
for city in cities:
temp = short[short['city'] == city]
temp = temp.drop_duplicates(subset=['date','category','value'])
temp_pvt = temp.pivot(index='date',columns='category', values='value')
temp_pvt.to_csv(f'inflation-explorer/assets/{city}-index.csv')
#%%
# Components by year-on-year change
df = pd.read_csv('cpi-pct.csv')
df[['series1','category','city']] = df['series'].str.split(" ; ", expand=True)
cols = list(df.columns)
df = df[['date','value','category','city']]
df['city'] = df['city'].str.replace(" ;","")
df['city'] = df['city'].str.strip()
df['category'] = df['category'].str.strip()
unique_cats = list(df['category'].unique())
exp_class = pd.read_csv("expenditure_classes.csv")
exp_class_list = list(exp_class['expenditure_class'])
exp_class_list.append("All groups CPI")
# exp_class_list.remove("Tobacco")
short = df[df['category'].isin(exp_class_list)]
short = short[short['date'] >= since_date]
# Split by cities
cities = list(short['city'].unique())
for city in cities:
temp = short[short['city'] == city]
temp = temp.drop_duplicates(subset=['date','category','value'])
temp_pvt = temp.pivot(index='date',columns='category', values='value')
temp_pvt.to_csv(f'inflation-explorer/assets/{city}-pct_year.csv')
#%%
current = short[short['date'] == since_date]
current_gp = current.groupby(['category']).count()
current.to_csv('current-pct-change.csv', index=False)