in experiments/notebooks/covid/covid.py [0:0]
def get_today_stats(force = False):
today_file = datetime.now().strftime('%Y-%m-%d') + '-covid-india-stats.csv'
if path.exists(today_file) and not force:
stats_df= pd.read_csv(today_file)
print('Stats file exists: ' + today_file)
else:
print('Creating stats for today...')
with urllib.request.urlopen('https://www.mohfw.gov.in/') as response:
page = response.read()
html = bs4.BeautifulSoup(page, 'lxml')
df_cols = []
# stats page has multiple tables
tables = html.findAll("table", {"class": "table-dark"})
for table in tables:
# only stats table has rows (tr tag)
if table.thead.tr.th.strong.string == 'S. No.':
for th in table.thead.tr:
if th.string:
df_cols.append(th.string.strip())
else:
df_cols.append(th.strong.text.strip())
while '' in df_cols:
df_cols.remove('')
stats_df = pd.DataFrame(columns = df_cols)
i = 0
for tr in table.tbody:
df_row = []
df_data = []
for td in tr:
if len(df_row) == len(df_cols):
# print(df_row)
stats_df.loc[i] = df_row
i = i + 1
df_row = []
if type(td) is bs4.element.Tag:
df_row.append(td.string)
stats_df = stats_df.drop(columns=['S. No.'])
stats_df = stats_df.rename(columns={'Name of State / UT': 'State',
'Total Confirmed cases (Indian National)': 'Indian',
'Total Confirmed cases ( Foreign National )': 'Foreign',
'Cured/Discharged/Migrated': 'Discharged'})
stats_df['Indian'] = stats_df['Indian'].astype(int)
stats_df['Foreign'] = stats_df['Foreign'].astype(int)
stats_df['Discharged'] = stats_df['Discharged'].astype(int)
stats_df['Death'] = stats_df['Death'].astype(int)
stats_df['Confirmed'] = stats_df['Indian'] + stats_df['Foreign']
stats_df['Active'] = stats_df['Indian'] + stats_df['Foreign'] - stats_df['Discharged'] - stats_df['Death']
stats_df.to_csv(today_file, index=False)
print('Stats file for today saved: ' + today_file)
return stats_df