def get_today_stats()

in experiments/notebooks/covid/covid.py [0:0]


def get_today_stats(force = False):
    today_file = datetime.now().strftime('%Y-%m-%d') + '-covid-india-stats.csv'

    if path.exists(today_file) and not force:
        stats_df= pd.read_csv(today_file)
        print('Stats file exists: ' + today_file)
    else:
        print('Creating stats for today...')
        with urllib.request.urlopen('https://www.mohfw.gov.in/') as response:
            page = response.read()
            html = bs4.BeautifulSoup(page, 'lxml')

        df_cols = []
        # stats page has multiple tables
        tables = html.findAll("table", {"class": "table-dark"})
        for table in tables:
            # only stats table has rows (tr tag)
            if table.thead.tr.th.strong.string == 'S. No.':
                for th in table.thead.tr:
                    if th.string:
                        df_cols.append(th.string.strip())
                    else:
                        df_cols.append(th.strong.text.strip())

                while '' in df_cols:
                    df_cols.remove('')

                stats_df = pd.DataFrame(columns = df_cols)

                i = 0
                for tr in table.tbody:
                    df_row = []
                    df_data = []
                    for td in tr:
                        if len(df_row) == len(df_cols):
                            # print(df_row)
                            stats_df.loc[i] = df_row
                            i = i + 1
                            df_row = []
                        if type(td) is bs4.element.Tag:
                            df_row.append(td.string)

        stats_df = stats_df.drop(columns=['S. No.'])
        stats_df = stats_df.rename(columns={'Name of State / UT': 'State',
                    'Total Confirmed cases (Indian National)': 'Indian', 
                    'Total Confirmed cases ( Foreign National )': 'Foreign',
                    'Cured/Discharged/Migrated': 'Discharged'})
        stats_df['Indian'] = stats_df['Indian'].astype(int)
        stats_df['Foreign'] = stats_df['Foreign'].astype(int)
        stats_df['Discharged'] = stats_df['Discharged'].astype(int)
        stats_df['Death'] = stats_df['Death'].astype(int)
        stats_df['Confirmed'] = stats_df['Indian'] + stats_df['Foreign']
        stats_df['Active'] = stats_df['Indian'] + stats_df['Foreign'] - stats_df['Discharged'] - stats_df['Death']

        stats_df.to_csv(today_file, index=False)
        print('Stats file for today saved: ' + today_file)
    
    return stats_df