def get_browser_data()

in sql/moz-fx-data-shared-prod/cloudflare_derived/browser_usage_v1/query.py [0:0]


def get_browser_data(date_of_interest, auth_token):
    """Pull browser data for each combination of the configs from the Cloudflare API, always runs with a lag of 4 days."""
    # Calculate start date and end date
    logical_dag_dt = date_of_interest
    logical_dag_dt_as_date = datetime.strptime(logical_dag_dt, "%Y-%m-%d").date()
    start_date = logical_dag_dt_as_date - timedelta(days=4)
    end_date = start_date + timedelta(days=1)
    print("Start Date: ", start_date)
    print("End Date: ", end_date)

    # Configure request headers
    bearer_string = f"Bearer {auth_token}"
    headers = {"Authorization": bearer_string}
    limit = brwsr_usg_configs["max_limit"]

    # Initialize the empty results and errors dataframes
    browser_results_df = pd.DataFrame(
        {
            "StartTime": [],
            "EndTime": [],
            "DeviceType": [],
            "Location": [],
            "UserType": [],
            "Browser": [],
            "OperatingSystem": [],
            "PercentShare": [],
            "ConfLevel": [],
            "Normalization": [],
            "LastUpdated": [],
        }
    )

    browser_errors_df = pd.DataFrame(
        {
            "StartTime": [],
            "EndTime": [],
            "Location": [],
            "UserType": [],
            "DeviceType": [],
            "OperatingSystem": [],
        }
    )

    # Loop through the combinations
    for device_type in brwsr_usg_configs["device_types"]:
        for loc in brwsr_usg_configs["locations"]:
            for os in brwsr_usg_configs["operating_systems"]:
                for user_type in brwsr_usg_configs["user_types"]:
                    curr_combo = f"DeviceType: {device_type}, Loc: {loc}, OS: {os}, UserType: {user_type}, Limit: {limit}"
                    print(curr_combo)

                    # Generate the URL & call the API
                    brwsr_usg_api_url = generate_browser_api_call(
                        start_date, end_date, device_type, loc, os, user_type, limit
                    )
                    try:
                        response = requests.get(
                            brwsr_usg_api_url,
                            headers=headers,
                            timeout=brwsr_usg_configs["timeout_limit"],
                        )
                        response_json = json.loads(response.text)

                        # if the response was successful, get the result and append it to the results dataframe
                        if response_json["success"] is True:
                            # Save the results to GCS
                            result = response_json["result"]
                            confidence_level = result["meta"]["confidenceInfo"]["level"]
                            normalization = result["meta"]["normalization"]
                            last_updated = result["meta"]["lastUpdated"]
                            startTime = result["meta"]["dateRange"][0]["startTime"]
                            endTime = result["meta"]["dateRange"][0]["endTime"]
                            data = result["top_0"]
                            browser_lst = []
                            browser_share_lst = []

                            for browser in data:
                                browser_lst.append(browser["name"])
                                browser_share_lst.append(browser["value"])

                            new_browser_results_df = pd.DataFrame(
                                {
                                    "StartTime": [startTime] * len(browser_lst),
                                    "EndTime": [endTime] * len(browser_lst),
                                    "DeviceType": [device_type] * len(browser_lst),
                                    "Location": [loc] * len(browser_lst),
                                    "UserType": [user_type] * len(browser_lst),
                                    "Browser": browser_lst,
                                    "OperatingSystem": [os] * len(browser_lst),
                                    "PercentShare": browser_share_lst,
                                    "ConfLevel": [confidence_level] * len(browser_lst),
                                    "Normalization": [normalization] * len(browser_lst),
                                    "LastUpdated": [last_updated] * len(browser_lst),
                                }
                            )
                            browser_results_df = pd.concat(
                                [browser_results_df, new_browser_results_df]
                            )

                        # If there were errors, save them to the errors dataframe
                        else:
                            new_browser_error_df = pd.DataFrame(
                                {
                                    "StartTime": [start_date],
                                    "EndTime": [end_date],
                                    "Location": [loc],
                                    "UserType": [user_type],
                                    "DeviceType": [device_type],
                                    "OperatingSystem": [os],
                                }
                            )
                            browser_errors_df = pd.concat(
                                [browser_errors_df, new_browser_error_df]
                            )
                    except:
                        new_browser_error_df = pd.DataFrame(
                            {
                                "StartTime": [start_date],
                                "EndTime": [end_date],
                                "Location": [loc],
                                "UserType": [user_type],
                                "DeviceType": [device_type],
                                "OperatingSystem": [os],
                            }
                        )
                        browser_errors_df = pd.concat(
                            [browser_errors_df, new_browser_error_df]
                        )

    # LOAD RESULTS & ERRORS TO STAGING GCS
    result_fpath = brwsr_usg_configs["bucket"] + brwsr_usg_configs[
        "results_stg_gcs_fpth"
    ] % (start_date, logical_dag_dt)
    error_fpath = brwsr_usg_configs["bucket"] + brwsr_usg_configs[
        "errors_stg_gcs_fpth"
    ] % (start_date, logical_dag_dt)
    browser_results_df.to_csv(result_fpath, index=False)
    browser_errors_df.to_csv(error_fpath, index=False)
    print("Wrote errors to: ", error_fpath)
    print("Wrote results to: ", result_fpath)

    # Return a summary to the console
    len_results = str(len(browser_results_df))
    len_errors = str(len(browser_errors_df))
    result_summary = [len_results, len_errors]
    return result_summary