in sql/moz-fx-data-shared-prod/cloudflare_derived/browser_usage_v1/query.py [0:0]
def get_browser_data(date_of_interest, auth_token):
"""Pull browser data for each combination of the configs from the Cloudflare API, always runs with a lag of 4 days."""
# Calculate start date and end date
logical_dag_dt = date_of_interest
logical_dag_dt_as_date = datetime.strptime(logical_dag_dt, "%Y-%m-%d").date()
start_date = logical_dag_dt_as_date - timedelta(days=4)
end_date = start_date + timedelta(days=1)
print("Start Date: ", start_date)
print("End Date: ", end_date)
# Configure request headers
bearer_string = f"Bearer {auth_token}"
headers = {"Authorization": bearer_string}
limit = brwsr_usg_configs["max_limit"]
# Initialize the empty results and errors dataframes
browser_results_df = pd.DataFrame(
{
"StartTime": [],
"EndTime": [],
"DeviceType": [],
"Location": [],
"UserType": [],
"Browser": [],
"OperatingSystem": [],
"PercentShare": [],
"ConfLevel": [],
"Normalization": [],
"LastUpdated": [],
}
)
browser_errors_df = pd.DataFrame(
{
"StartTime": [],
"EndTime": [],
"Location": [],
"UserType": [],
"DeviceType": [],
"OperatingSystem": [],
}
)
# Loop through the combinations
for device_type in brwsr_usg_configs["device_types"]:
for loc in brwsr_usg_configs["locations"]:
for os in brwsr_usg_configs["operating_systems"]:
for user_type in brwsr_usg_configs["user_types"]:
curr_combo = f"DeviceType: {device_type}, Loc: {loc}, OS: {os}, UserType: {user_type}, Limit: {limit}"
print(curr_combo)
# Generate the URL & call the API
brwsr_usg_api_url = generate_browser_api_call(
start_date, end_date, device_type, loc, os, user_type, limit
)
try:
response = requests.get(
brwsr_usg_api_url,
headers=headers,
timeout=brwsr_usg_configs["timeout_limit"],
)
response_json = json.loads(response.text)
# if the response was successful, get the result and append it to the results dataframe
if response_json["success"] is True:
# Save the results to GCS
result = response_json["result"]
confidence_level = result["meta"]["confidenceInfo"]["level"]
normalization = result["meta"]["normalization"]
last_updated = result["meta"]["lastUpdated"]
startTime = result["meta"]["dateRange"][0]["startTime"]
endTime = result["meta"]["dateRange"][0]["endTime"]
data = result["top_0"]
browser_lst = []
browser_share_lst = []
for browser in data:
browser_lst.append(browser["name"])
browser_share_lst.append(browser["value"])
new_browser_results_df = pd.DataFrame(
{
"StartTime": [startTime] * len(browser_lst),
"EndTime": [endTime] * len(browser_lst),
"DeviceType": [device_type] * len(browser_lst),
"Location": [loc] * len(browser_lst),
"UserType": [user_type] * len(browser_lst),
"Browser": browser_lst,
"OperatingSystem": [os] * len(browser_lst),
"PercentShare": browser_share_lst,
"ConfLevel": [confidence_level] * len(browser_lst),
"Normalization": [normalization] * len(browser_lst),
"LastUpdated": [last_updated] * len(browser_lst),
}
)
browser_results_df = pd.concat(
[browser_results_df, new_browser_results_df]
)
# If there were errors, save them to the errors dataframe
else:
new_browser_error_df = pd.DataFrame(
{
"StartTime": [start_date],
"EndTime": [end_date],
"Location": [loc],
"UserType": [user_type],
"DeviceType": [device_type],
"OperatingSystem": [os],
}
)
browser_errors_df = pd.concat(
[browser_errors_df, new_browser_error_df]
)
except:
new_browser_error_df = pd.DataFrame(
{
"StartTime": [start_date],
"EndTime": [end_date],
"Location": [loc],
"UserType": [user_type],
"DeviceType": [device_type],
"OperatingSystem": [os],
}
)
browser_errors_df = pd.concat(
[browser_errors_df, new_browser_error_df]
)
# LOAD RESULTS & ERRORS TO STAGING GCS
result_fpath = brwsr_usg_configs["bucket"] + brwsr_usg_configs[
"results_stg_gcs_fpth"
] % (start_date, logical_dag_dt)
error_fpath = brwsr_usg_configs["bucket"] + brwsr_usg_configs[
"errors_stg_gcs_fpth"
] % (start_date, logical_dag_dt)
browser_results_df.to_csv(result_fpath, index=False)
browser_errors_df.to_csv(error_fpath, index=False)
print("Wrote errors to: ", error_fpath)
print("Wrote results to: ", result_fpath)
# Return a summary to the console
len_results = str(len(browser_results_df))
len_errors = str(len(browser_errors_df))
result_summary = [len_results, len_errors]
return result_summary