in jobs/desktop-mobile-mau-2020/desktop_mau/desktop_mau_dau.py [0:0]
def fetch_data(project):
bq_client = bigquery.Client(project=project)
bq_storage_client = bigquery_storage_v1beta1.BigQueryStorageClient()
IMG_DIR.mkdir(exist_ok=True)
desktop_data = (
bq_client.query(DESKTOP_QUERY)
.result()
.to_dataframe(bqstorage_client=bq_storage_client)
)
desktop_user_state_data = (
bq_client.query(DESKTOP_USER_STATE_QUERY)
.result()
.to_dataframe(bqstorage_client=bq_storage_client)
)
desktop_data = pd.merge(
desktop_data, desktop_user_state_data, on=["date", "country"]
)
desktop_data["year"] = pd.DatetimeIndex(desktop_data["date"]).year
desktop_data["doy"] = pd.DatetimeIndex(desktop_data["date"]).dayofyear
desktop_data["fakedate"] = [
pd.to_datetime("20170101") + timedelta(days=x) for x in desktop_data["doy"]
]
desktop_data["DAU_MA7d"] = desktop_data.groupby("country")["DAU"].transform(
lambda x: x.rolling(window=7).mean()
)
desktop_data["MAU_base"] = desktop_data.groupby(["country", "year"]).MAU.transform(
"first"
)
desktop_data["DAU_MA7d_base"] = desktop_data.groupby(
["country", "year"]
).DAU_MA7d.transform("first")
desktop_data["dau_pcnt_Jan01"] = (
desktop_data["DAU_MA7d"] / desktop_data["DAU_MA7d_base"]
)
desktop_data["mau_pcnt_Jan01"] = desktop_data["MAU"] / desktop_data["MAU_base"]
return desktop_data