def _get_quick_rating_stats()

in sourcecode/scoring/pflip_plus_model.py [0:0]


  def _get_quick_rating_stats(self, notes: pd.DataFrame, ratings: pd.DataFrame) -> pd.DataFrame:
    """Return counts and ratios of how many ratings occurred in the first 1/5/15/60 minutes.

    Args:
      notes: DF specifying note creation timestamps.
      ratings: DF specifying local rating timestamps.
    """
    ratingTotals = (
      ratings[[c.noteIdKey]]
      .value_counts()
      .to_frame()
      .reset_index(drop=False)
      .rename(columns={"count": "total"})
    )
    ratingTotals = notes[[c.noteIdKey]].merge(ratingTotals, how="left")
    ratingTotals = ratingTotals.fillna({"total": 0}).astype(pd.Int64Dtype())
    for cutoff in _RATING_TIME_BUCKETS:
      beforeCutoff = ratings[[c.noteIdKey, c.createdAtMillisKey]].rename(
        columns={c.createdAtMillisKey: "ratingCreationMts"}
      )
      beforeCutoff = beforeCutoff.merge(notes[[c.noteIdKey, _NOTE_CREATION_MILLIS]])
      beforeCutoff = beforeCutoff[
        beforeCutoff["ratingCreationMts"]
        < (beforeCutoff[_NOTE_CREATION_MILLIS] + (1000 * 60 * cutoff))
      ]
      cutoffCount = (
        beforeCutoff[[c.noteIdKey]]
        .value_counts()
        .to_frame()
        .reset_index(drop=False)
        .rename(columns={"count": f"FIRST_{cutoff}_TOTAL"})
      )
      ratingTotals = ratingTotals.merge(cutoffCount, how="left").fillna(0)
    ratingTotals = ratingTotals.astype(pd.Int64Dtype())
    for cutoff in _RATING_TIME_BUCKETS:
      ratingTotals[f"FIRST_{cutoff}_RATIO"] = ratingTotals[f"FIRST_{cutoff}_TOTAL"] / (
        ratingTotals["total"].clip(lower=1)
      )
    return ratingTotals[[c.noteIdKey] + _QUICK_RATING_COLS]