def _transform_note_info()

in sourcecode/scoring/pflip_plus_model.py [0:0]


  def _transform_note_info(self, noteInfo: pd.DataFrame) -> pd.DataFrame:
    noteInfo = noteInfo.copy()
    # Transform tag ratio columns
    for prefix in [_LOCAL, _PEER_MISLEADING, _PEER_NON_MISLEADING]:
      for tagset in [c.notHelpfulTagsTSVOrder, c.helpfulTagsTSVOrder]:
        for tag in tagset:
          column = f"{prefix}_{tag}"
          threshold = self._column_thresholds[column]
          noteInfo[column] = noteInfo[column].fillna(-0.25).clip(-0.25, threshold)
    # Transform for rating bursts
    for colset in [_QUICK_RATING_COLS, _BURST_RATING_COLS, _RECENT_RATING_COLS]:
      for column in colset:
        threshold = self._column_thresholds[column]
        if "RATIO" in column:
          noteInfo[column] = noteInfo[column].clip(0, threshold)
        else:
          assert "TOTAL" in column
          noteInfo[column] = np.log(1 + noteInfo[column].clip(0, threshold)) / np.log(2)
    # Transform for writing latency
    threshold = self._column_thresholds[_NOTE_WRITING_LATENCY]
    noteInfo[_NOTE_WRITING_LATENCY] = np.log(
      noteInfo[_NOTE_WRITING_LATENCY].clip(0, threshold)
    ) / np.log(2)
    # Transform for peer notes
    peerNoteCols = [
      _TOTAL_PEER_NOTES,
      _TOTAL_PEER_MISLEADING_NOTES,
      _TOTAL_PEER_NON_MISLEADING_NOTES,
      _TOTAL_PEER_CRH_NOTES,
      _TOTAL_PEER_STABILIZATION_NOTES,
    ]
    for column in peerNoteCols:
      threshold = self._column_thresholds[column]
      noteInfo[column] = np.log(1 + noteInfo[column].clip(0, threshold)) / np.log(2)
    return noteInfo