in sourcecode/scoring/pflip_plus_model.py [0:0]
def _transform_note_info(self, noteInfo: pd.DataFrame) -> pd.DataFrame:
noteInfo = noteInfo.copy()
# Transform tag ratio columns
for prefix in [_LOCAL, _PEER_MISLEADING, _PEER_NON_MISLEADING]:
for tagset in [c.notHelpfulTagsTSVOrder, c.helpfulTagsTSVOrder]:
for tag in tagset:
column = f"{prefix}_{tag}"
threshold = self._column_thresholds[column]
noteInfo[column] = noteInfo[column].fillna(-0.25).clip(-0.25, threshold)
# Transform for rating bursts
for colset in [_QUICK_RATING_COLS, _BURST_RATING_COLS, _RECENT_RATING_COLS]:
for column in colset:
threshold = self._column_thresholds[column]
if "RATIO" in column:
noteInfo[column] = noteInfo[column].clip(0, threshold)
else:
assert "TOTAL" in column
noteInfo[column] = np.log(1 + noteInfo[column].clip(0, threshold)) / np.log(2)
# Transform for writing latency
threshold = self._column_thresholds[_NOTE_WRITING_LATENCY]
noteInfo[_NOTE_WRITING_LATENCY] = np.log(
noteInfo[_NOTE_WRITING_LATENCY].clip(0, threshold)
) / np.log(2)
# Transform for peer notes
peerNoteCols = [
_TOTAL_PEER_NOTES,
_TOTAL_PEER_MISLEADING_NOTES,
_TOTAL_PEER_NON_MISLEADING_NOTES,
_TOTAL_PEER_CRH_NOTES,
_TOTAL_PEER_STABILIZATION_NOTES,
]
for column in peerNoteCols:
threshold = self._column_thresholds[column]
noteInfo[column] = np.log(1 + noteInfo[column].clip(0, threshold)) / np.log(2)
return noteInfo