sourcecode/scoring/pflip_model.py [739:775]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  def _get_label_vector(self, noteInfo: pd.DataFrame) -> np.array:
    """Extract a binary label vector from a noteInfo DataFrame."""
    return (noteInfo[LABEL] == FLIP).values.astype(np.int8)

  def _evaluate_model(
    self, noteInfo: pd.DataFrame, threshold: Optional[float] = None
  ) -> Tuple[float, float, float, float]:
    """Apply a pipeline to noteInfo and return the AUC, TPR, FPR and associated threshold.

    Assumes that the pipeline has already been fit.  If the threshold is specified as a
    command line argument then uses the provided threshold.  Otherwise, select the
    threshold to yield a 25% FPR.

    Args:
      noteInfo: pd.DataFrame containing raw feature information and labels.

    Returns:
      Tuple containing AUC, TPR and FPR.
    """
    assert self._pipeline is not None, "pipeline must be fit prior to evaluation"
    labels = self._get_label_vector(noteInfo)
    predictions = self._pipeline.decision_function(noteInfo)
    fpRates, tpRates, thresholds = roc_curve(labels, predictions)
    auc = area_under_curve(fpRates, tpRates)
    if threshold is None:
      idx = np.argmin(np.abs(fpRates - self._crhFpRate))
      threshold = thresholds[idx]
      fpr = fpRates[idx]
      tpr = tpRates[idx]
    else:
      tn, fp, fn, tp = confusion_matrix(
        labels, (predictions > threshold).astype(np.int8), labels=np.arange(2)
      ).ravel()
      fpr = fp / (fp + tn)
      tpr = tp / (tp + fn)
    logger.info(f"threshold={threshold}  tpr={tpr}  fpr={fpr}  auc={auc}")
    return (threshold, tpr, fpr, auc)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


sourcecode/scoring/pflip_plus_model.py [1453:1489]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  def _get_label_vector(self, noteInfo: pd.DataFrame) -> np.array:
    """Extract a binary label vector from a noteInfo DataFrame."""
    return (noteInfo[LABEL] == FLIP).values.astype(np.int8)

  def _evaluate_model(
    self, noteInfo: pd.DataFrame, threshold: Optional[float] = None
  ) -> Tuple[float, float, float, float]:
    """Apply a pipeline to noteInfo and return the AUC, TPR, FPR and associated threshold.

    Assumes that the pipeline has already been fit.  If the threshold is specified as a
    command line argument then uses the provided threshold.  Otherwise, select the
    threshold to yield a 25% FPR.

    Args:
      noteInfo: pd.DataFrame containing raw feature information and labels.

    Returns:
      Tuple containing AUC, TPR and FPR.
    """
    assert self._pipeline is not None, "pipeline must be fit prior to evaluation"
    labels = self._get_label_vector(noteInfo)
    predictions = self._pipeline.decision_function(noteInfo)
    fpRates, tpRates, thresholds = roc_curve(labels, predictions)
    auc = area_under_curve(fpRates, tpRates)
    if threshold is None:
      idx = np.argmin(np.abs(fpRates - self._crhFpRate))
      threshold = thresholds[idx]
      fpr = fpRates[idx]
      tpr = tpRates[idx]
    else:
      tn, fp, fn, tp = confusion_matrix(
        labels, (predictions > threshold).astype(np.int8), labels=np.arange(2)
      ).ravel()
      fpr = fp / (fp + tn)
      tpr = tp / (tp + fn)
    logger.info(f"threshold={threshold}  tpr={tpr}  fpr={fpr}  auc={auc}")
    return (threshold, tpr, fpr, auc)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -