def _get_pair_tuples_optimized()

in sourcecode/scoring/post_selection_similarity_old.py [0:0]


def _get_pair_tuples_optimized(ratings, windowMillis):

    # Sort ratings by noteIdKey and createdAtMillisKey
    ratings_sorted = ratings.sort_values([c.noteIdKey, c.createdAtMillisKey])

    # Initialize an empty list to store the result
    tuples = []

    # Group by noteIdKey to process each note individually
    grouped = ratings_sorted.groupby(c.noteIdKey, sort=False)

    for noteId, group in grouped:
        # Extract relevant columns as numpy arrays for efficient computation
        times = group[c.createdAtMillisKey].values
        raters = group[c.raterParticipantIdKey].values
        priorTweet = group[c.tweetIdKey].iloc[0]

        n = len(group)
        window_start = 0  # Start index of the sliding window

        for i in range(n):
            # Move the window start forward if the time difference exceeds windowMillis
            while times[i] - times[window_start] > windowMillis:
                window_start += 1

            # For all indices within the sliding window (excluding the current index)
            for j in range(window_start, i):
                # Check if raters are different
                if raters[i] != raters[j]:
                    # Sort raters to maintain consistency
                    leftRater, rightRater = tuple(sorted((raters[i], raters[j])))
                    tuples.append((leftRater, rightRater, priorTweet))

    return tuples