tools/scd-bq-to-bq/scd_operations.py [214:266]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        columns_to_process = scd_column_list.split(",")

        for current_column in columns_to_process:
            # Get unique values for SCD generation
            scd_array = (
                df[current_column]
                .drop_duplicates()
                .head(unique_scd_keys_for_generation)
                .values
            )
            if scd_array.size == 0:  # Handle potential empty array
                scd_array = [None]  # Use a list with None if the array is empty

            array_len = len(scd_array)
            print("For the Column:", current_column)
            print("Array Length:", array_len)  # Print the array length for debugging
            batch_size = max(
                int(total_records / array_len), 1
            )  # Ensure batch_size is at least 1
            print("Batch Size:", batch_size)  # Print the batch size for debugging
            i = 1
            array_index = 0
            unporcessed_record_count = len(
                scd_staging[scd_staging["processed_flag"] == False]
            )

            while (
                unporcessed_record_count > 0
            ):  # Loop while there are unprocessed records
                j = i
                array_value = scd_array[array_index % array_len]
                k = min(i + batch_size - 1, total_records)

                mask = (scd_staging[current_column] != array_value) & (
                    scd_staging["processed_flag"] == False
                )

                if i <= total_records:  # Only apply id filter if within range
                    mask = mask & (scd_staging["id"] >= j) & (scd_staging["id"] <= k)

                scd_staging.loc[mask, current_column] = array_value
                scd_staging.loc[mask, effective_to_date_column] = pd.Timestamp.now()
                scd_staging.loc[mask, "processed_flag"] = True

                unporcessed_record_count = len(
                    scd_staging[scd_staging["processed_flag"] == False]
                )
                i += batch_size
                array_index += 1

            scd_staging["processed_flag"] = (
                False  # Reset processed flag for next column
            )
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


tools/scd-file-to-file/scd_operations.py [66:118]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        columns_to_process = scd_column_list.split(",")

        for current_column in columns_to_process:
            # Get unique values for SCD generation
            scd_array = (
                df[current_column]
                .drop_duplicates()
                .head(unique_scd_keys_for_generation)
                .values
            )
            if scd_array.size == 0:  # Handle potential empty array
                scd_array = [None]  # Use a list with None if the array is empty

            array_len = len(scd_array)
            print("For the Column:", current_column)
            print("Array Length:", array_len)  # Print the array length for debugging
            batch_size = max(
                int(total_records / array_len), 1
            )  # Ensure batch_size is at least 1
            print("Batch Size:", batch_size)  # Print the batch size for debugging
            i = 1
            array_index = 0
            unporcessed_record_count = len(
                scd_staging[scd_staging["processed_flag"] == False]
            )

            while (
                unporcessed_record_count > 0
            ):  # Loop while there are unprocessed records
                j = i
                array_value = scd_array[array_index % array_len]
                k = min(i + batch_size - 1, total_records)

                mask = (scd_staging[current_column] != array_value) & (
                    scd_staging["processed_flag"] == False
                )

                if i <= total_records:  # Only apply id filter if within range
                    mask = mask & (scd_staging["id"] >= j) & (scd_staging["id"] <= k)

                scd_staging.loc[mask, current_column] = array_value
                scd_staging.loc[mask, effective_to_date_column] = pd.Timestamp.now()
                scd_staging.loc[mask, "processed_flag"] = True

                unporcessed_record_count = len(
                    scd_staging[scd_staging["processed_flag"] == False]
                )
                i += batch_size
                array_index += 1

            scd_staging["processed_flag"] = (
                False  # Reset processed flag for next column
            )
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -