fn fix_repeated_string()

in amzn-smt-string-transformer/src/string_mappings.rs [197:306]


fn fix_repeated_string(
    orig_string: String,
    to_ret: String,
    char_map: &mut CharMap,
    retries_allowed: u8,
    keep_ints: KeepInts,
    consider_substrings: bool,
    len_bool: bool,
) -> Result<String, StringMapError> {
    let mut to_ret = to_ret; // string to return
    let mut retry_ind: usize = 0; // index of the mapped string we're perturbing
    let mut retry_add = 1; // offset to be applied on the current char
    let mut cur_try = 0; // number of attempts
    let mut retry_char = to_ret.chars().nth(retry_ind).unwrap(); // character we're perturbing
    let mut reps = 1; // number of repetitions of the current char we're adding in the perturbation
                      // (this stays 1 if the length or substrings properties need to be maintained)
    while char_map.string_map.iter().any(|(key, val)| {
        // can't map to a string we already used
        val == &to_ret
            // more checks required if we need to keep substrings
            || (consider_substrings
                && ((!orig_string.contains(key) && to_ret.contains(val))
                    || (!key.contains(&orig_string) // check both directions of containment now that len is modified
                && val.contains(&to_ret)))
                && (matches!(
                    char_map.string_lit_props.get(key),
                    Some(StringSetProperties::Some {
                        len: _,
                        ranges: _,
                        keep_ints: _,
                        keep_substrings: true, // only care about substring matches for string literals involved in substrings
                        keep_prefix_suffix: _,
                    })
                )))
    }) {
        // get next non-range char in the charmap
        let next_char = char::from_u32(update_and_get_next_char(
            char_map,
            (retry_char as u32) + retry_add,
            false, // don't modify the charmap
        ));
        // if the next_char is invalid, move to the next index in the string and retry
        if match next_char {
            None => true,
            Some(c) => !c.is_ascii(),
        } {
            retry_ind = 0;
            retry_char = to_ret.chars().nth(retry_ind).unwrap();
            retry_add += 1;
            continue;
        }
        let next_char = next_char.unwrap();

        // get the next char
        // if it's a range, just use the first value in the range
        // if it's not a range, just get the next available value
        if let Some(RangeParams {
            start_char: s,
            end_char: e,
            ..
        }) = char_map.get_backwards_range_for_char(retry_char)
        {
            if next_char < s || next_char > e {
                retry_ind = (retry_ind + 1) % to_ret.len();
                if retry_ind == 0 {
                    retry_add += 1;
                }
                retry_char = to_ret.chars().nth(retry_ind).unwrap();
                continue;
            }
        }
        // respect ints properties if required
        if matches!(keep_ints, KeepInts::AsInts | KeepInts::ExactInts) && retry_char.is_digit(10) {
            retry_ind = (retry_ind + 1) % to_ret.len();
            if retry_ind == 0 {
                retry_add += 1;
            }
            retry_char = to_ret.chars().nth(retry_ind).unwrap();
            continue;
        }
        cur_try += 1;
        if cur_try > retries_allowed {
            // error out if we have exceeded retries allowed
            return Err(StringMapError::RemapRetryCountExceeded);
        }

        // actually modify the string
        // at this point we're guaranteed the modification will maintain relevant properties
        to_ret.replace_range(
            retry_ind..(retry_ind + 1),
            &next_char.to_string().repeat(
                if consider_substrings && (retry_ind == 0 || retry_ind + 1 == to_ret.len()) {
                    // only repeat if not first/last char
                    1 // if we care about substrings
                } else {
                    reps
                },
            ),
        );
        retry_ind = (retry_ind + 1) % to_ret.len();
        if retry_ind == 0 {
            retry_add += 1;
            // if we don't need to keep length or substrings, can start repeating characters
            if !len_bool && !consider_substrings {
                reps += 1;
            }
        }
    }
    Ok(to_ret)
}