in lain/src/new_fuzzed.rs [495:603]
fn new_fuzzed<R: crate::rand::Rng>(
mutator: &mut crate::mutator::Mutator<R>,
_constraints: Option<&Constraints<Self::RangeType>>,
) -> Self {
trace!("generating random UTF8 char");
// This implementation is taken almost verbatim from burntsushi's
// quickcheck library. See this link for the original implementation:
// https://github.com/BurntSushi/quickcheck/blob/b3e50a5e7c85e19538cf8612d9fd6da32c588930/src/arbitrary.rs#L573-L637
//
// I like his logic for mode generation, so that's kept as well
let mode_chance = mutator.gen_range(0, 100);
match mode_chance {
0..=49 => Utf8Char(mutator.gen_range(0, 0xB0) as u8 as char),
50..=59 => {
loop {
if let Some(c) = char::from_u32(mutator.gen_range(0, 0x10000)) {
return Utf8Char(c);
}
// keep looping if we got an invalid char. this will
// ignore surrogate pairs
}
}
60..=84 => {
// Characters often used in programming languages
let c = [
' ', ' ', ' ', '\t', '\n', '~', '`', '!', '@', '#', '$', '%', '^', '&', '*',
'(', ')', '_', '-', '=', '+', '[', ']', '{', '}', ':', ';', '\'', '"', '\\',
'|', ',', '<', '>', '.', '/', '?', '0', '1', '2', '3', '4', '5', '6', '7', '8',
'9',
]
.choose(&mut mutator.rng)
.unwrap()
.to_owned();
Utf8Char(c)
}
85..=89 => {
// Tricky Unicode, part 1
let c = [
'\u{0149}', // a deprecated character
'\u{fff0}', // some of "Other, format" category:
'\u{fff1}',
'\u{fff2}',
'\u{fff3}',
'\u{fff4}',
'\u{fff5}',
'\u{fff6}',
'\u{fff7}',
'\u{fff8}',
'\u{fff9}',
'\u{fffA}',
'\u{fffB}',
'\u{fffC}',
'\u{fffD}',
'\u{fffE}',
'\u{fffF}',
'\u{0600}',
'\u{0601}',
'\u{0602}',
'\u{0603}',
'\u{0604}',
'\u{0605}',
'\u{061C}',
'\u{06DD}',
'\u{070F}',
'\u{180E}',
'\u{110BD}',
'\u{1D173}',
'\u{e0001}', // tag
'\u{e0020}', // tag space
'\u{e000}',
'\u{e001}',
'\u{ef8ff}', // private use
'\u{f0000}',
'\u{ffffd}',
'\u{ffffe}',
'\u{fffff}',
'\u{100000}',
'\u{10FFFD}',
'\u{10FFFE}',
'\u{10FFFF}',
// "Other, surrogate" characters are so that very special
// that they are not even allowed in safe Rust,
//so omitted here
'\u{3000}', // ideographic space
'\u{1680}',
// other space characters are already covered by two next
// branches
]
.choose(&mut mutator.rng)
.unwrap()
.to_owned();
Utf8Char(c)
}
90..=94 => {
// Tricky unicode, part 2
Utf8Char(char::from_u32(mutator.gen_range(0x2000, 0x2070)).unwrap())
}
95..=99 => {
// Completely arbitrary characters
Utf8Char(mutator.gen())
}
_ => unreachable!(),
}
}