core/fuzz/fuzz_reader.rs (196 lines of code) (raw):
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#![no_main]
use std::io::SeekFrom;
use bytes::Bytes;
use libfuzzer_sys::arbitrary::Arbitrary;
use libfuzzer_sys::arbitrary::Unstructured;
use libfuzzer_sys::fuzz_target;
use rand::prelude::*;
use sha2::Digest;
use sha2::Sha256;
use opendal::raw::oio::ReadExt;
use opendal::raw::BytesRange;
use opendal::Operator;
use opendal::Result;
mod utils;
const MAX_DATA_SIZE: usize = 16 * 1024 * 1024;
#[derive(Debug, Clone)]
enum ReadAction {
Read { size: usize },
Seek(SeekFrom),
Next,
}
#[derive(Debug, Clone)]
struct FuzzInput {
size: usize,
range: BytesRange,
actions: Vec<ReadAction>,
}
impl Arbitrary<'_> for FuzzInput {
fn arbitrary(u: &mut Unstructured<'_>) -> arbitrary::Result<Self> {
let total_size = u.int_in_range(1..=MAX_DATA_SIZE)?;
// TODO: it's valid that size is larger than total_size.
let (offset, size) = match u.int_in_range(0..=3)? {
// Full range
0 => (None, None),
1 => {
let offset = u.int_in_range(0..=total_size as u64 - 1)?;
(Some(offset), None)
}
2 => {
let size = u.int_in_range(1..=total_size as u64)?;
(None, Some(size))
}
3 => {
let offset = u.int_in_range(0..=total_size as u64 - 1)?;
let size = u.int_in_range(1..=total_size as u64 - offset)?;
(Some(offset), Some(size))
}
_ => unreachable!("invalid int generated by arbitrary"),
};
let range = BytesRange::new(offset, size);
let count = u.int_in_range(1..=1024)?;
let mut actions = vec![];
for _ in 0..count {
let action = match u.int_in_range(0..=4)? {
// Read
0 => {
let size = u.int_in_range(0..=total_size * 2)?;
ReadAction::Read { size }
}
// Next
1 => ReadAction::Next,
// Seek Start
2 => {
// NOTE: seek out of the end of file is valid.
let offset = u.int_in_range(0..=total_size * 2)?;
ReadAction::Seek(SeekFrom::Start(offset as u64))
}
// Seek Current
3 => {
let offset = u.int_in_range(-(total_size as i64)..=(total_size as i64))?;
ReadAction::Seek(SeekFrom::Current(offset))
}
// Seek End
4 => {
let offset = u.int_in_range(-(total_size as i64)..=(total_size as i64))?;
ReadAction::Seek(SeekFrom::End(offset))
}
_ => unreachable!("invalid int generated by arbitrary"),
};
actions.push(action);
}
Ok(FuzzInput {
size: total_size,
range,
actions,
})
}
}
struct ReadChecker {
/// Raw Data is the data we write to the storage.
raw_data: Bytes,
/// Ranged Data is the data that we read from the storage.
ranged_data: Bytes,
cur: usize,
}
impl ReadChecker {
fn new(size: usize, range: BytesRange) -> Self {
let mut rng = thread_rng();
let mut data = vec![0; size];
rng.fill_bytes(&mut data);
let raw_data = Bytes::from(data);
let ranged_data = range.apply_on_bytes(raw_data.clone());
Self {
raw_data,
ranged_data,
cur: 0,
}
}
fn check_read(&mut self, n: usize, output: &[u8]) {
if n == 0 {
assert_eq!(
output.len(),
0,
"check read failed: output bs is not empty when read size is 0"
);
return;
}
let expected = &self.ranged_data[self.cur..self.cur + n];
// Check the read result
assert_eq!(
format!("{:x}", Sha256::digest(output)),
format!("{:x}", Sha256::digest(expected)),
"check read failed: output bs is different with expected bs",
);
// Update the current position
self.cur += n;
}
fn check_seek(&mut self, seek_from: SeekFrom, output: Result<u64>) {
let expected = match seek_from {
SeekFrom::Start(offset) => offset as i64,
SeekFrom::End(offset) => self.ranged_data.len() as i64 + offset,
SeekFrom::Current(offset) => self.cur as i64 + offset,
};
if expected < 0 {
assert!(output.is_err(), "check seek failed: seek should fail");
assert_eq!(
output.unwrap_err().kind(),
opendal::ErrorKind::InvalidInput,
"check seek failed: seek result is different with expected result"
);
return;
}
assert_eq!(
output.unwrap(),
expected as u64,
"check seek failed: seek result is different with expected result",
);
// only update the current position when seek succeed
self.cur = expected as usize;
}
fn check_next(&mut self, output: Option<Bytes>) {
if let Some(output) = output {
assert!(
self.cur + output.len() <= self.ranged_data.len(),
"check next failed: output bs is larger than remaining bs",
);
assert_eq!(
format!("{:x}", Sha256::digest(&output)),
format!(
"{:x}",
Sha256::digest(&self.ranged_data[self.cur..self.cur + output.len()])
),
"check next failed: output bs is different with expected bs",
);
// update the current position
self.cur += output.len();
} else {
assert!(
self.cur >= self.ranged_data.len(),
"check next failed: output bs is None, we still have bytes to read",
)
}
}
}
async fn fuzz_reader(op: Operator, input: FuzzInput) -> Result<()> {
let path = uuid::Uuid::new_v4().to_string();
let mut checker = ReadChecker::new(input.size, input.range);
op.write(&path, checker.raw_data.clone()).await?;
let mut o = op.range_reader(&path, input.range.to_range()).await?;
for action in input.actions {
match action {
ReadAction::Read { size } => {
let mut buf = vec![0; size];
let n = o.read(&mut buf).await?;
checker.check_read(n, &buf[..n]);
}
ReadAction::Seek(seek_from) => {
let res = o.seek(seek_from).await;
checker.check_seek(seek_from, res);
}
ReadAction::Next => {
let res = o.next().await.transpose()?;
checker.check_next(res);
}
}
}
op.delete(&path).await?;
Ok(())
}
fuzz_target!(|input: FuzzInput| {
let _ = dotenvy::dotenv();
let runtime = tokio::runtime::Runtime::new().expect("init runtime must succeed");
for op in utils::init_services() {
runtime.block_on(async {
fuzz_reader(op, input.clone())
.await
.unwrap_or_else(|_| panic!("fuzz reader must succeed"));
})
}
});