fn test_repair_and_delete_content()

in eden/scm/lib/indexedlog/src/log/tests.rs [1018:1333]


fn test_repair_and_delete_content() {
    let dir = tempdir().unwrap();
    let path = dir.path();
    let open_opts = OpenOptions::new().create(true).index_defs(vec![
        IndexDef::new("c", |_| vec![IndexOutput::Reference(0..1)]).lag_threshold(5000),
    ]);

    let long_lived_log = RefCell::new(open_opts.open(()).unwrap());
    let open = || open_opts.open(path);
    let corrupt = |name: &str, offset: i64| pwrite(&path.join(name), offset, b"cc");
    let truncate = |name: &str| fs::write(path.join(name), "garbage").unwrap();
    let delete = |name: &str| fs::remove_file(path.join(name)).unwrap();
    let index_file = "index2-c";
    let append = || {
        let mut log = open().unwrap();
        log.append(&[b'x'; 50_000][..]).unwrap();
        log.append(&[b'y'; 50_000][..]).unwrap();
        log.append(&[b'z'; 50_000][..]).unwrap();
        log.sync().unwrap();
    };
    let count = || -> crate::Result<(usize, usize)> {
        let log = open()?;
        let log_len = log.iter().collect::<Result<Vec<_>, _>>()?.len();
        let mut index_len = 0;
        for key in [b"x", b"y", b"z"] {
            let iter = log.lookup(0, key)?;
            index_len += iter.into_vec()?.len();
        }
        Ok((log_len, index_len))
    };
    let verify_len = |len: usize| {
        let (log_len, index_len) = count().unwrap();
        assert_eq!(log_len, len);
        assert_eq!(index_len, len);
    };
    let verify_corrupted = || {
        let err = count().unwrap_err();
        assert!(err.is_corruption(), "not a corruption:\n {:?}", err);
    };
    let try_trigger_sigbus = || {
        // Check no SIGBUS
        let log = long_lived_log.borrow();
        match log.lookup(0, "z") {
            Err(_) => {} // okay - not SIGBUS
            Ok(iter) => match iter.into_vec() {
                Err(_) => {} // okay - not SIGBUS
                Ok(_) => {}  // okay - not SIGBUS
            },
        }
        // Check 'sync' on a long-lived log will load the right data and
        // resolve errors.
        let mut cloned_log = log.try_clone().unwrap();
        cloned_log.sync().unwrap();
        let _ = cloned_log.lookup(0, "z").unwrap().into_vec().unwrap();
    };
    let repair = || {
        let message = open_opts.repair(path).unwrap();
        try_trigger_sigbus();
        message
            .lines()
            // Remove 'Backed up' lines since they have dynamic file names.
            .filter(|l| !l.contains("Backed up"))
            .filter(|l| !l.contains("Processing"))
            .collect::<Vec<_>>()
            .join("\n")
    };

    // Repair is a no-op if log and indexes pass integirty check.
    append();
    verify_len(3);
    assert_eq!(
        repair(),
        r#"Verified 3 entries, 150048 bytes in log
Index "c" passed integrity check"#
    );

    append();
    verify_len(6);
    assert_eq!(
        repair(),
        r#"Verified 6 entries, 300084 bytes in log
Index "c" passed integrity check"#
    );

    // Prepare long-lived log for SIGBUS check
    // (skip on Windows, since mmap makes it impossible to replace files)
    if cfg!(unix) {
        long_lived_log.replace(open().unwrap());
    }

    // Corrupt the end of log
    corrupt(PRIMARY_FILE, -1);
    verify_corrupted();
    assert_eq!(
        repair(),
        r#"Verified first 5 entries, 250072 of 300084 bytes in log
Reset log size to 250072
Index "c" is incompatible with (truncated) log
Rebuilt index "c""#
    );
    verify_len(5);

    // Corrupt the middle of log
    corrupt(PRIMARY_FILE, 125000);
    verify_corrupted();
    assert_eq!(
        repair(),
        r#"Verified first 2 entries, 100036 of 250072 bytes in log
Reset log size to 100036
Index "c" is incompatible with (truncated) log
Rebuilt index "c""#
    );
    verify_len(2);

    append();
    verify_len(5);

    // Change the beginning of log
    corrupt(PRIMARY_FILE, 1);
    verify_len(5);
    assert_eq!(
        repair(),
        r#"Fixed header in log
Verified 5 entries, 250072 bytes in log
Index "c" passed integrity check"#
    );

    // Corrupt the end of index
    corrupt(&index_file, -1);
    verify_corrupted();
    assert_eq!(
        repair(),
        r#"Verified 5 entries, 250072 bytes in log
Rebuilt index "c""#
    );
    verify_len(5);

    // Corrupt the beginning of index
    corrupt(&index_file, 1);
    verify_corrupted();
    assert_eq!(
        repair(),
        r#"Verified 5 entries, 250072 bytes in log
Rebuilt index "c""#
    );
    verify_len(5);

    // Replace index with garbage
    truncate(&index_file);
    verify_corrupted();
    assert_eq!(
        repair(),
        r#"Verified 5 entries, 250072 bytes in log
Rebuilt index "c""#
    );
    verify_len(5);

    // Replace log with garbage
    truncate(PRIMARY_FILE);
    verify_corrupted();
    assert_eq!(
        repair(),
        r#"Fixed header in log
Extended log to 250072 bytes required by meta
Verified first 0 entries, 12 of 250072 bytes in log
Reset log size to 12
Index "c" is incompatible with (truncated) log
Rebuilt index "c""#
    );
    verify_len(0);

    append();
    verify_len(3);

    // Delete index
    delete(&index_file);
    verify_corrupted();
    assert_eq!(
        repair(),
        r#"Verified 3 entries, 150048 bytes in log
Rebuilt index "c""#
    );
    verify_len(3);

    // Delete log
    delete(PRIMARY_FILE);
    verify_corrupted();
    assert_eq!(
        repair(),
        r#"Fixed header in log
Extended log to 150048 bytes required by meta
Verified first 0 entries, 12 of 150048 bytes in log
Reset log size to 12
Index "c" is incompatible with (truncated) log
Rebuilt index "c""#
    );
    verify_len(0);

    // Corrupt the middle of index. This test wants to be able
    // to make it okay to open Index, but not okay to use it at
    // some random place. The index checksum chunk size is 1MB
    // so the index has to be a few MBs to be able to pass checksum
    // check at Index open time.
    // To do that, insert a lot entries to the log.
    //
    // Practically, this should show "Index .. failed integrity check".
    let append_many_entries = || {
        let mut log = open().unwrap();
        for _ in 0..200_000 {
            log.append(&[b'z'; 1][..]).unwrap();
        }
        log.sync().unwrap();
    };
    append_many_entries();
    corrupt(&index_file, -1000_000);
    verify_corrupted();
    assert_eq!(
        repair(),
        r#"Verified 200000 entries, 1400012 bytes in log
Index "c" failed integrity check
Rebuilt index "c""#
    );
    verify_len(200000);

    // Corrupt meta
    crate::utils::atomic_write(&path.join(META_FILE), b"xxx", false).unwrap();
    corrupt(PRIMARY_FILE, 1000);
    verify_corrupted();
    assert_eq!(
        repair(),
        r#"Rebuilt metadata
Verified first 141 entries, 999 of 1400012 bytes in log
Reset log size to 999
Rebuilt index "c""#
    );
    verify_len(141);

    crate::utils::atomic_write(&path.join(META_FILE), b"yyy", false).unwrap();
    verify_corrupted();
    assert_eq!(
        repair(),
        r#"Rebuilt metadata
Verified first 141 entries, 999 of 1400012 bytes in log
Reset log size to 999
Rebuilt index "c""#
    );
    verify_len(141);

    // Delete meta - as if the log directory does not exist.
    delete(META_FILE);
    assert_eq!(
        repair(),
        r#"Rebuilt metadata
Verified first 141 entries, 999 of 1400012 bytes in log
Reset log size to 999
Rebuilt index "c""#
    );
    verify_len(141);

    let len = |name: &str| path.join(name).metadata().unwrap().len();
    let append = || {
        let mut log = open().unwrap();
        log.append(&[b'x'; 50_000][..]).unwrap();
        log.append(&[b'y'; 50_000][..]).unwrap();
        log.append(&[b'z'; 50_000][..]).unwrap();
        log.sync().unwrap();
        assert_eq!(len(PRIMARY_FILE), PRIMARY_START_OFFSET + 150036);
        assert_eq!(len(&index_file), 100);
    };
    let delete_content = || {
        open_opts.delete_content(path).unwrap();
        assert_eq!(len(PRIMARY_FILE), PRIMARY_START_OFFSET);
        assert_eq!(len(&index_file), 25);
        // Check SIGBUS
        try_trigger_sigbus();
        // Check log is empty
        verify_len(0);
    };

    // 'dir' does not exist - delete_content creates the log
    fs::remove_dir_all(&path).unwrap();
    delete_content();

    // Normal log
    append();
    if cfg!(unix) {
        long_lived_log.replace(open().unwrap());
    }
    delete_content();

    // Corrupt log
    append();
    corrupt(PRIMARY_FILE, -75_000);
    delete_content();

    // Corrupt index
    append();
    corrupt(&index_file, -10);
    delete_content();

    // Corrupt log and index
    append();
    corrupt(PRIMARY_FILE, -25_000);
    corrupt(&index_file, -10);
    delete_content();

    // Deleted various files
    delete(&index_file);
    delete_content();

    delete(PRIMARY_FILE);
    delete_content();

    delete(META_FILE);
    delete_content();
}