in rust/src/main.rs [368:406]
fn main() -> std::io::Result<()> {
let matches = App::new("CCQA WARC Processor")
.version("1.0")
.author("Patrick Huber <huberpat@cs.ubc.ca> and Armen Aghajanyan <armenag@fb.com>")
.about("Common Crawl Question Answering (CCQA) WARC processor for in-domain pre-training corpora")
.arg(
Arg::with_name("input_file")
.help("WARC input file")
.required(true)
.index(1),
)
.arg(
Arg::with_name("output_file")
.help("Minified HTML (mhtml) output file path")
.required(true)
.index(2),
)
.get_matches();
let file_path = matches.value_of("input_file").unwrap();
let output_file_path = matches.value_of("output_file").unwrap();
// Main function of the script called here
let minified = minify(file_path);
let json_val = serde_json::to_string_pretty(&minified)?;
match OpenOptions::new()
.create(true)
.write(true)
.append(false)
.open(output_file_path)
{
Ok(ref mut file) => {
file.write_all(json_val.as_bytes())?;
}
Err(err) => {
panic!("Failed to open output file: {}", err);
}
}
Ok(())
}