compiler/rustc_codegen_gcc/build_system/src/fuzz.rs - rust - Git at Google

 use std::ffi::OsStr;
 use std::path::Path;

 mod reduce;

 use crate::utils::run_command_with_output;

 fn show_usage() {
     println!(
         r#"
 `fuzz` command help:
     --reduce               : Reduces a file generated by rustlantis
     --help                 : Show this help
     --start                : Start of the fuzzed range
     --count                : The number of cases to fuzz
     -j --jobs              : The number of threads to use during fuzzing"#
     );
 }

 pub fn run() -> Result<(), String> {
     // We skip binary name and the `fuzz` command.
     let mut args = std::env::args().skip(2);
     let mut start = 0;
     let mut count = 100;
     let mut threads =
         std::thread::available_parallelism().map(|threads| threads.get()).unwrap_or(1);
     while let Some(arg) = args.next() {
         match arg.as_str() {
             "--reduce" => {
                 let Some(path) = args.next() else {
                     return Err("--reduce must be provided with a path".into());
                 };
                 if !std::fs::exists(&path).unwrap_or(false) {
                     return Err("--reduce must be provided with a valid path".into());
                 }
                 reduce::reduce(&path);
                 return Ok(());
             }
             "--help" => {
                 show_usage();
                 return Ok(());
             }
             "--start" => {
                 start =
                     str::parse(&args.next().ok_or_else(|| "Fuzz start not provided!".to_string())?)
                         .map_err(|err| (format!("Fuzz start not a number {err:?}!")))?;
             }
             "--count" => {
                 count =
                     str::parse(&args.next().ok_or_else(|| "Fuzz count not provided!".to_string())?)
                         .map_err(|err| (format!("Fuzz count not a number {err:?}!")))?;
             }
             "-j" | "--jobs" => {
                 threads = str::parse(
                     &args.next().ok_or_else(|| "Fuzz thread count not provided!".to_string())?,
                 )
                 .map_err(|err| (format!("Fuzz thread count not a number {err:?}!")))?;
             }
             _ => return Err(format!("Unknown option {arg}")),
         }
     }

     // Ensure that we have a cloned version of rustlantis on hand.
     crate::utils::git_clone(
         "https://github.com/cbeuw/rustlantis.git",
         Some("clones/rustlantis".as_ref()),
         true,
     )
     .map_err(|err| (format!("Git clone failed with message: {err:?}!")))?;

     // Ensure that we are on the newest rustlantis commit.
     let cmd: &[&dyn AsRef<OsStr>] = &[&"git", &"pull", &"origin"];
     run_command_with_output(cmd, Some(Path::new("clones/rustlantis")))?;

     // Build the release version of rustlantis
     let cmd: &[&dyn AsRef<OsStr>] = &[&"cargo", &"build", &"--release"];
     run_command_with_output(cmd, Some(Path::new("clones/rustlantis")))?;
     // Fuzz a given range
     fuzz_range(start, start + count, threads);
     Ok(())
 }

 /// Fuzzes a range `start..end` with `threads`.
 fn fuzz_range(start: u64, end: u64, threads: usize) {
     use std::sync::Arc;
     use std::sync::atomic::{AtomicU64, Ordering};
     use std::time::{Duration, Instant};
     // Total amount of files to fuzz
     let total = end - start;
     // Currently fuzzed element
     let start = Arc::new(AtomicU64::new(start));
     // Count time during fuzzing
     let start_time = Instant::now();
     let mut workers = Vec::with_capacity(threads);
     // Spawn `threads`..
     for _ in 0..threads {
         let start = start.clone();
         // .. which each will ..
         workers.push(std::thread::spawn(move || {
             // ... grab the next fuzz seed ...
             while start.load(Ordering::Relaxed) < end {
                 let next = start.fetch_add(1, Ordering::Relaxed);
                 // .. test that seed .
                 match test(next, false) {
                     Err(err) => {
                         // If the test failed at compile-time...
                         println!("test({next}) failed because {err:?}");
                         // ... copy that file to the directory `target/fuzz/compiletime_error`...
                         let mut out_path: std::path::PathBuf =
                             "target/fuzz/compiletime_error".into();
                         std::fs::create_dir_all(&out_path).unwrap();
                         // .. into a file named `fuzz{seed}.rs`.
                         out_path.push(format!("fuzz{next}.rs"));
                         std::fs::copy(err, out_path).unwrap();
                     }
                     Ok(Err(err)) => {
                         // If the test failed at run-time...
                         println!("The LLVM and GCC results don't match for {err:?}");
                         // ... generate a new file, which prints temporaries(instead of hashing them)...
                         let mut out_path: std::path::PathBuf = "target/fuzz/runtime_error".into();
                         std::fs::create_dir_all(&out_path).unwrap();
                         let Ok(Err(tmp_print_err)) = test(next, true) else {
                             // ... if that file does not reproduce the issue...
                             // ... save the original sample in a file named `fuzz{seed}.rs`...
                             out_path.push(format!("fuzz{next}.rs"));
                             std::fs::copy(err, &out_path).unwrap();
                             continue;
                         };
                         // ... if that new file still produces the issue, copy it to `fuzz{seed}.rs`..
                         out_path.push(format!("fuzz{next}.rs"));
                         std::fs::copy(tmp_print_err, &out_path).unwrap();
                         // ... and start reducing it, using some properties of `rustlantis` to speed up the process.
                         reduce::reduce(&out_path);
                     }
                     // If the test passed, do nothing
                     Ok(Ok(())) => (),
                 }
             }
         }));
     }
     // The "manager" thread loop.
     while start.load(Ordering::Relaxed) < end || !workers.iter().all(|t| t.is_finished()) {
         // Every 500 ms...
         let five_hundred_millis = Duration::from_millis(500);
         std::thread::sleep(five_hundred_millis);
         // ... calculate the remaining fuzz iters ...
         let remaining = end - start.load(Ordering::Relaxed);
         // ... fix the count(the start counter counts the cases that
         // begun fuzzing, and not only the ones that are done)...
         let fuzzed = (total - remaining).saturating_sub(threads as u64);
         // ... and the fuzz speed ...
         let iter_per_sec = fuzzed as f64 / start_time.elapsed().as_secs_f64();
         // .. and use them to display fuzzing stats.
         println!(
             "fuzzed {fuzzed} cases({}%), at rate {iter_per_sec} iter/s, remaining ~{}s",
             (100 * fuzzed) as f64 / total as f64,
             (remaining as f64) / iter_per_sec
         )
     }
     drop(workers);
 }

 /// Builds & runs a file with LLVM.
 fn debug_llvm(path: &std::path::Path) -> Result<Vec<u8>, String> {
     // Build a file named `llvm_elf`...
     let exe_path = path.with_extension("llvm_elf");
     // ... using the LLVM backend ...
     let output = std::process::Command::new("rustc")
         .arg(path)
         .arg("-o")
         .arg(&exe_path)
         .output()
         .map_err(|err| format!("{err:?}"))?;
     // ... check that the compilation succeeded ...
     if !output.status.success() {
         return Err(format!("LLVM compilation failed:{output:?}"));
     }
     // ... run the resulting executable ...
     let output =
         std::process::Command::new(&exe_path).output().map_err(|err| format!("{err:?}"))?;
     // ... check it run normally ...
     if !output.status.success() {
         return Err(format!(
             "The program at {path:?}, compiled with LLVM, exited unsuccessfully:{output:?}"
         ));
     }
     // ... cleanup that executable ...
     std::fs::remove_file(exe_path).map_err(|err| format!("{err:?}"))?;
     // ... and return the output(stdout + stderr - this allows UB checks to fire).
     let mut res = output.stdout;
     res.extend(output.stderr);
     Ok(res)
 }

 /// Builds & runs a file with GCC.
 fn release_gcc(path: &std::path::Path) -> Result<Vec<u8>, String> {
     // Build a file named `gcc_elf`...
     let exe_path = path.with_extension("gcc_elf");
     // ... using the GCC backend ...
     let output = std::process::Command::new("./y.sh")
         .arg("rustc")
         .arg(path)
         .arg("-O")
         .arg("-o")
         .arg(&exe_path)
         .output()
         .map_err(|err| format!("{err:?}"))?;
     // ... check that the compilation succeeded ...
     if !output.status.success() {
         return Err(format!("GCC compilation failed:{output:?}"));
     }
     // ... run the resulting executable ..
     let output =
         std::process::Command::new(&exe_path).output().map_err(|err| format!("{err:?}"))?;
     // ... check it run normally ...
     if !output.status.success() {
         return Err(format!(
             "The program at {path:?}, compiled with GCC, exited unsuccessfully:{output:?}"
         ));
     }
     // ... cleanup that executable ...
     std::fs::remove_file(exe_path).map_err(|err| format!("{err:?}"))?;
     // ... and return the output(stdout + stderr - this allows UB checks to fire).
     let mut res = output.stdout;
     res.extend(output.stderr);
     Ok(res)
 }
 type ResultCache = Option<(Vec<u8>, Vec<u8>)>;
 /// Generates a new rustlantis file, & compares the result of running it with GCC and LLVM.
 fn test(seed: u64, print_tmp_vars: bool) -> Result<Result<(), std::path::PathBuf>, String> {
     // Generate a Rust source...
     let source_file = generate(seed, print_tmp_vars)?;
     test_file(&source_file, true)
 }
 /// Tests a file with a cached LLVM result. Used for reduction, when it is known
 /// that a given transformation should not change the execution result.
 fn test_cached(
     source_file: &Path,
     remove_tmps: bool,
     cache: &mut ResultCache,
 ) -> Result<Result<(), std::path::PathBuf>, String> {
     //  Test `source_file` with release GCC ...
     let gcc_res = release_gcc(source_file)?;
     if cache.is_none() {
         // ...test `source_file` with debug LLVM ...
         *cache = Some((debug_llvm(source_file)?, gcc_res.clone()));
     }
     let (llvm_res, old_gcc) = cache.as_ref().unwrap();
     // ... compare the results ...
     if *llvm_res != gcc_res && gcc_res == *old_gcc {
         // .. if they don't match, report an error.
         Ok(Err(source_file.to_path_buf()))
     } else {
         if remove_tmps {
             std::fs::remove_file(source_file).map_err(|err| format!("{err:?}"))?;
         }
         Ok(Ok(()))
     }
 }
 fn test_file(
     source_file: &Path,
     remove_tmps: bool,
 ) -> Result<Result<(), std::path::PathBuf>, String> {
     let mut uncached = None;
     test_cached(source_file, remove_tmps, &mut uncached)
 }

 /// Generates a new rustlantis file for us to run tests on.
 fn generate(seed: u64, print_tmp_vars: bool) -> Result<std::path::PathBuf, String> {
     use std::io::Write;
     let mut out_path = std::env::temp_dir();
     out_path.push(format!("fuzz{seed}.rs"));
     // We need to get the command output here.
     let mut generate = std::process::Command::new("cargo");
     generate
         .args(["run", "--release", "--bin", "generate"])
         .arg(format!("{seed}"))
         .current_dir("clones/rustlantis");
     if print_tmp_vars {
         generate.arg("--debug");
     }
     let out = generate.output().map_err(|err| format!("{err:?}"))?;
     // Stuff the rustlantis output in a source file.
     std::fs::File::create(&out_path)
         .map_err(|err| format!("{err:?}"))?
         .write_all(&out.stdout)
         .map_err(|err| format!("{err:?}"))?;
     Ok(out_path)
 }
	use std::ffi::OsStr;
	use std::path::Path;

	mod reduce;

	use crate::utils::run_command_with_output;

	fn show_usage() {
	println!(
	r#"
	`fuzz` command help:
	--reduce : Reduces a file generated by rustlantis
	--help : Show this help
	--start : Start of the fuzzed range
	--count : The number of cases to fuzz
	-j --jobs : The number of threads to use during fuzzing"#
	);
	}

	pub fn run() -> Result<(), String> {
	// We skip binary name and the `fuzz` command.
	let mut args = std::env::args().skip(2);
	let mut start = 0;
	let mut count = 100;
	let mut threads =
	std::thread::available_parallelism().map(\|threads\| threads.get()).unwrap_or(1);
	while let Some(arg) = args.next() {
	match arg.as_str() {
	"--reduce" => {
	let Some(path) = args.next() else {
	return Err("--reduce must be provided with a path".into());
	};
	if !std::fs::exists(&path).unwrap_or(false) {
	return Err("--reduce must be provided with a valid path".into());
	}
	reduce::reduce(&path);
	return Ok(());
	}
	"--help" => {
	show_usage();
	return Ok(());
	}
	"--start" => {
	start =
	str::parse(&args.next().ok_or_else(\|\| "Fuzz start not provided!".to_string())?)
	.map_err(\|err\| (format!("Fuzz start not a number {err:?}!")))?;
	}
	"--count" => {
	count =
	str::parse(&args.next().ok_or_else(\|\| "Fuzz count not provided!".to_string())?)
	.map_err(\|err\| (format!("Fuzz count not a number {err:?}!")))?;
	}
	"-j" \| "--jobs" => {
	threads = str::parse(
	&args.next().ok_or_else(\|\| "Fuzz thread count not provided!".to_string())?,
	)
	.map_err(\|err\| (format!("Fuzz thread count not a number {err:?}!")))?;
	}
	_ => return Err(format!("Unknown option {arg}")),
	}
	}

	// Ensure that we have a cloned version of rustlantis on hand.
	crate::utils::git_clone(
	"https://github.com/cbeuw/rustlantis.git",
	Some("clones/rustlantis".as_ref()),
	true,
	)
	.map_err(\|err\| (format!("Git clone failed with message: {err:?}!")))?;

	// Ensure that we are on the newest rustlantis commit.
	let cmd: &[&dyn AsRef<OsStr>] = &[&"git", &"pull", &"origin"];
	run_command_with_output(cmd, Some(Path::new("clones/rustlantis")))?;

	// Build the release version of rustlantis
	let cmd: &[&dyn AsRef<OsStr>] = &[&"cargo", &"build", &"--release"];
	run_command_with_output(cmd, Some(Path::new("clones/rustlantis")))?;
	// Fuzz a given range
	fuzz_range(start, start + count, threads);
	Ok(())
	}

	/// Fuzzes a range `start..end` with `threads`.
	fn fuzz_range(start: u64, end: u64, threads: usize) {
	use std::sync::Arc;
	use std::sync::atomic::{AtomicU64, Ordering};
	use std::time::{Duration, Instant};
	// Total amount of files to fuzz
	let total = end - start;
	// Currently fuzzed element
	let start = Arc::new(AtomicU64::new(start));
	// Count time during fuzzing
	let start_time = Instant::now();
	let mut workers = Vec::with_capacity(threads);
	// Spawn `threads`..
	for _ in 0..threads {
	let start = start.clone();
	// .. which each will ..
	workers.push(std::thread::spawn(move \|\| {
	// ... grab the next fuzz seed ...
	while start.load(Ordering::Relaxed) < end {
	let next = start.fetch_add(1, Ordering::Relaxed);
	// .. test that seed .
	match test(next, false) {
	Err(err) => {
	// If the test failed at compile-time...
	println!("test({next}) failed because {err:?}");
	// ... copy that file to the directory `target/fuzz/compiletime_error`...
	let mut out_path: std::path::PathBuf =
	"target/fuzz/compiletime_error".into();
	std::fs::create_dir_all(&out_path).unwrap();
	// .. into a file named `fuzz{seed}.rs`.
	out_path.push(format!("fuzz{next}.rs"));
	std::fs::copy(err, out_path).unwrap();
	}
	Ok(Err(err)) => {
	// If the test failed at run-time...
	println!("The LLVM and GCC results don't match for {err:?}");
	// ... generate a new file, which prints temporaries(instead of hashing them)...
	let mut out_path: std::path::PathBuf = "target/fuzz/runtime_error".into();
	std::fs::create_dir_all(&out_path).unwrap();
	let Ok(Err(tmp_print_err)) = test(next, true) else {
	// ... if that file does not reproduce the issue...
	// ... save the original sample in a file named `fuzz{seed}.rs`...
	out_path.push(format!("fuzz{next}.rs"));
	std::fs::copy(err, &out_path).unwrap();
	continue;
	};
	// ... if that new file still produces the issue, copy it to `fuzz{seed}.rs`..
	out_path.push(format!("fuzz{next}.rs"));
	std::fs::copy(tmp_print_err, &out_path).unwrap();
	// ... and start reducing it, using some properties of `rustlantis` to speed up the process.
	reduce::reduce(&out_path);
	}
	// If the test passed, do nothing
	Ok(Ok(())) => (),
	}
	}
	}));
	}
	// The "manager" thread loop.
	while start.load(Ordering::Relaxed) < end \|\| !workers.iter().all(\|t\| t.is_finished()) {
	// Every 500 ms...
	let five_hundred_millis = Duration::from_millis(500);
	std::thread::sleep(five_hundred_millis);
	// ... calculate the remaining fuzz iters ...
	let remaining = end - start.load(Ordering::Relaxed);
	// ... fix the count(the start counter counts the cases that
	// begun fuzzing, and not only the ones that are done)...
	let fuzzed = (total - remaining).saturating_sub(threads as u64);
	// ... and the fuzz speed ...
	let iter_per_sec = fuzzed as f64 / start_time.elapsed().as_secs_f64();
	// .. and use them to display fuzzing stats.
	println!(
	"fuzzed {fuzzed} cases({}%), at rate {iter_per_sec} iter/s, remaining ~{}s",
	(100 * fuzzed) as f64 / total as f64,
	(remaining as f64) / iter_per_sec
	)
	}
	drop(workers);
	}

	/// Builds & runs a file with LLVM.
	fn debug_llvm(path: &std::path::Path) -> Result<Vec<u8>, String> {
	// Build a file named `llvm_elf`...
	let exe_path = path.with_extension("llvm_elf");
	// ... using the LLVM backend ...
	let output = std::process::Command::new("rustc")
	.arg(path)
	.arg("-o")
	.arg(&exe_path)
	.output()
	.map_err(\|err\| format!("{err:?}"))?;
	// ... check that the compilation succeeded ...
	if !output.status.success() {
	return Err(format!("LLVM compilation failed:{output:?}"));
	}
	// ... run the resulting executable ...
	let output =
	std::process::Command::new(&exe_path).output().map_err(\|err\| format!("{err:?}"))?;
	// ... check it run normally ...
	if !output.status.success() {
	return Err(format!(
	"The program at {path:?}, compiled with LLVM, exited unsuccessfully:{output:?}"
	));
	}
	// ... cleanup that executable ...
	std::fs::remove_file(exe_path).map_err(\|err\| format!("{err:?}"))?;
	// ... and return the output(stdout + stderr - this allows UB checks to fire).
	let mut res = output.stdout;
	res.extend(output.stderr);
	Ok(res)
	}

	/// Builds & runs a file with GCC.
	fn release_gcc(path: &std::path::Path) -> Result<Vec<u8>, String> {
	// Build a file named `gcc_elf`...
	let exe_path = path.with_extension("gcc_elf");
	// ... using the GCC backend ...
	let output = std::process::Command::new("./y.sh")
	.arg("rustc")
	.arg(path)
	.arg("-O")
	.arg("-o")
	.arg(&exe_path)
	.output()
	.map_err(\|err\| format!("{err:?}"))?;
	// ... check that the compilation succeeded ...
	if !output.status.success() {
	return Err(format!("GCC compilation failed:{output:?}"));
	}
	// ... run the resulting executable ..
	let output =
	std::process::Command::new(&exe_path).output().map_err(\|err\| format!("{err:?}"))?;
	// ... check it run normally ...
	if !output.status.success() {
	return Err(format!(
	"The program at {path:?}, compiled with GCC, exited unsuccessfully:{output:?}"
	));
	}
	// ... cleanup that executable ...
	std::fs::remove_file(exe_path).map_err(\|err\| format!("{err:?}"))?;
	// ... and return the output(stdout + stderr - this allows UB checks to fire).
	let mut res = output.stdout;
	res.extend(output.stderr);
	Ok(res)
	}
	type ResultCache = Option<(Vec<u8>, Vec<u8>)>;
	/// Generates a new rustlantis file, & compares the result of running it with GCC and LLVM.
	fn test(seed: u64, print_tmp_vars: bool) -> Result<Result<(), std::path::PathBuf>, String> {
	// Generate a Rust source...
	let source_file = generate(seed, print_tmp_vars)?;
	test_file(&source_file, true)
	}
	/// Tests a file with a cached LLVM result. Used for reduction, when it is known
	/// that a given transformation should not change the execution result.
	fn test_cached(
	source_file: &Path,
	remove_tmps: bool,
	cache: &mut ResultCache,
	) -> Result<Result<(), std::path::PathBuf>, String> {
	// Test `source_file` with release GCC ...
	let gcc_res = release_gcc(source_file)?;
	if cache.is_none() {
	// ...test `source_file` with debug LLVM ...
	*cache = Some((debug_llvm(source_file)?, gcc_res.clone()));
	}
	let (llvm_res, old_gcc) = cache.as_ref().unwrap();
	// ... compare the results ...
	if llvm_res != gcc_res && gcc_res == old_gcc {
	// .. if they don't match, report an error.
	Ok(Err(source_file.to_path_buf()))
	} else {
	if remove_tmps {
	std::fs::remove_file(source_file).map_err(\|err\| format!("{err:?}"))?;
	}
	Ok(Ok(()))
	}
	}
	fn test_file(
	source_file: &Path,
	remove_tmps: bool,
	) -> Result<Result<(), std::path::PathBuf>, String> {
	let mut uncached = None;
	test_cached(source_file, remove_tmps, &mut uncached)
	}

	/// Generates a new rustlantis file for us to run tests on.
	fn generate(seed: u64, print_tmp_vars: bool) -> Result<std::path::PathBuf, String> {
	use std::io::Write;
	let mut out_path = std::env::temp_dir();
	out_path.push(format!("fuzz{seed}.rs"));
	// We need to get the command output here.
	let mut generate = std::process::Command::new("cargo");
	generate
	.args(["run", "--release", "--bin", "generate"])
	.arg(format!("{seed}"))
	.current_dir("clones/rustlantis");
	if print_tmp_vars {
	generate.arg("--debug");
	}
	let out = generate.output().map_err(\|err\| format!("{err:?}"))?;
	// Stuff the rustlantis output in a source file.
	std::fs::File::create(&out_path)
	.map_err(\|err\| format!("{err:?}"))?
	.write_all(&out.stdout)
	.map_err(\|err\| format!("{err:?}"))?;
	Ok(out_path)
	}