blob: 20715ab0e7c480101f58b48ea72cde58d50cceb1 [file] [log] [blame]
use std::io::Write;
use std::path::{Path, PathBuf};
use super::ResultCache;
/// Saves a reduced file for a given `stage`
fn save_reduction(lines: &[String], path: &Path, stage: &str) {
let mut path = path.to_path_buf();
path.set_extension(format!("rs.{stage}"));
let mut file = std::fs::File::create(&path).expect("Could not create the reduced example file");
for line in lines {
file.write_all(line.as_bytes()).expect("Could not save the reduced example");
}
}
/// Checks if a given reduction is valid.
fn test_reduction(lines: &[String], path: &Path, cache: &mut ResultCache) -> bool {
let mut path = path.to_path_buf();
path.set_extension("rs_reduced");
let mut file = std::fs::File::create(&path).expect("Could not create the reduced example file");
for line in lines {
file.write_all(line.as_bytes()).expect("Could not save the reduced example");
}
let res = super::test_cached(&path, false, cache);
let Ok(Err(_)) = res else {
return false;
};
true
}
/// Removes duplicate assignments in bulk.
/// If a line A = B is followed directly by A = C,
/// then removing the first line ought to be fully sound,
/// and not change the behaviour of the program at all. Detect & remove such lines.
fn remove_dup_assign(
file: &mut Vec<String>,
path: &PathBuf,
starts: usize,
ends: usize,
cache: &mut ResultCache,
) {
let mut file_copy = file.clone();
let mut reduction_count = 0;
// Not worth it.
if ends - starts < 8 {
return;
}
for index in starts..ends {
let Some((prefix, _)) = file_copy[index].split_once('=') else {
continue;
};
let Some((prefix2, postifx2)) = file_copy[index + 1].split_once('=') else {
continue;
};
let prefix = prefix.trim();
let prefix2 = prefix2.trim();
// FIXME: Right now, remove_dup_assign cares about assignments to the exact same place.
// However, given an assigemnt like this:
// ```
// A.0 = 1_u32;
// A = (2_u32, 3.0);
// ```
// The first assignment could be safely omitted.
// Additionally, we try to check if the second assignment could depend on the first one.
// In such cases, the result is likely to change, so we bail.
if prefix == prefix2 && !postifx2.contains(prefix) {
file_copy[index] = "".into();
reduction_count += 1;
}
}
// We have removed no lines - no point in testing.
if reduction_count == 0 {
return;
}
// Check if the removed lines affected the execution result in any way, shape or form.
if test_reduction(&file_copy, path, cache) {
println!("Reduced {path:?} by {reduction_count} lines `remove_dup_assign`");
*file = file_copy;
} else {
// The execution result changed.
// This can occur if the second assignment depended on the first one.
// Eg.
// ```
// a = b + c;
// a = a + d;
// ```
remove_dup_assign(file, path, starts, (starts + ends) / 2, cache);
remove_dup_assign(file, path, (starts + ends) / 2, ends, cache);
}
save_reduction(file, path, "remove_dup_assign");
}
/// Removes all the unneeded calls to `dump_var`. This is not something tools like `cvise` can do,
/// but it greately speeds up MIR interpretation + native execution.
fn remove_dump_var(file: &mut Vec<String>, path: &PathBuf) {
let mut curr = 0;
// ... try disabling `dump_vars` one by one, until only the necessary ones are left.
while curr < file.len() {
let Some(line) = file[curr..].iter().position(|line| line.contains("dump_var")) else {
// No more `dump_var`s to remove - exit early.
break;
};
// Make the line absolute again.
let line = line + curr;
let mut file_copy = file.clone();
// Try removing 3 consecutive lines(the call, block end and block beginning). This effectively removes a `dump_var`.
file_copy.remove(line);
file_copy.remove(line);
file_copy.remove(line);
// Not cached - the execution result can change.
let mut uncached = None;
// Check if this reduction is valid.
if test_reduction(&file_copy, path, &mut uncached) {
println!("Reduced {path:?} by 3 lines `remove_dump_var`");
*file = file_copy;
curr = line;
} else {
curr = line + 1;
}
}
save_reduction(file, path, "remove_dump_var");
}
/// Replaces matches with gotos where possible.
/// This exploits some properties of rustlantis(match arm order),
/// and is only soundly applicable to MIR generated by it.
/// Still, it is not something `cvise` can do, but it simplifies the code a ton.
fn match_to_goto(file: &mut Vec<String>, path: &PathBuf, cache: &mut ResultCache) {
let mut curr = 0;
while curr < file.len() {
let Some(match_starts) = file[curr..].iter().position(|line| line.contains("match")) else {
// No more `match`es to remove - exit early.
break;
};
let match_starts = match_starts + curr;
// Find the end of the match
let Some(match_ends) = file[match_starts..].iter().position(|line| line.contains('}'))
else {
// Can't find match end - exit early.
break;
};
let match_ends = match_ends + match_starts;
let match_body = &file[match_starts..match_ends];
// Find where this match should normally jump to.
// This *should* be the second-last arm of the match, as per the paper(the remaining blocks are decoys).
// If this ever changes, this reduction may not always be sound.
// This is not a problem, however: we NEED to use MIRI for reduction anwyway,
// and it will catch this issue.
let jumps_to = &match_body[match_body.len() - 2].trim();
let Some((_, bb_ident)) = jumps_to.split_once("bb") else {
break;
};
// We now have the number of the block we jump to at runtime.
let bb_ident = bb_ident.trim_matches(',');
// Try replacing this match with an unconditional jump.
let mut file_copy = file.clone();
for _ in match_starts..(match_ends + 1) {
file_copy.remove(match_starts);
}
file_copy.insert(match_starts, format!("Goto(bb{bb_ident})\n"));
if test_reduction(&file_copy, path, cache) {
println!("Reduced {path:?} by {} lines `match_to_goto`", match_ends - match_starts);
*file = file_copy;
curr = match_starts;
} else {
curr = match_ends;
}
}
save_reduction(file, path, "match_to_goto");
}
/// At this point, we can try "killing" blocks, by replacing their bodies with calls to `abort`.
/// This is always sound(the program aborts, so no UB can occur after the block),
/// and allows us to safely remove *a lot* of unneeded blocks.
fn block_abort(file: &mut Vec<String>, path: &PathBuf, cache: &mut ResultCache) {
let mut curr = 0;
while curr < file.len() {
let Some(block_starts) = file[curr..]
.iter()
.position(|line| line.starts_with("bb") && line.trim_end().ends_with(" = {"))
else {
// No more `block`s to kill - exit early.
break;
};
let block_starts = block_starts + curr;
// Find the beginning of the next block to find the end of this block.
let Some(block_ends) = file[(block_starts + 1)..]
.iter()
.position(|line| line.starts_with("bb") && line.trim_end().ends_with(" = {"))
else {
// No more `block`s to kill - exit early.
break;
};
let block_ends = block_starts + block_ends;
let block_starts = block_starts + 1;
let mut file_copy = file.clone();
// Remove the block body...
for _ in block_starts..(block_ends) {
file_copy.remove(block_starts);
}
// ..and insert an unconditional call to abort.
file_copy.insert(
block_starts,
"Call(tmp = core::intrinsics::abort(), ReturnTo(bb1), UnwindUnreachable())\n"
.to_string(),
);
file_copy.insert(block_starts, "let tmp = ();\n".to_string());
if test_reduction(&file_copy, path, cache) {
println!("Reduced {path:?} by {} lines `block_abort`", block_ends - block_starts - 2);
*file = file_copy;
curr = block_starts;
} else {
curr = block_ends;
}
}
save_reduction(file, path, "block_abort");
}
/// Removes unreachable basic blocks
fn remove_block(file: &mut Vec<String>, path: &PathBuf, cache: &mut ResultCache) {
let mut curr = 0;
// Next, we try to outright remove blocks.
while curr < file.len() {
let Some(block_starts) = file[curr..]
.iter()
.position(|line| line.starts_with("bb") && line.trim_end().ends_with(" = {"))
else {
// No more `block`s to remove - exit early.
break;
};
let block_starts = block_starts + curr;
// Find the beginning of the next block to find the end of this block.
let Some(block_ends) = file[(block_starts + 1)..]
.iter()
.position(|line| line.starts_with("bb") && line.trim_end().ends_with(" = {"))
else {
// No more `block`s to remove - exit early.
break;
};
let block_ends = block_starts + block_ends + 1;
// Large blocks are likely to be necessary.
if block_ends - block_starts > 6 {
curr = block_starts + 1;
continue;
}
let mut file_copy = file.clone();
file_copy.drain(block_starts..block_ends);
if test_reduction(&file_copy, path, cache) {
println!("Reduced {path:?} by {} lines `remove_blocks`", block_ends - block_starts);
*file = file_copy;
curr = block_starts;
} else {
curr = block_starts + 1;
}
}
save_reduction(file, path, "remove_block");
}
/// Merges blocks ending with unconditional jumps.
fn linearize_cf(file: &mut Vec<String>, path: &PathBuf, cache: &mut ResultCache) {
let mut curr = 0;
// Next, we try to linearize the control flow. What the does that mean?
// Given a sequence like this:
// Goto(bb22)
// }
// bb22 = {
// We remove those 3 lines, merging the blocks together. This is not something `cvise` can do,
// and it makes other transformations easier.
while curr < file.len() {
let Some(block_starts) = file[curr..]
.iter()
.position(|line| line.starts_with("bb") && line.trim_end().ends_with(" = {"))
else {
// No more `block`s to remove - exit early.
break;
};
let block_starts = block_starts + curr;
// Extract the block id.
let Some((block, _)) = file[block_starts].split_once('=') else {
curr = block_starts + 1;
continue;
};
let block = block.trim();
if file[block_starts - 2].trim() != format!("Goto({block})") {
curr = block_starts + 1;
continue;
}
let mut file_copy = file.clone();
// Try removing 3 consecutive lines(the goto, block end and block beginning). This effectively removes a `Goto(next)`.
file_copy.remove(block_starts - 2);
file_copy.remove(block_starts - 2);
file_copy.remove(block_starts - 2);
// Check if this reduction is valid.
if test_reduction(&file_copy, path, cache) {
println!("Reduced {path:?} by 3 lines `linearize_cf`");
*file = file_copy;
curr = block_starts;
} else {
curr = block_starts + 1;
}
}
save_reduction(file, path, "linearize_cf");
}
/// Replaces a call to a given function with a 0 assignment to the destination place, and a Goto.
/// This is always sound, because:
/// 1. All the functions arguments are always initialized
/// 2. and point to initialized memory(the operand of &raw must be an initialized place in rustlantis).
fn remove_fn_calls(file: &mut Vec<String>, path: &PathBuf, cache: &mut ResultCache) {
let mut curr = 0;
while curr < file.len() {
let Some(fn_call) =
file[curr..].iter().position(|line| line.contains("Call(") && line.contains(" = fn"))
else {
// No more calls to remove - exit early.
break;
};
let fn_call = fn_call + curr;
let line = file[fn_call].trim();
// Skip the Call(
let line = &line["Call(".len()..];
// Extract the destination place
let Some((place, line)) = line.split_once('=') else {
curr = fn_call + 1;
continue;
};
// Skip till the return block id.
let Some((_, line)) = line.split_once("ReturnTo(") else {
curr = fn_call + 1;
continue;
};
// Extract the full return block
let Some((block, _)) = line.split_once(')') else {
curr = fn_call + 1;
continue;
};
let mut file_copy = file.clone();
// Remove the call.
file_copy.remove(fn_call);
file_copy.insert(fn_call, format!("Goto({block})\n"));
file_copy.insert(fn_call, format!("{place} = 0;\n"));
// Check if this reduction is valid.
if test_reduction(&file_copy, path, cache) {
println!("Reduced {path:?} using `remove_fn_calls` {cache:?}");
*file = file_copy;
curr = fn_call;
} else {
curr = fn_call + 1;
}
}
save_reduction(file, path, "remove_fn_calls");
}
/// Fully removes unreachable functions.
fn remove_fns(file: &mut Vec<String>, path: &PathBuf, cache: &mut ResultCache) {
let mut curr = 0;
while curr < file.len() {
// Find a function start
let Some(fn_start) = file[curr..].iter().position(|line| {
line.contains("#[custom_mir(dialect = \"runtime\", phase = \"initial\")]")
}) else {
// No more functions to remove - exit early.
break;
};
// Find the next function(and use that to find the end of this one).
// FIXME: this check is flawed: it will never remove the very last function(the one before main).
// The other checks will turn that function into a single call to abort, but it is still annoying that it is kept.
let fn_start = fn_start + curr;
let Some(fn_end) = file[(fn_start + 3)..].iter().position(|line| line.contains("fn fn"))
else {
// No more functions to remove - exit early.
break;
};
let fn_end = fn_start + 2 + fn_end;
let mut file_copy = file.clone();
// Remove the function.\\
file_copy.drain(fn_start..fn_end);
// Check if this reduction is valid.
if test_reduction(&file_copy, path, cache) {
println!("Reduced {path:?} by {} lines `remove_fns`", fn_end - fn_start);
*file = file_copy;
} else {
curr = fn_start + 1;
}
}
save_reduction(file, path, "remove_fns");
}
pub(super) fn reduce(path: impl AsRef<Path>) {
let path = path.as_ref().to_owned();
// ... read the file to a buffer ..
let file = std::fs::read_to_string(&path).expect("Could not open the file to reduce");
let mut file: Vec<_> = file.split_inclusive('\n').map(|s| s.to_string()).collect();
// ... and run reduction passes.
println!("running `remove_dump_var` on {path:?}.");
remove_dump_var(&mut file, &path);
// After `dump_var`, the execution results ought not to change. Cache them.
let mut cache = None;
// Fill the cache
assert!(
test_reduction(&file, &path, &mut cache),
"Reduction error: check that the input file is a valid reproducer."
);
println!("cache:{cache:?}");
println!("running `remove_fn_calls` on {path:?}.");
remove_fn_calls(&mut file, &path, &mut cache);
println!("running `remove_fns` on {path:?}.");
remove_fns(&mut file, &path, &mut cache);
let len = file.len();
println!("running `remove_dup_assign` on {path:?}.");
remove_dup_assign(&mut file, &path, 0, len, &mut cache);
file.retain(|line| !line.is_empty());
println!("running `match_to_goto` on {path:?}.");
match_to_goto(&mut file, &path, &mut cache);
println!("running `block_abort` on {path:?}.");
block_abort(&mut file, &path, &mut cache);
println!("running `remove_block` on {path:?}.");
remove_block(&mut file, &path, &mut cache);
println!("running `linearize_cf` on {path:?}.");
linearize_cf(&mut file, &path, &mut cache);
let mut out = std::fs::File::create(&path).expect("Could not save the reduction result.");
let file = file.into_iter().collect::<String>();
out.write_all(file.as_bytes()).expect("failed to write into file");
}