| use std::io::Write; |
| use std::path::{Path, PathBuf}; |
| |
| use super::ResultCache; |
| |
| /// Saves a reduced file for a given `stage` |
| fn save_reduction(lines: &[String], path: &Path, stage: &str) { |
| let mut path = path.to_path_buf(); |
| path.set_extension(format!("rs.{stage}")); |
| let mut file = std::fs::File::create(&path).expect("Could not create the reduced example file"); |
| for line in lines { |
| file.write_all(line.as_bytes()).expect("Could not save the reduced example"); |
| } |
| } |
| |
| /// Checks if a given reduction is valid. |
| fn test_reduction(lines: &[String], path: &Path, cache: &mut ResultCache) -> bool { |
| let mut path = path.to_path_buf(); |
| path.set_extension("rs_reduced"); |
| let mut file = std::fs::File::create(&path).expect("Could not create the reduced example file"); |
| for line in lines { |
| file.write_all(line.as_bytes()).expect("Could not save the reduced example"); |
| } |
| let res = super::test_cached(&path, false, cache); |
| let Ok(Err(_)) = res else { |
| return false; |
| }; |
| true |
| } |
| |
| /// Removes duplicate assignments in bulk. |
| /// If a line A = B is followed directly by A = C, |
| /// then removing the first line ought to be fully sound, |
| /// and not change the behaviour of the program at all. Detect & remove such lines. |
| fn remove_dup_assign( |
| file: &mut Vec<String>, |
| path: &PathBuf, |
| starts: usize, |
| ends: usize, |
| cache: &mut ResultCache, |
| ) { |
| let mut file_copy = file.clone(); |
| let mut reduction_count = 0; |
| // Not worth it. |
| if ends - starts < 8 { |
| return; |
| } |
| for index in starts..ends { |
| let Some((prefix, _)) = file_copy[index].split_once('=') else { |
| continue; |
| }; |
| let Some((prefix2, postifx2)) = file_copy[index + 1].split_once('=') else { |
| continue; |
| }; |
| let prefix = prefix.trim(); |
| let prefix2 = prefix2.trim(); |
| // FIXME: Right now, remove_dup_assign cares about assignments to the exact same place. |
| // However, given an assigemnt like this: |
| // ``` |
| // A.0 = 1_u32; |
| // A = (2_u32, 3.0); |
| // ``` |
| // The first assignment could be safely omitted. |
| // Additionally, we try to check if the second assignment could depend on the first one. |
| // In such cases, the result is likely to change, so we bail. |
| if prefix == prefix2 && !postifx2.contains(prefix) { |
| file_copy[index] = "".into(); |
| reduction_count += 1; |
| } |
| } |
| // We have removed no lines - no point in testing. |
| if reduction_count == 0 { |
| return; |
| } |
| // Check if the removed lines affected the execution result in any way, shape or form. |
| if test_reduction(&file_copy, path, cache) { |
| println!("Reduced {path:?} by {reduction_count} lines `remove_dup_assign`"); |
| *file = file_copy; |
| } else { |
| // The execution result changed. |
| // This can occur if the second assignment depended on the first one. |
| // Eg. |
| // ``` |
| // a = b + c; |
| // a = a + d; |
| // ``` |
| remove_dup_assign(file, path, starts, (starts + ends) / 2, cache); |
| remove_dup_assign(file, path, (starts + ends) / 2, ends, cache); |
| } |
| save_reduction(file, path, "remove_dup_assign"); |
| } |
| |
| /// Removes all the unneeded calls to `dump_var`. This is not something tools like `cvise` can do, |
| /// but it greately speeds up MIR interpretation + native execution. |
| fn remove_dump_var(file: &mut Vec<String>, path: &PathBuf) { |
| let mut curr = 0; |
| // ... try disabling `dump_vars` one by one, until only the necessary ones are left. |
| while curr < file.len() { |
| let Some(line) = file[curr..].iter().position(|line| line.contains("dump_var")) else { |
| // No more `dump_var`s to remove - exit early. |
| break; |
| }; |
| // Make the line absolute again. |
| let line = line + curr; |
| let mut file_copy = file.clone(); |
| // Try removing 3 consecutive lines(the call, block end and block beginning). This effectively removes a `dump_var`. |
| file_copy.remove(line); |
| file_copy.remove(line); |
| file_copy.remove(line); |
| // Not cached - the execution result can change. |
| let mut uncached = None; |
| // Check if this reduction is valid. |
| if test_reduction(&file_copy, path, &mut uncached) { |
| println!("Reduced {path:?} by 3 lines `remove_dump_var`"); |
| *file = file_copy; |
| curr = line; |
| } else { |
| curr = line + 1; |
| } |
| } |
| save_reduction(file, path, "remove_dump_var"); |
| } |
| |
| /// Replaces matches with gotos where possible. |
| /// This exploits some properties of rustlantis(match arm order), |
| /// and is only soundly applicable to MIR generated by it. |
| /// Still, it is not something `cvise` can do, but it simplifies the code a ton. |
| fn match_to_goto(file: &mut Vec<String>, path: &PathBuf, cache: &mut ResultCache) { |
| let mut curr = 0; |
| |
| while curr < file.len() { |
| let Some(match_starts) = file[curr..].iter().position(|line| line.contains("match")) else { |
| // No more `match`es to remove - exit early. |
| break; |
| }; |
| let match_starts = match_starts + curr; |
| // Find the end of the match |
| let Some(match_ends) = file[match_starts..].iter().position(|line| line.contains('}')) |
| else { |
| // Can't find match end - exit early. |
| break; |
| }; |
| let match_ends = match_ends + match_starts; |
| let match_body = &file[match_starts..match_ends]; |
| |
| // Find where this match should normally jump to. |
| // This *should* be the second-last arm of the match, as per the paper(the remaining blocks are decoys). |
| // If this ever changes, this reduction may not always be sound. |
| // This is not a problem, however: we NEED to use MIRI for reduction anwyway, |
| // and it will catch this issue. |
| let jumps_to = &match_body[match_body.len() - 2].trim(); |
| let Some((_, bb_ident)) = jumps_to.split_once("bb") else { |
| break; |
| }; |
| // We now have the number of the block we jump to at runtime. |
| let bb_ident = bb_ident.trim_matches(','); |
| // Try replacing this match with an unconditional jump. |
| let mut file_copy = file.clone(); |
| for _ in match_starts..(match_ends + 1) { |
| file_copy.remove(match_starts); |
| } |
| file_copy.insert(match_starts, format!("Goto(bb{bb_ident})\n")); |
| if test_reduction(&file_copy, path, cache) { |
| println!("Reduced {path:?} by {} lines `match_to_goto`", match_ends - match_starts); |
| *file = file_copy; |
| curr = match_starts; |
| } else { |
| curr = match_ends; |
| } |
| } |
| save_reduction(file, path, "match_to_goto"); |
| } |
| |
| /// At this point, we can try "killing" blocks, by replacing their bodies with calls to `abort`. |
| /// This is always sound(the program aborts, so no UB can occur after the block), |
| /// and allows us to safely remove *a lot* of unneeded blocks. |
| fn block_abort(file: &mut Vec<String>, path: &PathBuf, cache: &mut ResultCache) { |
| let mut curr = 0; |
| while curr < file.len() { |
| let Some(block_starts) = file[curr..] |
| .iter() |
| .position(|line| line.starts_with("bb") && line.trim_end().ends_with(" = {")) |
| else { |
| // No more `block`s to kill - exit early. |
| break; |
| }; |
| let block_starts = block_starts + curr; |
| // Find the beginning of the next block to find the end of this block. |
| let Some(block_ends) = file[(block_starts + 1)..] |
| .iter() |
| .position(|line| line.starts_with("bb") && line.trim_end().ends_with(" = {")) |
| else { |
| // No more `block`s to kill - exit early. |
| break; |
| }; |
| let block_ends = block_starts + block_ends; |
| let block_starts = block_starts + 1; |
| let mut file_copy = file.clone(); |
| // Remove the block body... |
| for _ in block_starts..(block_ends) { |
| file_copy.remove(block_starts); |
| } |
| // ..and insert an unconditional call to abort. |
| file_copy.insert( |
| block_starts, |
| "Call(tmp = core::intrinsics::abort(), ReturnTo(bb1), UnwindUnreachable())\n" |
| .to_string(), |
| ); |
| file_copy.insert(block_starts, "let tmp = ();\n".to_string()); |
| |
| if test_reduction(&file_copy, path, cache) { |
| println!("Reduced {path:?} by {} lines `block_abort`", block_ends - block_starts - 2); |
| *file = file_copy; |
| curr = block_starts; |
| } else { |
| curr = block_ends; |
| } |
| } |
| save_reduction(file, path, "block_abort"); |
| } |
| |
| /// Removes unreachable basic blocks |
| fn remove_block(file: &mut Vec<String>, path: &PathBuf, cache: &mut ResultCache) { |
| let mut curr = 0; |
| |
| // Next, we try to outright remove blocks. |
| while curr < file.len() { |
| let Some(block_starts) = file[curr..] |
| .iter() |
| .position(|line| line.starts_with("bb") && line.trim_end().ends_with(" = {")) |
| else { |
| // No more `block`s to remove - exit early. |
| break; |
| }; |
| let block_starts = block_starts + curr; |
| // Find the beginning of the next block to find the end of this block. |
| let Some(block_ends) = file[(block_starts + 1)..] |
| .iter() |
| .position(|line| line.starts_with("bb") && line.trim_end().ends_with(" = {")) |
| else { |
| // No more `block`s to remove - exit early. |
| break; |
| }; |
| let block_ends = block_starts + block_ends + 1; |
| // Large blocks are likely to be necessary. |
| if block_ends - block_starts > 6 { |
| curr = block_starts + 1; |
| continue; |
| } |
| let mut file_copy = file.clone(); |
| file_copy.drain(block_starts..block_ends); |
| if test_reduction(&file_copy, path, cache) { |
| println!("Reduced {path:?} by {} lines `remove_blocks`", block_ends - block_starts); |
| *file = file_copy; |
| curr = block_starts; |
| } else { |
| curr = block_starts + 1; |
| } |
| } |
| save_reduction(file, path, "remove_block"); |
| } |
| |
| /// Merges blocks ending with unconditional jumps. |
| fn linearize_cf(file: &mut Vec<String>, path: &PathBuf, cache: &mut ResultCache) { |
| let mut curr = 0; |
| |
| // Next, we try to linearize the control flow. What the does that mean? |
| // Given a sequence like this: |
| // Goto(bb22) |
| // } |
| // bb22 = { |
| // We remove those 3 lines, merging the blocks together. This is not something `cvise` can do, |
| // and it makes other transformations easier. |
| while curr < file.len() { |
| let Some(block_starts) = file[curr..] |
| .iter() |
| .position(|line| line.starts_with("bb") && line.trim_end().ends_with(" = {")) |
| else { |
| // No more `block`s to remove - exit early. |
| break; |
| }; |
| let block_starts = block_starts + curr; |
| // Extract the block id. |
| let Some((block, _)) = file[block_starts].split_once('=') else { |
| curr = block_starts + 1; |
| continue; |
| }; |
| let block = block.trim(); |
| if file[block_starts - 2].trim() != format!("Goto({block})") { |
| curr = block_starts + 1; |
| continue; |
| } |
| let mut file_copy = file.clone(); |
| // Try removing 3 consecutive lines(the goto, block end and block beginning). This effectively removes a `Goto(next)`. |
| file_copy.remove(block_starts - 2); |
| file_copy.remove(block_starts - 2); |
| file_copy.remove(block_starts - 2); |
| // Check if this reduction is valid. |
| if test_reduction(&file_copy, path, cache) { |
| println!("Reduced {path:?} by 3 lines `linearize_cf`"); |
| *file = file_copy; |
| curr = block_starts; |
| } else { |
| curr = block_starts + 1; |
| } |
| } |
| save_reduction(file, path, "linearize_cf"); |
| } |
| |
| /// Replaces a call to a given function with a 0 assignment to the destination place, and a Goto. |
| /// This is always sound, because: |
| /// 1. All the functions arguments are always initialized |
| /// 2. and point to initialized memory(the operand of &raw must be an initialized place in rustlantis). |
| fn remove_fn_calls(file: &mut Vec<String>, path: &PathBuf, cache: &mut ResultCache) { |
| let mut curr = 0; |
| |
| while curr < file.len() { |
| let Some(fn_call) = |
| file[curr..].iter().position(|line| line.contains("Call(") && line.contains(" = fn")) |
| else { |
| // No more calls to remove - exit early. |
| break; |
| }; |
| let fn_call = fn_call + curr; |
| let line = file[fn_call].trim(); |
| // Skip the Call( |
| let line = &line["Call(".len()..]; |
| // Extract the destination place |
| let Some((place, line)) = line.split_once('=') else { |
| curr = fn_call + 1; |
| continue; |
| }; |
| // Skip till the return block id. |
| let Some((_, line)) = line.split_once("ReturnTo(") else { |
| curr = fn_call + 1; |
| continue; |
| }; |
| // Extract the full return block |
| let Some((block, _)) = line.split_once(')') else { |
| curr = fn_call + 1; |
| continue; |
| }; |
| let mut file_copy = file.clone(); |
| // Remove the call. |
| file_copy.remove(fn_call); |
| file_copy.insert(fn_call, format!("Goto({block})\n")); |
| file_copy.insert(fn_call, format!("{place} = 0;\n")); |
| // Check if this reduction is valid. |
| if test_reduction(&file_copy, path, cache) { |
| println!("Reduced {path:?} using `remove_fn_calls` {cache:?}"); |
| *file = file_copy; |
| curr = fn_call; |
| } else { |
| curr = fn_call + 1; |
| } |
| } |
| save_reduction(file, path, "remove_fn_calls"); |
| } |
| |
| /// Fully removes unreachable functions. |
| fn remove_fns(file: &mut Vec<String>, path: &PathBuf, cache: &mut ResultCache) { |
| let mut curr = 0; |
| |
| while curr < file.len() { |
| // Find a function start |
| let Some(fn_start) = file[curr..].iter().position(|line| { |
| line.contains("#[custom_mir(dialect = \"runtime\", phase = \"initial\")]") |
| }) else { |
| // No more functions to remove - exit early. |
| break; |
| }; |
| // Find the next function(and use that to find the end of this one). |
| // FIXME: this check is flawed: it will never remove the very last function(the one before main). |
| // The other checks will turn that function into a single call to abort, but it is still annoying that it is kept. |
| let fn_start = fn_start + curr; |
| let Some(fn_end) = file[(fn_start + 3)..].iter().position(|line| line.contains("fn fn")) |
| else { |
| // No more functions to remove - exit early. |
| break; |
| }; |
| let fn_end = fn_start + 2 + fn_end; |
| let mut file_copy = file.clone(); |
| // Remove the function.\\ |
| file_copy.drain(fn_start..fn_end); |
| // Check if this reduction is valid. |
| if test_reduction(&file_copy, path, cache) { |
| println!("Reduced {path:?} by {} lines `remove_fns`", fn_end - fn_start); |
| *file = file_copy; |
| } else { |
| curr = fn_start + 1; |
| } |
| } |
| save_reduction(file, path, "remove_fns"); |
| } |
| |
| pub(super) fn reduce(path: impl AsRef<Path>) { |
| let path = path.as_ref().to_owned(); |
| // ... read the file to a buffer .. |
| let file = std::fs::read_to_string(&path).expect("Could not open the file to reduce"); |
| let mut file: Vec<_> = file.split_inclusive('\n').map(|s| s.to_string()).collect(); |
| |
| // ... and run reduction passes. |
| println!("running `remove_dump_var` on {path:?}."); |
| remove_dump_var(&mut file, &path); |
| // After `dump_var`, the execution results ought not to change. Cache them. |
| let mut cache = None; |
| // Fill the cache |
| assert!( |
| test_reduction(&file, &path, &mut cache), |
| "Reduction error: check that the input file is a valid reproducer." |
| ); |
| println!("cache:{cache:?}"); |
| println!("running `remove_fn_calls` on {path:?}."); |
| remove_fn_calls(&mut file, &path, &mut cache); |
| println!("running `remove_fns` on {path:?}."); |
| remove_fns(&mut file, &path, &mut cache); |
| let len = file.len(); |
| println!("running `remove_dup_assign` on {path:?}."); |
| remove_dup_assign(&mut file, &path, 0, len, &mut cache); |
| file.retain(|line| !line.is_empty()); |
| println!("running `match_to_goto` on {path:?}."); |
| match_to_goto(&mut file, &path, &mut cache); |
| println!("running `block_abort` on {path:?}."); |
| block_abort(&mut file, &path, &mut cache); |
| println!("running `remove_block` on {path:?}."); |
| remove_block(&mut file, &path, &mut cache); |
| println!("running `linearize_cf` on {path:?}."); |
| linearize_cf(&mut file, &path, &mut cache); |
| let mut out = std::fs::File::create(&path).expect("Could not save the reduction result."); |
| let file = file.into_iter().collect::<String>(); |
| out.write_all(file.as_bytes()).expect("failed to write into file"); |
| } |