| #!/usr/bin/env python | 
 | """Calls C-Reduce to create a minimal reproducer for clang crashes. | 
 |  | 
 | Output files: | 
 |   *.reduced.sh -- crash reproducer with minimal arguments | 
 |   *.reduced.cpp -- the reduced file | 
 |   *.test.sh -- interestingness test for C-Reduce | 
 | """ | 
 |  | 
 | from __future__ import print_function | 
 | from argparse import ArgumentParser, RawTextHelpFormatter | 
 | import os | 
 | import re | 
 | import stat | 
 | import sys | 
 | import subprocess | 
 | import pipes | 
 | import shlex | 
 | import tempfile | 
 | import shutil | 
 | from distutils.spawn import find_executable | 
 | import multiprocessing | 
 |  | 
 | verbose = False | 
 | creduce_cmd = None | 
 | clang_cmd = None | 
 |  | 
 | def verbose_print(*args, **kwargs): | 
 |   if verbose: | 
 |     print(*args, **kwargs) | 
 |  | 
 | def check_file(fname): | 
 |   fname = os.path.normpath(fname) | 
 |   if not os.path.isfile(fname): | 
 |     sys.exit("ERROR: %s does not exist" % (fname)) | 
 |   return fname | 
 |  | 
 | def check_cmd(cmd_name, cmd_dir, cmd_path=None): | 
 |   """ | 
 |   Returns absolute path to cmd_path if it is given, | 
 |   or absolute path to cmd_dir/cmd_name. | 
 |   """ | 
 |   if cmd_path: | 
 |     # Make the path absolute so the creduce test can be run from any directory. | 
 |     cmd_path = os.path.abspath(cmd_path) | 
 |     cmd = find_executable(cmd_path) | 
 |     if cmd: | 
 |       return cmd | 
 |     sys.exit("ERROR: executable `%s` not found" % (cmd_path)) | 
 |  | 
 |   cmd = find_executable(cmd_name, path=cmd_dir) | 
 |   if cmd: | 
 |     return cmd | 
 |  | 
 |   if not cmd_dir: | 
 |     cmd_dir = "$PATH" | 
 |   sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir)) | 
 |  | 
 | def quote_cmd(cmd): | 
 |   return ' '.join(pipes.quote(arg) for arg in cmd) | 
 |  | 
 | def write_to_script(text, filename): | 
 |   with open(filename, 'w') as f: | 
 |     f.write(text) | 
 |   os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC) | 
 |  | 
 | class Reduce(object): | 
 |   def __init__(self, crash_script, file_to_reduce, core_number): | 
 |     crash_script_name, crash_script_ext = os.path.splitext(crash_script) | 
 |     file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce) | 
 |  | 
 |     self.testfile = file_reduce_name + '.test.sh' | 
 |     self.crash_script = crash_script_name + '.reduced' + crash_script_ext | 
 |     self.file_to_reduce = file_reduce_name + '.reduced' + file_reduce_ext | 
 |     shutil.copy(file_to_reduce, self.file_to_reduce) | 
 |  | 
 |     self.clang = clang_cmd | 
 |     self.clang_args = [] | 
 |     self.expected_output = [] | 
 |     self.needs_stack_trace = False | 
 |     self.creduce_flags = ["--tidy"] | 
 |     self.creduce_flags = ["--n", str(core_number)] | 
 |  | 
 |     self.read_clang_args(crash_script, file_to_reduce) | 
 |     self.read_expected_output() | 
 |  | 
 |   def get_crash_cmd(self, cmd=None, args=None, filename=None): | 
 |     if not cmd: | 
 |       cmd = self.clang | 
 |     if not args: | 
 |       args = self.clang_args | 
 |     if not filename: | 
 |       filename = self.file_to_reduce | 
 |  | 
 |     return [cmd] + args + [filename] | 
 |  | 
 |   def read_clang_args(self, crash_script, filename): | 
 |     print("\nReading arguments from crash script...") | 
 |     with open(crash_script) as f: | 
 |       # Assume clang call is the first non comment line. | 
 |       cmd = [] | 
 |       for line in f: | 
 |         if not line.lstrip().startswith('#'): | 
 |           cmd = shlex.split(line) | 
 |           break | 
 |     if not cmd: | 
 |       sys.exit("Could not find command in the crash script."); | 
 |  | 
 |     # Remove clang and filename from the command | 
 |     # Assume the last occurrence of the filename is the clang input file | 
 |     del cmd[0] | 
 |     for i in range(len(cmd)-1, -1, -1): | 
 |       if cmd[i] == filename: | 
 |         del cmd[i] | 
 |         break | 
 |     self.clang_args = cmd | 
 |     verbose_print("Clang arguments:", quote_cmd(self.clang_args)) | 
 |  | 
 |   def read_expected_output(self): | 
 |     print("\nGetting expected crash output...") | 
 |     p = subprocess.Popen(self.get_crash_cmd(), | 
 |                          stdout=subprocess.PIPE, | 
 |                          stderr=subprocess.STDOUT) | 
 |     crash_output, _ = p.communicate() | 
 |     result = [] | 
 |  | 
 |     # Remove color codes | 
 |     ansi_escape = r'\x1b\[[0-?]*m' | 
 |     crash_output = re.sub(ansi_escape, '', crash_output.decode('utf-8')) | 
 |  | 
 |     # Look for specific error messages | 
 |     regexes = [r"Assertion .+ failed", # Linux assert() | 
 |                r"Assertion failed: .+,", # FreeBSD/Mac assert() | 
 |                r"fatal error: error in backend: .+", | 
 |                r"LLVM ERROR: .+", | 
 |                r"UNREACHABLE executed at .+?!", | 
 |                r"LLVM IR generation of declaration '.+'", | 
 |                r"Generating code for declaration '.+'", | 
 |                r"\*\*\* Bad machine code: .+ \*\*\*", | 
 |                r"ERROR: .*Sanitizer: [^ ]+ "] | 
 |     for msg_re in regexes: | 
 |       match = re.search(msg_re, crash_output) | 
 |       if match: | 
 |         msg = match.group(0) | 
 |         result = [msg] | 
 |         print("Found message:", msg) | 
 |         break | 
 |  | 
 |     # If no message was found, use the top five stack trace functions, | 
 |     # ignoring some common functions | 
 |     # Five is a somewhat arbitrary number; the goal is to get a small number | 
 |     # of identifying functions with some leeway for common functions | 
 |     if not result: | 
 |       self.needs_stack_trace = True | 
 |       stacktrace_re = r'[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\(' | 
 |       filters = ["PrintStackTrace", "RunSignalHandlers", "CleanupOnSignal", | 
 |                  "HandleCrash", "SignalHandler", "__restore_rt", "gsignal", "abort"] | 
 |       def skip_function(func_name): | 
 |         return any(name in func_name for name in filters) | 
 |  | 
 |       matches = re.findall(stacktrace_re, crash_output) | 
 |       result = [x for x in matches if x and not skip_function(x)][:5] | 
 |       for msg in result: | 
 |         print("Found stack trace function:", msg) | 
 |  | 
 |     if not result: | 
 |       print("ERROR: no crash was found") | 
 |       print("The crash output was:\n========\n%s========" % crash_output) | 
 |       sys.exit(1) | 
 |  | 
 |     self.expected_output = result | 
 |  | 
 |   def check_expected_output(self, args=None, filename=None): | 
 |     if not args: | 
 |       args = self.clang_args | 
 |     if not filename: | 
 |       filename = self.file_to_reduce | 
 |  | 
 |     p = subprocess.Popen(self.get_crash_cmd(args=args, filename=filename), | 
 |                          stdout=subprocess.PIPE, | 
 |                          stderr=subprocess.STDOUT) | 
 |     crash_output, _ = p.communicate() | 
 |     return all(msg in crash_output.decode('utf-8') for msg in | 
 |                self.expected_output) | 
 |  | 
 |   def write_interestingness_test(self): | 
 |     print("\nCreating the interestingness test...") | 
 |  | 
 |     # Disable symbolization if it's not required to avoid slow symbolization. | 
 |     disable_symbolization = '' | 
 |     if not self.needs_stack_trace: | 
 |       disable_symbolization = 'export LLVM_DISABLE_SYMBOLIZATION=1' | 
 |  | 
 |     output = """#!/bin/bash | 
 | %s | 
 | if %s >& t.log ; then | 
 |   exit 1 | 
 | fi | 
 | """ % (disable_symbolization, quote_cmd(self.get_crash_cmd())) | 
 |  | 
 |     for msg in self.expected_output: | 
 |       output += 'grep -F %s t.log || exit 1\n' % pipes.quote(msg) | 
 |  | 
 |     write_to_script(output, self.testfile) | 
 |     self.check_interestingness() | 
 |  | 
 |   def check_interestingness(self): | 
 |     testfile = os.path.abspath(self.testfile) | 
 |  | 
 |     # Check that the test considers the original file interesting | 
 |     with open(os.devnull, 'w') as devnull: | 
 |       returncode = subprocess.call(testfile, stdout=devnull) | 
 |     if returncode: | 
 |       sys.exit("The interestingness test does not pass for the original file.") | 
 |  | 
 |     # Check that an empty file is not interesting | 
 |     # Instead of modifying the filename in the test file, just run the command | 
 |     with tempfile.NamedTemporaryFile() as empty_file: | 
 |       is_interesting = self.check_expected_output(filename=empty_file.name) | 
 |     if is_interesting: | 
 |       sys.exit("The interestingness test passes for an empty file.") | 
 |  | 
 |   def clang_preprocess(self): | 
 |     print("\nTrying to preprocess the source file...") | 
 |     with tempfile.NamedTemporaryFile() as tmpfile: | 
 |       cmd_preprocess = self.get_crash_cmd() + ['-E', '-o', tmpfile.name] | 
 |       cmd_preprocess_no_lines = cmd_preprocess + ['-P'] | 
 |       try: | 
 |         subprocess.check_call(cmd_preprocess_no_lines) | 
 |         if self.check_expected_output(filename=tmpfile.name): | 
 |           print("Successfully preprocessed with line markers removed") | 
 |           shutil.copy(tmpfile.name, self.file_to_reduce) | 
 |         else: | 
 |           subprocess.check_call(cmd_preprocess) | 
 |           if self.check_expected_output(filename=tmpfile.name): | 
 |             print("Successfully preprocessed without removing line markers") | 
 |             shutil.copy(tmpfile.name, self.file_to_reduce) | 
 |           else: | 
 |             print("No longer crashes after preprocessing -- " | 
 |                   "using original source") | 
 |       except subprocess.CalledProcessError: | 
 |         print("Preprocessing failed") | 
 |  | 
 |   @staticmethod | 
 |   def filter_args(args, opts_equal=[], opts_startswith=[], | 
 |                   opts_one_arg_startswith=[]): | 
 |     result = [] | 
 |     skip_next = False | 
 |     for arg in args: | 
 |       if skip_next: | 
 |         skip_next = False | 
 |         continue | 
 |       if any(arg == a for a in opts_equal): | 
 |         continue | 
 |       if any(arg.startswith(a) for a in opts_startswith): | 
 |         continue | 
 |       if any(arg.startswith(a) for a in opts_one_arg_startswith): | 
 |         skip_next = True | 
 |         continue | 
 |       result.append(arg) | 
 |     return result | 
 |  | 
 |   def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs): | 
 |     new_args = self.filter_args(args, **kwargs) | 
 |  | 
 |     if extra_arg: | 
 |       if extra_arg in new_args: | 
 |         new_args.remove(extra_arg) | 
 |       new_args.append(extra_arg) | 
 |  | 
 |     if (new_args != args and | 
 |         self.check_expected_output(args=new_args)): | 
 |       if msg: | 
 |         verbose_print(msg) | 
 |       return new_args | 
 |     return args | 
 |  | 
 |   def try_remove_arg_by_index(self, args, index): | 
 |     new_args = args[:index] + args[index+1:] | 
 |     removed_arg = args[index] | 
 |  | 
 |     # Heuristic for grouping arguments: | 
 |     # remove next argument if it doesn't start with "-" | 
 |     if index < len(new_args) and not new_args[index].startswith('-'): | 
 |       del new_args[index] | 
 |       removed_arg += ' ' + args[index+1] | 
 |  | 
 |     if self.check_expected_output(args=new_args): | 
 |       verbose_print("Removed", removed_arg) | 
 |       return new_args, index | 
 |     return args, index+1 | 
 |  | 
 |   def simplify_clang_args(self): | 
 |     """Simplify clang arguments before running C-Reduce to reduce the time the | 
 |     interestingness test takes to run. | 
 |     """ | 
 |     print("\nSimplifying the clang command...") | 
 |  | 
 |     # Remove some clang arguments to speed up the interestingness test | 
 |     new_args = self.clang_args | 
 |     new_args = self.try_remove_args(new_args, | 
 |                                     msg="Removed debug info options", | 
 |                                     opts_startswith=["-gcodeview", | 
 |                                                      "-debug-info-kind=", | 
 |                                                      "-debugger-tuning="]) | 
 |  | 
 |     new_args = self.try_remove_args(new_args, | 
 |                                     msg="Removed --show-includes", | 
 |                                     opts_startswith=["--show-includes"]) | 
 |     # Not suppressing warnings (-w) sometimes prevents the crash from occurring | 
 |     # after preprocessing | 
 |     new_args = self.try_remove_args(new_args, | 
 |                                     msg="Replaced -W options with -w", | 
 |                                     extra_arg='-w', | 
 |                                     opts_startswith=["-W"]) | 
 |     new_args = self.try_remove_args(new_args, | 
 |                                     msg="Replaced optimization level with -O0", | 
 |                                     extra_arg="-O0", | 
 |                                     opts_startswith=["-O"]) | 
 |  | 
 |     # Try to remove compilation steps | 
 |     new_args = self.try_remove_args(new_args, msg="Added -emit-llvm", | 
 |                                     extra_arg="-emit-llvm") | 
 |     new_args = self.try_remove_args(new_args, msg="Added -fsyntax-only", | 
 |                                     extra_arg="-fsyntax-only") | 
 |  | 
 |     # Try to make implicit int an error for more sensible test output | 
 |     new_args = self.try_remove_args(new_args, msg="Added -Werror=implicit-int", | 
 |                                     opts_equal=["-w"], | 
 |                                     extra_arg="-Werror=implicit-int") | 
 |  | 
 |     self.clang_args = new_args | 
 |     verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd())) | 
 |  | 
 |   def reduce_clang_args(self): | 
 |     """Minimize the clang arguments after running C-Reduce, to get the smallest | 
 |     command that reproduces the crash on the reduced file. | 
 |     """ | 
 |     print("\nReducing the clang crash command...") | 
 |  | 
 |     new_args = self.clang_args | 
 |  | 
 |     # Remove some often occurring args | 
 |     new_args = self.try_remove_args(new_args, msg="Removed -D options", | 
 |                                     opts_startswith=["-D"]) | 
 |     new_args = self.try_remove_args(new_args, msg="Removed -D options", | 
 |                                     opts_one_arg_startswith=["-D"]) | 
 |     new_args = self.try_remove_args(new_args, msg="Removed -I options", | 
 |                                     opts_startswith=["-I"]) | 
 |     new_args = self.try_remove_args(new_args, msg="Removed -I options", | 
 |                                     opts_one_arg_startswith=["-I"]) | 
 |     new_args = self.try_remove_args(new_args, msg="Removed -W options", | 
 |                                     opts_startswith=["-W"]) | 
 |  | 
 |     # Remove other cases that aren't covered by the heuristic | 
 |     new_args = self.try_remove_args(new_args, msg="Removed -mllvm", | 
 |                                     opts_one_arg_startswith=["-mllvm"]) | 
 |  | 
 |     i = 0 | 
 |     while i < len(new_args): | 
 |       new_args, i = self.try_remove_arg_by_index(new_args, i) | 
 |  | 
 |     self.clang_args = new_args | 
 |  | 
 |     reduced_cmd = quote_cmd(self.get_crash_cmd()) | 
 |     write_to_script(reduced_cmd, self.crash_script) | 
 |     print("Reduced command:", reduced_cmd) | 
 |  | 
 |   def run_creduce(self): | 
 |     print("\nRunning C-Reduce...") | 
 |     try: | 
 |       p = subprocess.Popen([creduce_cmd] + self.creduce_flags + | 
 |                            [self.testfile, self.file_to_reduce]) | 
 |       p.communicate() | 
 |     except KeyboardInterrupt: | 
 |       # Hack to kill C-Reduce because it jumps into its own pgid | 
 |       print('\n\nctrl-c detected, killed creduce') | 
 |       p.kill() | 
 |  | 
 | def main(): | 
 |   global verbose | 
 |   global creduce_cmd | 
 |   global clang_cmd | 
 |  | 
 |   parser = ArgumentParser(description=__doc__, | 
 |                           formatter_class=RawTextHelpFormatter) | 
 |   parser.add_argument('crash_script', type=str, nargs=1, | 
 |                       help="Name of the script that generates the crash.") | 
 |   parser.add_argument('file_to_reduce', type=str, nargs=1, | 
 |                       help="Name of the file to be reduced.") | 
 |   parser.add_argument('--llvm-bin', dest='llvm_bin', type=str, | 
 |                       help="Path to the LLVM bin directory.") | 
 |   parser.add_argument('--clang', dest='clang', type=str, | 
 |                       help="The path to the `clang` executable. " | 
 |                       "By default uses the llvm-bin directory.") | 
 |   parser.add_argument('--creduce', dest='creduce', type=str, | 
 |                       help="The path to the `creduce` executable. " | 
 |                       "Required if `creduce` is not in PATH environment.") | 
 |   parser.add_argument('--n', dest='core_number', type=int,  | 
 |                       default=max(4, multiprocessing.cpu_count() / 2), | 
 |                       help="Number of cores to use.") | 
 |   parser.add_argument('-v', '--verbose', action='store_true') | 
 |   args = parser.parse_args() | 
 |  | 
 |   verbose = args.verbose | 
 |   llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None | 
 |   creduce_cmd = check_cmd('creduce', None, args.creduce) | 
 |   clang_cmd = check_cmd('clang', llvm_bin, args.clang) | 
 |   core_number = args.core_number | 
 |  | 
 |   crash_script = check_file(args.crash_script[0]) | 
 |   file_to_reduce = check_file(args.file_to_reduce[0]) | 
 |  | 
 |   r = Reduce(crash_script, file_to_reduce, core_number) | 
 |  | 
 |   r.simplify_clang_args() | 
 |   r.write_interestingness_test() | 
 |   r.clang_preprocess() | 
 |   r.run_creduce() | 
 |   r.reduce_clang_args() | 
 |  | 
 | if __name__ == '__main__': | 
 |   main() |