llvm/utils/demangle_tree.py - rust-lang/llvm-project - Git at Google

 # Given a path to llvm-objdump and a directory tree, spider the directory tree
 # dumping every object file encountered with correct options needed to demangle
 # symbols in the object file, and collect statistics about failed / crashed
 # demanglings.  Useful for stress testing the demangler against a large corpus
 # of inputs.

 from __future__ import print_function

 import argparse
 import functools
 import os
 import re
 import sys
 import subprocess
 import traceback
 from multiprocessing import Pool
 import multiprocessing

 args = None


 def parse_line(line):
     question = line.find("?")
     if question == -1:
         return None, None

     open_paren = line.find("(", question)
     if open_paren == -1:
         return None, None
     close_paren = line.rfind(")", open_paren)
     if open_paren == -1:
         return None, None
     mangled = line[question:open_paren]
     demangled = line[open_paren + 1 : close_paren]
     return mangled.strip(), demangled.strip()


 class Result(object):
     def __init__(self):
         self.crashed = []
         self.file = None
         self.nsymbols = 0
         self.errors = set()
         self.nfiles = 0


 class MapContext(object):
     def __init__(self):
         self.rincomplete = None
         self.rcumulative = Result()
         self.pending_objs = []
         self.npending = 0


 def process_file(path, objdump):
     r = Result()
     r.file = path

     popen_args = [objdump, "-t", "-demangle", path]
     p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     stdout, stderr = p.communicate()
     if p.returncode != 0:
         r.crashed = [r.file]
         return r

     output = stdout.decode("utf-8")

     for line in output.splitlines():
         mangled, demangled = parse_line(line)
         if mangled is None:
             continue
         r.nsymbols += 1
         if "invalid mangled name" in demangled:
             r.errors.add(mangled)
     return r


 def add_results(r1, r2):
     r1.crashed.extend(r2.crashed)
     r1.errors.update(r2.errors)
     r1.nsymbols += r2.nsymbols
     r1.nfiles += r2.nfiles


 def print_result_row(directory, result):
     print(
         "[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format(
             result.nfiles,
             len(result.crashed),
             len(result.errors),
             result.nsymbols,
             directory,
         )
     )


 def process_one_chunk(pool, chunk_size, objdump, context):
     objs = []

     incomplete = False
     dir_results = {}
     ordered_dirs = []
     while context.npending > 0 and len(objs) < chunk_size:
         this_dir = context.pending_objs[0][0]
         ordered_dirs.append(this_dir)
         re = Result()
         if context.rincomplete is not None:
             re = context.rincomplete
             context.rincomplete = None

         dir_results[this_dir] = re
         re.file = this_dir

         nneeded = chunk_size - len(objs)
         objs_this_dir = context.pending_objs[0][1]
         navail = len(objs_this_dir)
         ntaken = min(nneeded, navail)
         objs.extend(objs_this_dir[0:ntaken])
         remaining_objs_this_dir = objs_this_dir[ntaken:]
         context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir)
         context.npending -= ntaken
         if ntaken == navail:
             context.pending_objs.pop(0)
         else:
             incomplete = True

         re.nfiles += ntaken

     assert len(objs) == chunk_size or context.npending == 0

     copier = functools.partial(process_file, objdump=objdump)
     mapped_results = list(pool.map(copier, objs))

     for mr in mapped_results:
         result_dir = os.path.dirname(mr.file)
         result_entry = dir_results[result_dir]
         add_results(result_entry, mr)

     # It's only possible that a single item is incomplete, and it has to be the
     # last item.
     if incomplete:
         context.rincomplete = dir_results[ordered_dirs[-1]]
         ordered_dirs.pop()

     # Now ordered_dirs contains a list of all directories which *did* complete.
     for c in ordered_dirs:
         re = dir_results[c]
         add_results(context.rcumulative, re)
         print_result_row(c, re)


 def process_pending_files(pool, chunk_size, objdump, context):
     while context.npending >= chunk_size:
         process_one_chunk(pool, chunk_size, objdump, context)


 def go():
     global args

     obj_dir = args.dir
     extensions = args.extensions.split(",")
     extensions = [x if x[0] == "." else "." + x for x in extensions]

     pool_size = 48
     pool = Pool(processes=pool_size)

     try:
         nfiles = 0
         context = MapContext()

         for root, dirs, files in os.walk(obj_dir):
             root = os.path.normpath(root)
             pending = []
             for f in files:
                 file, ext = os.path.splitext(f)
                 if not ext in extensions:
                     continue

                 nfiles += 1
                 full_path = os.path.join(root, f)
                 full_path = os.path.normpath(full_path)
                 pending.append(full_path)

             # If this directory had no object files, just print a default
             # status line and continue with the next dir
             if len(pending) == 0:
                 print_result_row(root, Result())
                 continue

             context.npending += len(pending)
             context.pending_objs.append((root, pending))
             # Drain the tasks, `pool_size` at a time, until we have less than
             # `pool_size` tasks remaining.
             process_pending_files(pool, pool_size, args.objdump, context)

         assert context.npending < pool_size
         process_one_chunk(pool, pool_size, args.objdump, context)

         total = context.rcumulative
         nfailed = len(total.errors)
         nsuccess = total.nsymbols - nfailed
         ncrashed = len(total.crashed)

         if nfailed > 0:
             print("Failures:")
             for m in sorted(total.errors):
                 print("  " + m)
         if ncrashed > 0:
             print("Crashes:")
             for f in sorted(total.crashed):
                 print("  " + f)
         print("Summary:")
         spct = float(nsuccess) / float(total.nsymbols)
         fpct = float(nfailed) / float(total.nsymbols)
         cpct = float(ncrashed) / float(nfiles)
         print("Processed {0} object files.".format(nfiles))
         print(
             "{0}/{1} symbols successfully demangled ({2:.4%})".format(
                 nsuccess, total.nsymbols, spct
             )
         )
         print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct))
         print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct))

     except:
         traceback.print_exc()

     pool.close()
     pool.join()


 if __name__ == "__main__":
     def_obj = "obj" if sys.platform == "win32" else "o"

     parser = argparse.ArgumentParser(
         description="Demangle all symbols in a tree of object files, looking for failures."
     )
     parser.add_argument(
         "dir", type=str, help="the root directory at which to start crawling"
     )
     parser.add_argument(
         "--objdump",
         type=str,
         help="path to llvm-objdump.  If not specified "
         + "the tool is located as if by `which llvm-objdump`.",
     )
     parser.add_argument(
         "--extensions",
         type=str,
         default=def_obj,
         help="comma separated list of extensions to demangle (e.g. `o,obj`).  "
         + "By default this will be `obj` on Windows and `o` otherwise.",
     )

     args = parser.parse_args()

     multiprocessing.freeze_support()
     go()
	# Given a path to llvm-objdump and a directory tree, spider the directory tree
	# dumping every object file encountered with correct options needed to demangle
	# symbols in the object file, and collect statistics about failed / crashed
	# demanglings. Useful for stress testing the demangler against a large corpus
	# of inputs.

	from __future__ import print_function

	import argparse
	import functools
	import os
	import re
	import sys
	import subprocess
	import traceback
	from multiprocessing import Pool
	import multiprocessing

	args = None


	def parse_line(line):
	question = line.find("?")
	if question == -1:
	return None, None

	open_paren = line.find("(", question)
	if open_paren == -1:
	return None, None
	close_paren = line.rfind(")", open_paren)
	if open_paren == -1:
	return None, None
	mangled = line[question:open_paren]
	demangled = line[open_paren + 1 : close_paren]
	return mangled.strip(), demangled.strip()


	class Result(object):
	def __init__(self):
	self.crashed = []
	self.file = None
	self.nsymbols = 0
	self.errors = set()
	self.nfiles = 0


	class MapContext(object):
	def __init__(self):
	self.rincomplete = None
	self.rcumulative = Result()
	self.pending_objs = []
	self.npending = 0


	def process_file(path, objdump):
	r = Result()
	r.file = path

	popen_args = [objdump, "-t", "-demangle", path]
	p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	stdout, stderr = p.communicate()
	if p.returncode != 0:
	r.crashed = [r.file]
	return r

	output = stdout.decode("utf-8")

	for line in output.splitlines():
	mangled, demangled = parse_line(line)
	if mangled is None:
	continue
	r.nsymbols += 1
	if "invalid mangled name" in demangled:
	r.errors.add(mangled)
	return r


	def add_results(r1, r2):
	r1.crashed.extend(r2.crashed)
	r1.errors.update(r2.errors)
	r1.nsymbols += r2.nsymbols
	r1.nfiles += r2.nfiles


	def print_result_row(directory, result):
	print(
	"[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format(
	result.nfiles,
	len(result.crashed),
	len(result.errors),
	result.nsymbols,
	directory,
	)
	)


	def process_one_chunk(pool, chunk_size, objdump, context):
	objs = []

	incomplete = False
	dir_results = {}
	ordered_dirs = []
	while context.npending > 0 and len(objs) < chunk_size:
	this_dir = context.pending_objs[0][0]
	ordered_dirs.append(this_dir)
	re = Result()
	if context.rincomplete is not None:
	re = context.rincomplete
	context.rincomplete = None

	dir_results[this_dir] = re
	re.file = this_dir

	nneeded = chunk_size - len(objs)
	objs_this_dir = context.pending_objs[0][1]
	navail = len(objs_this_dir)
	ntaken = min(nneeded, navail)
	objs.extend(objs_this_dir[0:ntaken])
	remaining_objs_this_dir = objs_this_dir[ntaken:]
	context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir)
	context.npending -= ntaken
	if ntaken == navail:
	context.pending_objs.pop(0)
	else:
	incomplete = True

	re.nfiles += ntaken

	assert len(objs) == chunk_size or context.npending == 0

	copier = functools.partial(process_file, objdump=objdump)
	mapped_results = list(pool.map(copier, objs))

	for mr in mapped_results:
	result_dir = os.path.dirname(mr.file)
	result_entry = dir_results[result_dir]
	add_results(result_entry, mr)

	# It's only possible that a single item is incomplete, and it has to be the
	# last item.
	if incomplete:
	context.rincomplete = dir_results[ordered_dirs[-1]]
	ordered_dirs.pop()

	# Now ordered_dirs contains a list of all directories which did complete.
	for c in ordered_dirs:
	re = dir_results[c]
	add_results(context.rcumulative, re)
	print_result_row(c, re)


	def process_pending_files(pool, chunk_size, objdump, context):
	while context.npending >= chunk_size:
	process_one_chunk(pool, chunk_size, objdump, context)


	def go():
	global args

	obj_dir = args.dir
	extensions = args.extensions.split(",")
	extensions = [x if x[0] == "." else "." + x for x in extensions]

	pool_size = 48
	pool = Pool(processes=pool_size)

	try:
	nfiles = 0
	context = MapContext()

	for root, dirs, files in os.walk(obj_dir):
	root = os.path.normpath(root)
	pending = []
	for f in files:
	file, ext = os.path.splitext(f)
	if not ext in extensions:
	continue

	nfiles += 1
	full_path = os.path.join(root, f)
	full_path = os.path.normpath(full_path)
	pending.append(full_path)

	# If this directory had no object files, just print a default
	# status line and continue with the next dir
	if len(pending) == 0:
	print_result_row(root, Result())
	continue

	context.npending += len(pending)
	context.pending_objs.append((root, pending))
	# Drain the tasks, `pool_size` at a time, until we have less than
	# `pool_size` tasks remaining.
	process_pending_files(pool, pool_size, args.objdump, context)

	assert context.npending < pool_size
	process_one_chunk(pool, pool_size, args.objdump, context)

	total = context.rcumulative
	nfailed = len(total.errors)
	nsuccess = total.nsymbols - nfailed
	ncrashed = len(total.crashed)

	if nfailed > 0:
	print("Failures:")
	for m in sorted(total.errors):
	print(" " + m)
	if ncrashed > 0:
	print("Crashes:")
	for f in sorted(total.crashed):
	print(" " + f)
	print("Summary:")
	spct = float(nsuccess) / float(total.nsymbols)
	fpct = float(nfailed) / float(total.nsymbols)
	cpct = float(ncrashed) / float(nfiles)
	print("Processed {0} object files.".format(nfiles))
	print(
	"{0}/{1} symbols successfully demangled ({2:.4%})".format(
	nsuccess, total.nsymbols, spct
	)
	)
	print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct))
	print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct))

	except:
	traceback.print_exc()

	pool.close()
	pool.join()


	if __name__ == "__main__":
	def_obj = "obj" if sys.platform == "win32" else "o"

	parser = argparse.ArgumentParser(
	description="Demangle all symbols in a tree of object files, looking for failures."
	)
	parser.add_argument(
	"dir", type=str, help="the root directory at which to start crawling"
	)
	parser.add_argument(
	"--objdump",
	type=str,
	help="path to llvm-objdump. If not specified "
	+ "the tool is located as if by `which llvm-objdump`.",
	)
	parser.add_argument(
	"--extensions",
	type=str,
	default=def_obj,
	help="comma separated list of extensions to demangle (e.g. `o,obj`). "
	+ "By default this will be `obj` on Windows and `o` otherwise.",
	)

	args = parser.parse_args()

	multiprocessing.freeze_support()
	go()