blob: 1312a717970322158959f68e000c59f3dda35706 [file] [log] [blame]
//! Tool used by CI to inspect compiler-builtins archives and help ensure we won't run into any
//! linking errors.
use std::collections::{BTreeMap, BTreeSet};
use std::fs;
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use object::read::archive::{ArchiveFile, ArchiveMember};
use object::{
File as ObjFile, Object, ObjectSymbol, Symbol, SymbolKind, SymbolScope, SymbolSection,
};
use serde_json::Value;
const CHECK_LIBRARIES: &[&str] = &["compiler_builtins", "builtins_test_intrinsics"];
const CHECK_EXTENSIONS: &[Option<&str>] = &[Some("rlib"), Some("a"), Some("exe"), None];
const USAGE: &str = "Usage:
symbol-check build-and-check [TARGET] -- CARGO_BUILD_ARGS ...
Cargo will get invoked with `CARGO_ARGS` and the specified target. All output
`compiler_builtins*.rlib` files will be checked.
If TARGET is not specified, the host target is used.
";
fn main() {
// Create a `&str` vec so we can match on it.
let args = std::env::args().collect::<Vec<_>>();
let args_ref = args.iter().map(String::as_str).collect::<Vec<_>>();
match &args_ref[1..] {
["build-and-check", target, "--", args @ ..] if !args.is_empty() => {
check_cargo_args(args);
run_build_and_check(target, args);
}
["build-and-check", "--", args @ ..] if !args.is_empty() => {
check_cargo_args(args);
run_build_and_check(&host_target(), args);
}
_ => {
println!("{USAGE}");
std::process::exit(1);
}
}
}
/// Make sure `--target` isn't passed to avoid confusion (since it should be proivded only once,
/// positionally).
fn check_cargo_args(args: &[&str]) {
for arg in args {
assert!(
!arg.contains("--target"),
"target must be passed positionally. {USAGE}"
);
}
}
fn run_build_and_check(target: &str, args: &[&str]) {
let paths = exec_cargo_with_args(target, args);
for path in paths {
println!("Checking {}", path.display());
let archive = Archive::from_path(&path);
verify_no_duplicates(&archive);
verify_core_symbols(&archive);
}
}
fn host_target() -> String {
let out = Command::new("rustc")
.arg("--version")
.arg("--verbose")
.output()
.unwrap();
assert!(out.status.success());
let out = String::from_utf8(out.stdout).unwrap();
out.lines()
.find_map(|s| s.strip_prefix("host: "))
.unwrap()
.to_owned()
}
/// Run `cargo build` with the provided additional arguments, collecting the list of created
/// libraries.
fn exec_cargo_with_args(target: &str, args: &[&str]) -> Vec<PathBuf> {
let mut cmd = Command::new("cargo");
cmd.args([
"build",
"--target",
target,
"--message-format=json-diagnostic-rendered-ansi",
])
.args(args)
.stdout(Stdio::piped());
println!("running: {cmd:?}");
let mut child = cmd.spawn().expect("failed to launch Cargo");
let stdout = child.stdout.take().unwrap();
let reader = BufReader::new(stdout);
let mut check_files = Vec::new();
for line in reader.lines() {
let line = line.expect("failed to read line");
let j: Value = serde_json::from_str(&line).expect("failed to deserialize");
let reason = &j["reason"];
// Forward output that is meant to be user-facing
if reason == "compiler-message" {
println!("{}", j["message"]["rendered"].as_str().unwrap());
} else if reason == "build-finished" {
println!("build finshed. success: {}", j["success"]);
} else if reason == "build-script-executed" {
let pretty = serde_json::to_string_pretty(&j).unwrap();
println!("build script output: {pretty}",);
}
// Only interested in the artifact list now
if reason != "compiler-artifact" {
continue;
}
// Find rlibs in the created file list that match our expected library names and
// extensions.
for fpath in j["filenames"].as_array().expect("filenames not an array") {
let path = fpath.as_str().expect("file name not a string");
let path = PathBuf::from(path);
if CHECK_EXTENSIONS.contains(&path.extension().map(|ex| ex.to_str().unwrap())) {
let fname = path.file_name().unwrap().to_str().unwrap();
if CHECK_LIBRARIES.iter().any(|lib| fname.contains(lib)) {
check_files.push(path);
}
}
}
}
assert!(child.wait().expect("failed to wait on Cargo").success());
assert!(!check_files.is_empty(), "no compiler_builtins rlibs found");
println!("Collected the following rlibs to check: {check_files:#?}");
check_files
}
/// Information collected from `object`, for convenience.
#[expect(unused)] // only for printing
#[derive(Clone, Debug)]
struct SymInfo {
name: String,
kind: SymbolKind,
scope: SymbolScope,
section: SymbolSection,
is_undefined: bool,
is_global: bool,
is_local: bool,
is_weak: bool,
is_common: bool,
address: u64,
object: String,
}
impl SymInfo {
fn new(sym: &Symbol, member: &ArchiveMember) -> Self {
Self {
name: sym.name().expect("missing name").to_owned(),
kind: sym.kind(),
scope: sym.scope(),
section: sym.section(),
is_undefined: sym.is_undefined(),
is_global: sym.is_global(),
is_local: sym.is_local(),
is_weak: sym.is_weak(),
is_common: sym.is_common(),
address: sym.address(),
object: String::from_utf8_lossy(member.name()).into_owned(),
}
}
}
/// Ensure that the same global symbol isn't defined in multiple object files within an archive.
///
/// Note that this will also locate cases where a symbol is weakly defined in more than one place.
/// Technically there are no linker errors that will come from this, but it keeps our binary more
/// straightforward and saves some distribution size.
fn verify_no_duplicates(archive: &Archive) {
let mut syms = BTreeMap::<String, SymInfo>::new();
let mut dups = Vec::new();
let mut found_any = false;
archive.for_each_symbol(|symbol, member| {
// Only check defined globals
if !symbol.is_global() || symbol.is_undefined() {
return;
}
let sym = SymInfo::new(&symbol, member);
// x86-32 includes multiple copies of thunk symbols
if sym.name.starts_with("__x86.get_pc_thunk") {
return;
}
// Windows has symbols for literal numeric constants, string literals, and MinGW pseudo-
// relocations. These are allowed to have repeated definitions.
let win_allowed_dup_pfx = ["__real@", "__xmm@", "??_C@_", ".refptr"];
if win_allowed_dup_pfx
.iter()
.any(|pfx| sym.name.starts_with(pfx))
{
return;
}
match syms.get(&sym.name) {
Some(existing) => {
dups.push(sym);
dups.push(existing.clone());
}
None => {
syms.insert(sym.name.clone(), sym);
}
}
found_any = true;
});
assert!(found_any, "no symbols found");
if !dups.is_empty() {
dups.sort_unstable_by(|a, b| a.name.cmp(&b.name));
panic!("found duplicate symbols: {dups:#?}");
}
println!(" success: no duplicate symbols found");
}
/// Ensure that there are no references to symbols from `core` that aren't also (somehow) defined.
fn verify_core_symbols(archive: &Archive) {
let mut defined = BTreeSet::new();
let mut undefined = Vec::new();
let mut has_symbols = false;
archive.for_each_symbol(|symbol, member| {
has_symbols = true;
// Find only symbols from `core`
if !symbol.name().unwrap().contains("_ZN4core") {
return;
}
let sym = SymInfo::new(&symbol, member);
if sym.is_undefined {
undefined.push(sym);
} else {
defined.insert(sym.name);
}
});
assert!(has_symbols, "no symbols found");
// Discard any symbols that are defined somewhere in the archive
undefined.retain(|sym| !defined.contains(&sym.name));
if !undefined.is_empty() {
undefined.sort_unstable_by(|a, b| a.name.cmp(&b.name));
panic!("found undefined symbols from core: {undefined:#?}");
}
println!(" success: no undefined references to core found");
}
/// Thin wrapper for owning data used by `object`.
struct Archive {
data: Vec<u8>,
}
impl Archive {
fn from_path(path: &Path) -> Self {
Self {
data: fs::read(path).expect("reading file failed"),
}
}
fn file(&self) -> ArchiveFile<'_> {
ArchiveFile::parse(self.data.as_slice()).expect("archive parse failed")
}
/// For a given archive, do something with each object file.
fn for_each_object(&self, mut f: impl FnMut(ObjFile, &ArchiveMember)) {
let archive = self.file();
for member in archive.members() {
let member = member.expect("failed to access member");
let obj_data = member
.data(self.data.as_slice())
.expect("failed to access object");
let obj = ObjFile::parse(obj_data).expect("failed to parse object");
f(obj, &member);
}
}
/// For a given archive, do something with each symbol.
fn for_each_symbol(&self, mut f: impl FnMut(Symbol, &ArchiveMember)) {
self.for_each_object(|obj, member| {
obj.symbols().for_each(|sym| f(sym, member));
});
}
}