From 8a609e704ed3d430ad4920863e76e930f0cb6268 Mon Sep 17 00:00:00 2001 From: zackees Date: Sat, 6 Jun 2026 23:11:35 -0700 Subject: [PATCH] feat(bloat): per-symbol forward refs + bidirectional graph + dual-rank sub-table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The motivating scenario (asked by FastLED user): When AI sees a Top-25 symbol like `ClocklessIdf5` it should be able to tell what `ClocklessIdf5` actually calls — not what its TU siblings call. cref-inversion is TU-level and mis-attributes sibling callees (e.g. `fl::sort` showed up as a callee of `ClocklessIdf5` when the cref-inverted view ran). Approach: per-symbol forward references via `objdump -d` on the linked ELF. Same source `cargo install` / `cargo bloat` use, no extra build-graph dependency on `.o` files (which fbuild deletes post-link), and the analyzer falls back gracefully when objdump is absent. Changes ------- - New `fbuild_core::symbol_analysis::callgraph` module: textual `objdump -d` disassembly parser. Handles ARM Thumb (`bl`), Xtensa (`call4`/`call8`), RISC-V (`jal`/`tail`), and AVR (`call`/`rcall`/`jmp`). Filters ARM `$a`/`$t`/`$d` mapping markers, PLT shims, hex-only annotations, self-refs. 14 unit tests cover every flavour + edge cases. - `FineGrainedSymbol` grows `references_to: Vec` (mangled callee names). `serde(default)` so older `report.json` files keep parsing. - `BackrefGraph` gains a `Direction` enum (`Backward` default, `Forward`, `Bidirectional`) and a per-edge `EdgeDirection` so the `.dot` renderer styles backward (solid) vs forward (dashed, blue, labelled `calls`) distinctly. Forward walking is per-symbol via `references_to`; the existing backward walk over `referenced_by` is unchanged so every pre-#471 caller sees zero behaviour change. - Markdown report's per-symbol section now uses the bidirectional graph (callers ← root → callees) and embeds a "Top callees (dual ranking)" sub-table next to the existing back-ref table. Side-by- side columns: heaviest callees (by flash bytes) + most-shared callees (by `referenced_by` length), with an `(… and N more)` bucket row matching the user's ask. - `AnalyzeConfig` gains `objdump_path: Option<&Path>`. The CLI's `ToolPaths` resolver reads `objdump_path` from `build_info.json` (already populated by #428) or derives it from `nm` via the GCC cross-tool naming convention. When neither resolves, the analyzer skips the objdump pass and `references_to` stays empty — backref graphs are unaffected. - Section heading renamed from "Top N back-reference graphs" to "Top N symbol graphs" to reflect the bidirectional content. Implementation caveats called out in code comments: - **TU-level for cref**, **per-symbol for objdump.** Forward edges are the per-symbol view the user wanted; backward edges remain TU-granular because `--cref` itself is. - **LTO hides edges.** Inlined helpers don't appear in disassembly, so listed edges are real but missing edges are common. - **Indirect calls dropped.** `blx r3` / `jalr` on a register have no symbol annotation; the parser silently skips them. 23 new tests across the 3 crates (disassembly parser × 14, forward graph traversal × 6, dual-rank sub-table × 1, sibling-exclusion regression × 1, bidirectional dot styling × 1). Closes the AI-misattributing-`fl::sort` reproducer. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/fbuild-build/src/symbol_analyzer.rs | 328 ++++++- crates/fbuild-cli/src/cli/graph_cmd.rs | 9 +- crates/fbuild-cli/src/cli/symbols_cmd.rs | 81 +- .../src/symbol_analysis/callgraph.rs | 427 +++++++++ .../fbuild-core/src/symbol_analysis/graph.rs | 834 +++++++++++++++++- crates/fbuild-core/src/symbol_analysis/mod.rs | 14 + .../fbuild-core/src/symbol_analysis/tests.rs | 1 + 7 files changed, 1634 insertions(+), 60 deletions(-) create mode 100644 crates/fbuild-core/src/symbol_analysis/callgraph.rs diff --git a/crates/fbuild-build/src/symbol_analyzer.rs b/crates/fbuild-build/src/symbol_analyzer.rs index 40a9d0bb..2860094f 100644 --- a/crates/fbuild-build/src/symbol_analyzer.rs +++ b/crates/fbuild-build/src/symbol_analyzer.rs @@ -13,6 +13,7 @@ use std::path::{Path, PathBuf}; use std::collections::BTreeMap; use fbuild_core::subprocess::run_command_with_stdin; +use fbuild_core::symbol_analysis::graph::{rank_callees_dual, Direction}; use fbuild_core::symbol_analysis::{ build_fine_grained_map_with_synth, collect_map_derived_owners, parse_cref_table, parse_linker_map, parse_nm_output, sanitize_filename, BackrefGraph, FineGrainedSymbolMap, @@ -190,6 +191,12 @@ pub struct AnalyzeConfig<'a> { pub map_path: Option<&'a Path>, pub nm_path: &'a Path, pub cppfilt_path: Option<&'a Path>, + /// Optional objdump used to populate per-symbol forward refs + /// (`references_to`). When absent, the analyzer skips the + /// forward-call extraction and leaves `references_to` empty — + /// existing backref-only consumers don't see a behaviour change. + /// Wire from `build_info.json::objdump_path` (#428). + pub objdump_path: Option<&'a Path>, } /// Run nm + c++filt + map-file parse and return the fully-attributed @@ -298,9 +305,82 @@ pub fn analyze_elf(cfg: AnalyzeConfig<'_>) -> Result { ); } } + + // #471: per-symbol forward edges from `objdump -d`. When the + // analyzer was wired with an objdump path (typically from + // build_info.json::objdump_path), run it once on the linked ELF + // and pull `` annotations out of the disassembly. The + // resulting per-symbol callee map populates each row's + // `references_to` field, which the bidirectional graph + the + // dual-ranked callees sub-table consume. Failures are non-fatal + // — we'd rather ship a report without forward edges than fail + // the whole symbol-analysis post-link step. + if let Some(objdump_path) = cfg.objdump_path { + match run_objdump_and_attribute(objdump_path, cfg.elf_path, &mut map) { + Ok(edge_count) => { + tracing::info!( + "objdump: extracted {edge_count} forward edges from {}", + cfg.elf_path.display() + ); + } + Err(e) => { + tracing::warn!( + "objdump forward-edge extraction failed for {} ({e}); \ + references_to will be empty", + cfg.elf_path.display() + ); + } + } + } + Ok(map) } +/// Run `objdump -d --no-show-raw-insn ` and populate +/// `references_to` on every symbol in `map` that the parser found +/// outgoing call edges for. Returns the total edge count surfaced +/// (across all symbols) so the caller can log a one-liner. +fn run_objdump_and_attribute( + objdump_path: &Path, + elf_path: &Path, + map: &mut FineGrainedSymbolMap, +) -> Result { + use fbuild_core::subprocess::run_command; + use fbuild_core::symbol_analysis::callgraph::parse_disasm; + + let objdump_s = objdump_path.to_string_lossy().to_string(); + let elf_s = elf_path.to_string_lossy().to_string(); + let args = [ + objdump_s.as_str(), + "-d", + "--no-show-raw-insn", + elf_s.as_str(), + ]; + let result = run_command(&args, None, None, None)?; + if !result.success() { + return Err(FbuildError::BuildFailed(format!( + "objdump exit={}: {}", + result.exit_code, result.stderr + ))); + } + + let edges = parse_disasm(&result.stdout); + let mut total = 0usize; + for sym in &mut map.symbols { + if let Some(callees) = edges.get(&sym.mangled) { + sym.references_to = callees.clone(); + total += callees.len(); + } else if let Some(callees) = edges.get(&sym.demangled) { + // Some toolchains demangle in-place when emitting the + // disassembly, so the function header uses the demangled + // name. Match against either. + sym.references_to = callees.clone(); + total += callees.len(); + } + } + Ok(total) +} + /// Format a fine-grained per-symbol map as a human-readable text report /// suitable for streaming to a terminal or stashing in a log artifact. /// Shows the top `top_n` Flash symbols and top `top_n` Ram symbols with @@ -584,14 +664,25 @@ fn emit_backref_graph_section( } let _ = writeln!( out, - "## Top {limit} back-reference graphs\n\n\ - For each symbol below, the embedded `dot` block traces \"who pulled \ - this in?\" outward from the symbol's `referenced_by` data. \ - See fbuild #463 for the walker design (cross-archive termination + \ - fan-out cap + collapse rules)." + "## Top {limit} symbol graphs\n\n\ + For each symbol below: a bidirectional `dot` block (callers on \ + the back-edge side, callees on the forward-edge side), plus a \ + dual-ranked \"Top callees\" sub-table. The forward edges come \ + from per-symbol `references_to` (objdump-derived), so the AI \ + can tell what `ClocklessIdf5` actually calls vs. what its \ + sibling symbols call. See fbuild #463 (backref walker) + \ + #471 (forward edges)." ); let _ = writeln!(out); let index = TuIndex::build(map); + // Per-symbol section: use a bidirectional config so the graph + // surfaces both axes when forward data is available. If + // references_to is empty on every row (older fbuild build with no + // objdump in the chain), the bidirectional walker degenerates + // gracefully into the same picture the backref-only config would + // have rendered. + let mut bidir_cfg = graph_opts.config.clone(); + bidir_cfg.direction = Direction::Bidirectional; for (rank, s) in syms.iter().take(limit).enumerate() { let archive = s.archive.as_deref().unwrap_or("(none)"); let object = s.object.as_deref().unwrap_or("-"); @@ -607,10 +698,21 @@ fn emit_backref_graph_section( let _ = writeln!(out, "- **Object**: `{object}`"); let _ = writeln!(out, "- **Section**: `{sect}`"); let _ = writeln!(out, "- **Referenced by**: {} TUs", s.referenced_by.len()); + let _ = writeln!( + out, + "- **References (calls)**: {} symbols", + s.references_to.len() + ); let _ = writeln!(out); - let graph = BackrefGraph::build_with_index(map, &index, &s.mangled, &graph_opts.config); + + emit_dual_callees_subtable(out, map, s); + + let graph = BackrefGraph::build_with_index(map, &index, &s.mangled, &bidir_cfg); let _ = writeln!(out, "
"); - let _ = writeln!(out, "Back-reference graph (Graphviz)"); + let _ = writeln!( + out, + "Bidirectional graph (callers ← root → callees, Graphviz)" + ); let _ = writeln!(out); let _ = writeln!(out, "```dot"); out.push_str(&graph.to_dot()); @@ -621,6 +723,80 @@ fn emit_backref_graph_section( } } +/// Emit the "Top callees" sub-table for one symbol: two side-by-side +/// rankings (by callee flash bytes, by how widely the callee is +/// shared) plus an `(… and N more)` other-bucket row when applicable. +/// This is the data the user asked for in #471 so an AI optimization +/// pass sees a symbol's actual heavy hitters, not just the symbol +/// itself. +fn emit_dual_callees_subtable( + out: &mut String, + map: &FineGrainedSymbolMap, + caller: &fbuild_core::symbol_analysis::FineGrainedSymbol, +) { + use std::fmt::Write as _; + if caller.references_to.is_empty() { + let _ = writeln!( + out, + "_No forward-call data for this symbol — either the \ + analyzer wasn't wired with an objdump or the symbol \ + contains no recognised call instructions (data / weak \ + ref / vtable-only dispatch)._" + ); + let _ = writeln!(out); + return; + } + let (by_size, by_pop, other) = rank_callees_dual(map, caller, 3); + let _ = writeln!(out, "#### Top callees (dual ranking)"); + let _ = writeln!(out); + let _ = writeln!( + out, + "| # | by callee size (B) | shared × | by callees \ + shared with (×) | size (B) |" + ); + let _ = writeln!(out, "|---:|---|---:|---|---:|"); + for i in 0..3 { + let size_cell = by_size + .get(i) + .map(|c| format!("`{}` — {} B", c.demangled.replace('|', "\\|"), c.size)) + .unwrap_or_else(|| "—".into()); + let size_shared = by_size + .get(i) + .map(|c| c.callers_count.to_string()) + .unwrap_or_else(|| "—".into()); + let pop_cell = by_pop + .get(i) + .map(|c| { + format!( + "`{}` — shared ×{}", + c.demangled.replace('|', "\\|"), + c.callers_count + ) + }) + .unwrap_or_else(|| "—".into()); + let pop_size = by_pop + .get(i) + .map(|c| c.size.to_string()) + .unwrap_or_else(|| "—".into()); + let _ = writeln!( + out, + "| {} | {} | {} | {} | {} |", + i + 1, + size_cell, + size_shared, + pop_cell, + pop_size + ); + } + if other > 0 { + let _ = writeln!( + out, + "| — | _(… and {other} more callees, see graph below)_ | | | |" + ); + } + let _ = writeln!(out); +} + /// Configuration for [`write_sidecar_dot_files`]. #[derive(Debug, Clone)] pub struct SidecarOptions { @@ -951,6 +1127,7 @@ mod tests { output_section: Some(".flash.text".into()), source: "nm".into(), referenced_by: Vec::new(), + references_to: Vec::new(), }, FineGrainedSymbol { mangled: "_Z3barv".into(), @@ -964,6 +1141,7 @@ mod tests { output_section: Some(".dram0.bss".into()), source: "nm".into(), referenced_by: Vec::new(), + references_to: Vec::new(), }, ], sections: Vec::::new(), @@ -1002,6 +1180,7 @@ mod tests { output_section: None, source: "nm".into(), referenced_by: Vec::new(), + references_to: Vec::new(), }], sections: Vec::::new(), }; @@ -1055,6 +1234,7 @@ mod tests { object: "sha512.c.obj".into(), }, ], + references_to: Vec::new(), }], sections: Vec::::new(), }; @@ -1100,6 +1280,7 @@ mod tests { archive: Some("liblog.a".into()), object: "log_write.c.obj".into(), }], + references_to: Vec::new(), }], sections: Vec::::new(), }; @@ -1112,12 +1293,16 @@ mod tests { config: GraphConfig::default(), }, ); + // #471 renamed this from "back-reference graphs" to + // "symbol graphs" because the section now shows bidirectional + // graphs (callers ← root → callees), not just backref. assert!( - md.contains("## Top 1 back-reference graphs"), - "missing back-ref section header in:\n{md}" + md.contains("## Top 1 symbol graphs"), + "missing symbol-graph section header in:\n{md}" ); assert!( - md.contains("
") && md.contains("Back-reference graph (Graphviz)"), + md.contains("
") + && md.contains("Bidirectional graph (callers ← root → callees,"), "missing details summary in:\n{md}" ); assert!( @@ -1129,6 +1314,124 @@ mod tests { assert!(md.contains("
")); } + /// #471: the per-symbol section MUST embed a "Top callees" + /// dual-ranking sub-table when `references_to` is populated. + /// The sub-table shows the heaviest and the most-shared callees + /// side-by-side, so an AI optimisation pass can tell whether + /// to chase a fat callee or a popular hub. + #[test] + fn markdown_report_emits_dual_ranked_callees_subtable() { + use fbuild_core::symbol_analysis::{FineGrainedSymbol, FineGrainedSymbolMap, SectionBytes}; + let root = FineGrainedSymbol { + mangled: "_Z13ClocklessIdf5v".into(), + demangled: "ClocklessIdf5".into(), + address: 0x1000, + size: 10_000, + sym_type: 'T', + region: fbuild_core::MemoryRegion::Flash, + archive: Some("libFastLED.a".into()), + object: Some("clockless_idf5.cpp.o".into()), + output_section: Some(".flash.text".into()), + source: "nm".into(), + referenced_by: Vec::new(), + references_to: vec![ + "esp_log_write".into(), + "rmt_tx_start".into(), + "fl_lerp8".into(), + "small_helper_4".into(), + "small_helper_5".into(), + ], + }; + let callees = vec![ + FineGrainedSymbol { + mangled: "esp_log_write".into(), + demangled: "esp_log_write".into(), + address: 0x2000, + size: 2_000, + sym_type: 'T', + region: fbuild_core::MemoryRegion::Flash, + archive: Some("libesp.a".into()), + object: Some("log.c.o".into()), + output_section: Some(".flash.text".into()), + source: "nm".into(), + referenced_by: (0..10) + .map(|i| SymbolReference { + archive: None, + object: format!("caller_{i}.o"), + }) + .collect(), + references_to: Vec::new(), + }, + FineGrainedSymbol { + mangled: "rmt_tx_start".into(), + demangled: "rmt_tx_start".into(), + address: 0x3000, + size: 800, + sym_type: 'T', + region: fbuild_core::MemoryRegion::Flash, + archive: Some("libesp.a".into()), + object: Some("rmt.c.o".into()), + output_section: Some(".flash.text".into()), + source: "nm".into(), + referenced_by: vec![SymbolReference { + archive: None, + object: "main.cpp.o".into(), + }], + references_to: Vec::new(), + }, + FineGrainedSymbol { + mangled: "fl_lerp8".into(), + demangled: "fl_lerp8".into(), + address: 0x4000, + size: 60, + sym_type: 'T', + region: fbuild_core::MemoryRegion::Flash, + archive: Some("libFastLED.a".into()), + object: Some("math.cpp.o".into()), + output_section: Some(".flash.text".into()), + source: "nm".into(), + referenced_by: Vec::new(), + references_to: Vec::new(), + }, + ]; + let mut all = vec![root]; + all.extend(callees); + let map = FineGrainedSymbolMap { + elf_path: "test.elf".into(), + map_path: None, + total_flash: 12_860, + total_ram: 0, + symbols: all, + sections: Vec::::new(), + }; + let md = format_markdown_report_with_graphs( + &map, + 5, + &MarkdownGraphOptions { + enabled: true, + graph_top: 10, + config: GraphConfig::default(), + }, + ); + assert!( + md.contains("#### Top callees (dual ranking)"), + "missing dual-ranking sub-table header in:\n{md}" + ); + // esp_log_write is the heaviest callee (2000 B) AND the most + // shared (10 callers). Must appear on both axes. + assert!(md.contains("esp_log_write")); + // The "by callee size" column header must mention size; the + // "shared with" column header must mention sharing. + assert!(md.contains("by callee size (B)")); + assert!(md.contains("by callees shared with")); + // The "other" bucket: 5 callees, top-3 each, intersection at + // least esp_log_write, so other > 0. + assert!( + md.contains("more callees, see graph below"), + "missing 'other' bucket row in:\n{md}" + ); + } + /// `format_markdown_report` (without `_with_graphs`) MUST NOT /// embed graphs — protects pre-#463 markdown for callers that /// haven't opted in. @@ -1152,6 +1455,7 @@ mod tests { output_section: Some(".flash.text".into()), source: "nm".into(), referenced_by: Vec::new(), + references_to: Vec::new(), }], sections: Vec::::new(), }; @@ -1184,6 +1488,7 @@ mod tests { output_section: Some(".text".into()), source: "nm".into(), referenced_by: Vec::new(), + references_to: Vec::new(), }, FineGrainedSymbol { mangled: "tiny".into(), @@ -1197,6 +1502,7 @@ mod tests { output_section: Some(".text".into()), source: "nm".into(), referenced_by: Vec::new(), + references_to: Vec::new(), }, ], sections: Vec::::new(), @@ -1250,6 +1556,7 @@ mod tests { output_section: Some(".text".into()), source: "nm".into(), referenced_by: Vec::new(), + references_to: Vec::new(), }], sections: Vec::::new(), }; @@ -1288,6 +1595,7 @@ mod tests { output_section: Some(".flash.text".into()), source: "nm".into(), referenced_by: Vec::new(), + references_to: Vec::new(), }], sections: Vec::::new(), }; diff --git a/crates/fbuild-cli/src/cli/graph_cmd.rs b/crates/fbuild-cli/src/cli/graph_cmd.rs index f62f36d3..19852cbe 100644 --- a/crates/fbuild-cli/src/cli/graph_cmd.rs +++ b/crates/fbuild-cli/src/cli/graph_cmd.rs @@ -50,7 +50,7 @@ pub fn run_bloat_graph( input_path }; - let (nm_path, cppfilt_path) = resolve_tool_paths_public( + let (nm_path, cppfilt_path, objdump_path) = resolve_tool_paths_public( &elf_path, nm.as_deref(), cppfilt.as_deref(), @@ -66,6 +66,7 @@ pub fn run_bloat_graph( map_path: map_path_owned.as_deref(), nm_path: &nm_path, cppfilt_path: cppfilt_path.as_deref(), + objdump_path: objdump_path.as_deref(), }; let report = analyze_elf(cfg)?; @@ -128,6 +129,12 @@ pub fn parse_graph_config( max_depth: max_depth.max(1), collapse_archives, exclude_archives, + // The `fbuild bloat graph` CLI hasn't grown a `--direction` + // flag yet; preserve the pre-#471 behaviour (backref-only). + // A `--direction forward|both` flag is a small follow-up + // once Phase 4 (the markdown sub-table) demonstrates the + // forward graph is actually useful in practice. + direction: Default::default(), }) } diff --git a/crates/fbuild-cli/src/cli/symbols_cmd.rs b/crates/fbuild-cli/src/cli/symbols_cmd.rs index ccf6b3fe..eea434c6 100644 --- a/crates/fbuild-cli/src/cli/symbols_cmd.rs +++ b/crates/fbuild-cli/src/cli/symbols_cmd.rs @@ -83,6 +83,7 @@ pub fn run_symbols( map_path: map_path_ref, nm_path: &nm_path, cppfilt_path: cppfilt_path.as_deref(), + objdump_path: tool_paths.objdump.as_deref(), }; let report = analyze_elf(cfg)?; @@ -224,12 +225,20 @@ fn find_nm_on_path() -> Result { struct ToolPaths { nm: PathBuf, cppfilt: Option, + /// `objdump`, when discoverable. Populated from build_info.json + /// (`objdump_path`) or by deriving it from the nm path using the + /// GCC cross-tool naming convention. `None` when neither path + /// can be found — the analyzer falls back to an empty + /// `references_to` (forward graphs are unavailable; backref + /// graphs are unaffected). + objdump: Option, } impl ToolPaths { - /// Resolve `nm` / `c++filt` using the precedence documented in the - /// module header. `build_info_arg` is the explicit `--build-info` - /// path; when absent, walk up from `elf_path`. + /// Resolve `nm` / `c++filt` / `objdump` using the precedence + /// documented in the module header. `build_info_arg` is the + /// explicit `--build-info` path; when absent, walk up from + /// `elf_path`. fn resolve( elf_path: &Path, nm: Option<&str>, @@ -241,11 +250,15 @@ impl ToolPaths { .map(PathBuf::from) .or_else(|| elf_path.parent().and_then(find_build_info_near)); - let (bi_nm, bi_cppfilt) = match build_info_path { + let (bi_nm, bi_cppfilt, bi_objdump) = match build_info_path { Some(path) => match load_build_info(&path) { Ok((_env, info)) => { tracing::info!("symbols: read toolchain paths from {}", path.display()); - (option_path(&info.nm_path), option_path(&info.cppfilt_path)) + ( + option_path(&info.nm_path), + option_path(&info.cppfilt_path), + option_path(&info.objdump_path), + ) } Err(e) => { tracing::warn!( @@ -253,10 +266,10 @@ impl ToolPaths { path.display(), e ); - (None, None) + (None, None, None) } }, - None => (None, None), + None => (None, None, None), }; let nm = match nm { @@ -279,20 +292,66 @@ impl ToolPaths { }), }; - Ok(Self { nm, cppfilt }) + // objdump: prefer build_info, else derive from nm using the + // GCC cross-tool prefix (`-nm` → `-objdump`). + // Same prefix-replacement strategy `derive_cppfilt_path` + // uses; inlined here to keep symbol_analyzer's public surface + // minimal — objdump derivation isn't useful outside this CLI. + let objdump = bi_objdump.or_else(|| derive_sibling_tool(&nm, "objdump")); + + Ok(Self { + nm, + cppfilt, + objdump, + }) + } +} + +/// Replace the `nm` suffix on the file stem with `target` (e.g. +/// `arm-none-eabi-nm` → `arm-none-eabi-objdump`). Returns `None` +/// when the result doesn't exist on disk — caller treats absent as +/// "skip the feature", not as an error. +fn derive_sibling_tool(nm_path: &Path, target: &str) -> Option { + let parent = nm_path.parent().unwrap_or(Path::new(".")); + let stem = nm_path + .file_stem() + .map(|s| s.to_string_lossy().to_string()) + .unwrap_or_default(); + let ext = nm_path + .extension() + .map(|e| e.to_string_lossy().to_string()) + .unwrap_or_default(); + let new_stem = if let Some(prefix) = stem.strip_suffix("nm") { + format!("{prefix}{target}") + } else { + target.to_string() + }; + let candidate = if ext.is_empty() { + parent.join(new_stem) + } else { + parent.join(format!("{new_stem}.{ext}")) + }; + if candidate.exists() { + Some(candidate) + } else { + None } } /// Public wrapper around the internal toolchain resolver — used by /// `fbuild bloat graph` (`graph_cmd.rs`) so it shares the exact /// `--nm` / `--cppfilt` / `--build-info` resolution semantics as -/// `fbuild symbols`. Returns `(nm_path, optional_cppfilt_path)`. +/// `fbuild symbols`. Returns `(nm_path, optional_cppfilt_path, +/// optional_objdump_path)` — the third field added in #471 carries +/// the objdump used to populate per-symbol forward refs +/// (`references_to`). Callers that don't care about forward graphs +/// can discard the third field. pub fn resolve_tool_paths_public( elf_path: &Path, nm: Option<&str>, cppfilt: Option<&str>, build_info_arg: Option<&str>, -) -> Result<(PathBuf, Option)> { +) -> Result<(PathBuf, Option, Option)> { let resolved = ToolPaths::resolve(elf_path, nm, cppfilt, build_info_arg)?; if !resolved.nm.exists() { return Err(FbuildError::BuildFailed(format!( @@ -301,7 +360,7 @@ pub fn resolve_tool_paths_public( resolved.nm.display() ))); } - Ok((resolved.nm, resolved.cppfilt)) + Ok((resolved.nm, resolved.cppfilt, resolved.objdump)) } /// Treat an empty BuildInfo path field (the schema's "missing" diff --git a/crates/fbuild-core/src/symbol_analysis/callgraph.rs b/crates/fbuild-core/src/symbol_analysis/callgraph.rs new file mode 100644 index 00000000..8fc6d693 --- /dev/null +++ b/crates/fbuild-core/src/symbol_analysis/callgraph.rs @@ -0,0 +1,427 @@ +//! Parser for `objdump -d` disassembly that extracts a per-symbol call +//! graph (forward references). +//! +//! Complements [`super::cref`], which gives us *back*-references +//! (TU-level granularity). For the forward direction we need +//! per-symbol precision — the cref data structure can't give us +//! "which symbols does `ClocklessIdf5` call?" because cref is keyed by +//! `.o`, not by symbol. +//! +//! We parse the textual disassembly format because it's +//! architecture-neutral and stable across binutils versions: +//! +//! ```text +//! 00400500 : +//! 400500: bl 400600 +//! 400504: call4 400700 +//! 400508: jal ra, 400800 +//! ``` +//! +//! Each function header has the form ` :`; subsequent +//! indented lines are instructions. We record an edge whenever an +//! instruction's textual tail ends in `` (objdump +//! normalises this regardless of mnemonic: bl/b/call4/call8/jal/jalr/ +//! tail/rcall/rjmp all emit the same `<...>` annotation). +//! +//! ## Why not relocations on .o files? +//! +//! `objdump -r` over each `.o` gives strictly-correct per-symbol +//! relocation entries. Two reasons we don't go that route: +//! +//! 1. fbuild deletes per-TU `.o` files after the link succeeds; the +//! only artefact retained is the linked ELF. +//! 2. The relocation entries reflect the **pre-link** symbol graph. +//! After LTO + `-Os` the linker collapses helpers into their +//! callers; the relocation graph over-reports edges that no +//! longer exist in the final binary. +//! +//! The textual disassembly of the final ELF is therefore *more* +//! accurate for "what's actually called in the shipped binary", at +//! the cost of being conservative — calls indirected through a +//! function pointer (e.g. through a vtable load + `blx r3`) show no +//! `` annotation and are silently dropped. That mirrors the +//! `--cref` back-reference contract: listed edges are real, missing +//! edges may exist. + +use std::collections::BTreeMap; + +/// One edge: `caller` calls `callee`. Both names match +/// `FineGrainedSymbol::mangled` (objdump emits the mangled name in +/// the angle-bracket annotation when the binary hasn't been demangled +/// post-link; we tell the analyzer to invoke objdump without `-C` for +/// exactly this reason). +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct CallEdge { + pub caller: String, + pub callee: String, +} + +/// Parse `objdump -d` output into a `caller -> Vec` map. +/// +/// Skips: +/// - self-references (`f` calling `f` from a tail call back into +/// itself; not interesting for bloat analysis). +/// - PLT trampolines (callee name ends in `@plt`) — for size +/// analysis the actual import is what matters. +/// - the `$x`/`$d`/`$t` ARM mapping symbols (objdump injects these +/// between code and data; they're not real call targets). +/// - duplicate edges from the same caller (multiple calls to the +/// same callee collapse to one edge). +/// +/// Always succeeds — malformed lines are silently skipped. The +/// downstream contract is "empty list = no information", not +/// "empty list = error". +#[must_use] +pub fn parse_disasm(disasm: &str) -> BTreeMap> { + let mut out: BTreeMap> = BTreeMap::new(); + let mut current: Option = None; + let mut seen: std::collections::BTreeSet<(String, String)> = std::collections::BTreeSet::new(); + + for line in disasm.lines() { + // 1. Function header: " :" + if let Some(name) = parse_function_header(line) { + current = Some(name); + continue; + } + // 2. Empty line resets nothing (a function body can contain + // blank lines between basic blocks on some platforms). + let trimmed = line.trim_start(); + if trimmed.is_empty() { + continue; + } + // 3. Section header: "Disassembly of section .text:" — clears + // current so we don't attribute the next blob to the wrong + // symbol. + if trimmed.starts_with("Disassembly of section") { + current = None; + continue; + } + // 4. Instruction line: hex offset followed by mnemonic + args. + let Some(caller) = current.as_deref() else { + continue; + }; + let Some(callee) = parse_call_target(line) else { + continue; + }; + if !is_real_call_target(&callee) || callee == caller { + continue; + } + let key = (caller.to_string(), callee.clone()); + if seen.insert(key) { + out.entry(caller.to_string()).or_default().push(callee); + } + } + out +} + +/// Parse a function header line. Returns the symbol name if the line +/// has the shape ` :`. Lines like +/// `00400500 :` produce `Some("ClocklessIdf5")`. +fn parse_function_header(line: &str) -> Option { + let trimmed = line.trim_end(); + // Must end with ":" and contain "" preceded by a hex addr. + let body = trimmed.strip_suffix(':')?; + let (addr_part, name_part) = body.split_once(" <")?; + // Address must be all hex digits (drop leading 0x/zero padding). + if !addr_part + .trim_start_matches("0x") + .chars() + .all(|c| c.is_ascii_hexdigit()) + { + return None; + } + let name = name_part.strip_suffix('>')?; + if name.is_empty() { + return None; + } + Some(name.to_string()) +} + +/// Extract the call-target symbol name from an instruction line, if +/// any. Looks for the last `<...>` token (the symbol annotation +/// objdump appends to call/branch instructions). Returns the inner +/// name without the angle brackets. +/// +/// Examples that yield `Some(...)`: +/// - ` 400500: bl 400600 ` → `esp_log_write` +/// - ` 400500: call8 400700 ` → `FastLED::lerp8by8` +/// - ` 400500: jal ra, 400800 ` → `fl::sort` +/// - ` 400500: tail <__udivdi3>` → `__udivdi3` +/// +/// Examples that yield `None`: +/// - ` 400500: add r0, r1, r2` (no `<...>`) +/// - ` 400500: blx r3` (indirect, no symbol) +/// - ` 400500: .word 0x00000000` (data) +fn parse_call_target(line: &str) -> Option { + // We need *the last* `<...>` on the line because some encodings + // emit two: e.g. `bl 400600 +0x10 ` (rare but seen on + // big binaries with --visualize-jumps disabled). The last one is + // always the call target. + let close = line.rfind('>')?; + let before_close = &line[..close]; + let open = before_close.rfind('<')?; + let inner = &line[open + 1..close]; + if inner.is_empty() { + return None; + } + Some(inner.to_string()) +} + +/// Filter out tokens that look like symbols but aren't real call +/// targets for size analysis. The disassembler injects mapping +/// markers and PLT shims; we don't want those polluting the graph. +fn is_real_call_target(name: &str) -> bool { + // ARM ELF mapping symbols. The toolchain inserts `$a` / `$t` / + // `$d` at the boundary between ARM code, Thumb code, and data. + if matches!(name, "$a" | "$t" | "$d") { + return false; + } + // PLT shims. `printf@plt` is a trampoline; the real `printf` + // lives elsewhere. For backref + forward attribution we want + // the real callee, but objdump on a fully-linked ELF rarely + // emits `@plt` annotations (no PLT in statically-linked + // firmware). Filter conservatively. + if name.ends_with("@plt") { + return false; + } + // Hex literals dressed up as `<0x4040_1234>` (an absolute + // address with no known symbol). These look like edges but + // don't tell us anything useful. + if name.starts_with("0x") && name[2..].chars().all(|c| c.is_ascii_hexdigit() || c == '_') { + return false; + } + true +} + +/// Build the inverse of [`parse_disasm`]'s output: `callee -> Vec`. +/// Useful when the caller wants to surface "who calls X?" via the +/// forward map (a redundant view to `--cref` back-references, but at +/// per-symbol granularity rather than TU). +#[must_use] +pub fn invert(forward: &BTreeMap>) -> BTreeMap> { + let mut out: BTreeMap> = BTreeMap::new(); + let mut seen: std::collections::BTreeSet<(String, String)> = std::collections::BTreeSet::new(); + for (caller, callees) in forward { + for callee in callees { + let key = (callee.clone(), caller.clone()); + if seen.insert(key) { + out.entry(callee.clone()).or_default().push(caller.clone()); + } + } + } + out +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parses_arm_thumb_bl_call() { + let disasm = "\ +00000500
: + 500: b500 push {lr} + 502: f000 f800 bl 506 + 506: bd00 pop {pc} + +00000506 : + 506: 4770 bx lr +"; + let edges = parse_disasm(disasm); + assert_eq!(edges.get("main"), Some(&vec!["delay".to_string()])); + // delay has no outgoing edges (bx lr is a return). + assert!(!edges.contains_key("delay")); + } + + /// Xtensa ESP32 `call4` / `call8` / `l32r` flavours. + #[test] + fn parses_xtensa_call() { + let disasm = "\ +40080000 : +40080000: 004136 entry a1, 32 +40080003: 0c0006 call4 40080020 +40080006: 0c0006 call8 40080040 +40080009: f00c retw.n +"; + let edges = parse_disasm(disasm); + let calls = edges.get("app_main").expect("app_main has edges"); + assert!(calls.contains(&"esp_log_write".to_string())); + assert!(calls.contains(&"FastLED::lerp8by8".to_string())); + } + + /// RISC-V `jal` / `tail`. + #[test] + fn parses_riscv_jal_and_tail() { + let disasm = "\ +20000100 : +20000100: 1101 add sp,sp,-32 +20000102: c606 sw a1,12(sp) +20000104: 2eb000ef jal ra, 200001e8 +20000108: 2eb000ef tail 200001f0 +2000010c: 8082 ret +"; + let edges = parse_disasm(disasm); + let calls = edges.get("my_task").expect("my_task has edges"); + assert!(calls.contains(&"esp_log_write".to_string())); + assert!(calls.contains(&"fl::sort".to_string())); + } + + /// AVR `call` / `rcall` / `jmp`. + #[test] + fn parses_avr_call_rcall() { + let disasm = "\ +00000080 : + 80: 0f 93 push r16 + 82: 0e 94 c0 00 call 0x180 + 86: df cf rcall .-66 ; 0x46 + 88: 08 95 ret +"; + let edges = parse_disasm(disasm); + let calls = edges.get("setup").expect("setup has edges"); + assert!(calls.contains(&"FastLED_init".to_string())); + assert!(calls.contains(&"delay_ms".to_string())); + } + + /// Indirect calls (`blx r3`, `jalr` on a register) have no + /// symbol annotation — they're silently dropped, matching the + /// "missing edges may exist" contract. + #[test] + fn indirect_calls_are_dropped() { + let disasm = "\ +00000500 : + 500: f8d0 3008 ldr.w r3, [r0, #8] + 504: 4798 blx r3 + 506: bd00 pop {pc} +"; + let edges = parse_disasm(disasm); + assert!(!edges.contains_key("vtable_dispatch")); + } + + /// Duplicate calls to the same callee collapse to a single edge. + #[test] + fn duplicate_callees_dedupe() { + let disasm = "\ +00000500 : + 500: f7ff fffe bl 0 + 504: f7ff fffe bl 0 + 508: f7ff fffe bl 0 +"; + let edges = parse_disasm(disasm); + assert_eq!(edges.get("loop"), Some(&vec!["step".to_string()])); + } + + /// Self-references (e.g. recursion) aren't edges for the + /// purposes of bloat analysis. + #[test] + fn self_references_dropped() { + let disasm = "\ +00000500 : + 500: f7ff fffe bl 500 + 504: bd00 pop {pc} +"; + let edges = parse_disasm(disasm); + assert!(!edges.contains_key("fib")); + } + + /// ARM `$a`/`$t`/`$d` mapping markers must not appear as callees. + #[test] + fn arm_mapping_symbols_filtered() { + let disasm = "\ +00000500 : + 500: f7ff fffe bl 520 <$a> + 504: f7ff fffe bl 520 <$d> + 508: f7ff fffe bl 520 +"; + let edges = parse_disasm(disasm); + assert_eq!(edges.get("foo"), Some(&vec!["real_callee".to_string()])); + } + + /// PLT trampoline annotations don't appear in fully statically + /// linked firmware, but if a host-tool binary slipped through we + /// don't want them polluting the graph. + #[test] + fn plt_callees_filtered() { + let disasm = "\ +00000500 : + 500: f7ff fffe bl 520 + 504: f7ff fffe bl 520 +"; + let edges = parse_disasm(disasm); + assert_eq!(edges.get("foo"), Some(&vec!["real_callee".to_string()])); + } + + /// `<0x40123456>`-style absolute-address annotations (no known + /// symbol) are noise — drop them. + #[test] + fn hex_address_annotations_filtered() { + let disasm = "\ +00000500 : + 500: f7ff fffe bl 520 <0x40123456> + 504: f7ff fffe bl 520 +"; + let edges = parse_disasm(disasm); + assert_eq!(edges.get("foo"), Some(&vec!["real_callee".to_string()])); + } + + /// `Disassembly of section .text:` headers (and similar) reset + /// the current-function context so we don't attribute a stray + /// instruction from a later section to the previous function. + #[test] + fn section_header_resets_current_function() { + let disasm = "\ +00000500 : + 500: f7ff fffe bl 520 + +Disassembly of section .data: + + 600: f7ff fffe bl 520 +"; + let edges = parse_disasm(disasm); + assert_eq!(edges.get("foo"), Some(&vec!["foo_callee".to_string()])); + assert!(!edges + .values() + .any(|callees| callees.contains(&"should_not_attach_to_foo".to_string()))); + } + + /// C++ mangled names contain `::`, `<>`, and parentheses. We + /// must not split on them — the whole inner-bracket payload is + /// the symbol name. + #[test] + fn cpp_mangled_names_preserved() { + let disasm = "\ +00000500 <_ZN8FastLED5beginEv>: + 500: f7ff fffe bl 520 <_ZNSt6vectorIiSaIiEE9push_backERKi> +"; + let edges = parse_disasm(disasm); + assert_eq!( + edges.get("_ZN8FastLED5beginEv"), + Some(&vec!["_ZNSt6vectorIiSaIiEE9push_backERKi".to_string()]) + ); + } + + /// `invert()` produces a callee → callers map; round-tripping a + /// known small graph proves the inversion is byte-identical to + /// the original edge set. + #[test] + fn invert_is_bijective() { + let mut forward: BTreeMap> = BTreeMap::new(); + forward.insert("a".to_string(), vec!["b".to_string(), "c".to_string()]); + forward.insert("b".to_string(), vec!["c".to_string()]); + let inverse = invert(&forward); + assert_eq!(inverse.get("b"), Some(&vec!["a".to_string()])); + assert_eq!( + inverse.get("c"), + Some(&vec!["a".to_string(), "b".to_string()]) + ); + // Round-trip back equals the original. + let round_trip = invert(&inverse); + assert_eq!(round_trip, forward); + } + + /// Empty input ⇒ empty output, no panic. + #[test] + fn empty_input_yields_empty_map() { + assert!(parse_disasm("").is_empty()); + assert!(parse_disasm("\n\n\nrandom non-disasm text\n").is_empty()); + } +} diff --git a/crates/fbuild-core/src/symbol_analysis/graph.rs b/crates/fbuild-core/src/symbol_analysis/graph.rs index 036bacca..35a05032 100644 --- a/crates/fbuild-core/src/symbol_analysis/graph.rs +++ b/crates/fbuild-core/src/symbol_analysis/graph.rs @@ -51,6 +51,35 @@ pub enum GraphDepth { Fixed(u32), } +/// Which graph direction(s) to synthesize. +/// +/// `Backward` is the historical default and what every caller pre-#463 +/// asked for: walk from the root symbol *outward* along +/// `referenced_by` to surface "who pulled this in?". +/// +/// `Forward` walks the inverse: along `references_to` (populated from +/// `objdump -d`) to surface "what does this symbol call?". Forward +/// edges are per-symbol, not per-TU — the AI-assisted-optimization +/// use-case (see #471 motivation) wants exactly that precision so it +/// can tell the difference between `ClocklessIdf5` calling `fl::sort` +/// (which it doesn't) vs. its TU calling it (which it might). +/// +/// `Bidirectional` walks both. The rendered `.dot` shows backward +/// callers on one side of the root and forward callees on the other, +/// so a single picture answers both "who pulled this in?" and "what +/// did this end up dragging along?". +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum Direction { + /// Backward only. Compatible with the pre-#471 default. + #[default] + Backward, + /// Forward only. Useful when you want a small `.dot` focused on + /// callees without the noise of the call-in side. + Forward, + /// Both directions from the root. + Bidirectional, +} + /// User-visible knobs for back-reference graph synthesis. #[derive(Debug, Clone)] pub struct GraphConfig { @@ -70,6 +99,9 @@ pub struct GraphConfig { /// Archives whose branches are dropped entirely. Use when the /// caller only cares about non-system referencers. pub exclude_archives: Vec, + /// Which direction(s) to walk from the root. See [`Direction`]. + /// Default `Backward` matches the pre-#471 behaviour exactly. + pub direction: Direction, } impl Default for GraphConfig { @@ -80,6 +112,7 @@ impl Default for GraphConfig { max_depth: 4, collapse_archives: vec!["libc.a".to_string(), "libgcc.a".to_string()], exclude_archives: Vec::new(), + direction: Direction::Backward, } } } @@ -108,17 +141,62 @@ pub enum NodeKind { /// `size_hint` is the sum of symbol sizes attributed to this TU /// in the report, used both for fan-out ranking and node sizing. TranslationUnit { size_hint: Option }, + /// A callee — a symbol the root (or one of its callees) calls. + /// Distinct from `TranslationUnit` because forward edges are + /// per-symbol, not per-TU. `size` is the callee's own flash + /// footprint (drives ranking + node sizing). + Callee { + demangled: String, + size: u64, + callers_count: usize, + }, /// A super-node bundling multiple TUs from the same archive /// because of `collapse_archives` OR fan-out overflow. Collapsed { archive: String, count: usize }, } -/// Directed edge from a referencer toward what it references. -/// (`from` referenced `to`.) +/// Direction of a single edge in the graph. +/// +/// `Backward`: caller → callee, drawn from a referencer toward the +/// root (matches the pre-#471 contract that `from referenced to`). +/// +/// `Forward`: root → callee, drawn from the symbol toward the things +/// it calls. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum EdgeDirection { + #[default] + Backward, + Forward, +} + +/// Directed edge. For `Backward` edges `from` referenced `to`; for +/// `Forward` edges `from` calls `to`. The renderer styles the two +/// flavours differently so a bidirectional graph stays readable. #[derive(Debug, Clone, PartialEq, Eq)] pub struct GraphEdge { pub from: String, pub to: String, + pub direction: EdgeDirection, +} + +impl GraphEdge { + /// A back-edge from a referencer toward what it references. + pub fn backward(from: String, to: String) -> Self { + Self { + from, + to, + direction: EdgeDirection::Backward, + } + } + + /// A forward edge from a caller toward what it calls. + pub fn forward(from: String, to: String) -> Self { + Self { + from, + to, + direction: EdgeDirection::Forward, + } + } } /// Back-reference graph rooted at a single target symbol. @@ -246,14 +324,28 @@ impl BackrefGraph { // BFS queue. Each entry: (current TU, target node id it points to, depth). let mut queue: VecDeque<((Option, String), String, u32)> = VecDeque::new(); - // Seed level 1: every TU that directly references the root. - let level1 = rank_and_cap_referencers( - &root.referenced_by, - index, - config, - &root_archive, - /*depth=*/ 1, + // ---- Backward direction: seed level 1 with the TUs that + // ---- directly reference the root. + // + // Skipped entirely when the caller asked for `Direction::Forward` + // only — the BFS expansion below short-circuits naturally on an + // empty queue, so we get a clean forward-only graph without + // editing the rest of this method. + let want_backward = matches!( + config.direction, + Direction::Backward | Direction::Bidirectional ); + let level1: Vec = if want_backward { + rank_and_cap_referencers( + &root.referenced_by, + index, + config, + &root_archive, + /*depth=*/ 1, + ) + } else { + Vec::new() + }; for entry in level1 { match entry { CappedReferencer::Tu(tu_ref) => { @@ -271,10 +363,7 @@ impl BackrefGraph { depth: 1, }); node_id_by_tu.insert(key.clone(), node_id.clone()); - edges.push(GraphEdge { - from: node_id.clone(), - to: root_id.clone(), - }); + edges.push(GraphEdge::backward(node_id.clone(), root_id.clone())); queue.push_back((key, node_id, 1)); } } @@ -290,10 +379,7 @@ impl BackrefGraph { depth: 1, }); } - edges.push(GraphEdge { - from: node_id, - to: root_id.clone(), - }); + edges.push(GraphEdge::backward(node_id, root_id.clone())); } CappedReferencer::FanOutOverflow { count } => { let node_id = format!("ovf__d1__{}", count); @@ -308,10 +394,7 @@ impl BackrefGraph { }, depth: 1, }); - edges.push(GraphEdge { - from: node_id, - to: root_id.clone(), - }); + edges.push(GraphEdge::backward(node_id, root_id.clone())); } } } @@ -363,10 +446,7 @@ impl BackrefGraph { if let Some(target_id) = node_id_by_tu.get(&key) { push_edge_dedup( &mut edges, - GraphEdge { - from: target_id.clone(), - to: current_id.clone(), - }, + GraphEdge::backward(target_id.clone(), current_id.clone()), ); } continue; @@ -383,10 +463,7 @@ impl BackrefGraph { depth: next_depth, }); node_id_by_tu.insert(key.clone(), node_id.clone()); - edges.push(GraphEdge { - from: node_id.clone(), - to: current_id.clone(), - }); + edges.push(GraphEdge::backward(node_id.clone(), current_id.clone())); queue.push_back((key, node_id, next_depth)); } CappedReferencer::CollapsedArchive { archive, count } => { @@ -403,10 +480,7 @@ impl BackrefGraph { } push_edge_dedup( &mut edges, - GraphEdge { - from: node_id, - to: current_id.clone(), - }, + GraphEdge::backward(node_id, current_id.clone()), ); } CappedReferencer::FanOutOverflow { count } => { @@ -427,15 +501,38 @@ impl BackrefGraph { }, depth: next_depth, }); - edges.push(GraphEdge { - from: node_id, - to: current_id.clone(), - }); + edges.push(GraphEdge::backward(node_id, current_id.clone())); } } } } + // ---- Forward direction: walk root.references_to outward. + // + // Forward edges are per-symbol (not TU-level), so the node + // identity here is `sym__` instead of `tu____`. + // This is the side of the graph that answers "what does + // ClocklessIdf5 actually call?" — the motivating ask in #471 + // when an AI optimization pass mistook `fl::sort` (a sibling + // in the same TU) for something the root symbol called. + if matches!( + config.direction, + Direction::Forward | Direction::Bidirectional + ) { + let mut visited_syms: BTreeSet = BTreeSet::new(); + visited_syms.insert(root.mangled.clone()); + walk_forward( + map, + config, + root, + &root_id, + /*depth=*/ 1, + &mut nodes, + &mut edges, + &mut visited_syms, + ); + } + Self { root_id, nodes, @@ -467,7 +564,23 @@ impl BackrefGraph { )); } for e in &self.edges { - out.push_str(&format!(" \"{}\" -> \"{}\";\n", e.from, e.to)); + // Forward edges (caller → callee) get a distinct dashed + // arrow + a `forward` label so a bidirectional graph + // doesn't blur the two directions together. Backward + // edges keep the historical solid arrow (no extra + // attributes) so existing `.dot` consumers don't see a + // diff from the visual they're used to. + match e.direction { + EdgeDirection::Backward => { + out.push_str(&format!(" \"{}\" -> \"{}\";\n", e.from, e.to)); + } + EdgeDirection::Forward => { + out.push_str(&format!( + " \"{}\" -> \"{}\" [style=dashed, color=\"#0066cc\", fontcolor=\"#0066cc\", label=\"calls\"];\n", + e.from, e.to + )); + } + } } out.push_str("}\n"); out @@ -555,6 +668,263 @@ fn push_edge_dedup(edges: &mut Vec, candidate: GraphEdge) { } } +/// BFS the per-symbol forward edges (`references_to`) outward from a +/// starting symbol. Adds one `Callee` node per surviving callee, one +/// `Forward` edge from caller → callee, and a single +/// `(… and N more)` overflow super-node when the fan-out exceeds +/// `config.fan_out`. Recurses to `config.max_depth`. +/// +/// Ranking is by callee flash size (heaviest first), which matches +/// the AI-optimization use-case (#471): when a model sees a top +/// symbol it wants to know "what's the biggest thing this calls?", +/// not "what's the most-shared thing this calls?". The +/// most-shared view is surfaced via [`rank_callees_dual`] for the +/// markdown sub-table — both axes coexist; the graph picks one. +#[allow(clippy::too_many_arguments)] +fn walk_forward( + map: &FineGrainedSymbolMap, + config: &GraphConfig, + caller_sym: &super::FineGrainedSymbol, + caller_node_id: &str, + depth: u32, + nodes: &mut Vec, + edges: &mut Vec, + visited_syms: &mut BTreeSet, +) { + if depth > config.max_depth { + return; + } + // Resolve callee names to FineGrainedSymbols where possible. Names + // that don't resolve (typically platform/libc symbols not pulled + // into the report) are kept as raw mangled strings with size=0 + // so the graph still surfaces the edge. + let mut callees: Vec> = Vec::new(); + for callee_mangled in &caller_sym.references_to { + if visited_syms.contains(callee_mangled) { + continue; + } + let resolved = map + .symbols + .iter() + .find(|s| s.mangled == *callee_mangled || s.demangled == *callee_mangled); + callees.push(CalleeCandidate { + mangled: callee_mangled.clone(), + sym: resolved, + }); + } + if callees.is_empty() { + return; + } + + // Apply exclude_archives to filter callees from system libs the + // caller doesn't want surfaced. + callees.retain(|c| match c.sym.and_then(|s| s.archive.as_ref()) { + Some(a) => !config.exclude_archives.iter().any(|x| x == a), + None => true, + }); + if callees.is_empty() { + return; + } + + // Rank by callee size (largest first); ties broken by + // referenced_by length (most-shared first) so the ranking is + // deterministic. + callees.sort_by(|a, b| { + b.size() + .cmp(&a.size()) + .then_with(|| b.callers_count().cmp(&a.callers_count())) + .then_with(|| a.mangled.cmp(&b.mangled)) + }); + + let total = callees.len(); + let take_n = config.fan_out.min(total); + let overflow = total.saturating_sub(take_n); + + for c in callees.iter().take(take_n) { + let callee_id = format!("sym__{}", sanitize_id(&c.mangled)); + if visited_syms.insert(c.mangled.clone()) { + let (demangled, size) = match c.sym { + Some(s) => (s.demangled.clone(), s.size), + None => (c.mangled.clone(), 0), + }; + let callers_count = c.callers_count(); + let archive = c.sym.and_then(|s| s.archive.clone()); + let object = c.sym.and_then(|s| s.object.clone()); + nodes.push(GraphNode { + id: callee_id.clone(), + label: format_callee_label(&demangled, size, callers_count), + archive, + object, + kind: NodeKind::Callee { + demangled, + size, + callers_count, + }, + depth, + }); + // Recurse — only if we have a resolved symbol with its + // own references_to. + if let Some(resolved) = c.sym { + if depth < config.max_depth && !resolved.references_to.is_empty() { + walk_forward( + map, + config, + resolved, + &callee_id, + depth + 1, + nodes, + edges, + visited_syms, + ); + } + } + } + push_edge_dedup( + edges, + GraphEdge::forward(caller_node_id.to_string(), callee_id), + ); + } + + if overflow > 0 { + let overflow_id = format!( + "fwd_ovf__{}__d{}__{}", + sanitize_id(caller_node_id), + depth, + overflow + ); + nodes.push(GraphNode { + id: overflow_id.clone(), + label: format!("(… and {overflow} more callees)"), + archive: None, + object: None, + kind: NodeKind::Collapsed { + archive: "(overflow)".to_string(), + count: overflow, + }, + depth, + }); + edges.push(GraphEdge::forward(caller_node_id.to_string(), overflow_id)); + } +} + +/// Working struct for ranking callees: carries the mangled name plus +/// an optional resolved symbol pointer for size/popularity lookup. +struct CalleeCandidate<'a> { + mangled: String, + sym: Option<&'a super::FineGrainedSymbol>, +} + +impl<'a> CalleeCandidate<'a> { + fn size(&self) -> u64 { + self.sym.map(|s| s.size).unwrap_or(0) + } + + fn callers_count(&self) -> usize { + self.sym.map(|s| s.referenced_by.len()).unwrap_or(0) + } +} + +/// Per-callee node label: `\n B\nshared with `. +fn format_callee_label(demangled: &str, size: u64, callers_count: usize) -> String { + let truncated = if demangled.len() > 64 { + format!("{}…", &demangled[..63]) + } else { + demangled.to_string() + }; + if callers_count > 1 { + format!("{truncated}\n{size} B\nshared ×{callers_count}") + } else { + format!("{truncated}\n{size} B") + } +} + +/// Rank a symbol's direct callees by two axes simultaneously and +/// return the top-`top_n` from each — the data the markdown +/// "Top N callees" sub-table renders side by side. +/// +/// `(by_size, by_popularity, other_count)`: +/// - `by_size`: heaviest callees (flash bytes), ranked by callee +/// size descending. +/// - `by_popularity`: most-shared callees (how many TUs / symbols +/// also call them), ranked by `referenced_by.len()` descending. +/// - `other_count`: number of unique callees that fell out of both +/// top-N buckets (the "other" pile the user asked for). +/// +/// Callees that don't resolve to a row in `map` (e.g. weak refs not +/// pulled in) get `size=0` and contribute to neither axis. +#[must_use] +pub fn rank_callees_dual<'a>( + map: &'a FineGrainedSymbolMap, + caller: &super::FineGrainedSymbol, + top_n: usize, +) -> (Vec>, Vec>, usize) { + // Resolve every callee once. + let mut resolved: Vec> = caller + .references_to + .iter() + .map(|m| { + let s = map + .symbols + .iter() + .find(|s| s.mangled == *m || s.demangled == *m); + CalleeRanked { + mangled: m.clone(), + demangled: s.map(|s| s.demangled.clone()).unwrap_or_else(|| m.clone()), + size: s.map(|s| s.size).unwrap_or(0), + callers_count: s.map(|s| s.referenced_by.len()).unwrap_or(0), + sym: s, + } + }) + .collect(); + + // Sort copies by each axis. We clone the small structs because + // they're cheap (4 fields + a `&FineGrainedSymbol`). + let mut by_size = resolved.clone(); + by_size.sort_by(|a, b| { + b.size + .cmp(&a.size) + .then_with(|| a.demangled.cmp(&b.demangled)) + }); + let mut by_popularity = resolved.clone(); + by_popularity.sort_by(|a, b| { + b.callers_count + .cmp(&a.callers_count) + .then_with(|| b.size.cmp(&a.size)) + .then_with(|| a.demangled.cmp(&b.demangled)) + }); + + let size_top = by_size.iter().take(top_n).cloned().collect::>(); + let pop_top = by_popularity + .iter() + .take(top_n) + .cloned() + .collect::>(); + + // "Other" is the unique callees NOT present in either top-N + // bucket. + let top_mangled: BTreeSet<&str> = size_top + .iter() + .chain(pop_top.iter()) + .map(|c| c.mangled.as_str()) + .collect(); + let other_count = resolved + .drain(..) + .filter(|c| !top_mangled.contains(c.mangled.as_str())) + .count(); + + (size_top, pop_top, other_count) +} + +/// Row in the dual-ranked callee table. +#[derive(Debug, Clone)] +pub struct CalleeRanked<'a> { + pub mangled: String, + pub demangled: String, + pub size: u64, + pub callers_count: usize, + pub sym: Option<&'a super::FineGrainedSymbol>, +} + /// Map an archive name to a Graphviz fill color. Coloring follows /// the ecosystem the archive lives in so the eye jumps to the /// non-system edges. @@ -584,6 +954,7 @@ fn node_width(n: &GraphNode) -> Option { NodeKind::TranslationUnit { size_hint: Some(b), .. } => *b, + NodeKind::Callee { size, .. } => *size, _ => return None, }; if bytes == 0 { @@ -683,6 +1054,7 @@ mod tests { output_section: Some(".flash.text".to_string()), source: "nm".to_string(), referenced_by: refs, + references_to: Vec::new(), } } @@ -983,4 +1355,390 @@ mod tests { assert_eq!(sanitize_filename("op<<(int)"), "op___int_"); assert_eq!(sanitize_filename(""), "sym"); } + + // ---- #471: forward + bidirectional traversal ---- + + /// Helper: build a symbol with explicit `references_to`. + fn sym_calls( + mangled: &str, + size: u64, + archive: Option<&str>, + object: &str, + calls: Vec<&str>, + ) -> FineGrainedSymbol { + let mut s = sym(mangled, mangled, size, archive, object, Vec::new()); + s.references_to = calls.into_iter().map(|c| c.to_string()).collect(); + s + } + + /// Forward-only build with no back-references: just root + its + /// direct callees, ranked by callee size. + #[test] + fn forward_only_emits_callee_nodes() { + let symbols = vec![ + sym_calls( + "ClocklessIdf5", + 10_000, + Some("libFastLED.a"), + "clockless_idf5.cpp.o", + vec!["esp_log_write", "rmt_tx_start", "fl_lerp8"], + ), + sym( + "esp_log_write", + "esp_log_write", + 2_000, + Some("libesp.a"), + "log.c.o", + Vec::new(), + ), + sym( + "rmt_tx_start", + "rmt_tx_start", + 800, + Some("libesp.a"), + "rmt.c.o", + Vec::new(), + ), + sym( + "fl_lerp8", + "fl_lerp8", + 60, + Some("libFastLED.a"), + "math.cpp.o", + Vec::new(), + ), + ]; + let cfg = GraphConfig { + direction: Direction::Forward, + collapse_archives: Vec::new(), + ..GraphConfig::default() + }; + let g = BackrefGraph::build(&map(symbols), "ClocklessIdf5", &cfg); + // Root + 3 callees. + let callees: Vec<_> = g + .nodes + .iter() + .filter(|n| matches!(n.kind, NodeKind::Callee { .. })) + .collect(); + assert_eq!(callees.len(), 3, "expected three forward callees"); + // All edges should be forward. + assert!(g + .edges + .iter() + .all(|e| e.direction == EdgeDirection::Forward)); + // Heaviest callee (esp_log_write @ 2000) ranks first by size. + let heaviest = callees + .iter() + .max_by_key(|n| match &n.kind { + NodeKind::Callee { size, .. } => *size, + _ => 0, + }) + .unwrap(); + assert!(heaviest.label.starts_with("esp_log_write")); + } + + /// The motivating bug from #471: when a symbol's TU has siblings, + /// the forward graph must show ONLY what the root symbol itself + /// calls — not what the sibling symbols call. The cref-inversion + /// approach would have surfaced `fl::sort` here (called by a + /// sibling in the same TU); the objdump-based per-symbol + /// `references_to` correctly omits it. + #[test] + fn forward_excludes_tu_siblings_callees() { + let symbols = vec![ + // Root: calls only esp_log_write. + sym_calls( + "ClocklessIdf5", + 10_000, + Some("libFastLED.a"), + "clockless_idf5.cpp.o", + vec!["esp_log_write"], + ), + // Sibling in the same TU: calls fl::sort. This must NOT + // appear on ClocklessIdf5's forward edges. + sym_calls( + "ClocklessIdf5_helper", + 500, + Some("libFastLED.a"), + "clockless_idf5.cpp.o", + vec!["fl::sort"], + ), + sym( + "esp_log_write", + "esp_log_write", + 2_000, + Some("libesp.a"), + "log.c.o", + Vec::new(), + ), + sym( + "fl::sort", + "fl::sort", + 1_500, + Some("libFastLED.a"), + "sort.cpp.o", + Vec::new(), + ), + ]; + let cfg = GraphConfig { + direction: Direction::Forward, + collapse_archives: Vec::new(), + ..GraphConfig::default() + }; + let g = BackrefGraph::build(&map(symbols), "ClocklessIdf5", &cfg); + let callees: Vec<&str> = g + .nodes + .iter() + .filter_map(|n| match &n.kind { + NodeKind::Callee { demangled, .. } => Some(demangled.as_str()), + _ => None, + }) + .collect(); + assert!(callees.contains(&"esp_log_write")); + assert!( + !callees.contains(&"fl::sort"), + "fl::sort is a sibling's callee, not the root's; got callees: {callees:?}" + ); + } + + /// Bidirectional: root has BOTH a TU caller (backref) and a + /// resolved callee (forward). + #[test] + fn bidirectional_walks_both_sides() { + let symbols = vec![ + // Root: called by main.cpp.o; itself calls esp_log_write. + { + let mut r = sym_calls( + "ClocklessIdf5", + 10_000, + Some("libFastLED.a"), + "clockless_idf5.cpp.o", + vec!["esp_log_write"], + ); + r.referenced_by = vec![SymbolReference { + archive: None, + object: "main.cpp.o".to_string(), + }]; + r + }, + sym( + "esp_log_write", + "esp_log_write", + 2_000, + Some("libesp.a"), + "log.c.o", + Vec::new(), + ), + sym("setup", "setup", 30, None, "main.cpp.o", Vec::new()), + ]; + let cfg = GraphConfig { + direction: Direction::Bidirectional, + collapse_archives: Vec::new(), + ..GraphConfig::default() + }; + let g = BackrefGraph::build(&map(symbols), "ClocklessIdf5", &cfg); + // Edges must include at least one of each direction. + let backward_count = g + .edges + .iter() + .filter(|e| e.direction == EdgeDirection::Backward) + .count(); + let forward_count = g + .edges + .iter() + .filter(|e| e.direction == EdgeDirection::Forward) + .count(); + assert!(backward_count >= 1, "expected at least 1 backward edge"); + assert!(forward_count >= 1, "expected at least 1 forward edge"); + } + + /// Forward fan-out cap: 10 callees, cap = 3 → 3 callee nodes + 1 + /// `(… and 7 more callees)` super-node. + #[test] + fn forward_fan_out_overflows_collapse() { + let mut callees: Vec<&'static str> = Vec::new(); + let leak: Vec = (0..10).map(|i| format!("callee_{i}")).collect(); + let leaked: Vec<&'static str> = leak + .iter() + .map(|s| Box::leak(s.clone().into_boxed_str()) as &'static str) + .collect(); + for c in &leaked { + callees.push(c); + } + let mut symbols = vec![sym_calls( + "hub", + 1_000, + Some("libcore.a"), + "core.o", + callees, + )]; + // Give each callee a distinct size so ranking is deterministic. + for (i, name) in leaked.iter().enumerate() { + symbols.push(sym( + name, + name, + (100 - i) as u64, + Some("libapp.a"), + &format!("{name}.o"), + Vec::new(), + )); + } + let cfg = GraphConfig { + direction: Direction::Forward, + fan_out: 3, + collapse_archives: Vec::new(), + ..GraphConfig::default() + }; + let g = BackrefGraph::build(&map(symbols), "hub", &cfg); + let callee_nodes = g + .nodes + .iter() + .filter(|n| matches!(n.kind, NodeKind::Callee { .. })) + .count(); + assert_eq!(callee_nodes, 3, "expected exactly 3 callee nodes after cap"); + let overflow = g.nodes.iter().find( + |n| matches!(&n.kind, NodeKind::Collapsed { archive, .. } if archive == "(overflow)"), + ); + assert!(overflow.is_some(), "expected an overflow super-node"); + assert!(overflow.unwrap().label.contains("7 more callees")); + } + + /// Forward edges in the .dot output must be styled distinctly + /// (dashed + blue) so a bidirectional graph stays readable. + #[test] + fn forward_edges_styled_in_dot() { + let symbols = vec![ + sym_calls("a", 100, Some("libA.a"), "a.o", vec!["b"]), + sym("b", "b", 50, Some("libB.a"), "b.o", Vec::new()), + ]; + let cfg = GraphConfig { + direction: Direction::Forward, + collapse_archives: Vec::new(), + ..GraphConfig::default() + }; + let g = BackrefGraph::build(&map(symbols), "a", &cfg); + let dot = g.to_dot(); + assert!( + dot.contains("style=dashed"), + "forward edge missing dashed style" + ); + assert!( + dot.contains("label=\"calls\""), + "forward edge missing label" + ); + } + + /// `rank_callees_dual` returns top-N by both size and popularity, + /// plus an "other" count that excludes both top-N pickups. + #[test] + fn rank_callees_dual_separates_size_and_popularity() { + // Caller has 5 callees: + // a: size=1000, shared by 1 caller (heavy, unpopular) + // b: size=500, shared by 10 callers (medium, popular) + // c: size=300, shared by 3 callers + // d: size=200, shared by 2 callers + // e: size=100, shared by 1 caller + let symbols = vec![ + sym_calls( + "root", + 10_000, + None, + "main.o", + vec!["a", "b", "c", "d", "e"], + ), + { + let mut s = sym("a", "a", 1_000, Some("libX.a"), "a.o", Vec::new()); + s.referenced_by = vec![SymbolReference { + archive: None, + object: "x.o".to_string(), + }]; + s + }, + { + let mut s = sym("b", "b", 500, Some("libX.a"), "b.o", Vec::new()); + s.referenced_by = (0..10) + .map(|i| SymbolReference { + archive: None, + object: format!("caller_{i}.o"), + }) + .collect(); + s + }, + { + let mut s = sym("c", "c", 300, Some("libX.a"), "c.o", Vec::new()); + s.referenced_by = (0..3) + .map(|i| SymbolReference { + archive: None, + object: format!("caller_{i}.o"), + }) + .collect(); + s + }, + { + let mut s = sym("d", "d", 200, Some("libX.a"), "d.o", Vec::new()); + s.referenced_by = (0..2) + .map(|i| SymbolReference { + archive: None, + object: format!("caller_{i}.o"), + }) + .collect(); + s + }, + sym("e", "e", 100, Some("libX.a"), "e.o", Vec::new()), + ]; + let m = map(symbols); + let root_sym = m.symbols.iter().find(|s| s.mangled == "root").unwrap(); + let (by_size, by_pop, other) = rank_callees_dual(&m, root_sym, 3); + // By size: a > b > c. + assert_eq!( + by_size + .iter() + .map(|c| c.mangled.as_str()) + .collect::>(), + vec!["a", "b", "c"] + ); + // By popularity: b (10) > c (3) > d (2). + assert_eq!( + by_pop + .iter() + .map(|c| c.mangled.as_str()) + .collect::>(), + vec!["b", "c", "d"] + ); + // Other = unique callees not in either bucket. Top picks + // union: {a, b, c, d}. Total: {a, b, c, d, e}. Other: {e}. + assert_eq!(other, 1, "expected exactly e in the 'other' bucket"); + } + + /// Default config still gives the pre-#471 backward-only + /// behaviour: no callee nodes, no forward edges. + #[test] + fn default_direction_is_backward_only() { + let symbols = vec![ + sym_calls( + "ClocklessIdf5", + 10_000, + Some("libFastLED.a"), + "clockless_idf5.cpp.o", + vec!["esp_log_write"], + ), + sym( + "esp_log_write", + "esp_log_write", + 2_000, + Some("libesp.a"), + "log.c.o", + Vec::new(), + ), + ]; + let g = BackrefGraph::build(&map(symbols), "ClocklessIdf5", &GraphConfig::default()); + assert!(g + .nodes + .iter() + .all(|n| !matches!(n.kind, NodeKind::Callee { .. }))); + assert!(g + .edges + .iter() + .all(|e| e.direction == EdgeDirection::Backward)); + } } diff --git a/crates/fbuild-core/src/symbol_analysis/mod.rs b/crates/fbuild-core/src/symbol_analysis/mod.rs index 84a4e4f1..6e494222 100644 --- a/crates/fbuild-core/src/symbol_analysis/mod.rs +++ b/crates/fbuild-core/src/symbol_analysis/mod.rs @@ -19,6 +19,7 @@ //! no fs I/O for tool invocation) so it can be unit-tested without a //! cross toolchain. Subprocess drivers live in `fbuild_build`. +pub mod callgraph; pub mod cref; pub mod graph; @@ -80,6 +81,17 @@ pub struct FineGrainedSymbol { /// per-symbol; that's a property of `ld --cref` itself. #[serde(default)] pub referenced_by: Vec, + /// Symbols that **this** symbol references — i.e. the callees of + /// this function. Parsed from the textual disassembly emitted by + /// `objdump -d` on the linked ELF; the strings here match the + /// `mangled` name of another row in the same report. Empty when + /// the analyzer didn't have an `objdump` to run, or when the + /// symbol contains no recognised call instructions (data, weak + /// references, indirect-only call sites, etc.). Per-symbol + /// granularity, complementing `referenced_by` which is TU-level. + /// See `symbol_analysis::callgraph`. + #[serde(default)] + pub references_to: Vec, } fn default_source_nm() -> String { @@ -667,6 +679,7 @@ pub fn build_fine_grained_map_with_synth( output_section: attribution.map(|r| r.output_section.clone()), source: "nm".to_string(), referenced_by, + references_to: Vec::new(), }); } @@ -710,6 +723,7 @@ pub fn build_fine_grained_map_with_synth( output_section: Some(r.output_section.clone()), source: "map-derived".to_string(), referenced_by, + references_to: Vec::new(), }); } FineGrainedSymbolMap { diff --git a/crates/fbuild-core/src/symbol_analysis/tests.rs b/crates/fbuild-core/src/symbol_analysis/tests.rs index f4c18e94..625de426 100644 --- a/crates/fbuild-core/src/symbol_analysis/tests.rs +++ b/crates/fbuild-core/src/symbol_analysis/tests.rs @@ -400,6 +400,7 @@ fn sample_symbol(addr: u64, size: u64, region: MemoryRegion, name: &str) -> Fine output_section: None, source: "nm".to_string(), referenced_by: Vec::new(), + references_to: Vec::new(), } }