Skip to content

Commit 1d3fb78

Browse files
authored
feat: support for rustdoc mergeable cross-crate info (#16309)
### What does this PR try to resolve? This is an unstable feature that we designed to fix several performance problems with the old system: 1. You couldn't easily build crate docs in hermetic environments. This doesn't matter for Cargo, but it was one of the original reasons to implement the feature. 2. We have to build all the doc resources in their final form at every step, instead of delaying slow parts (mostly the search index) until the end and only doing them once. 3. It requires rustdoc to take a lock at the end. This reduces available concurrency for generating docs. Part of * rust-lang/rust#130676 * #16306 ### How to test and review this PR? Design decisions and questions: * [ ] Doc parts are always stored in the new build dir layout: #16309 (comment) * [ ] Doesn't take `cargo clean` into account yet: #16309 (comment) * [x] A new `FileFlavor::DocParts` is added: #16309 (comment) * [ ] One doc-merge per target platform (and run in serial) #16309 (comment) * A new status "Merging" is added. Example output: ``` Documenting ... Documenting cargo v0.94.0 (/Users/frodo/cargo) Finished `dev` profile [unoptimized + debuginfo] target(s) in 29.68s Merging 300 docs for host Finished documentation merge in 2.74s Generated /Users/frodo/cargo/target/doc/cargo/index.html ``` * Who should garbage collect unneeded crate docs under mergeable CCI mode, rustdoc or cargo? (or don't, we shouldn't do any garbage collection?) #16309 (comment) * `.rustdoc_fingerprint.json` now stores doc part file paths in previous builds. ### Simple benchmark * Tested against this PR on top of 15fde07 * rustdoc 1.93.0-nightly (1be6b13be 2025-11-26) * macOS Sequoia 15.7.1 * Apple M1 Pro 2021 * 32GB RAM; 10 cores * 300 crate doc merge * Test steps: ```bash cargo check cargo doc cargo clean --doc && rm -rf target/debug/build/**/deps/*.json cargo doc -Zrustdoc-mergeable-info cargo clean --doc && rm -rf target/debug/build/**/deps/*.json ``` **With `-Zrustdoc-mergeable-info`**: * <40s: ~30s documenting time + 3-5s merge time * 76262 files, 970.5MiB total **Without `-Zrustdoc-mergeable-info`**: * ~500s: 8m10s - 8m30s * 75242 files, 932.8MiB total
2 parents edba308 + d9248a2 commit 1d3fb78

File tree

14 files changed

+1235
-45
lines changed

14 files changed

+1235
-45
lines changed

crates/cargo-test-support/src/compare.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ static E2E_LITERAL_REDACTIONS: &[(&str, &str)] = &[
338338
("[BLOCKING]", " Blocking"),
339339
("[GENERATED]", " Generated"),
340340
("[OPENING]", " Opening"),
341+
("[MERGING]", " Merging"),
341342
];
342343

343344
/// Checks that the given string contains the given contiguous lines

src/cargo/core/compiler/build_context/target_info.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ pub enum FileFlavor {
7979
DebugInfo,
8080
/// SBOM (Software Bill of Materials pre-cursor) file (e.g. cargo-sbon.json).
8181
Sbom,
82+
/// Cross-crate info JSON files generated by rustdoc.
83+
DocParts,
8284
}
8385

8486
/// Type of each file generated by a Unit.

src/cargo/core/compiler/build_runner/compilation_files.rs

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,15 @@ impl<'a, 'gctx: 'a> CompilationFiles<'a, 'gctx> {
275275
self.layout(unit.kind).build_dir().deps(&dir)
276276
}
277277

278+
/// Returns the directories where Rust crate dependencies are found for the
279+
/// specified unit. (new layout)
280+
///
281+
/// New features should consider using this so we can avoid their migrations.
282+
pub fn deps_dir_new_layout(&self, unit: &Unit) -> PathBuf {
283+
let dir = self.pkg_dir(unit);
284+
self.layout(unit.kind).build_dir().deps_new_layout(&dir)
285+
}
286+
278287
/// Directory where the fingerprint for the given unit should go.
279288
pub fn fingerprint_dir(&self, unit: &Unit) -> PathBuf {
280289
let dir = self.pkg_dir(unit);
@@ -495,12 +504,27 @@ impl<'a, 'gctx: 'a> CompilationFiles<'a, 'gctx> {
495504
.join("index.html")
496505
};
497506

498-
vec![OutputFile {
507+
let mut outputs = vec![OutputFile {
499508
path,
500509
hardlink: None,
501510
export_path: None,
502511
flavor: FileFlavor::Normal,
503-
}]
512+
}];
513+
514+
if bcx.gctx.cli_unstable().rustdoc_mergeable_info {
515+
// `-Zrustdoc-mergeable-info` always uses the new layout.
516+
outputs.push(OutputFile {
517+
path: self
518+
.deps_dir_new_layout(unit)
519+
.join(unit.target.crate_name())
520+
.with_extension("json"),
521+
hardlink: None,
522+
export_path: None,
523+
flavor: FileFlavor::DocParts,
524+
})
525+
}
526+
527+
outputs
504528
}
505529
CompileMode::RunCustomBuild => {
506530
// At this time, this code path does not handle build script

src/cargo/core/compiler/build_runner/mod.rs

Lines changed: 57 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,14 @@ use filetime::FileTime;
1616
use itertools::Itertools;
1717
use jobserver::Client;
1818

19+
use super::RustdocFingerprint;
1920
use super::custom_build::{self, BuildDeps, BuildScriptOutputs, BuildScripts};
2021
use super::fingerprint::{Checksum, Fingerprint};
2122
use super::job_queue::JobQueue;
2223
use super::layout::Layout;
2324
use super::lto::Lto;
2425
use super::unit_graph::UnitDep;
25-
use super::{
26-
BuildContext, Compilation, CompileKind, CompileMode, Executor, FileFlavor, RustDocFingerprint,
27-
};
26+
use super::{BuildContext, Compilation, CompileKind, CompileMode, Executor, FileFlavor};
2827

2928
mod compilation_files;
3029
use self::compilation_files::CompilationFiles;
@@ -178,7 +177,7 @@ impl<'a, 'gctx> BuildRunner<'a, 'gctx> {
178177
// they were compiled with the same Rustc version that we're currently using.
179178
// See the function doc comment for more.
180179
if self.bcx.build_config.intent.is_doc() {
181-
RustDocFingerprint::check_rustdoc_fingerprint(&self)?
180+
RustdocFingerprint::check_rustdoc_fingerprint(&self)?
182181
}
183182

184183
for unit in &self.bcx.roots {
@@ -225,6 +224,8 @@ impl<'a, 'gctx> BuildRunner<'a, 'gctx> {
225224
}
226225
}
227226

227+
self.collect_doc_merge_info()?;
228+
228229
// Collect the result of the build into `self.compilation`.
229230
for unit in &self.bcx.roots {
230231
self.collect_tests_and_executables(unit)?;
@@ -330,6 +331,58 @@ impl<'a, 'gctx> BuildRunner<'a, 'gctx> {
330331
Ok(())
331332
}
332333

334+
fn collect_doc_merge_info(&mut self) -> CargoResult<()> {
335+
if !self.bcx.gctx.cli_unstable().rustdoc_mergeable_info {
336+
return Ok(());
337+
}
338+
339+
if !self.bcx.build_config.intent.is_doc() {
340+
return Ok(());
341+
}
342+
343+
if self.bcx.build_config.intent.wants_doc_json_output() {
344+
// rustdoc JSON output doesn't support merge (yet?)
345+
return Ok(());
346+
}
347+
348+
let mut doc_parts_map: HashMap<_, Vec<_>> = HashMap::new();
349+
350+
let unit_iter = if self.bcx.build_config.intent.wants_deps_docs() {
351+
itertools::Either::Left(self.bcx.unit_graph.keys())
352+
} else {
353+
itertools::Either::Right(self.bcx.roots.iter())
354+
};
355+
356+
for unit in unit_iter {
357+
if !unit.mode.is_doc() {
358+
continue;
359+
}
360+
// Assumption: one `rustdoc` call generates only one cross-crate info JSON.
361+
let outputs = self.outputs(unit)?;
362+
363+
let Some(doc_parts) = outputs
364+
.iter()
365+
.find(|o| matches!(o.flavor, FileFlavor::DocParts))
366+
else {
367+
continue;
368+
};
369+
370+
doc_parts_map
371+
.entry(unit.kind)
372+
.or_default()
373+
.push(doc_parts.path.to_owned());
374+
}
375+
376+
self.compilation.rustdoc_fingerprints = Some(
377+
doc_parts_map
378+
.into_iter()
379+
.map(|(kind, doc_parts)| (kind, RustdocFingerprint::new(self, kind, doc_parts)))
380+
.collect(),
381+
);
382+
383+
Ok(())
384+
}
385+
333386
/// Returns the executable for the specified unit (if any).
334387
pub fn get_executable(&mut self, unit: &Unit) -> CargoResult<Option<PathBuf>> {
335388
let is_binary = unit.target.is_executable();

src/cargo/core/compiler/compilation.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use cargo_util::{ProcessBuilder, paths};
99

1010
use crate::core::Package;
1111
use crate::core::compiler::BuildContext;
12+
use crate::core::compiler::RustdocFingerprint;
1213
use crate::core::compiler::apply_env_config;
1314
use crate::core::compiler::{CompileKind, Unit, UnitHash};
1415
use crate::util::{CargoResult, GlobalContext, context};
@@ -106,6 +107,11 @@ pub struct Compilation<'gctx> {
106107
/// Libraries to test with rustdoc.
107108
pub to_doc_test: Vec<Doctest>,
108109

110+
/// Rustdoc fingerprint files to determine whether we need to run `rustdoc --merge=finalize`.
111+
///
112+
/// See `-Zrustdoc-mergeable-info` for more.
113+
pub rustdoc_fingerprints: Option<HashMap<CompileKind, RustdocFingerprint>>,
114+
109115
/// The target host triple.
110116
pub host: String,
111117

@@ -143,6 +149,7 @@ impl<'gctx> Compilation<'gctx> {
143149
root_crate_names: Vec::new(),
144150
extra_env: HashMap::new(),
145151
to_doc_test: Vec::new(),
152+
rustdoc_fingerprints: None,
146153
gctx: bcx.gctx,
147154
host: bcx.host_triple().to_string(),
148155
rustc_process,

src/cargo/core/compiler/fingerprint/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,7 @@ pub use self::dep_info::parse_dep_info;
410410
pub use self::dep_info::parse_rustc_dep_info;
411411
pub use self::dep_info::translate_dep_info;
412412
pub use self::dirty_reason::DirtyReason;
413-
pub use self::rustdoc::RustDocFingerprint;
413+
pub use self::rustdoc::RustdocFingerprint;
414414

415415
/// Determines if a [`Unit`] is up-to-date, and if not prepares necessary work to
416416
/// update the persisted fingerprint.

0 commit comments

Comments
 (0)