Skip to content

Commit 8cf3ec7

Browse files
notriddleweihanglo
andcommitted
feat: support for rustdoc mergeable cross-crate info
This is an unstable feature that we designed to fix several performance problems with the old system: 1. You couldn't easily build crate docs in hermetic environments. This doesn't matter for Cargo, but it was one of the original reasons to implement the feature. 2. We have to build all the doc resources in their final form at every step, instead of delaying slow parts (mostly the search index) until the end and only doing them once. 3. It requires rustdoc to take a lock at the end. This reduces available concurrency for generating docs. A nightly feature `-Zrustdoc-mergeable-info` is added. Co-authored-by: Michael Howell <[email protected]> Co-authored-by: Weihang Lo <[email protected]>
1 parent 3a4485d commit 8cf3ec7

File tree

12 files changed

+908
-20
lines changed

12 files changed

+908
-20
lines changed

crates/cargo-test-support/src/compare.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ static E2E_LITERAL_REDACTIONS: &[(&str, &str)] = &[
338338
("[BLOCKING]", " Blocking"),
339339
("[GENERATED]", " Generated"),
340340
("[OPENING]", " Opening"),
341+
("[MERGING]", " Merging"),
341342
];
342343

343344
/// Checks that the given string contains the given contiguous lines

src/cargo/core/compiler/build_context/target_info.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ pub enum FileFlavor {
7474
DebugInfo,
7575
/// SBOM (Software Bill of Materials pre-cursor) file (e.g. cargo-sbon.json).
7676
Sbom,
77+
/// Cross-crate info JSON files generated by rustdoc.
78+
DocParts,
7779
}
7880

7981
/// Type of each file generated by a Unit.
@@ -1191,6 +1193,19 @@ impl RustDocFingerprint {
11911193
})
11921194
.filter(|path| path.exists())
11931195
.try_for_each(|path| clean_doc(path))?;
1196+
1197+
// Clean docdeps directory as well for `-Zrustdoc-mergeable-info`.
1198+
//
1199+
// This could potentially has a rustdoc version prefix
1200+
// so we can retain between different toolchain versions.
1201+
build_runner
1202+
.bcx
1203+
.all_kinds
1204+
.iter()
1205+
.map(|kind| build_runner.files().layout(*kind).build_dir().docdeps())
1206+
.filter(|path| path.exists())
1207+
.try_for_each(std::fs::remove_dir_all)?;
1208+
11941209
write_fingerprint()?;
11951210
return Ok(());
11961211

src/cargo/core/compiler/build_runner/compilation_files.rs

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,13 @@ impl<'a, 'gctx: 'a> CompilationFiles<'a, 'gctx> {
328328
.build_script(&dir)
329329
}
330330

331+
/// Returns the directory where mergeable cross crate info for docs is stored.
332+
pub fn docdeps_dir(&self, unit: &Unit) -> &Path {
333+
assert!(unit.mode.is_doc());
334+
assert!(self.metas.contains_key(unit));
335+
self.layout(unit.kind).build_dir().docdeps()
336+
}
337+
331338
/// Returns the directory for compiled artifacts files.
332339
/// `/path/to/target/{debug,release}/deps/artifact/KIND/PKG-HASH`
333340
fn artifact_dir(&self, unit: &Unit) -> PathBuf {
@@ -500,12 +507,26 @@ impl<'a, 'gctx: 'a> CompilationFiles<'a, 'gctx> {
500507
.join("index.html")
501508
};
502509

503-
vec![OutputFile {
510+
let mut outputs = vec![OutputFile {
504511
path,
505512
hardlink: None,
506513
export_path: None,
507514
flavor: FileFlavor::Normal,
508-
}]
515+
}];
516+
517+
if bcx.gctx.cli_unstable().rustdoc_mergeable_info {
518+
outputs.push(OutputFile {
519+
path: self
520+
.docdeps_dir(unit)
521+
.join(unit.target.crate_name())
522+
.with_extension("json"),
523+
hardlink: None,
524+
export_path: None,
525+
flavor: FileFlavor::DocParts,
526+
})
527+
}
528+
529+
outputs
509530
}
510531
CompileMode::RunCustomBuild => {
511532
// At this time, this code path does not handle build script

src/cargo/core/compiler/build_runner/mod.rs

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
//! [`BuildRunner`] is the mutable state used during the build process.
22
33
use std::collections::{HashMap, HashSet};
4+
use std::ffi::OsStr;
45
use std::path::{Path, PathBuf};
56
use std::sync::{Arc, Mutex};
67

@@ -230,6 +231,8 @@ impl<'a, 'gctx> BuildRunner<'a, 'gctx> {
230231
}
231232
}
232233

234+
self.collect_doc_merge_info()?;
235+
233236
// Collect the result of the build into `self.compilation`.
234237
for unit in &self.bcx.roots {
235238
self.collect_tests_and_executables(unit)?;
@@ -335,6 +338,132 @@ impl<'a, 'gctx> BuildRunner<'a, 'gctx> {
335338
Ok(())
336339
}
337340

341+
fn collect_doc_merge_info(&mut self) -> CargoResult<()> {
342+
if !self.bcx.gctx.cli_unstable().rustdoc_mergeable_info {
343+
return Ok(());
344+
}
345+
346+
if !self.bcx.build_config.intent.is_doc() {
347+
return Ok(());
348+
}
349+
350+
if self.bcx.build_config.intent.wants_doc_json_output() {
351+
// rustdoc JSON output doesn't support merge (yet?)
352+
return Ok(());
353+
}
354+
355+
let mut doc_merge_info = HashMap::new();
356+
357+
let unit_iter = if self.bcx.build_config.intent.wants_deps_docs() {
358+
itertools::Either::Left(self.bcx.unit_graph.keys())
359+
} else {
360+
itertools::Either::Right(self.bcx.roots.iter())
361+
};
362+
363+
for unit in unit_iter {
364+
let has_doc_parts = unit.mode.is_doc()
365+
&& self
366+
.outputs(unit)?
367+
.iter()
368+
.any(|o| matches!(o.flavor, FileFlavor::DocParts));
369+
if !has_doc_parts {
370+
continue;
371+
}
372+
373+
doc_merge_info.entry(unit.kind).or_insert_with(|| {
374+
let out_dir = self
375+
.files()
376+
.layout(unit.kind)
377+
.artifact_dir()
378+
.expect("artifact-dir was not locked")
379+
.doc()
380+
.to_owned();
381+
let docdeps_dir = self.files().docdeps_dir(unit);
382+
383+
let mut requires_merge = false;
384+
385+
// HACK: get mtime of crates.js to inform outside
386+
// whether we need to merge cross-crate info.
387+
// The content of `crates.js` looks like
388+
//
389+
// ```
390+
// window.ALL_CRATES = ["cargo","cargo_util","cargo_util_schemas","crates_io"]
391+
// ```
392+
//
393+
// and will be updated when any new crate got documented
394+
// even with the legacy `--merge=shared` mode.
395+
let crates_js = out_dir.join("crates.js");
396+
let crates_js_mtime = paths::mtime(&crates_js);
397+
398+
let mut num_crates = 0;
399+
400+
for entry in walkdir::WalkDir::new(docdeps_dir).max_depth(1) {
401+
let Ok(entry) = entry else {
402+
tracing::debug!("failed to read entry at {}", docdeps_dir.display());
403+
continue;
404+
};
405+
406+
if !entry.file_type().is_file()
407+
|| entry.path().extension() != Some(OsStr::new("json"))
408+
{
409+
continue;
410+
}
411+
412+
num_crates += 1;
413+
414+
if requires_merge {
415+
continue;
416+
}
417+
418+
let crates_js_mtime = match crates_js_mtime {
419+
Ok(mtime) => mtime,
420+
Err(ref err) => {
421+
tracing::debug!(
422+
?err,
423+
"failed to read mtime of {}",
424+
crates_js.display()
425+
);
426+
requires_merge = true;
427+
continue;
428+
}
429+
};
430+
431+
let parts_mtime = match paths::mtime(entry.path()) {
432+
Ok(mtime) => mtime,
433+
Err(err) => {
434+
tracing::debug!(
435+
?err,
436+
"failed to read mtime of {}",
437+
entry.path().display()
438+
);
439+
requires_merge = true;
440+
continue;
441+
}
442+
};
443+
444+
if parts_mtime > crates_js_mtime {
445+
requires_merge = true;
446+
continue;
447+
}
448+
}
449+
450+
if requires_merge {
451+
compilation::DocMergeInfo::Merge {
452+
num_crates,
453+
parts_dir: docdeps_dir.to_owned(),
454+
out_dir,
455+
}
456+
} else {
457+
compilation::DocMergeInfo::Fresh
458+
}
459+
});
460+
}
461+
462+
self.compilation.doc_merge_info = doc_merge_info;
463+
464+
Ok(())
465+
}
466+
338467
/// Returns the executable for the specified unit (if any).
339468
pub fn get_executable(&mut self, unit: &Unit) -> CargoResult<Option<PathBuf>> {
340469
let is_binary = unit.target.is_executable();

src/cargo/core/compiler/compilation.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,11 @@ pub struct Compilation<'gctx> {
106106
/// Libraries to test with rustdoc.
107107
pub to_doc_test: Vec<Doctest>,
108108

109+
/// Compilation information for running `rustdoc --merge=finalize`.
110+
///
111+
/// See `-Zrustdoc-mergeable-info` for more.
112+
pub doc_merge_info: HashMap<CompileKind, DocMergeInfo>,
113+
109114
/// The target host triple.
110115
pub host: String,
111116

@@ -143,6 +148,7 @@ impl<'gctx> Compilation<'gctx> {
143148
root_crate_names: Vec::new(),
144149
extra_env: HashMap::new(),
145150
to_doc_test: Vec::new(),
151+
doc_merge_info: Default::default(),
146152
gctx: bcx.gctx,
147153
host: bcx.host_triple().to_string(),
148154
rustc_process,
@@ -383,6 +389,25 @@ impl<'gctx> Compilation<'gctx> {
383389
}
384390
}
385391

392+
/// Compilation information for running `rustdoc --merge=finalize`.
393+
#[derive(Default)]
394+
pub enum DocMergeInfo {
395+
/// Doc merge disabled.
396+
#[default]
397+
None,
398+
/// Nothing is stale.
399+
Fresh,
400+
/// Doc merge is required
401+
Merge {
402+
/// Number of crates to merge.
403+
num_crates: u64,
404+
/// Output directory holding every cross-crate info JSON file.
405+
parts_dir: PathBuf,
406+
/// Output directory for rustdoc.
407+
out_dir: PathBuf,
408+
},
409+
}
410+
386411
/// Prepares a `rustc_tool` process with additional environment variables
387412
/// that are only relevant in a context that has a unit
388413
fn fill_rustc_tool_env(mut cmd: ProcessBuilder, unit: &Unit) -> ProcessBuilder {

src/cargo/core/compiler/layout.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ impl Layout {
192192
incremental: build_dest.join("incremental"),
193193
fingerprint: build_dest.join(".fingerprint"),
194194
examples: build_dest.join("examples"),
195+
docdeps: build_dest.join("docdeps"),
195196
tmp: build_root.join("tmp"),
196197
_lock: build_dir_lock,
197198
is_new_layout,
@@ -273,6 +274,8 @@ pub struct BuildDirLayout {
273274
fingerprint: PathBuf,
274275
/// The directory for pre-uplifted examples: `build-dir/debug/examples`
275276
examples: PathBuf,
277+
/// The directory with intermediate artifacts from rustdoc.
278+
docdeps: PathBuf,
276279
/// The directory for temporary data of integration tests and benches
277280
tmp: PathBuf,
278281
/// The lockfile for a build (`.cargo-lock`). Will be unlocked when this
@@ -290,6 +293,7 @@ impl BuildDirLayout {
290293
if !self.is_new_layout {
291294
paths::create_dir_all(&self.deps)?;
292295
paths::create_dir_all(&self.fingerprint)?;
296+
paths::create_dir_all(&self.docdeps)?;
293297
}
294298
paths::create_dir_all(&self.incremental)?;
295299
paths::create_dir_all(&self.examples)?;
@@ -344,6 +348,12 @@ impl BuildDirLayout {
344348
self.build().join(pkg_dir)
345349
}
346350
}
351+
/// Fetch the path storing intermediate artifacts from rustdoc.
352+
pub fn docdeps(&self) -> &Path {
353+
// This doesn't need to consider new build-dir layout (yet?)
354+
// because rustdoc artifacts must be appendable.
355+
&self.docdeps
356+
}
347357
/// Fetch the build script execution path.
348358
pub fn build_script_execution(&self, pkg_dir: &str) -> PathBuf {
349359
if self.is_new_layout {

src/cargo/core/compiler/mod.rs

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ pub use self::build_context::{
7676
BuildContext, FileFlavor, FileType, RustDocFingerprint, RustcTargetData, TargetInfo,
7777
};
7878
pub use self::build_runner::{BuildRunner, Metadata, UnitHash};
79+
pub use self::compilation::DocMergeInfo;
7980
pub use self::compilation::{Compilation, Doctest, UnitOutput};
8081
pub use self::compile_kind::{CompileKind, CompileKindFallback, CompileTarget};
8182
pub use self::crate_type::CrateType;
@@ -830,8 +831,13 @@ fn prepare_rustdoc(build_runner: &BuildRunner<'_, '_>, unit: &Unit) -> CargoResu
830831
if build_runner.bcx.gctx.cli_unstable().rustdoc_depinfo {
831832
// toolchain-shared-resources is required for keeping the shared styling resources
832833
// invocation-specific is required for keeping the original rustdoc emission
833-
let mut arg =
834-
OsString::from("--emit=toolchain-shared-resources,invocation-specific,dep-info=");
834+
let mut arg = if build_runner.bcx.gctx.cli_unstable().rustdoc_mergeable_info {
835+
// toolchain resources are written at the end, at the same time as merging
836+
OsString::from("--emit=invocation-specific,dep-info=")
837+
} else {
838+
// if not using mergeable CCI, everything is written every time
839+
OsString::from("--emit=toolchain-shared-resources,invocation-specific,dep-info=")
840+
};
835841
arg.push(rustdoc_dep_info_loc(build_runner, unit));
836842
rustdoc.arg(arg);
837843

@@ -840,6 +846,18 @@ fn prepare_rustdoc(build_runner: &BuildRunner<'_, '_>, unit: &Unit) -> CargoResu
840846
}
841847

842848
rustdoc.arg("-Zunstable-options");
849+
} else if build_runner.bcx.gctx.cli_unstable().rustdoc_mergeable_info {
850+
// toolchain resources are written at the end, at the same time as merging
851+
rustdoc.arg("--emit=invocation-specific");
852+
rustdoc.arg("-Zunstable-options");
853+
}
854+
855+
if build_runner.bcx.gctx.cli_unstable().rustdoc_mergeable_info {
856+
// write out mergeable data to be imported
857+
rustdoc.arg("--merge=none");
858+
let mut arg = OsString::from("--parts-out-dir=");
859+
arg.push(build_runner.files().docdeps_dir(unit));
860+
rustdoc.arg(arg);
843861
}
844862

845863
if let Some(trim_paths) = unit.profile.trim_paths.as_ref() {

src/cargo/core/features.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -884,6 +884,7 @@ unstable_cli_options!(
884884
rustc_unicode: bool = ("Enable `rustc`'s unicode error format in Cargo's error messages"),
885885
rustdoc_depinfo: bool = ("Use dep-info files in rustdoc rebuild detection"),
886886
rustdoc_map: bool = ("Allow passing external documentation mappings to rustdoc"),
887+
rustdoc_mergeable_info: bool = ("Use rustdoc mergeable cross-crate-info files"),
887888
rustdoc_scrape_examples: bool = ("Allows Rustdoc to scrape code examples from reverse-dependencies"),
888889
sbom: bool = ("Enable the `sbom` option in build config in .cargo/config.toml file"),
889890
script: bool = ("Enable support for single-file, `.rs` packages"),
@@ -1415,6 +1416,7 @@ impl CliUnstable {
14151416
"rustc-unicode" => self.rustc_unicode = parse_empty(k, v)?,
14161417
"rustdoc-depinfo" => self.rustdoc_depinfo = parse_empty(k, v)?,
14171418
"rustdoc-map" => self.rustdoc_map = parse_empty(k, v)?,
1419+
"rustdoc-mergeable-info" => self.rustdoc_mergeable_info = parse_empty(k, v)?,
14181420
"rustdoc-scrape-examples" => self.rustdoc_scrape_examples = parse_empty(k, v)?,
14191421
"sbom" => self.sbom = parse_empty(k, v)?,
14201422
"section-timings" => self.section_timings = parse_empty(k, v)?,

0 commit comments

Comments
 (0)