Skip to content

Commit 3163a6c

Browse files
authored
feat: Read and write metadata files (#1618)
This adds functionality to write and read the metadata structures introduced in #1617 to and from the filesystem. A cache file's metadata file has the same path, but with the extension `.metadata`. The read happens in check_expiry, which gives the cleanup job access to the metadata. All of the old logic is still there—if no metadata file exists, we use the mtime from the FS as before. The write happens in compute. It should be noted that currently metadata are not propagated through the shared cache; this is left as future work.
1 parent 540a444 commit 3163a6c

File tree

7 files changed

+439
-112
lines changed

7 files changed

+439
-112
lines changed

crates/symbolicator-native/tests/integration/e2e.rs

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
use std::fs::File;
12
use std::path::Path;
23
use std::sync::Arc;
34

45
use symbolicator_native::interface::{FrameStatus, SymbolicateStacktraces};
6+
use symbolicator_service::caching::Metadata;
57
use symbolicator_service::objects::ObjectDownloadInfo;
68
use symbolicator_service::types::{ObjectFileStatus, Scope};
79
use symbolicator_sources::{
@@ -480,11 +482,12 @@ async fn test_basic_windows() {
480482
// NOTE: the cache key depends on the exact location of the file, which is
481483
// random because it includes the [`Server`]s random port.
482484
cached_objects.sort_by_key(|(_, size)| *size);
483-
assert_eq!(cached_objects.len(), 4); // 2 filename patterns, 2 metadata files
485+
assert_eq!(cached_objects.len(), 6); // 2 filename patterns, 2 metadata files, 2 debug txt files
484486
assert_eq!(cached_objects[0].1, 0);
485-
assert_eq!(cached_objects[3].1, 846_848);
487+
assert_eq!(cached_objects[5].1, 846_848);
486488

487-
let metadata_file = &cached_objects[1].0;
489+
// Checks the .txt file that we only write in debug mode
490+
let metadata_text_file = &cached_objects[3].0;
488491
let cached_scope = if is_public { "global" } else { "myscope" };
489492
let mut expected_metadata = format!(
490493
"scope: {cached_scope}\n\nsource: microsoft\nlocation: {}",
@@ -493,26 +496,40 @@ async fn test_basic_windows() {
493496
)
494497
);
495498
let metadata =
496-
std::fs::read_to_string(objects_dir.join(metadata_file)).unwrap();
499+
std::fs::read_to_string(objects_dir.join(metadata_text_file)).unwrap();
497500
// NOTE: due to random sort order, we have either `.pdb` or `.pd_`,
498501
// thus we only check for the substring
499502
assert!(
500503
metadata.starts_with(&expected_metadata),
501504
"{metadata:?} == {expected_metadata:?}"
502505
);
503506

507+
// Checks the metadata file
508+
let metadata_file = &cached_objects[1].0;
509+
let cached_scope = if is_public { "global" } else { "myscope" };
510+
let file = File::open(objects_dir.join(metadata_file)).unwrap();
511+
let metadata: Metadata = serde_json::from_reader(&file).unwrap();
512+
assert_eq!(metadata.scope.as_ref(), cached_scope);
513+
504514
let symcaches_dir = cache_dir.path().join("symcaches");
505515
let mut cached_symcaches = get_cache_files(&symcaches_dir);
506516

507517
cached_symcaches.sort_by_key(|(_, size)| *size);
508-
assert_eq!(cached_symcaches.len(), 2); // 1 symcache, 1 metadata file
509-
assert_eq!(cached_symcaches[1].1, 142_365);
518+
assert_eq!(cached_symcaches.len(), 3); // 1 symcache, 1 metadata file, 1 debug text file
519+
assert_eq!(cached_symcaches[2].1, 142_365);
510520

511-
let metadata_file = &cached_symcaches[0].0;
521+
// Checks the .txt file that we only write in debug mode
522+
let metadata_text_file = &cached_symcaches[1].0;
512523
let metadata =
513-
std::fs::read_to_string(symcaches_dir.join(metadata_file)).unwrap();
524+
std::fs::read_to_string(symcaches_dir.join(metadata_text_file)).unwrap();
514525
expected_metadata.push_str("b\n"); // this truely ends in `.pdb` now
515526
assert_eq!(metadata, expected_metadata);
527+
528+
// Checks the metadata file
529+
let metadata_file = &cached_symcaches[0].0;
530+
let file = File::open(symcaches_dir.join(metadata_file)).unwrap();
531+
let metadata: Metadata = serde_json::from_reader(&file).unwrap();
532+
assert_eq!(metadata.scope.as_ref(), cached_scope);
516533
}
517534

518535
// our use of in-memory caching should make sure we only ever request each file once

crates/symbolicator-service/src/caching/cache_key.rs

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,26 @@ use symbolicator_sources::RemoteFile;
66

77
use crate::types::Scope;
88

9+
/// The key of an item in an in-memory or on-disk
10+
/// cache.
11+
///
12+
/// Each key belongs to a [Scope], determined by
13+
/// the scope of the symbolication request and the symbol
14+
/// source in question.
915
#[derive(Debug, Clone, Eq)]
1016
pub struct CacheKey {
17+
scope: Scope,
1118
metadata: Arc<str>,
1219
hash: [u8; 32],
1320
}
1421

22+
impl CacheKey {
23+
/// Returns the scope of this cache key.
24+
pub fn scope(&self) -> &Scope {
25+
&self.scope
26+
}
27+
}
28+
1529
impl fmt::Display for CacheKey {
1630
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1731
write!(f, "{}", self.cache_path(1234))
@@ -63,24 +77,28 @@ impl CacheKey {
6377
/// contributing sources.
6478
pub fn scoped_builder(scope: &Scope) -> CacheKeyBuilder {
6579
let metadata = format!("scope: {scope}\n\n");
66-
CacheKeyBuilder { metadata }
80+
CacheKeyBuilder {
81+
scope: scope.clone(),
82+
metadata,
83+
}
6784
}
6885

6986
#[cfg(test)]
70-
pub fn for_testing(key: impl Into<String>) -> Self {
87+
pub fn for_testing(scope: Scope, key: impl Into<String>) -> Self {
7188
let metadata = key.into();
7289

73-
CacheKeyBuilder { metadata }.build()
90+
CacheKeyBuilder { scope, metadata }.build()
7491
}
7592
}
7693

7794
/// A builder for [`CacheKey`]s.
7895
///
7996
/// This builder implements the [`Write`] trait, and the intention of it is to
8097
/// accept human readable, but most importantly **stable**, input.
81-
/// This input in then being hashed to form the [`CacheKey`], and can also be serialized alongside
98+
/// This input is then hashed to form the [`CacheKey`], and can also be serialized alongside
8299
/// the cache files to help debugging.
83100
pub struct CacheKeyBuilder {
101+
scope: Scope,
84102
metadata: String,
85103
}
86104

@@ -99,6 +117,7 @@ impl CacheKeyBuilder {
99117
let hash = Sha256::digest(&self.metadata);
100118

101119
CacheKey {
120+
scope: self.scope,
102121
metadata: self.metadata.into(),
103122
hash: hash.into(),
104123
}

crates/symbolicator-service/src/caching/cleanup.rs

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use std::ffi::OsStr;
12
use std::fs::{read_dir, remove_dir, remove_file};
23
use std::io;
34
use std::path::Path;
@@ -6,6 +7,7 @@ use anyhow::{anyhow, Result};
67
use rand::seq::SliceRandom;
78
use rand::thread_rng;
89

10+
use crate::caching::fs::{metadata_path, METADATA_EXTENSION};
911
use crate::config::Config;
1012
use crate::metric;
1113

@@ -166,6 +168,10 @@ impl Cache {
166168
let mut is_empty = true;
167169
for entry in entries {
168170
let path = entry?.path();
171+
// Skip metadata files—they will be handled together with their cache files.
172+
if path.extension().and_then(OsStr::to_str) == Some(METADATA_EXTENSION) {
173+
continue;
174+
}
169175
if path.is_dir() {
170176
let mut dir_is_empty = self.cleanup_directory_recursive(&path, stats, dry_run)?;
171177
if dir_is_empty {
@@ -204,6 +210,7 @@ impl Cache {
204210

205211
/// Tries to clean up the file at `path`, returning `true` if it was removed.
206212
///
213+
/// This also removes the file's corresponding metadata file, if it exists.
207214
/// If `dry_run` is `true`, the file will not actually be deleted.
208215
fn try_cleanup_path(
209216
&self,
@@ -222,16 +229,18 @@ impl Cache {
222229
tracing::debug!("Removing file `{}`", path.display());
223230
if !dry_run {
224231
catch_not_found(|| remove_file(path))?;
232+
catch_not_found(|| remove_file(metadata_path(path)))?;
225233
}
226234

227235
stats.removed_bytes += size;
228236
stats.removed_files += 1;
229237

230-
return Ok(true);
231-
}
232-
stats.retained_bytes += size;
233-
stats.retained_files += 1;
238+
Ok(true)
239+
} else {
240+
stats.retained_bytes += size;
241+
stats.retained_files += 1;
234242

235-
Ok(false)
243+
Ok(false)
244+
}
236245
}
237246
}

0 commit comments

Comments
 (0)