Skip to content

Commit 795c8d8

Browse files
noahgiftclaude
andcommitted
feat(oracle): Enable alimentar data_store module (Refs #156)
- Add alimentar path dependency to depyler-oracle - Enable data_store module for Parquet corpus storage - Fix Array trait import for is_null method - Fix synthetic corpus function name - Tests: 313 passed, data_store roundtrip working 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent 8985dad commit 795c8d8

File tree

3 files changed

+7
-9
lines changed

3 files changed

+7
-9
lines changed

crates/depyler-oracle/Cargo.toml

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,10 @@ depyler-core = { path = "../depyler-core" }
2323
# Features: parallel (rayon), gpu (wgpu via trueno)
2424
aprender = { path = "../../../aprender", default-features = true }
2525

26-
# Data loading and versioning (deferred - dependency conflicts)
27-
# TODO: Re-enable when aprender/alimentar are aligned
28-
# alimentar = "0.1.0"
29-
# arrow = { version = "53", default-features = false, features = ["prettyprint"] }
30-
# parquet = { version = "53", default-features = false, features = ["arrow", "snap"] }
26+
# Data loading and versioning
27+
alimentar = { path = "../../../alimentar" }
28+
arrow = { version = "53", default-features = false, features = ["prettyprint"] }
29+
parquet = { version = "53", default-features = false, features = ["arrow", "snap"] }
3130

3231
# Serialization
3332
serde.workspace = true

crates/depyler-oracle/src/data_store.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
use crate::classifier::ErrorCategory;
99
use crate::training::{TrainingDataset, TrainingSample};
1010
use alimentar::{ArrowDataset, Dataset};
11-
use arrow::array::{ArrayRef, RecordBatch, StringArray, UInt8Array};
11+
use arrow::array::{Array, ArrayRef, RecordBatch, StringArray, UInt8Array};
1212
use arrow::datatypes::{DataType, Field, Schema};
1313
use std::path::Path;
1414
use std::sync::Arc;
@@ -94,7 +94,6 @@ pub fn arrow_to_dataset(batch: &RecordBatch) -> TrainingDataset {
9494

9595
/// Save training corpus to Parquet file.
9696
pub fn save_corpus(dataset: &TrainingDataset, path: &Path) -> crate::Result<()> {
97-
use arrow::array::RecordBatchWriter;
9897
use parquet::arrow::ArrowWriter;
9998
use std::fs::File;
10099

@@ -145,7 +144,7 @@ pub fn load_or_create_corpus() -> crate::Result<TrainingDataset> {
145144
load_corpus(path)
146145
} else {
147146
// Generate from hardcoded + synthetic
148-
let dataset = crate::synthetic::build_augmented_corpus(0.5);
147+
let dataset = crate::synthetic::generate_synthetic_corpus();
149148

150149
// Save for next time
151150
if let Err(e) = save_corpus(&dataset, path) {

crates/depyler-oracle/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ pub mod citl_fixer;
2020
pub mod classifier;
2121
pub mod github_corpus;
2222
pub mod moe_oracle;
23-
// pub mod data_store; // TODO: Re-enable when alimentar integrated
23+
pub mod data_store;
2424
pub mod depyler_training;
2525
pub mod estimator;
2626
pub mod features;

0 commit comments

Comments
 (0)