Skip to content

Commit e9074b5

Browse files
noahgiftclaude
andcommitted
[GREEN] feat(oracle): ML-powered compile error classification using aprender (Refs GH-105)
EXTREME TDD implementation with 128 tests: - ngram.rs: N-gram fix pattern predictor with TF-IDF similarity - tfidf.rs: TF-IDF feature extraction with combined handcrafted features - patterns.rs: Fix template registry with 14 Rust error patterns - training.rs: Curated training dataset with 50+ rustc error samples - proptests.rs: 23 property-based tests verifying invariants Features: - Learn error→fix patterns from historical data - Predict fixes based on N-gram similarity - Combined TF-IDF + handcrafted feature extraction - Fix templates with code transformations - Drift detection for model retraining Uses aprender v0.10.0 for ML models and text vectorization. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent 8aa2c1a commit e9074b5

File tree

9 files changed

+4399
-0
lines changed

9 files changed

+4399
-0
lines changed

crates/depyler-oracle/Cargo.toml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
[package]
2+
name = "depyler-oracle"
3+
version.workspace = true
4+
edition.workspace = true
5+
authors.workspace = true
6+
license.workspace = true
7+
repository.workspace = true
8+
description = "ML-powered compile error classification and auto-fixing using aprender models"
9+
keywords = ["depyler", "ml", "error-classification", "auto-fix"]
10+
categories = ["development-tools"]
11+
12+
[dependencies]
13+
# Depyler core for error types
14+
depyler-core = { path = "../depyler-core" }
15+
16+
# ML models from aprender
17+
aprender = { path = "../../../aprender" }
18+
19+
# Serialization
20+
serde.workspace = true
21+
serde_json.workspace = true
22+
23+
# Error handling
24+
anyhow.workspace = true
25+
thiserror.workspace = true
26+
27+
# Logging
28+
tracing.workspace = true
29+
30+
[dev-dependencies]
31+
tempfile.workspace = true
32+
proptest.workspace = true
Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
//! Error classification types and logic.
2+
3+
use serde::{Deserialize, Serialize};
4+
5+
/// Categories of transpilation errors.
6+
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
7+
pub enum ErrorCategory {
8+
/// Type mismatch errors (e.g., expected i32, found &str)
9+
TypeMismatch,
10+
/// Borrow checker violations
11+
BorrowChecker,
12+
/// Missing import or use statement
13+
MissingImport,
14+
/// Syntax errors
15+
SyntaxError,
16+
/// Lifetime annotation errors
17+
LifetimeError,
18+
/// Trait bound not satisfied
19+
TraitBound,
20+
/// Uncategorized errors
21+
Other,
22+
}
23+
24+
impl ErrorCategory {
25+
/// Get human-readable name.
26+
#[must_use]
27+
pub fn name(&self) -> &'static str {
28+
match self {
29+
Self::TypeMismatch => "Type Mismatch",
30+
Self::BorrowChecker => "Borrow Checker",
31+
Self::MissingImport => "Missing Import",
32+
Self::SyntaxError => "Syntax Error",
33+
Self::LifetimeError => "Lifetime Error",
34+
Self::TraitBound => "Trait Bound",
35+
Self::Other => "Other",
36+
}
37+
}
38+
39+
/// Get category index for ML model.
40+
#[must_use]
41+
pub fn index(&self) -> usize {
42+
match self {
43+
Self::TypeMismatch => 0,
44+
Self::BorrowChecker => 1,
45+
Self::MissingImport => 2,
46+
Self::SyntaxError => 3,
47+
Self::LifetimeError => 4,
48+
Self::TraitBound => 5,
49+
Self::Other => 6,
50+
}
51+
}
52+
53+
/// Create from index.
54+
#[must_use]
55+
pub fn from_index(idx: usize) -> Self {
56+
match idx {
57+
0 => Self::TypeMismatch,
58+
1 => Self::BorrowChecker,
59+
2 => Self::MissingImport,
60+
3 => Self::SyntaxError,
61+
4 => Self::LifetimeError,
62+
5 => Self::TraitBound,
63+
_ => Self::Other,
64+
}
65+
}
66+
67+
/// All categories.
68+
#[must_use]
69+
pub fn all() -> &'static [ErrorCategory] {
70+
&[
71+
Self::TypeMismatch,
72+
Self::BorrowChecker,
73+
Self::MissingImport,
74+
Self::SyntaxError,
75+
Self::LifetimeError,
76+
Self::TraitBound,
77+
Self::Other,
78+
]
79+
}
80+
}
81+
82+
/// Error classifier using rule-based and ML approaches.
83+
pub struct ErrorClassifier {
84+
/// Keywords indicating type mismatch
85+
type_keywords: Vec<&'static str>,
86+
/// Keywords indicating borrow issues
87+
borrow_keywords: Vec<&'static str>,
88+
/// Keywords indicating missing imports
89+
import_keywords: Vec<&'static str>,
90+
/// Keywords indicating lifetime issues
91+
lifetime_keywords: Vec<&'static str>,
92+
/// Keywords indicating trait bound issues
93+
trait_keywords: Vec<&'static str>,
94+
}
95+
96+
impl ErrorClassifier {
97+
/// Create a new classifier.
98+
#[must_use]
99+
pub fn new() -> Self {
100+
Self {
101+
type_keywords: vec![
102+
"expected", "found", "mismatched types", "type mismatch",
103+
"cannot coerce", "incompatible types",
104+
],
105+
borrow_keywords: vec![
106+
"borrow", "borrowed", "move", "moved", "cannot move",
107+
"value used after move", "ownership",
108+
],
109+
import_keywords: vec![
110+
"not found", "unresolved", "cannot find",
111+
"no such", "undefined", "use of undeclared",
112+
],
113+
lifetime_keywords: vec![
114+
"lifetime", "'a", "'static", "does not live long enough",
115+
"borrowed value", "dangling",
116+
],
117+
trait_keywords: vec![
118+
"trait", "impl", "not implemented", "bound",
119+
"doesn't implement", "the trait bound",
120+
],
121+
}
122+
}
123+
124+
/// Classify an error message using keyword matching.
125+
#[must_use]
126+
pub fn classify_by_keywords(&self, message: &str) -> ErrorCategory {
127+
let lower = message.to_lowercase();
128+
129+
// Check lifetime first (more specific)
130+
if self.lifetime_keywords.iter().any(|k| lower.contains(k)) {
131+
return ErrorCategory::LifetimeError;
132+
}
133+
134+
// Check borrow checker
135+
if self.borrow_keywords.iter().any(|k| lower.contains(k)) {
136+
return ErrorCategory::BorrowChecker;
137+
}
138+
139+
// Check trait bounds
140+
if self.trait_keywords.iter().any(|k| lower.contains(k)) {
141+
return ErrorCategory::TraitBound;
142+
}
143+
144+
// Check type mismatch
145+
if self.type_keywords.iter().any(|k| lower.contains(k)) {
146+
return ErrorCategory::TypeMismatch;
147+
}
148+
149+
// Check missing import
150+
if self.import_keywords.iter().any(|k| lower.contains(k)) {
151+
return ErrorCategory::MissingImport;
152+
}
153+
154+
// Check for obvious syntax errors
155+
if lower.contains("syntax") || lower.contains("parse") || lower.contains("unexpected") {
156+
return ErrorCategory::SyntaxError;
157+
}
158+
159+
ErrorCategory::Other
160+
}
161+
162+
/// Get confidence score for classification.
163+
#[must_use]
164+
pub fn confidence(&self, message: &str, category: ErrorCategory) -> f32 {
165+
let lower = message.to_lowercase();
166+
167+
let keywords = match category {
168+
ErrorCategory::TypeMismatch => &self.type_keywords,
169+
ErrorCategory::BorrowChecker => &self.borrow_keywords,
170+
ErrorCategory::MissingImport => &self.import_keywords,
171+
ErrorCategory::LifetimeError => &self.lifetime_keywords,
172+
ErrorCategory::TraitBound => &self.trait_keywords,
173+
ErrorCategory::SyntaxError => return if lower.contains("syntax") { 0.9 } else { 0.5 },
174+
ErrorCategory::Other => return 0.3,
175+
};
176+
177+
let matches = keywords.iter().filter(|k| lower.contains(*k)).count();
178+
let confidence = (matches as f32 / keywords.len() as f32).min(1.0);
179+
180+
// Boost if multiple keywords match
181+
if matches > 1 {
182+
(confidence * 1.2).min(0.95)
183+
} else {
184+
confidence.max(0.5)
185+
}
186+
}
187+
}
188+
189+
impl Default for ErrorClassifier {
190+
fn default() -> Self {
191+
Self::new()
192+
}
193+
}
194+
195+
#[cfg(test)]
196+
mod tests {
197+
use super::*;
198+
199+
#[test]
200+
fn test_category_index_roundtrip() {
201+
for cat in ErrorCategory::all() {
202+
assert_eq!(ErrorCategory::from_index(cat.index()), *cat);
203+
}
204+
}
205+
206+
#[test]
207+
fn test_classify_type_mismatch() {
208+
let classifier = ErrorClassifier::new();
209+
let msg = "error: expected `i32`, found `&str`";
210+
assert_eq!(classifier.classify_by_keywords(msg), ErrorCategory::TypeMismatch);
211+
}
212+
213+
#[test]
214+
fn test_classify_borrow_checker() {
215+
let classifier = ErrorClassifier::new();
216+
let msg = "error: cannot move out of borrowed content";
217+
assert_eq!(classifier.classify_by_keywords(msg), ErrorCategory::BorrowChecker);
218+
}
219+
220+
#[test]
221+
fn test_classify_missing_import() {
222+
let classifier = ErrorClassifier::new();
223+
let msg = "error: cannot find type `HashMap` in this scope";
224+
assert_eq!(classifier.classify_by_keywords(msg), ErrorCategory::MissingImport);
225+
}
226+
227+
#[test]
228+
fn test_classify_lifetime() {
229+
let classifier = ErrorClassifier::new();
230+
let msg = "error: `x` does not live long enough";
231+
assert_eq!(classifier.classify_by_keywords(msg), ErrorCategory::LifetimeError);
232+
}
233+
234+
#[test]
235+
fn test_classify_trait_bound() {
236+
let classifier = ErrorClassifier::new();
237+
let msg = "error: the trait bound `Foo: Clone` is not satisfied";
238+
assert_eq!(classifier.classify_by_keywords(msg), ErrorCategory::TraitBound);
239+
}
240+
241+
#[test]
242+
fn test_confidence_high() {
243+
let classifier = ErrorClassifier::new();
244+
let msg = "mismatched types: expected i32, found &str";
245+
let conf = classifier.confidence(msg, ErrorCategory::TypeMismatch);
246+
assert!(conf > 0.5);
247+
}
248+
249+
#[test]
250+
fn test_confidence_low_for_wrong_category() {
251+
let classifier = ErrorClassifier::new();
252+
let msg = "mismatched types";
253+
let conf = classifier.confidence(msg, ErrorCategory::BorrowChecker);
254+
// Lower confidence for wrong category
255+
assert!(conf <= 0.7);
256+
}
257+
258+
#[test]
259+
fn test_category_names() {
260+
assert_eq!(ErrorCategory::TypeMismatch.name(), "Type Mismatch");
261+
assert_eq!(ErrorCategory::BorrowChecker.name(), "Borrow Checker");
262+
}
263+
}

0 commit comments

Comments
 (0)