llamastack
diff --git a/‎.stats.yml‎
Lines changed: 2 additions & 2 deletions b/‎.stats.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/resources/alpha/eval/eval.ts‎
Lines changed: 4 additions & 192 deletions b/‎src/resources/alpha/eval/eval.ts‎
Lines changed: 4 additions & 192 deletions
diff --git a/‎src/resources/beta/datasets.ts‎
Lines changed: 30 additions & 4 deletions b/‎src/resources/beta/datasets.ts‎
Lines changed: 30 additions & 4 deletions
diff --git a/‎src/resources/scoring-functions.ts‎
Lines changed: 85 additions & 6 deletions b/‎src/resources/scoring-functions.ts‎
Lines changed: 85 additions & 6 deletions
@@ -1,4 +1,4 @@
 configured_endpoints: 103
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-2b99a80543f8bc8fa164167693c214651ac8e710f4726fb5869183b4d6c71a03.yml
-openapi_spec_hash: a5632057f5e4d956a71c20a79c0d879c
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-aab1b331382f758fc255f765e73b62fedf463cf0748bc11b2b08974de9ac816a.yml
+openapi_spec_hash: f717a21f47419aa51e4d9298aa68cc45
 config_hash: 0017f6c419cbbf7b949f9b2842917a79
@@ -205,204 +205,16 @@ export interface EvalEvaluateRowsAlphaParams {
 
 export interface EvalRunEvalParams {
   /**
-   * A model candidate for evaluation.
-   */
-  eval_candidate: EvalRunEvalParams.EvalCandidate;
-
-  /**
-   * Number of examples to evaluate (useful for testing), if not provided, all
-   * examples in the dataset will be evaluated
-   */
-  num_examples?: number | null;
-
-  /**
-   * Map between scoring function id and parameters for each scoring function you
-   * want to run
-   */
-  scoring_params?: {
-    [key: string]:
-      | EvalRunEvalParams.LlmAsJudgeScoringFnParams
-      | EvalRunEvalParams.RegexParserScoringFnParams
-      | EvalRunEvalParams.BasicScoringFnParams;
-  };
-}
-
-export namespace EvalRunEvalParams {
-  /**
-   * A model candidate for evaluation.
-   */
-  export interface EvalCandidate {
-    model: string;
-
-    /**
-     * Sampling parameters.
-     */
-    sampling_params: Shared.SamplingParams;
-
-    /**
-     * A system message providing instructions or context to the model.
-     */
-    system_message?: Shared.SystemMessage | null;
-
-    type?: 'model';
-  }
-
-  /**
-   * Parameters for LLM-as-judge scoring function configuration.
-   */
-  export interface LlmAsJudgeScoringFnParams {
-    judge_model: string;
-
-    /**
-     * Aggregation functions to apply to the scores of each row
-     */
-    aggregation_functions?: Array<
-      'average' | 'weighted_average' | 'median' | 'categorical_count' | 'accuracy'
-    >;
-
-    /**
-     * Regexes to extract the answer from generated response
-     */
-    judge_score_regexes?: Array<string>;
-
-    prompt_template?: string | null;
-
-    type?: 'llm_as_judge';
-  }
-
-  /**
-   * Parameters for regex parser scoring function configuration.
-   */
-  export interface RegexParserScoringFnParams {
-    /**
-     * Aggregation functions to apply to the scores of each row
-     */
-    aggregation_functions?: Array<
-      'average' | 'weighted_average' | 'median' | 'categorical_count' | 'accuracy'
-    >;
-
-    /**
-     * Regex to extract the answer from generated response
-     */
-    parsing_regexes?: Array<string>;
-
-    type?: 'regex_parser';
-  }
-
-  /**
-   * Parameters for basic scoring function configuration.
+   * A benchmark configuration for evaluation.
    */
-  export interface BasicScoringFnParams {
-    /**
-     * Aggregation functions to apply to the scores of each row
-     */
-    aggregation_functions?: Array<
-      'average' | 'weighted_average' | 'median' | 'categorical_count' | 'accuracy'
-    >;
-
-    type?: 'basic';
-  }
+  benchmark_config: BenchmarkConfig;
 }
 
 export interface EvalRunEvalAlphaParams {
   /**
-   * A model candidate for evaluation.
-   */
-  eval_candidate: EvalRunEvalAlphaParams.EvalCandidate;
-
-  /**
-   * Number of examples to evaluate (useful for testing), if not provided, all
-   * examples in the dataset will be evaluated
-   */
-  num_examples?: number | null;
-
-  /**
-   * Map between scoring function id and parameters for each scoring function you
-   * want to run
-   */
-  scoring_params?: {
-    [key: string]:
-      | EvalRunEvalAlphaParams.LlmAsJudgeScoringFnParams
-      | EvalRunEvalAlphaParams.RegexParserScoringFnParams
-      | EvalRunEvalAlphaParams.BasicScoringFnParams;
-  };
-}
-
-export namespace EvalRunEvalAlphaParams {
-  /**
-   * A model candidate for evaluation.
-   */
-  export interface EvalCandidate {
-    model: string;
-
-    /**
-     * Sampling parameters.
-     */
-    sampling_params: Shared.SamplingParams;
-
-    /**
-     * A system message providing instructions or context to the model.
-     */
-    system_message?: Shared.SystemMessage | null;
-
-    type?: 'model';
-  }
-
-  /**
-   * Parameters for LLM-as-judge scoring function configuration.
-   */
-  export interface LlmAsJudgeScoringFnParams {
-    judge_model: string;
-
-    /**
-     * Aggregation functions to apply to the scores of each row
-     */
-    aggregation_functions?: Array<
-      'average' | 'weighted_average' | 'median' | 'categorical_count' | 'accuracy'
-    >;
-
-    /**
-     * Regexes to extract the answer from generated response
-     */
-    judge_score_regexes?: Array<string>;
-
-    prompt_template?: string | null;
-
-    type?: 'llm_as_judge';
-  }
-
-  /**
-   * Parameters for regex parser scoring function configuration.
-   */
-  export interface RegexParserScoringFnParams {
-    /**
-     * Aggregation functions to apply to the scores of each row
-     */
-    aggregation_functions?: Array<
-      'average' | 'weighted_average' | 'median' | 'categorical_count' | 'accuracy'
-    >;
-
-    /**
-     * Regex to extract the answer from generated response
-     */
-    parsing_regexes?: Array<string>;
-
-    type?: 'regex_parser';
-  }
-
-  /**
-   * Parameters for basic scoring function configuration.
+   * A benchmark configuration for evaluation.
    */
-  export interface BasicScoringFnParams {
-    /**
-     * Aggregation functions to apply to the scores of each row
-     */
-    aggregation_functions?: Array<
-      'average' | 'weighted_average' | 'median' | 'categorical_count' | 'accuracy'
-    >;
-
-    type?: 'basic';
-  }
+  benchmark_config: BenchmarkConfig;
 }
 
 Eval.Jobs = Jobs;
 
@@ -301,13 +301,39 @@ export interface DatasetIterrowsParams {
 }
 
 export interface DatasetRegisterParams {
-  purpose: unknown;
+  /**
+   * Purpose of the dataset. Each purpose has a required input data schema.
+   */
+  purpose: 'post-training/messages' | 'eval/question-answer' | 'eval/messages-answer';
+
+  /**
+   * A dataset that can be obtained from a URI.
+   */
+  source: DatasetRegisterParams.UriDataSource | DatasetRegisterParams.RowsDataSource;
+
+  dataset_id?: string | null;
+
+  metadata?: { [key: string]: unknown } | null;
+}
+
+export namespace DatasetRegisterParams {
+  /**
+   * A dataset that can be obtained from a URI.
+   */
+  export interface UriDataSource {
+    uri: string;
 
-  source: unknown;
+    type?: 'uri';
+  }
 
-  dataset_id?: unknown;
+  /**
+   * A dataset stored in rows.
+   */
+  export interface RowsDataSource {
+    rows: Array<{ [key: string]: unknown }>;
 
-  metadata?: unknown;
+    type?: 'rows';
+  }
 }
 
 export declare namespace Datasets {
 
@@ -240,17 +240,96 @@ export namespace ScoringFnParams {
 export type ScoringFunctionListResponse = Array<ScoringFn>;
 
 export interface ScoringFunctionRegisterParams {
-  description: unknown;
+  description: string;
 
-  return_type: unknown;
+  return_type: ScoringFunctionRegisterParams.ReturnType;
 
-  scoring_fn_id: unknown;
+  scoring_fn_id: string;
 
-  params?: unknown;
+  /**
+   * Parameters for LLM-as-judge scoring function configuration.
+   */
+  params?:
+    | ScoringFunctionRegisterParams.LlmAsJudgeScoringFnParams
+    | ScoringFunctionRegisterParams.RegexParserScoringFnParams
+    | ScoringFunctionRegisterParams.BasicScoringFnParams
+    | null;
+
+  provider_id?: string | null;
+
+  provider_scoring_fn_id?: string | null;
+}
+
+export namespace ScoringFunctionRegisterParams {
+  export interface ReturnType {
+    type:
+      | 'string'
+      | 'number'
+      | 'boolean'
+      | 'array'
+      | 'object'
+      | 'json'
+      | 'union'
+      | 'chat_completion_input'
+      | 'completion_input'
+      | 'agent_turn_input';
+  }
+
+  /**
+   * Parameters for LLM-as-judge scoring function configuration.
+   */
+  export interface LlmAsJudgeScoringFnParams {
+    judge_model: string;
+
+    /**
+     * Aggregation functions to apply to the scores of each row
+     */
+    aggregation_functions?: Array<
+      'average' | 'weighted_average' | 'median' | 'categorical_count' | 'accuracy'
+    >;
+
+    /**
+     * Regexes to extract the answer from generated response
+     */
+    judge_score_regexes?: Array<string>;
+
+    prompt_template?: string | null;
+
+    type?: 'llm_as_judge';
+  }
+
+  /**
+   * Parameters for regex parser scoring function configuration.
+   */
+  export interface RegexParserScoringFnParams {
+    /**
+     * Aggregation functions to apply to the scores of each row
+     */
+    aggregation_functions?: Array<
+      'average' | 'weighted_average' | 'median' | 'categorical_count' | 'accuracy'
+    >;
+
+    /**
+     * Regex to extract the answer from generated response
+     */
+    parsing_regexes?: Array<string>;
 
-  provider_id?: unknown;
+    type?: 'regex_parser';
+  }
 
-  provider_scoring_fn_id?: unknown;
+  /**
+   * Parameters for basic scoring function configuration.
+   */
+  export interface BasicScoringFnParams {
+    /**
+     * Aggregation functions to apply to the scores of each row
+     */
+    aggregation_functions?: Array<
+      'average' | 'weighted_average' | 'median' | 'categorical_count' | 'accuracy'
+    >;
+
+    type?: 'basic';
+  }
 }
 
 export declare namespace ScoringFunctions {