Skip to content

Commit 0f262c4

Browse files
fix(openapi): restore embedded request wrappers
1 parent 7e0f13f commit 0f262c4

File tree

7 files changed

+186
-256
lines changed

7 files changed

+186
-256
lines changed

.stats.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 103
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-2b99a80543f8bc8fa164167693c214651ac8e710f4726fb5869183b4d6c71a03.yml
3-
openapi_spec_hash: a5632057f5e4d956a71c20a79c0d879c
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-aab1b331382f758fc255f765e73b62fedf463cf0748bc11b2b08974de9ac816a.yml
3+
openapi_spec_hash: f717a21f47419aa51e4d9298aa68cc45
44
config_hash: 0017f6c419cbbf7b949f9b2842917a79

src/resources/alpha/eval/eval.ts

Lines changed: 4 additions & 192 deletions
Original file line numberDiff line numberDiff line change
@@ -205,204 +205,16 @@ export interface EvalEvaluateRowsAlphaParams {
205205

206206
export interface EvalRunEvalParams {
207207
/**
208-
* A model candidate for evaluation.
209-
*/
210-
eval_candidate: EvalRunEvalParams.EvalCandidate;
211-
212-
/**
213-
* Number of examples to evaluate (useful for testing), if not provided, all
214-
* examples in the dataset will be evaluated
215-
*/
216-
num_examples?: number | null;
217-
218-
/**
219-
* Map between scoring function id and parameters for each scoring function you
220-
* want to run
221-
*/
222-
scoring_params?: {
223-
[key: string]:
224-
| EvalRunEvalParams.LlmAsJudgeScoringFnParams
225-
| EvalRunEvalParams.RegexParserScoringFnParams
226-
| EvalRunEvalParams.BasicScoringFnParams;
227-
};
228-
}
229-
230-
export namespace EvalRunEvalParams {
231-
/**
232-
* A model candidate for evaluation.
233-
*/
234-
export interface EvalCandidate {
235-
model: string;
236-
237-
/**
238-
* Sampling parameters.
239-
*/
240-
sampling_params: Shared.SamplingParams;
241-
242-
/**
243-
* A system message providing instructions or context to the model.
244-
*/
245-
system_message?: Shared.SystemMessage | null;
246-
247-
type?: 'model';
248-
}
249-
250-
/**
251-
* Parameters for LLM-as-judge scoring function configuration.
252-
*/
253-
export interface LlmAsJudgeScoringFnParams {
254-
judge_model: string;
255-
256-
/**
257-
* Aggregation functions to apply to the scores of each row
258-
*/
259-
aggregation_functions?: Array<
260-
'average' | 'weighted_average' | 'median' | 'categorical_count' | 'accuracy'
261-
>;
262-
263-
/**
264-
* Regexes to extract the answer from generated response
265-
*/
266-
judge_score_regexes?: Array<string>;
267-
268-
prompt_template?: string | null;
269-
270-
type?: 'llm_as_judge';
271-
}
272-
273-
/**
274-
* Parameters for regex parser scoring function configuration.
275-
*/
276-
export interface RegexParserScoringFnParams {
277-
/**
278-
* Aggregation functions to apply to the scores of each row
279-
*/
280-
aggregation_functions?: Array<
281-
'average' | 'weighted_average' | 'median' | 'categorical_count' | 'accuracy'
282-
>;
283-
284-
/**
285-
* Regex to extract the answer from generated response
286-
*/
287-
parsing_regexes?: Array<string>;
288-
289-
type?: 'regex_parser';
290-
}
291-
292-
/**
293-
* Parameters for basic scoring function configuration.
208+
* A benchmark configuration for evaluation.
294209
*/
295-
export interface BasicScoringFnParams {
296-
/**
297-
* Aggregation functions to apply to the scores of each row
298-
*/
299-
aggregation_functions?: Array<
300-
'average' | 'weighted_average' | 'median' | 'categorical_count' | 'accuracy'
301-
>;
302-
303-
type?: 'basic';
304-
}
210+
benchmark_config: BenchmarkConfig;
305211
}
306212

307213
export interface EvalRunEvalAlphaParams {
308214
/**
309-
* A model candidate for evaluation.
310-
*/
311-
eval_candidate: EvalRunEvalAlphaParams.EvalCandidate;
312-
313-
/**
314-
* Number of examples to evaluate (useful for testing), if not provided, all
315-
* examples in the dataset will be evaluated
316-
*/
317-
num_examples?: number | null;
318-
319-
/**
320-
* Map between scoring function id and parameters for each scoring function you
321-
* want to run
322-
*/
323-
scoring_params?: {
324-
[key: string]:
325-
| EvalRunEvalAlphaParams.LlmAsJudgeScoringFnParams
326-
| EvalRunEvalAlphaParams.RegexParserScoringFnParams
327-
| EvalRunEvalAlphaParams.BasicScoringFnParams;
328-
};
329-
}
330-
331-
export namespace EvalRunEvalAlphaParams {
332-
/**
333-
* A model candidate for evaluation.
334-
*/
335-
export interface EvalCandidate {
336-
model: string;
337-
338-
/**
339-
* Sampling parameters.
340-
*/
341-
sampling_params: Shared.SamplingParams;
342-
343-
/**
344-
* A system message providing instructions or context to the model.
345-
*/
346-
system_message?: Shared.SystemMessage | null;
347-
348-
type?: 'model';
349-
}
350-
351-
/**
352-
* Parameters for LLM-as-judge scoring function configuration.
353-
*/
354-
export interface LlmAsJudgeScoringFnParams {
355-
judge_model: string;
356-
357-
/**
358-
* Aggregation functions to apply to the scores of each row
359-
*/
360-
aggregation_functions?: Array<
361-
'average' | 'weighted_average' | 'median' | 'categorical_count' | 'accuracy'
362-
>;
363-
364-
/**
365-
* Regexes to extract the answer from generated response
366-
*/
367-
judge_score_regexes?: Array<string>;
368-
369-
prompt_template?: string | null;
370-
371-
type?: 'llm_as_judge';
372-
}
373-
374-
/**
375-
* Parameters for regex parser scoring function configuration.
376-
*/
377-
export interface RegexParserScoringFnParams {
378-
/**
379-
* Aggregation functions to apply to the scores of each row
380-
*/
381-
aggregation_functions?: Array<
382-
'average' | 'weighted_average' | 'median' | 'categorical_count' | 'accuracy'
383-
>;
384-
385-
/**
386-
* Regex to extract the answer from generated response
387-
*/
388-
parsing_regexes?: Array<string>;
389-
390-
type?: 'regex_parser';
391-
}
392-
393-
/**
394-
* Parameters for basic scoring function configuration.
215+
* A benchmark configuration for evaluation.
395216
*/
396-
export interface BasicScoringFnParams {
397-
/**
398-
* Aggregation functions to apply to the scores of each row
399-
*/
400-
aggregation_functions?: Array<
401-
'average' | 'weighted_average' | 'median' | 'categorical_count' | 'accuracy'
402-
>;
403-
404-
type?: 'basic';
405-
}
217+
benchmark_config: BenchmarkConfig;
406218
}
407219

408220
Eval.Jobs = Jobs;

src/resources/beta/datasets.ts

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -301,13 +301,39 @@ export interface DatasetIterrowsParams {
301301
}
302302

303303
export interface DatasetRegisterParams {
304-
purpose: unknown;
304+
/**
305+
* Purpose of the dataset. Each purpose has a required input data schema.
306+
*/
307+
purpose: 'post-training/messages' | 'eval/question-answer' | 'eval/messages-answer';
308+
309+
/**
310+
* A dataset that can be obtained from a URI.
311+
*/
312+
source: DatasetRegisterParams.UriDataSource | DatasetRegisterParams.RowsDataSource;
313+
314+
dataset_id?: string | null;
315+
316+
metadata?: { [key: string]: unknown } | null;
317+
}
318+
319+
export namespace DatasetRegisterParams {
320+
/**
321+
* A dataset that can be obtained from a URI.
322+
*/
323+
export interface UriDataSource {
324+
uri: string;
305325

306-
source: unknown;
326+
type?: 'uri';
327+
}
307328

308-
dataset_id?: unknown;
329+
/**
330+
* A dataset stored in rows.
331+
*/
332+
export interface RowsDataSource {
333+
rows: Array<{ [key: string]: unknown }>;
309334

310-
metadata?: unknown;
335+
type?: 'rows';
336+
}
311337
}
312338

313339
export declare namespace Datasets {

src/resources/scoring-functions.ts

Lines changed: 85 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -240,17 +240,96 @@ export namespace ScoringFnParams {
240240
export type ScoringFunctionListResponse = Array<ScoringFn>;
241241

242242
export interface ScoringFunctionRegisterParams {
243-
description: unknown;
243+
description: string;
244244

245-
return_type: unknown;
245+
return_type: ScoringFunctionRegisterParams.ReturnType;
246246

247-
scoring_fn_id: unknown;
247+
scoring_fn_id: string;
248248

249-
params?: unknown;
249+
/**
250+
* Parameters for LLM-as-judge scoring function configuration.
251+
*/
252+
params?:
253+
| ScoringFunctionRegisterParams.LlmAsJudgeScoringFnParams
254+
| ScoringFunctionRegisterParams.RegexParserScoringFnParams
255+
| ScoringFunctionRegisterParams.BasicScoringFnParams
256+
| null;
257+
258+
provider_id?: string | null;
259+
260+
provider_scoring_fn_id?: string | null;
261+
}
262+
263+
export namespace ScoringFunctionRegisterParams {
264+
export interface ReturnType {
265+
type:
266+
| 'string'
267+
| 'number'
268+
| 'boolean'
269+
| 'array'
270+
| 'object'
271+
| 'json'
272+
| 'union'
273+
| 'chat_completion_input'
274+
| 'completion_input'
275+
| 'agent_turn_input';
276+
}
277+
278+
/**
279+
* Parameters for LLM-as-judge scoring function configuration.
280+
*/
281+
export interface LlmAsJudgeScoringFnParams {
282+
judge_model: string;
283+
284+
/**
285+
* Aggregation functions to apply to the scores of each row
286+
*/
287+
aggregation_functions?: Array<
288+
'average' | 'weighted_average' | 'median' | 'categorical_count' | 'accuracy'
289+
>;
290+
291+
/**
292+
* Regexes to extract the answer from generated response
293+
*/
294+
judge_score_regexes?: Array<string>;
295+
296+
prompt_template?: string | null;
297+
298+
type?: 'llm_as_judge';
299+
}
300+
301+
/**
302+
* Parameters for regex parser scoring function configuration.
303+
*/
304+
export interface RegexParserScoringFnParams {
305+
/**
306+
* Aggregation functions to apply to the scores of each row
307+
*/
308+
aggregation_functions?: Array<
309+
'average' | 'weighted_average' | 'median' | 'categorical_count' | 'accuracy'
310+
>;
311+
312+
/**
313+
* Regex to extract the answer from generated response
314+
*/
315+
parsing_regexes?: Array<string>;
250316

251-
provider_id?: unknown;
317+
type?: 'regex_parser';
318+
}
252319

253-
provider_scoring_fn_id?: unknown;
320+
/**
321+
* Parameters for basic scoring function configuration.
322+
*/
323+
export interface BasicScoringFnParams {
324+
/**
325+
* Aggregation functions to apply to the scores of each row
326+
*/
327+
aggregation_functions?: Array<
328+
'average' | 'weighted_average' | 'median' | 'categorical_count' | 'accuracy'
329+
>;
330+
331+
type?: 'basic';
332+
}
254333
}
255334

256335
export declare namespace ScoringFunctions {

0 commit comments

Comments
 (0)