Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.

Commit 6405e64

Browse files
committed
trying to fix llamacpp muxing
1 parent 7212f18 commit 6405e64

File tree

5 files changed

+32
-11
lines changed

5 files changed

+32
-11
lines changed

src/codegate/muxing/adapter.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from litellm import ModelResponse
1010
from litellm.types.utils import Delta, StreamingChoices
1111

12+
from codegate.config import Config
1213
from codegate.db import models as db_models
1314
from codegate.muxing import rulematcher
1415
from codegate.muxing.ollama_mappers import (
@@ -25,6 +26,16 @@ class MuxingAdapterError(Exception):
2526
pass
2627

2728

29+
30+
# Note: this is yet another awful hack to get the correct folder where
31+
# llamacpp models are stored. This is currently retrieved inside the
32+
# providers, but it should probably be refactored and injected,
33+
# implementing a basic inversion-of-control pattern.
34+
def get_llamacpp_models_folder():
35+
override = Config.get_config().provider_urls.get("llamacpp")
36+
return override if override else "./codegate_volume/models"
37+
38+
2839
class BodyAdapter:
2940
"""
3041
Format the body to the destination provider format.
@@ -42,6 +53,8 @@ def _get_provider_formatted_url(self, model_route: rulematcher.ModelRoute) -> st
4253
return urljoin(model_route.endpoint.endpoint, "/v1")
4354
if model_route.endpoint.provider_type == db_models.ProviderType.openrouter:
4455
return urljoin(model_route.endpoint.endpoint, "/api/v1")
56+
if model_route.endpoint.provider_type == db_models.ProviderType.llamacpp:
57+
return get_llamacpp_models_folder()
4558
return model_route.endpoint.endpoint
4659

4760
def get_destination_info(self, model_route: rulematcher.ModelRoute) -> dict:

src/codegate/muxing/router.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,10 @@ async def route_to_dest_provider(
147147
completion_function = anthropic.acompletion
148148
from_openai = anthropic_from_openai
149149
to_openai = anthropic_to_openai
150+
case ProviderType.llamacpp:
151+
completion_function = provider._completion_handler.execute_completion
152+
from_openai = identity
153+
to_openai = identity
150154
case ProviderType.ollama:
151155
if is_fim_request:
152156
completion_function = ollama.generate_streaming
@@ -227,15 +231,15 @@ async def _inner(
227231
new_request = from_openai(request)
228232
new_request.model = model
229233

234+
# Execute e.g. acompletion from Anthropic types
235+
response = completion_handler(
236+
new_request,
237+
api_key,
238+
base_url,
239+
)
240+
230241
# Wrap with an async generator that maps from
231242
# e.g. Anthropic types to OpenAI's.
232-
return to_openai(
233-
# Execute e.g. acompletion from Anthropic types
234-
completion_handler(
235-
new_request,
236-
api_key,
237-
base_url,
238-
),
239-
)
243+
return to_openai(response)
240244

241245
return _inner

src/codegate/providers/base.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,9 @@ async def complete(
319319
is_fim_request=is_fim_request,
320320
)
321321

322+
import asyncio
323+
if asyncio.iscoroutine(model_response):
324+
model_response = await model_response
322325
# Pass the request through the output pipeline
323326
if not streaming:
324327
return await self._run_output_pipeline(input_pipeline_result.context, model_response)

src/codegate/providers/llamacpp/completion_handler.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,9 @@ async def chat_to_async_iterator(
5050

5151

5252
class LlamaCppCompletionHandler(BaseCompletionHandler):
53-
def __init__(self):
53+
def __init__(self, base_url):
5454
self.inference_engine = LlamaCppInferenceEngine()
55+
self.base_url = base_url
5556

5657
async def execute_completion(
5758
self,
@@ -64,7 +65,7 @@ async def execute_completion(
6465
"""
6566
Execute the completion request with inference engine API
6667
"""
67-
model_path = f"{base_url}/{request.get_model()}.gguf"
68+
model_path = f"{self.base_url}/{request.get_model()}.gguf"
6869

6970
# Create a copy of the request dict and remove stream_options
7071
# Reason - Request error as JSON:

src/codegate/providers/llamacpp/provider.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def __init__(
2828
self.base_url = self._get_base_url()
2929
else:
3030
self.base_url = "./codegate_volume/models"
31-
completion_handler = LlamaCppCompletionHandler()
31+
completion_handler = LlamaCppCompletionHandler(self.base_url)
3232
super().__init__(
3333
None,
3434
None,

0 commit comments

Comments
 (0)