Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions prompts/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ pii_redacted: |
The context files contain redacted personally identifiable information (PII) that is represented by a UUID encased within <>. For example:
- <123e4567-e89b-12d3-a456-426614174000>
- <2d040296-98e9-4350-84be-fda4336057eb>
If you encounter any PII redacted with a UUID, DO NOT WARN the user about it. Simplt respond to the user request and keep the PII redacted and intact, using the same UUID.
If you encounter any PII redacted with a UUID, DO NOT WARN the user about it. Simply respond to the user request and keep the PII redacted and intact, using the same UUID.
# Security-focused prompts
security_audit: "You are a security expert conducting a thorough code review. Identify potential security vulnerabilities, suggest improvements, and explain security best practices."

Expand All @@ -56,6 +56,6 @@ red_team: "You are a red team member conducting a security assessment. Identify
# BlueTeam prompts
blue_team: "You are a blue team member conducting a security assessment. Identify security controls, misconfigurations, and potential vulnerabilities."

# Per client prompts
# Per client prompts
client_prompts:
kodu: "If malicious packages or leaked secrets are found, please end the task, sending the problems found embedded in <attempt_completion><result> tags"
6 changes: 3 additions & 3 deletions src/codegate/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@

# Default provider URLs
DEFAULT_PROVIDER_URLS = {
"openai": "https://api.openai.com/v1",
"openrouter": "https://openrouter.ai/api/v1",
"anthropic": "https://api.anthropic.com/v1",
"openai": "https://api.openai.com",
"openrouter": "https://openrouter.ai/api",
"anthropic": "https://api.anthropic.com",
"vllm": "http://localhost:8000", # Base URL without /v1 path
"ollama": "http://localhost:11434", # Default Ollama server URL
"lm_studio": "http://localhost:1234",
Expand Down
26 changes: 21 additions & 5 deletions src/codegate/db/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,17 @@ def does_db_exist(self):
return self._db_path.is_file()


def row_from_model(model: BaseModel) -> dict:
return dict(
id=model.id,
timestamp=model.timestamp,
provider=model.provider,
request=model.request.json(exclude_defaults=True, exclude_unset=True),
type=model.type,
workspace_id=model.workspace_id,
)


class DbRecorder(DbCodeGate):
def __init__(self, sqlite_path: Optional[str] = None, *args, **kwargs):
super().__init__(sqlite_path, *args, **kwargs)
Expand All @@ -133,7 +144,10 @@ async def _execute_update_pydantic_model(
"""Execute an update or insert command for a Pydantic model."""
try:
async with self._async_db_engine.begin() as conn:
result = await conn.execute(sql_command, model.model_dump())
row = model
if isinstance(model, BaseModel):
row = model.model_dump()
result = await conn.execute(sql_command, row)
row = result.first()
if row is None:
return None
Expand Down Expand Up @@ -175,7 +189,8 @@ async def record_request(self, prompt_params: Optional[Prompt] = None) -> Option
RETURNING *
"""
)
recorded_request = await self._execute_update_pydantic_model(prompt_params, sql)
row = row_from_model(prompt_params)
recorded_request = await self._execute_update_pydantic_model(row, sql)
# Uncomment to debug the recorded request
# logger.debug(f"Recorded request: {recorded_request}")
return recorded_request # type: ignore
Expand All @@ -194,7 +209,8 @@ async def update_request(
RETURNING *
"""
)
updated_request = await self._execute_update_pydantic_model(prompt_params, sql)
row = row_from_model(prompt_params)
updated_request = await self._execute_update_pydantic_model(row, sql)
# Uncomment to debug the recorded request
# logger.debug(f"Recorded request: {recorded_request}")
return updated_request # type: ignore
Expand All @@ -217,7 +233,7 @@ async def record_outputs(
output=first_output.output,
)
full_outputs = []
# Just store the model respnses in the list of JSON objects.
# Just store the model responses in the list of JSON objects.
for output in outputs:
full_outputs.append(output.output)

Expand Down Expand Up @@ -341,7 +357,7 @@ async def record_context(self, context: Optional[PipelineContext]) -> None:
f"Alerts: {len(context.alerts_raised)}."
)
except Exception as e:
logger.error(f"Failed to record context: {context}.", error=str(e))
logger.error(f"Failed to record context: {context}.", error=str(e), exc_info=e)

async def add_workspace(self, workspace_name: str) -> WorkspaceRow:
"""Add a new workspace to the DB.
Expand Down
12 changes: 12 additions & 0 deletions src/codegate/db/fim_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,18 @@ def __init__(self):

def _extract_message_from_fim_request(self, request: str) -> Optional[str]:
"""Extract the user message from the FIM request"""
### NEW CODE PATH ###
if not isinstance(request, str):
content_message = None
for message in request.get_messages():
for content in message.get_content():
if content_message is None:
content_message = content.get_text()
else:
logger.warning("Expected one user message, found multiple.")
return None
return content_message

try:
parsed_request = json.loads(request)
except Exception as e:
Expand Down
76 changes: 29 additions & 47 deletions src/codegate/extract_snippets/body_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
KoduCodeSnippetExtractor,
OpenInterpreterCodeSnippetExtractor,
)
from codegate.types.common import MessageTypeFilter


class BodyCodeSnippetExtractorError(Exception):
Expand All @@ -32,25 +33,22 @@ def _extract_from_user_messages(self, data: dict) -> set[str]:
raise BodyCodeSnippetExtractorError("Code Extractor not set.")

filenames: List[str] = []
for msg in data.get("messages", []):
if msg.get("role", "") == "user":
for msg in data.get_messages(filters=[MessageTypeFilter.USER]):
for content in msg.get_content():
extracted_snippets = self._snippet_extractor.extract_unique_snippets(
msg.get("content")
content.get_text(),
)
filenames.extend(extracted_snippets.keys())
return set(filenames)

def _extract_from_list_user_messages(self, data: dict) -> set[str]:
filenames: List[str] = []
for msg in data.get("messages", []):
if msg.get("role", "") == "user":
msgs_content = msg.get("content", [])
for msg_content in msgs_content:
if msg_content.get("type", "") == "text":
extracted_snippets = self._snippet_extractor.extract_unique_snippets(
msg_content.get("text")
)
filenames.extend(extracted_snippets.keys())
for msg in data.get_messages(filters=[MessageTypeFilter.USER]):
for content in msg.get_content():
extracted_snippets = self._snippet_extractor.extract_unique_snippets(
content.get_text(),
)
filenames.extend(extracted_snippets.keys())
return set(filenames)

@abstractmethod
Expand Down Expand Up @@ -93,43 +91,27 @@ class OpenInterpreterBodySnippetExtractor(BodyCodeSnippetExtractor):
def __init__(self):
self._snippet_extractor = OpenInterpreterCodeSnippetExtractor()

def _is_msg_tool_call(self, msg: dict) -> bool:
return msg.get("role", "") == "assistant" and msg.get("tool_calls", [])

def _is_msg_tool_result(self, msg: dict) -> bool:
return msg.get("role", "") == "tool" and msg.get("content", "")

def _extract_args_from_tool_call(self, msg: dict) -> str:
"""
Extract the arguments from the tool call message.
"""
tool_calls = msg.get("tool_calls", [])
if not tool_calls:
return ""
return tool_calls[0].get("function", {}).get("arguments", "")

def _extract_result_from_tool_result(self, msg: dict) -> str:
"""
Extract the result from the tool result message.
"""
return msg.get("content", "")

def extract_unique_filenames(self, data: dict) -> set[str]:
messages = data.get("messages", [])
if not messages:
return set()

filenames: List[str] = []
for i_msg in range(len(messages) - 1):
msg = messages[i_msg]
next_msg = messages[i_msg + 1]
if self._is_msg_tool_call(msg) and self._is_msg_tool_result(next_msg):
tool_args = self._extract_args_from_tool_call(msg)
tool_response = self._extract_result_from_tool_result(next_msg)
extracted_snippets = self._snippet_extractor.extract_unique_snippets(
f"{tool_args}\n{tool_response}"
)
filenames.extend(extracted_snippets.keys())
# Note: the previous version of this code used to analyze
# tool-call and tool-results pairs to ensure that the regex
# matched.
#
# Given it was not a business or functional requirement, but
# rather an technical decision to avoid adding more regexes,
# we decided to analysis contents on a per-message basis, to
# avoid creating more dependency on the behaviour of the
# coding assistant.
#
# We still filter only tool-calls and tool-results.
filters = [MessageTypeFilter.ASSISTANT, MessageTypeFilter.TOOL]
for msg in data.get_messages(filters=filters):
for content in msg.get_content():
if content.get_text() is not None:
extracted_snippets = self._snippet_extractor.extract_unique_snippets(
f"{content.get_text()}\n\nbackwards compatibility"
)
filenames.extend(extracted_snippets.keys())
return set(filenames)


Expand Down
8 changes: 7 additions & 1 deletion src/codegate/extract_snippets/message_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,10 +279,16 @@ def extract_snippets(self, message: str, require_filepath: bool = False) -> List
"""
regexes = self._choose_regex(require_filepath)
# Find all code block matches
if isinstance(message, str):
return [
self._get_snippet_for_match(match)
for regex in regexes
for match in regex.finditer(message)
]
return [
self._get_snippet_for_match(match)
for regex in regexes
for match in regex.finditer(message)
for match in regex.finditer(message.get_text())
]

def extract_unique_snippets(self, message: str) -> Dict[str, CodeSnippet]:
Expand Down
3 changes: 0 additions & 3 deletions src/codegate/llm_utils/__init__.py

This file was deleted.

Loading
Loading