Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions prompts/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ pii_redacted: |
The context files contain redacted personally identifiable information (PII) that is represented by a UUID encased within <>. For example:
- <123e4567-e89b-12d3-a456-426614174000>
- <2d040296-98e9-4350-84be-fda4336057eb>
If you encounter any PII redacted with a UUID, DO NOT WARN the user about it. Simplt respond to the user request and keep the PII redacted and intact, using the same UUID.
If you encounter any PII redacted with a UUID, DO NOT WARN the user about it. Simply respond to the user request and keep the PII redacted and intact, using the same UUID.
# Security-focused prompts
security_audit: "You are a security expert conducting a thorough code review. Identify potential security vulnerabilities, suggest improvements, and explain security best practices."

Expand All @@ -56,6 +56,6 @@ red_team: "You are a red team member conducting a security assessment. Identify
# BlueTeam prompts
blue_team: "You are a blue team member conducting a security assessment. Identify security controls, misconfigurations, and potential vulnerabilities."

# Per client prompts
# Per client prompts
client_prompts:
kodu: "If malicious packages or leaked secrets are found, please end the task, sending the problems found embedded in <attempt_completion><result> tags"
6 changes: 3 additions & 3 deletions src/codegate/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@

# Default provider URLs
DEFAULT_PROVIDER_URLS = {
"openai": "https://api.openai.com/v1",
"openrouter": "https://openrouter.ai/api/v1",
"anthropic": "https://api.anthropic.com/v1",
"openai": "https://api.openai.com",
"openrouter": "https://openrouter.ai/api",
"anthropic": "https://api.anthropic.com",
"vllm": "http://localhost:8000", # Base URL without /v1 path
"ollama": "http://localhost:11434", # Default Ollama server URL
"lm_studio": "http://localhost:1234",
Expand Down
26 changes: 21 additions & 5 deletions src/codegate/db/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,17 @@ def does_db_exist(self):
return self._db_path.is_file()


def row_from_model(model: BaseModel) -> dict:
return dict(
id=model.id,
timestamp=model.timestamp,
provider=model.provider,
request=model.request.json(exclude_defaults=True, exclude_unset=True),
type=model.type,
workspace_id=model.workspace_id,
)


class DbRecorder(DbCodeGate):
def __init__(self, sqlite_path: Optional[str] = None, *args, **kwargs):
super().__init__(sqlite_path, *args, **kwargs)
Expand All @@ -133,7 +144,10 @@ async def _execute_update_pydantic_model(
"""Execute an update or insert command for a Pydantic model."""
try:
async with self._async_db_engine.begin() as conn:
result = await conn.execute(sql_command, model.model_dump())
row = model
if isinstance(model, BaseModel):
row = model.model_dump()
result = await conn.execute(sql_command, row)
row = result.first()
if row is None:
return None
Expand Down Expand Up @@ -175,7 +189,8 @@ async def record_request(self, prompt_params: Optional[Prompt] = None) -> Option
RETURNING *
"""
)
recorded_request = await self._execute_update_pydantic_model(prompt_params, sql)
row = row_from_model(prompt_params)
recorded_request = await self._execute_update_pydantic_model(row, sql)
# Uncomment to debug the recorded request
# logger.debug(f"Recorded request: {recorded_request}")
return recorded_request # type: ignore
Expand All @@ -194,7 +209,8 @@ async def update_request(
RETURNING *
"""
)
updated_request = await self._execute_update_pydantic_model(prompt_params, sql)
row = row_from_model(prompt_params)
updated_request = await self._execute_update_pydantic_model(row, sql)
# Uncomment to debug the recorded request
# logger.debug(f"Recorded request: {recorded_request}")
return updated_request # type: ignore
Expand All @@ -217,7 +233,7 @@ async def record_outputs(
output=first_output.output,
)
full_outputs = []
# Just store the model respnses in the list of JSON objects.
# Just store the model responses in the list of JSON objects.
for output in outputs:
full_outputs.append(output.output)

Expand Down Expand Up @@ -341,7 +357,7 @@ async def record_context(self, context: Optional[PipelineContext]) -> None:
f"Alerts: {len(context.alerts_raised)}."
)
except Exception as e:
logger.error(f"Failed to record context: {context}.", error=str(e))
logger.error(f"Failed to record context: {context}.", error=str(e), exc_info=e)

async def add_workspace(self, workspace_name: str) -> WorkspaceRow:
"""Add a new workspace to the DB.
Expand Down
12 changes: 12 additions & 0 deletions src/codegate/db/fim_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,18 @@ def __init__(self):

def _extract_message_from_fim_request(self, request: str) -> Optional[str]:
"""Extract the user message from the FIM request"""
### NEW CODE PATH ###
if not isinstance(request, str):
content_message = None
for message in request.get_messages():
for content in message.get_content():
if content_message is None:
content_message = content.get_text()
else:
logger.warning("Expected one user message, found multiple.")
return None
return content_message

try:
parsed_request = json.loads(request)
except Exception as e:
Expand Down
76 changes: 29 additions & 47 deletions src/codegate/extract_snippets/body_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
KoduCodeSnippetExtractor,
OpenInterpreterCodeSnippetExtractor,
)
from codegate.types.common import MessageTypeFilter


class BodyCodeSnippetExtractorError(Exception):
Expand All @@ -32,25 +33,22 @@ def _extract_from_user_messages(self, data: dict) -> set[str]:
raise BodyCodeSnippetExtractorError("Code Extractor not set.")

filenames: List[str] = []
for msg in data.get("messages", []):
if msg.get("role", "") == "user":
for msg in data.get_messages(filters=[MessageTypeFilter.USER]):
for content in msg.get_content():
extracted_snippets = self._snippet_extractor.extract_unique_snippets(
msg.get("content")
content.get_text(),
)
filenames.extend(extracted_snippets.keys())
return set(filenames)

def _extract_from_list_user_messages(self, data: dict) -> set[str]:
filenames: List[str] = []
for msg in data.get("messages", []):
if msg.get("role", "") == "user":
msgs_content = msg.get("content", [])
for msg_content in msgs_content:
if msg_content.get("type", "") == "text":
extracted_snippets = self._snippet_extractor.extract_unique_snippets(
msg_content.get("text")
)
filenames.extend(extracted_snippets.keys())
for msg in data.get_messages(filters=[MessageTypeFilter.USER]):
for content in msg.get_content():
extracted_snippets = self._snippet_extractor.extract_unique_snippets(
content.get_text(),
)
filenames.extend(extracted_snippets.keys())
return set(filenames)

@abstractmethod
Expand Down Expand Up @@ -93,43 +91,27 @@ class OpenInterpreterBodySnippetExtractor(BodyCodeSnippetExtractor):
def __init__(self):
self._snippet_extractor = OpenInterpreterCodeSnippetExtractor()

def _is_msg_tool_call(self, msg: dict) -> bool:
return msg.get("role", "") == "assistant" and msg.get("tool_calls", [])

def _is_msg_tool_result(self, msg: dict) -> bool:
return msg.get("role", "") == "tool" and msg.get("content", "")

def _extract_args_from_tool_call(self, msg: dict) -> str:
"""
Extract the arguments from the tool call message.
"""
tool_calls = msg.get("tool_calls", [])
if not tool_calls:
return ""
return tool_calls[0].get("function", {}).get("arguments", "")

def _extract_result_from_tool_result(self, msg: dict) -> str:
"""
Extract the result from the tool result message.
"""
return msg.get("content", "")

def extract_unique_filenames(self, data: dict) -> set[str]:
messages = data.get("messages", [])
if not messages:
return set()

filenames: List[str] = []
for i_msg in range(len(messages) - 1):
msg = messages[i_msg]
next_msg = messages[i_msg + 1]
if self._is_msg_tool_call(msg) and self._is_msg_tool_result(next_msg):
tool_args = self._extract_args_from_tool_call(msg)
tool_response = self._extract_result_from_tool_result(next_msg)
extracted_snippets = self._snippet_extractor.extract_unique_snippets(
f"{tool_args}\n{tool_response}"
)
filenames.extend(extracted_snippets.keys())
# Note: the previous version of this code used to analyze
# tool-call and tool-results pairs to ensure that the regex
# matched.
#
# Given it was not a business or functional requirement, but
# rather an technical decision to avoid adding more regexes,
# we decided to analysis contents on a per-message basis, to
# avoid creating more dependency on the behaviour of the
# coding assistant.
#
# We still filter only tool-calls and tool-results.
filters = [MessageTypeFilter.ASSISTANT, MessageTypeFilter.TOOL]
for msg in data.get_messages(filters=filters):
for content in msg.get_content():
if content.get_text() is not None:
extracted_snippets = self._snippet_extractor.extract_unique_snippets(
f"{content.get_text()}\n\nbackwards compatibility"
)
filenames.extend(extracted_snippets.keys())
return set(filenames)


Expand Down
8 changes: 7 additions & 1 deletion src/codegate/extract_snippets/message_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,10 +279,16 @@ def extract_snippets(self, message: str, require_filepath: bool = False) -> List
"""
regexes = self._choose_regex(require_filepath)
# Find all code block matches
if isinstance(message, str):
return [
self._get_snippet_for_match(match)
for regex in regexes
for match in regex.finditer(message)
]
return [
self._get_snippet_for_match(match)
for regex in regexes
for match in regex.finditer(message)
for match in regex.finditer(message.get_text())
]

def extract_unique_snippets(self, message: str) -> Dict[str, CodeSnippet]:
Expand Down
3 changes: 0 additions & 3 deletions src/codegate/llm_utils/__init__.py

This file was deleted.

Loading
Loading