Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 51 additions & 3 deletions src/strands/models/bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
"""

import asyncio
import base64
import binascii
import json
import logging
import os
Expand Down Expand Up @@ -350,6 +352,45 @@ def _should_include_tool_result_status(self) -> bool:
else: # "auto"
return any(model in self.config["model_id"] for model in _MODELS_INCLUDE_STATUS)

def _coerce_to_bytes(self, value: Any, *, expected_fmt: Optional[str] = None) -> bytes:
Copy link
Member

@pgrayy pgrayy Oct 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not so sure this logic should exist in the BedrockModel provider. The expected type of ["source"]["bytes"] is bytes (src) and so users should be configuring this before passing the payload into Strands. Also, this would be a problem for other model providers as well.

I'm thinking that this logic should go into https://github.com/strands-agents/sdk-python/blob/main/src/strands/multiagent/a2a/executor.py if we are trying to resolve #850.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I started with executor.py but moved it to bedrock.py because this is a requirement from the Bedrock API and has nothing to do with the a2a protocol. The point about other model provides is a good one so may be we should check what provider it is before coercing it. WDYT?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would say this does have to do with A2A as the bytes coming in through the payloads are likely to be base64 encoded.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, the bytes are Base64-encoded since JSON-RPC is used as the serialization protocol for A2A server. But their conversion to raw bytes is a requirement of Bedrock. Hence implementing it on the Bedrock side. Or did I miss your point?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pgrayy any thoughts?

Copy link
Member

@pgrayy pgrayy Nov 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I was proposing is now addressed in #1195. This solution will work for all model providers and does not affect the typing contract, which specifies that source is of type bytes.

Note, it may be still worthwhile to accept a b64 source string, but that would require more updates than provided here. We would also need to update the source content block type, which would also require updates across all model providers.

"""Normalize bytes-like inputs to raw bytes for Bedrock requests.

Args:
value: Input that should represent binary data.
expected_fmt: Optional file format hint used for error messaging.

Returns:
Raw bytes suitable for Bedrock's `source` payloads.

Raises:
TypeError: If the provided value cannot be interpreted as bytes.
"""
if hasattr(value, "read") and callable(value.read):
data = value.read()
if isinstance(data, bytes):
return data
if isinstance(data, str):
return data.encode("utf-8")
return bytes(data)

if isinstance(value, (bytes, bytearray, memoryview)):
return bytes(value)

# Base64-encoded strings (optionally data URLs)
if isinstance(value, str):
data_str = value
if data_str.startswith("data:") and ";base64," in data_str:
data_str = data_str.split(",", 1)[1]

try:
return base64.b64decode(data_str, validate=True)
except binascii.Error as exc:
raise TypeError(
f"document.source.bytes must be raw bytes or a base64-encoded string (format={expected_fmt!r})."
) from exc

raise TypeError(f"Unsupported type for bytes conversion: {type(value).__name__}")

def _format_request_message_content(self, content: ContentBlock) -> dict[str, Any]:
"""Format a Bedrock content block.

Expand Down Expand Up @@ -382,7 +423,14 @@ def _format_request_message_content(self, content: ContentBlock) -> dict[str, An

# Handle source
if "source" in document:
result["source"] = {"bytes": document["source"]["bytes"]}
source = document["source"]

if "bytes" in source:
result["source"] = {
"bytes": self._coerce_to_bytes(source["bytes"], expected_fmt=document.get("format"))
}
else:
raise TypeError("document.source must include 'bytes'")

# Handle optional fields
if "citations" in document and document["citations"] is not None:
Expand All @@ -405,7 +453,7 @@ def _format_request_message_content(self, content: ContentBlock) -> dict[str, An
source = image["source"]
formatted_source = {}
if "bytes" in source:
formatted_source = {"bytes": source["bytes"]}
formatted_source = {"bytes": self._coerce_to_bytes(source["bytes"], expected_fmt=image.get("format"))}
result = {"format": image["format"], "source": formatted_source}
return {"image": result}

Expand Down Expand Up @@ -470,7 +518,7 @@ def _format_request_message_content(self, content: ContentBlock) -> dict[str, An
source = video["source"]
formatted_source = {}
if "bytes" in source:
formatted_source = {"bytes": source["bytes"]}
formatted_source = {"bytes": self._coerce_to_bytes(source["bytes"], expected_fmt=video.get("format"))}
result = {"format": video["format"], "source": formatted_source}
return {"video": result}

Expand Down
68 changes: 68 additions & 0 deletions tests/strands/models/test_bedrock.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import base64
import os
import sys
import unittest.mock
Expand Down Expand Up @@ -421,6 +422,73 @@ def test_format_request_tool_specs(model, messages, model_id, tool_spec):
assert tru_request == exp_request


def test_format_request_document_base64_bytes(model, model_id):
pdf_bytes = b"%PDF-1.4 test pdf"
encoded = base64.b64encode(pdf_bytes).decode("ascii")
messages = [
{
"role": "user",
"content": [
{
"document": {
"name": "testing.pdf",
"format": "pdf",
"source": {"bytes": encoded},
}
}
],
}
]

request = model.format_request(messages)

doc_source = request["messages"][0]["content"][0]["document"]["source"]
assert doc_source["bytes"] == pdf_bytes


def test_format_request_document_plain_text_raises(model):
messages = [
{
"role": "user",
"content": [
{
"document": {
"name": "testing.pdf",
"format": "pdf",
"source": {"bytes": "this is not base64"},
}
}
],
}
]

with pytest.raises(TypeError):
model.format_request(messages)


def test_format_request_document_raw_bytes(model):
pdf_bytes = b"%PDF-1.4 test pdf"
messages = [
{
"role": "user",
"content": [
{
"document": {
"name": "testing.pdf",
"format": "pdf",
"source": {"bytes": pdf_bytes},
}
}
],
}
]

request = model.format_request(messages)

doc_source = request["messages"][0]["content"][0]["document"]["source"]
assert doc_source["bytes"] == pdf_bytes


def test_format_request_tool_choice_auto(model, messages, model_id, tool_spec):
tool_choice = {"auto": {}}
tru_request = model.format_request(messages, [tool_spec], tool_choice=tool_choice)
Expand Down
Loading