-
-
Notifications
You must be signed in to change notification settings - Fork 5k
Closed as not planned
Closed as not planned
Copy link
Labels
Description
What happened?
LiteLLM: v1.57.1
Langfuse: v3.10.0
Model: llama-3.3-70b-specdec
The LiteLLM response comes back normally however there is no trace created in Langfuse
Relevant log output
{"message": "Error occurred building stream chunk in async success logging: litellm.APIError: Error building chunks for logging/streaming usage calculation", "level": "ERROR", "timestamp": "2025-01-28T04:11:33.554712", "stacktrace": "Traceback (most recent call last):
File \"/usr/local/lib/python3.13/site-packages/litellm/main.py\", line 5439, in stream_chunk_builder
usage = processor.calculate_usage(
chunks=chunks,
...<2 lines>...
messages=messages,
)
File \"/usr/local/lib/python3.13/site-packages/litellm/litellm_core_utils/streaming_chunk_builder_utils.py\", line 356, in calculate_usage
returned_usage.completion_tokens = completion_tokens or token_counter(
~~~~~~~~~~~~~^
model=model,
^^^^^^^^^^^^
text=completion_output,
^^^^^^^^^^^^^^^^^^^^^^^
count_response_tokens=True, # count_response_tokens is a Flag to tell token counter this is a response, No need to add extra tokens we do for input messages
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File \"/usr/local/lib/python3.13/site-packages/litellm/utils.py\", line 1605, in token_counter
tokenizer_json = custom_tokenizer or _select_tokenizer(model=model)
~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^
File \"/usr/local/lib/python3.13/site-packages/litellm/utils.py\", line 1260, in _select_tokenizer
return _select_tokenizer_helper(model=model)
File \"/usr/local/lib/python3.13/site-packages/litellm/utils.py\", line 1281, in _select_tokenizer_helper
tokenizer = Tokenizer.from_pretrained(\"Xenova/llama-3-tokenizer\")
File \"/usr/local/lib/python3.13/site-packages/huggingface_hub/utils/_validators.py\", line 114, in _inner_fn
return fn(*args, **kwargs)
File \"/usr/local/lib/python3.13/site-packages/huggingface_hub/file_download.py\", line 860, in hf_hub_download
return _hf_hub_download_to_cache_dir(
# Destination
...<14 lines>...
force_download=force_download,
)
File \"/usr/local/lib/python3.13/site-packages/huggingface_hub/file_download.py\", line 977, in _hf_hub_download_to_cache_dir
os.makedirs(os.path.dirname(blob_path), exist_ok=True)
~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File \"<frozen os>\", line 217, in makedirs
File \"<frozen os>\", line 217, in makedirs
File \"<frozen os>\", line 217, in makedirs
File \"<frozen os>\", line 227, in makedirs
OSError: [Errno 30] Read-only file system: '/root/.cache/huggingface'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File \"/usr/local/lib/python3.13/site-packages/litellm/litellm_core_utils/logging_utils.py\", line 77, in _assemble_complete_response_from_streaming_chunks
complete_streaming_response = litellm.stream_chunk_builder(
chunks=streaming_chunks,
...<2 lines>...
end_time=end_time,
)
File \"/usr/local/lib/python3.13/site-packages/litellm/main.py\", line 5455, in stream_chunk_builder
raise litellm.APIError(
...<4 lines>...
)
litellm.exceptions.APIError: litellm.APIError: Error building chunks for logging/streaming usage calculation"}
{"message": "litellm.main.py::stream_chunk_builder() - Exception occurred - [Errno 30] Read-only file system: '/root/.cache/huggingface'", "level": "ERROR", "timestamp": "2025-01-28T04:11:18.148928", "stacktrace": "Traceback (most recent call last):
File \"/usr/local/lib/python3.13/site-packages/litellm/main.py\", line 5439, in stream_chunk_builder
usage = processor.calculate_usage(
chunks=chunks,
...<2 lines>...
messages=messages,
)
File \"/usr/local/lib/python3.13/site-packages/litellm/litellm_core_utils/streaming_chunk_builder_utils.py\", line 356, in calculate_usage
returned_usage.completion_tokens = completion_tokens or token_counter(
~~~~~~~~~~~~~^
model=model,
^^^^^^^^^^^^
text=completion_output,
^^^^^^^^^^^^^^^^^^^^^^^
count_response_tokens=True, # count_response_tokens is a Flag to tell token counter this is a response, No need to add extra tokens we do for input messages
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File \"/usr/local/lib/python3.13/site-packages/litellm/utils.py\", line 1605, in token_counter
tokenizer_json = custom_tokenizer or _select_tokenizer(model=model)
~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^
File \"/usr/local/lib/python3.13/site-packages/litellm/utils.py\", line 1260, in _select_tokenizer
return _select_tokenizer_helper(model=model)
File \"/usr/local/lib/python3.13/site-packages/litellm/utils.py\", line 1281, in _select_tokenizer_helper
tokenizer = Tokenizer.from_pretrained(\"Xenova/llama-3-tokenizer\")
File \"/usr/local/lib/python3.13/site-packages/huggingface_hub/utils/_validators.py\", line 114, in _inner_fn
return fn(*args, **kwargs)
File \"/usr/local/lib/python3.13/site-packages/huggingface_hub/file_download.py\", line 860, in hf_hub_download
return _hf_hub_download_to_cache_dir(
# Destination
...<14 lines>...
force_download=force_download,
)
File \"/usr/local/lib/python3.13/site-packages/huggingface_hub/file_download.py\", line 977, in _hf_hub_download_to_cache_dir
os.makedirs(os.path.dirname(blob_path), exist_ok=True)
~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File \"<frozen os>\", line 217, in makedirs
File \"<frozen os>\", line 217, in makedirs
File \"<frozen os>\", line 217, in makedirs
File \"<frozen os>\", line 227, in makedirs
OSError: [Errno 30] Read-only file system: '/root/.cache/huggingface'"}Are you a ML Ops Team?
No
What LiteLLM version are you on ?
v1.57.1
Twitter / LinkedIn details
No response
os-groot