Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 17 additions & 7 deletions src/openai/lib/streaming/_assistants.py
Original file line number Diff line number Diff line change
Expand Up @@ -980,13 +980,20 @@ def accumulate_event(
def accumulate_delta(acc: dict[object, object], delta: dict[object, object]) -> dict[object, object]:
for key, delta_value in delta.items():
if key not in acc:
acc[key] = delta_value
continue
if is_list(delta_value):
acc_value = []
else:
acc[key] = delta_value
continue

acc_value = acc[key]
if acc_value is None:
acc[key] = delta_value
continue
else:
acc_value = acc[key]
if acc_value is None:
if is_list(delta_value):
acc_value = []
else:
acc[key] = delta_value
continue

# the `index` property is used in arrays of objects so it should
# not be accumulated like other values e.g.
Expand All @@ -1007,8 +1014,11 @@ def accumulate_delta(acc: dict[object, object], delta: dict[object, object]) ->
elif is_list(acc_value) and is_list(delta_value):
# for lists of non-dictionary items we'll only ever get new entries
# in the array, existing entries will never be changed
if all(isinstance(x, (str, int, float)) for x in acc_value):
if all(isinstance(x, (str, int, float)) for x in acc_value) and all(
isinstance(x, (str, int, float)) for x in delta_value
):
acc_value.extend(delta_value)
acc[key] = acc_value
continue

for delta_entry in delta_value:
Expand Down
24 changes: 17 additions & 7 deletions src/openai/lib/streaming/_deltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,20 @@
def accumulate_delta(acc: dict[object, object], delta: dict[object, object]) -> dict[object, object]:
for key, delta_value in delta.items():
if key not in acc:
acc[key] = delta_value
continue
if is_list(delta_value):
acc_value = []
else:
acc[key] = delta_value
continue

acc_value = acc[key]
if acc_value is None:
acc[key] = delta_value
continue
else:
acc_value = acc[key]
if acc_value is None:
if is_list(delta_value):
acc_value = []
else:
acc[key] = delta_value
continue

# the `index` property is used in arrays of objects so it should
# not be accumulated like other values e.g.
Expand All @@ -33,8 +40,11 @@ def accumulate_delta(acc: dict[object, object], delta: dict[object, object]) ->
elif is_list(acc_value) and is_list(delta_value):
# for lists of non-dictionary items we'll only ever get new entries
# in the array, existing entries will never be changed
if all(isinstance(x, (str, int, float)) for x in acc_value):
if all(isinstance(x, (str, int, float)) for x in acc_value) and all(
isinstance(x, (str, int, float)) for x in delta_value
):
acc_value.extend(delta_value)
acc[key] = acc_value
continue

for delta_entry in delta_value:
Expand Down
2 changes: 1 addition & 1 deletion src/openai/lib/streaming/chat/_completions.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,7 +744,7 @@ def _convert_initial_chunk_into_snapshot(chunk: ChatCompletionChunk) -> ParsedCh
for choice in chunk.choices:
choices[choice.index] = {
**choice.model_dump(exclude_unset=True, exclude={"delta"}),
"message": choice.delta.to_dict(),
"message": accumulate_delta({}, choice.delta.to_dict()),
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Normalize first chunks for choices added later

This only applies the duplicate-index merge to choices present in the stream's very first chunk. If a multi-choice stream first sees a choice through the except IndexError path in _accumulate_chunk (for example, the first SSE has choice 0 and a later SSE first introduces choice 1) and that choice's first delta contains duplicate tool_calls entries with the same index, line 418 still stores choice.delta.to_dict() directly, so the duplicate entries remain and subsequent chunks merge into only one of them. Please route that new-choice path through accumulate_delta({}, choice.delta.to_dict()) as well.

Useful? React with 👍 / 👎.

}

return cast(
Expand Down
134 changes: 134 additions & 0 deletions tests/lib/test_streaming_deltas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
from __future__ import annotations

import json
from copy import deepcopy
from collections.abc import Callable

import pytest

from openai.types.chat import ChatCompletionChunk
from openai.lib.streaming.chat import ChatCompletionStreamState
from openai.lib.streaming._deltas import accumulate_delta as accumulate_chat_delta
from openai.lib.streaming._assistants import accumulate_delta as accumulate_assistant_delta

AccumulateDelta = Callable[[dict[object, object], dict[object, object]], dict[object, object]]


@pytest.mark.parametrize("accumulate_delta", [accumulate_chat_delta, accumulate_assistant_delta])
@pytest.mark.parametrize("initial_acc", [{}, {"tool_calls": None}])
def test_accumulate_delta_merges_duplicate_index_entries_in_initial_list(
accumulate_delta: AccumulateDelta,
initial_acc: dict[object, object],
) -> None:
acc = deepcopy(initial_acc)

accumulate_delta(
acc,
{
"tool_calls": [
{
"index": 0,
"id": "call_abc",
"function": {"name": "get_weather"},
"type": "function",
},
{
"index": 0,
"function": {"arguments": '{"city"'},
},
]
},
)
accumulate_delta(
acc,
{
"tool_calls": [
{
"index": 0,
"function": {"arguments": ': "London"}'},
},
]
},
)

tool_calls = acc["tool_calls"]
assert isinstance(tool_calls, list)
assert len(tool_calls) == 1

arguments = tool_calls[0]["function"]["arguments"]
assert arguments == '{"city": "London"}'
assert json.loads(arguments) == {"city": "London"}


@pytest.mark.parametrize("accumulate_delta", [accumulate_chat_delta, accumulate_assistant_delta])
@pytest.mark.parametrize("initial_acc", [{}, {"content": None}])
def test_accumulate_delta_preserves_initial_primitive_lists(
accumulate_delta: AccumulateDelta,
initial_acc: dict[object, object],
) -> None:
acc = deepcopy(initial_acc)

accumulate_delta(acc, {"content": ["hello", " ", "world"]})

assert acc["content"] == ["hello", " ", "world"]


def test_chat_stream_state_merges_duplicate_tool_call_indexes_in_first_chunk() -> None:
state = ChatCompletionStreamState[object]()

state.handle_chunk(
ChatCompletionChunk(
id="chatcmpl_123",
created=1,
model="gpt-4o",
object="chat.completion.chunk",
choices=[
{
"index": 0,
"finish_reason": None,
"delta": {
"role": "assistant",
"tool_calls": [
{
"index": 0,
"id": "call_abc",
"function": {"name": "get_weather"},
"type": "function",
},
{
"index": 0,
"function": {"arguments": '{"city"'},
},
],
},
}
],
)
)
state.handle_chunk(
ChatCompletionChunk(
id="chatcmpl_123",
created=1,
model="gpt-4o",
object="chat.completion.chunk",
choices=[
{
"index": 0,
"finish_reason": None,
"delta": {
"tool_calls": [
{
"index": 0,
"function": {"arguments": ': "London"}'},
},
],
},
}
],
)
)

tool_calls = state.current_completion_snapshot.choices[0].message.tool_calls
assert tool_calls is not None
assert len(tool_calls) == 1
assert tool_calls[0].function.arguments == '{"city": "London"}'