From 43e5ae3b23b386051b92d7dd3b9b0a54e96e510d Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sun, 1 Mar 2026 04:47:46 +0000 Subject: [PATCH] Handle all client app message event types Co-authored-by: Sahil Suman --- Examples/SwiftUICallView.swift | 28 +++ Sources/Models/AppMessage.swift | 78 +++++++- Sources/Models/ConversationUpdate.swift | 53 ++++- Sources/Vapi.swift | 251 +++++++++++++++++------- Tests/VapiTests.swift | 95 ++++++++- 5 files changed, 427 insertions(+), 78 deletions(-) diff --git a/Examples/SwiftUICallView.swift b/Examples/SwiftUICallView.swift index baf353b..ee3efb7 100644 --- a/Examples/SwiftUICallView.swift +++ b/Examples/SwiftUICallView.swift @@ -47,6 +47,34 @@ class CallManager: ObservableObject { print(event) case .voiceInput: print(event) + case .workflowNodeStarted: + print(event) + case .assistantStarted: + print(event) + case .toolCalls: + print(event) + case .toolCallsResult: + print(event) + case .transferUpdate: + print(event) + case .languageChangeDetected: + print(event) + case .chatCreated: + print(event) + case .chatDeleted: + print(event) + case .sessionCreated: + print(event) + case .sessionUpdated: + print(event) + case .sessionDeleted: + print(event) + case .callDeleted: + print(event) + case .callDeleteFailed: + print(event) + case .unknown: + print(event) case .error(let error): print("Error: \(error)") } diff --git a/Sources/Models/AppMessage.swift b/Sources/Models/AppMessage.swift index 0f9c545..9a3b499 100644 --- a/Sources/Models/AppMessage.swift +++ b/Sources/Models/AppMessage.swift @@ -7,8 +7,8 @@ import Foundation -struct AppMessage: Codable { - enum MessageType: String, Codable { +struct AppMessage: Decodable { + enum MessageType: String { case hang case functionCall = "function-call" case transcript @@ -19,7 +19,77 @@ struct AppMessage: Codable { case statusUpdate = "status-update" case voiceInput = "voice-input" case userInterrupted = "user-interrupted" + case assistantStarted = "assistant.started" + case workflowNodeStarted = "workflow.node.started" + case toolCalls = "tool-calls" + case toolCallsResult = "tool-calls-result" + case transferUpdate = "transfer-update" + case languageChangeDetected = "language-change-detected" + case chatCreated = "chat.created" + case chatDeleted = "chat.deleted" + case sessionCreated = "session.created" + case sessionUpdated = "session.updated" + case sessionDeleted = "session.deleted" + case callDeleted = "call.deleted" + case callDeleteFailed = "call.delete.failed" + case unknown + } + + let type: String + + var messageType: MessageType { + // Messages can be configured as transcript[transcriptType="final"]. + let normalizedType = String(type.split(separator: "[", maxSplits: 1).first ?? "") + + switch normalizedType { + case MessageType.functionCall.rawValue: + return .functionCall + case MessageType.hang.rawValue: + return .hang + case MessageType.transcript.rawValue: + return .transcript + case MessageType.speechUpdate.rawValue: + return .speechUpdate + case MessageType.metadata.rawValue: + return .metadata + case MessageType.conversationUpdate.rawValue: + return .conversationUpdate + case MessageType.modelOutput.rawValue: + return .modelOutput + case MessageType.statusUpdate.rawValue: + return .statusUpdate + case MessageType.voiceInput.rawValue: + return .voiceInput + case MessageType.userInterrupted.rawValue: + return .userInterrupted + case MessageType.assistantStarted.rawValue: + return .assistantStarted + case MessageType.workflowNodeStarted.rawValue: + return .workflowNodeStarted + case MessageType.toolCalls.rawValue: + return .toolCalls + case MessageType.toolCallsResult.rawValue, "function-call-result", "tool.completed", "assistant.tool.completed": + return .toolCallsResult + case MessageType.transferUpdate.rawValue: + return .transferUpdate + case MessageType.languageChangeDetected.rawValue, "language-changed": + return .languageChangeDetected + case MessageType.chatCreated.rawValue: + return .chatCreated + case MessageType.chatDeleted.rawValue: + return .chatDeleted + case MessageType.sessionCreated.rawValue: + return .sessionCreated + case MessageType.sessionUpdated.rawValue: + return .sessionUpdated + case MessageType.sessionDeleted.rawValue: + return .sessionDeleted + case MessageType.callDeleted.rawValue: + return .callDeleted + case MessageType.callDeleteFailed.rawValue: + return .callDeleteFailed + default: + return .unknown + } } - - let type: MessageType } diff --git a/Sources/Models/ConversationUpdate.swift b/Sources/Models/ConversationUpdate.swift index 917a3e8..d2042d3 100644 --- a/Sources/Models/ConversationUpdate.swift +++ b/Sources/Models/ConversationUpdate.swift @@ -5,12 +5,63 @@ public struct Message: Codable { case user = "user" case assistant = "assistant" case system = "system" + case tool = "tool" + case unknown + + public init(from decoder: Decoder) throws { + let container = try decoder.singleValueContainer() + let rawValue = try container.decode(String.self) + self = Role(rawValue: rawValue) ?? .unknown + } } public let role: Role - public let content: String + public let content: String? + + private enum CodingKeys: String, CodingKey { + case role + case content + } + + public init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + role = (try? container.decode(Role.self, forKey: .role)) ?? .unknown + content = try? container.decodeIfPresent(String.self, forKey: .content) + } } public struct ConversationUpdate: Codable { public let conversation: [Message] + + private enum CodingKeys: String, CodingKey { + case conversation + case messages + case messagesOpenAIFormatted + } + + public init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + + if let conversation = try? container.decode([Message].self, forKey: .conversation) { + self.conversation = conversation + return + } + + if let messages = try? container.decode([Message].self, forKey: .messages) { + self.conversation = messages + return + } + + if let openAIFormattedMessages = try? container.decode([Message].self, forKey: .messagesOpenAIFormatted) { + self.conversation = openAIFormattedMessages + return + } + + self.conversation = [] + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + try container.encode(conversation, forKey: .conversation) + } } diff --git a/Sources/Vapi.swift b/Sources/Vapi.swift index 69bdaa3..7094dfd 100644 --- a/Sources/Vapi.swift +++ b/Sources/Vapi.swift @@ -49,6 +49,20 @@ public final class Vapi: CallClientDelegate { case modelOutput(ModelOutput) case userInterrupted(UserInterrupted) case voiceInput(VoiceInput) + case workflowNodeStarted([String: Any]) + case assistantStarted([String: Any]) + case toolCalls([String: Any]) + case toolCallsResult([String: Any]) + case transferUpdate([String: Any]) + case languageChangeDetected([String: Any]) + case chatCreated([String: Any]) + case chatDeleted([String: Any]) + case sessionCreated([String: Any]) + case sessionUpdated([String: Any]) + case sessionDeleted([String: Any]) + case callDeleted([String: Any]) + case callDeleteFailed([String: Any]) + case unknown(type: String, payload: [String: Any]) case hang case error(Swift.Error) } @@ -341,21 +355,171 @@ public final class Vapi: CallClientDelegate { } } - private func unescapeAppMessage(_ jsonData: Data) -> (Data, String?) { - guard let jsonString = String(data: jsonData, encoding: .utf8) else { - return (jsonData, nil) + private enum NormalizedAppMessage { + case listening + case json(Data) + } + + private func normalizeAppMessage(_ jsonData: Data) -> NormalizedAppMessage { + var currentData = jsonData + + for _ in 0..<5 { + if let rawString = String(data: currentData, encoding: .utf8)? + .trimmingCharacters(in: .whitespacesAndNewlines), + rawString == "listening" + { + return .listening + } + + guard let jsonObject = try? JSONSerialization.jsonObject(with: currentData, options: []) else { + break + } + + if let encodedString = jsonObject as? String { + let trimmed = encodedString.trimmingCharacters(in: .whitespacesAndNewlines) + if trimmed == "listening" { + return .listening + } + + currentData = Data(trimmed.utf8) + continue + } + + guard let dictionary = jsonObject as? [String: Any] else { + break + } + + if let nestedMessage = dictionary["message"] { + if let nestedMessageString = nestedMessage as? String { + let trimmed = nestedMessageString.trimmingCharacters(in: .whitespacesAndNewlines) + if trimmed == "listening" { + return .listening + } + + currentData = Data(trimmed.utf8) + continue + } + + if JSONSerialization.isValidJSONObject(nestedMessage), + let nestedData = try? JSONSerialization.data(withJSONObject: nestedMessage, options: []) + { + currentData = nestedData + continue + } + } + + return .json(currentData) + } + + // Compatibility fallback for escaped JSON strings. + if let jsonString = String(data: currentData, encoding: .utf8) { + let trimmedString = jsonString + .trimmingCharacters(in: .whitespacesAndNewlines) + .trimmingCharacters(in: CharacterSet(charactersIn: "\"")) + + if trimmedString == "listening" { + return .listening + } + + let unescapedString = trimmedString + .replacingOccurrences(of: "\\\\", with: "\\") + .replacingOccurrences(of: "\\\"", with: "\"") + + if let unescapedData = unescapedString.data(using: .utf8), + (try? JSONSerialization.jsonObject(with: unescapedData, options: [])) != nil + { + return .json(unescapedData) + } + } + + return .json(currentData) + } + + func decodeAppEvent(from jsonData: Data) throws -> Event? { + let normalizedMessage = normalizeAppMessage(jsonData) + + switch normalizedMessage { + case .listening: + return .callDidStart + case .json(let normalizedData): + let decoder = JSONDecoder() + let appMessage = try decoder.decode(AppMessage.self, from: normalizedData) + + guard let messageDictionary = try JSONSerialization.jsonObject(with: normalizedData, options: []) as? [String: Any] else { + throw VapiError.decodingError(message: "App message isn't a valid JSON object") + } + + switch appMessage.messageType { + case .functionCall: + guard let functionCallDictionary = messageDictionary["functionCall"] as? [String: Any] else { + throw VapiError.decodingError(message: "App message missing functionCall") + } + + guard let name = functionCallDictionary[FunctionCall.CodingKeys.name.stringValue] as? String else { + throw VapiError.decodingError(message: "App message missing name") + } + + guard let parameters = functionCallDictionary[FunctionCall.CodingKeys.parameters.stringValue] as? [String: Any] else { + throw VapiError.decodingError(message: "App message missing parameters") + } + + let functionCall = FunctionCall(name: name, parameters: parameters) + return .functionCall(functionCall) + case .hang: + return .hang + case .transcript: + let transcript = try decoder.decode(Transcript.self, from: normalizedData) + return .transcript(transcript) + case .speechUpdate: + let speechUpdate = try decoder.decode(SpeechUpdate.self, from: normalizedData) + return .speechUpdate(speechUpdate) + case .metadata: + let metadata = try decoder.decode(Metadata.self, from: normalizedData) + return .metadata(metadata) + case .conversationUpdate: + let conversationUpdate = try decoder.decode(ConversationUpdate.self, from: normalizedData) + return .conversationUpdate(conversationUpdate) + case .statusUpdate: + let statusUpdate = try decoder.decode(StatusUpdate.self, from: normalizedData) + return .statusUpdate(statusUpdate) + case .modelOutput: + let modelOutput = try decoder.decode(ModelOutput.self, from: normalizedData) + return .modelOutput(modelOutput) + case .userInterrupted: + return .userInterrupted(UserInterrupted()) + case .voiceInput: + let voiceInput = try decoder.decode(VoiceInput.self, from: normalizedData) + return .voiceInput(voiceInput) + case .workflowNodeStarted: + return .workflowNodeStarted(messageDictionary) + case .assistantStarted: + return .assistantStarted(messageDictionary) + case .toolCalls: + return .toolCalls(messageDictionary) + case .toolCallsResult: + return .toolCallsResult(messageDictionary) + case .transferUpdate: + return .transferUpdate(messageDictionary) + case .languageChangeDetected: + return .languageChangeDetected(messageDictionary) + case .chatCreated: + return .chatCreated(messageDictionary) + case .chatDeleted: + return .chatDeleted(messageDictionary) + case .sessionCreated: + return .sessionCreated(messageDictionary) + case .sessionUpdated: + return .sessionUpdated(messageDictionary) + case .sessionDeleted: + return .sessionDeleted(messageDictionary) + case .callDeleted: + return .callDeleted(messageDictionary) + case .callDeleteFailed: + return .callDeleteFailed(messageDictionary) + case .unknown: + return .unknown(type: appMessage.type, payload: messageDictionary) + } } - - // Remove the leading and trailing double quotes - let trimmedString = jsonString.trimmingCharacters(in: CharacterSet(charactersIn: "\"")) - // Replace escaped backslashes - let unescapedString = trimmedString.replacingOccurrences(of: "\\\\", with: "\\") - // Replace escaped double quotes - let unescapedJSON = unescapedString.replacingOccurrences(of: "\\\"", with: "\"") - - let unescapedData = unescapedJSON.data(using: .utf8) ?? jsonData - - return (unescapedData, unescapedJSON) } public func startLocalAudioLevelObserver() async throws { @@ -431,67 +595,10 @@ public final class Vapi: CallClientDelegate { public func callClient(_ callClient: Daily.CallClient, appMessageAsJson jsonData: Data, from participantID: Daily.ParticipantID) { do { - let (unescapedData, unescapedString) = unescapeAppMessage(jsonData) - - // Detect listening message first since it's a string rather than JSON - guard unescapedString != "listening" else { - eventSubject.send(.callDidStart) + guard let event = try decodeAppEvent(from: jsonData) else { return } - // Parse the JSON data generically to determine the type of event - let decoder = JSONDecoder() - let appMessage = try decoder.decode(AppMessage.self, from: unescapedData) - // Parse the JSON data again, this time using the specific type - let event: Event - switch appMessage.type { - case .functionCall: - guard let messageDictionary = try JSONSerialization.jsonObject(with: unescapedData, options: []) as? [String: Any] else { - throw VapiError.decodingError(message: "App message isn't a valid JSON object") - } - - guard let functionCallDictionary = messageDictionary["functionCall"] as? [String: Any] else { - throw VapiError.decodingError(message: "App message missing functionCall") - } - - guard let name = functionCallDictionary[FunctionCall.CodingKeys.name.stringValue] as? String else { - throw VapiError.decodingError(message: "App message missing name") - } - - guard let parameters = functionCallDictionary[FunctionCall.CodingKeys.parameters.stringValue] as? [String: Any] else { - throw VapiError.decodingError(message: "App message missing parameters") - } - - - let functionCall = FunctionCall(name: name, parameters: parameters) - event = Event.functionCall(functionCall) - case .hang: - event = Event.hang - case .transcript: - let transcript = try decoder.decode(Transcript.self, from: unescapedData) - event = Event.transcript(transcript) - case .speechUpdate: - let speechUpdate = try decoder.decode(SpeechUpdate.self, from: unescapedData) - event = Event.speechUpdate(speechUpdate) - case .metadata: - let metadata = try decoder.decode(Metadata.self, from: unescapedData) - event = Event.metadata(metadata) - case .conversationUpdate: - let conv = try decoder.decode(ConversationUpdate.self, from: unescapedData) - event = Event.conversationUpdate(conv) - case .statusUpdate: - let statusUpdate = try decoder.decode(StatusUpdate.self, from: unescapedData) - event = Event.statusUpdate(statusUpdate) - case .modelOutput: - let modelOutput = try decoder.decode(ModelOutput.self, from: unescapedData) - event = Event.modelOutput(modelOutput) - case .userInterrupted: - let userInterrupted = UserInterrupted() - event = Event.userInterrupted(userInterrupted) - case .voiceInput: - let voiceInput = try decoder.decode(VoiceInput.self, from: unescapedData) - event = Event.voiceInput(voiceInput) - } eventSubject.send(event) } catch { let messageText = String(data: jsonData, encoding: .utf8) diff --git a/Tests/VapiTests.swift b/Tests/VapiTests.swift index 452dd24..14332b8 100644 --- a/Tests/VapiTests.swift +++ b/Tests/VapiTests.swift @@ -2,5 +2,98 @@ import XCTest @testable import Vapi final class VapiTests: XCTestCase { - func testExample() throws {} + private func makeVapi() -> Vapi { + Vapi(publicKey: "test-public-key") + } + + func testDecodeEscapedAssistantStartedMessage() throws { + let vapi = makeVapi() + let escapedMessage = "\"{\\\"type\\\":\\\"assistant.started\\\",\\\"newAssistant\\\":{\\\"id\\\":\\\"assistant-id\\\"}}\"" + let data = Data(escapedMessage.utf8) + + let event = try vapi.decodeAppEvent(from: data) + + guard case .assistantStarted(let payload)? = event else { + XCTFail("Expected assistantStarted event") + return + } + + XCTAssertEqual(payload["type"] as? String, "assistant.started") + } + + func testDecodeToolCompletedAliasAsToolCallsResult() throws { + let vapi = makeVapi() + let message = """ + { + "type": "tool.completed", + "toolCallResult": { + "name": "lookup", + "result": "ok" + } + } + """ + + let event = try vapi.decodeAppEvent(from: Data(message.utf8)) + + guard case .toolCallsResult(let payload)? = event else { + XCTFail("Expected toolCallsResult event") + return + } + + XCTAssertEqual(payload["type"] as? String, "tool.completed") + XCTAssertNotNil(payload["toolCallResult"]) + } + + func testDecodeListeningMessageAsCallDidStart() throws { + let vapi = makeVapi() + let event = try vapi.decodeAppEvent(from: Data("\"listening\"".utf8)) + + guard case .callDidStart? = event else { + XCTFail("Expected callDidStart event") + return + } + } + + func testDecodeUnknownMessageType() throws { + let vapi = makeVapi() + let message = """ + { + "type": "my-new-event", + "foo": "bar" + } + """ + + let event = try vapi.decodeAppEvent(from: Data(message.utf8)) + + guard case .unknown(let type, let payload)? = event else { + XCTFail("Expected unknown event") + return + } + + XCTAssertEqual(type, "my-new-event") + XCTAssertEqual(payload["foo"] as? String, "bar") + } + + func testDecodeWrappedMessagePayload() throws { + let vapi = makeVapi() + let message = """ + { + "message": { + "type": "workflow.node.started", + "node": { + "id": "node-1" + } + } + } + """ + + let event = try vapi.decodeAppEvent(from: Data(message.utf8)) + + guard case .workflowNodeStarted(let payload)? = event else { + XCTFail("Expected workflowNodeStarted event") + return + } + + XCTAssertEqual(payload["type"] as? String, "workflow.node.started") + } }