diff --git a/Dockerfile b/Dockerfile index de34cfe..74d6ab2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,7 +5,11 @@ FROM golang:1.25-bookworm as builder RUN apt-get update && apt-get install -y \ build-essential \ protobuf-compiler \ - libvips-dev + libvips-dev \ + gcc \ + cmake \ + git \ + && rm -rf /var/lib/apt/lists/* # Set the working directory WORKDIR /app @@ -17,16 +21,48 @@ RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@latest && \ # Copy the application source code COPY . . -# Build the binary +# Build libopus_mlow.so when not vendored in native/ (Linux Docker builds). +RUN mkdir -p native && \ + if [ ! -f native/libopus_mlow.so ]; then \ + git clone --depth 1 https://github.com/edgardmessias/opus_mlow.git /tmp/opus_mlow && \ + cmake -S /tmp/opus_mlow -B /tmp/opus_build \ + -DOPUS_BUILD_SHARED_LIBRARY=ON \ + -DOPUS_BUILD_TESTING=OFF \ + -DOPUS_BUILD_PROGRAMS=OFF \ + -DCMAKE_BUILD_TYPE=Release && \ + cmake --build /tmp/opus_build -j"$(nproc)" && \ + cp /tmp/opus_build/libopus.so native/libopus_mlow.so && \ + ln -sf libopus_mlow.so native/libopus.so.0 && \ + ln -sf libopus_mlow.so native/libopus-0.so; \ + fi + +# Build the binary with MLow codec when Linux libs are available +ENV CGO_ENABLED=1 +ENV LD_LIBRARY_PATH=/app/native:${LD_LIBRARY_PATH} RUN export GOPATH=$HOME/go && \ export PATH=$PATH:$GOPATH/bin && \ export PATH=$PATH:/usr/local/go/bin && \ - make all + make build-proto && \ + cd src && \ + if [ -f ../native/libopus_mlow.so ]; then \ + go build -tags mlow -o ../bin/gows .; \ + else \ + echo "WARN: native/libopus_mlow.so missing — building signaling-only (no live audio)" && \ + go build -o ../bin/gows .; \ + fi # Create a new minimal container to hold the binary FROM debian:bookworm-slim WORKDIR /release -# Copy the compiled binary from the builder stage +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + libstdc++6 \ + && rm -rf /var/lib/apt/lists/* + +# Copy the compiled binary and native MLow libraries from the builder stage COPY --from=builder /app/bin/gows /release/gows +COPY --from=builder /app/native/ /release/native/ + +ENV LD_LIBRARY_PATH=/release/native diff --git a/Makefile b/Makefile index e0a6244..11fcd8f 100644 --- a/Makefile +++ b/Makefile @@ -24,3 +24,11 @@ tidy: build-proto build: cd src && \ go build -o ../bin/gows . + +build-mlow: + cd src && \ + CGO_ENABLED=1 go build -tags mlow -o ../bin/gows . + +build-mlow-docker: + cd src && \ + CGO_ENABLED=1 GOOS=linux go build -tags mlow -o ../bin/gows . diff --git a/docs/calls.md b/docs/calls.md new file mode 100644 index 0000000..8f608d0 --- /dev/null +++ b/docs/calls.md @@ -0,0 +1,70 @@ +# Chamadas VoIP nativas (GOWS) + +## Visão geral + +O gows-plus integra o stack VoIP do WaCalls (`src/voip/`) no mesmo processo e `whatsmeow.Client` por sessão WAHA. + +- **Signaling**: stanzas `` via `DangerousInternalClient` +- **Mídia**: MLow 16 kHz + SRTP + relay SCTP (Meta) +- **Browser**: bridge WebRTC Opus 48 kHz (`src/callbridge/`) + +## Build + +### Sem áudio (signaling only) + +```bash +cd src && go build -o ../bin/gows . +``` + +### Com áudio (MLow via CGO) + +Requer compilador C e bibliotecas em `native/`: + +```bash +# Windows (MSYS2) +$env:PATH = "C:\msys64\mingw64\bin;$PWD\native;$env:PATH" +$env:CGO_ENABLED = "1" +go build -tags mlow -o bin/gows ./src + +# Linux / Docker +CGO_ENABLED=1 go build -tags mlow -o bin/gows ./src +``` + +### Docker + +A imagem usa `-tags mlow` quando `native/libopus_mlow.so` está presente. Para Linux, obtenha o `.so` a partir do projeto [opus_mlow](https://github.com/edgardmessias/opus_mlow) e coloque em `native/` junto com `libopus-0.so`. + +```bash +docker build -t gows-plus . +``` + +## Requisitos de rede (produção) + +| Requisito | Detalhe | +|-----------|---------| +| UDP saída | Relays STUN/SCTP da Meta (porta ~3480) | +| NAT | Container/VM deve conseguir binding UDP | +| Linked device | Sessão pareada como dispositivo vinculado | + +Sem conectividade UDP aos relays, a chamada pode chegar a `ringing` mas não a `active`. + +## Limitações + +- Uma chamada ativa por sessão +- 1:1 voz (vídeo: signaling apenas, sem pipeline VP8) +- Cliente WebRTC necessário para áudio (bots headless precisam de integração futura) + +## Teste E2E + +1. Suba WAHA com engine GOWS e este binário +2. Implemente REST conforme [`integrations/waha/README.md`](../integrations/waha/README.md) +3. Abra [`tools/call-test-client/index.html`](../tools/call-test-client/index.html) no browser +4. Inicie chamada para um número WhatsApp real + +## Variáveis de ambiente + +| Variável | Efeito | +|----------|--------| +| `WAHA_GOWS_DEVICE_HISTORY_SYNC_SUPPORT_CALL_LOG_HISTORY` | Registra capability de call log no device | + +`PatchDeviceProps()` é chamado no startup de `main.go`. diff --git a/integrations/waha/README.md b/integrations/waha/README.md new file mode 100644 index 0000000..7d393f7 --- /dev/null +++ b/integrations/waha/README.md @@ -0,0 +1,80 @@ +# Integração WAHA — Chamadas VoIP (GOWS) + +Este diretório descreve como o [WAHA](https://github.com/devlikeapro/waha) deve consumir os novos RPCs do gows-plus. + +## Novos RPCs gRPC (`MessageService`) + +| RPC | Request | Response | +|-----|---------|----------| +| `StartCall` | `session`, `jid` (telefone ou JID), `video` | `call_id` | +| `AcceptCall` | `session`, `call_id`, `owner_id?` | `Empty` | +| `RejectCall` | `session`, `from`, `id` | `Empty` | +| `EndCall` | `session`, `call_id` | `Empty` | +| `ExchangeCallWebRTC` | `session`, `call_id`, `sdp_offer` | `sdp_answer` | +| `GetCallState` | `session` | `active`, `call_id`, `from`, `direction`, `status`, `event` | + +## REST WAHA (a implementar no NestJS) + +Espelhar as rotas do WaCalls: + +``` +POST /api/{session}/calls → StartCall +POST /api/{session}/calls/{id}/accept → AcceptCall (+ header X-Client-Id → owner_id) +POST /api/{session}/calls/reject → RejectCall (já existe) +DELETE /api/{session}/calls/{id} → EndCall +POST /api/{session}/calls/{id}/webrtc → ExchangeCallWebRTC +GET /api/{session}/calls/state → GetCallState (opcional) +``` + +### Exemplo `calls.controller.ts` + +```typescript +@Post(':session/calls') +start(@Param('session') session: string, @Body() body: { phone?: string; jid?: string; video?: boolean }) { + const jid = body.jid ?? `${normalizePhone(body.phone)}@c.us`; + return this.gows.startCall(session, { jid, video: body.video ?? false }); +} + +@Post(':session/calls/:id/webrtc') +webrtc(@Param('session') session: string, @Param('id') id: string, @Body() body: { sdp_offer: string }) { + return this.gows.exchangeCallWebRTC(session, { callId: id, sdpOffer: body.sdp_offer }); +} +``` + +## Webhooks / eventos + +O `EventStream` gRPC emite `CallLifecycleEvent` com o campo `event` já no formato WAHA: + +| `event` (stream) | Quando | +|------------------|--------| +| `call.received` | Chamada entrante (`OnIncoming`) | +| `call.ringing` | Estado ringing | +| `call.connecting` | Aceite / negociação | +| `call.active` | Mídia ativa | +| `call.ended` | Encerramento | + +Payload JSON: + +```json +{ + "event": "call.active", + "id": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", + "from": "5511999999999@s.whatsapp.net", + "direction": "inbound", + "status": "active", + "reason": "", + "timestamp": 1710000000000 +} +``` + +No consumidor NestJS, mapear `EventJson.event === "call.active"` (ou tipo `gows.CallLifecycleEvent`) para webhook HTTP `call.active`. + +## Fluxo do cliente de áudio + +1. `POST /calls` ou receber webhook `call.received` +2. Browser/app cria `RTCPeerConnection`, gera SDP offer +3. `POST /calls/{id}/webrtc` com `{ "sdp_offer": "..." }` +4. Aplicar `sdp_answer` no peer connection +5. Áudio bidirecional via Opus 48 kHz + +Ver [`tools/call-test-client/`](../tools/call-test-client/) para teste manual. diff --git a/integrations/waha/gows-calls.types.ts b/integrations/waha/gows-calls.types.ts new file mode 100644 index 0000000..4b2fb39 --- /dev/null +++ b/integrations/waha/gows-calls.types.ts @@ -0,0 +1,40 @@ +/** + * Referência de cliente gRPC para os RPCs de chamada do gows-plus. + * Integrar no GowsGrpcClient existente do WAHA (NestJS). + */ +export interface StartCallRequest { + session: { id: string }; + jid: string; + video?: boolean; +} + +export interface AcceptCallRequest { + session: { id: string }; + call_id: string; + owner_id?: string; +} + +export interface ExchangeCallWebRTCRequest { + session: { id: string }; + call_id: string; + sdp_offer: string; +} + +export interface CallLifecyclePayload { + event: 'call.received' | 'call.ringing' | 'call.connecting' | 'call.active' | 'call.ended' | 'call.rejected'; + id: string; + from: string; + direction: 'inbound' | 'outbound'; + status: string; + reason?: string; + timestamp: number; +} + +export const GOWS_CALL_RPCS = { + startCall: 'StartCall', + acceptCall: 'AcceptCall', + rejectCall: 'RejectCall', + endCall: 'EndCall', + exchangeCallWebRTC: 'ExchangeCallWebRTC', + getCallState: 'GetCallState', +} as const; diff --git a/native/README.md b/native/README.md new file mode 100644 index 0000000..4b4f0a7 --- /dev/null +++ b/native/README.md @@ -0,0 +1,27 @@ +# Bibliotecas nativas MLow (opus_mlow) + +## Windows (desenvolvimento) + +Incluídas no repositório: + +- `opus_mlow.dll` +- `libopus-0.dll` + +Build: `make build-mlow` ou `go build -tags mlow` com `CGO_ENABLED=1` e MinGW no PATH. + +## Linux (Docker / produção) + +O repositório **não inclui** `.so` por padrão (binários são específicos de plataforma). + +Para build Docker com áudio: + +1. Compile ou baixe `libopus_mlow.so` e `libopus-0.so` para linux/amd64 (ou arm64) +2. Coloque em `native/`: + - `native/libopus_mlow.so` + - `native/libopus.so.0` → symlink para `libopus_mlow.so` + - `native/libopus-0.so` → symlink para `libopus_mlow.so` +3. `docker build -t gows-plus .` + +Sem os `.so`, use build sem tag `mlow` — chamadas funcionam em modo **signaling-only** (sem áudio bidirecional). + +Fonte: [github.com/edgardmessias/opus_mlow](https://github.com/edgardmessias/opus_mlow) diff --git a/native/libopus-0.dll b/native/libopus-0.dll new file mode 100644 index 0000000..9201059 Binary files /dev/null and b/native/libopus-0.dll differ diff --git a/native/opus_mlow.dll b/native/opus_mlow.dll new file mode 100644 index 0000000..9201059 Binary files /dev/null and b/native/opus_mlow.dll differ diff --git a/proto/gows.proto b/proto/gows.proto index 3b241aa..35a2db7 100644 --- a/proto/gows.proto +++ b/proto/gows.proto @@ -174,6 +174,11 @@ service MessageService { // Calls // rpc RejectCall(RejectCallRequest) returns (Empty); + rpc StartCall(StartCallRequest) returns (StartCallResponse); + rpc AcceptCall(AcceptCallRequest) returns (Empty); + rpc EndCall(EndCallRequest) returns (Empty); + rpc ExchangeCallWebRTC(ExchangeCallWebRTCRequest) returns (ExchangeCallWebRTCResponse); + rpc GetCallState(Session) returns (CallStateResponse); // // Storage @@ -783,6 +788,46 @@ message RejectCallRequest { string id = 3; } +message StartCallRequest { + Session session = 1; + string jid = 2; + bool video = 3; +} + +message StartCallResponse { + string call_id = 1; +} + +message AcceptCallRequest { + Session session = 1; + string call_id = 2; + optional string owner_id = 3; +} + +message EndCallRequest { + Session session = 1; + string call_id = 2; +} + +message ExchangeCallWebRTCRequest { + Session session = 1; + string call_id = 2; + string sdp_offer = 3; +} + +message ExchangeCallWebRTCResponse { + string sdp_answer = 1; +} + +message CallStateResponse { + bool active = 1; + string call_id = 2; + string from = 3; + string direction = 4; + string status = 5; + string event = 6; +} + // // Poll // diff --git a/scripts/wsl-build.sh b/scripts/wsl-build.sh new file mode 100644 index 0000000..b3e2906 --- /dev/null +++ b/scripts/wsl-build.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +set -euo pipefail + +GO_ROOT="$HOME/.local/go" +export PATH="$GO_ROOT/bin:$HOME/go/bin:$PATH" + +if [ ! -x "$GO_ROOT/bin/go" ]; then + mkdir -p "$HOME/.local" + curl -fsSL https://go.dev/dl/go1.26.2.linux-amd64.tar.gz -o /tmp/go.tar.gz + rm -rf "$GO_ROOT" + tar -C "$HOME/.local" -xzf /tmp/go.tar.gz +fi + +if ! pkg-config --exists vips 2>/dev/null; then + sudo apt-get update -qq + sudo apt-get install -y -qq build-essential libvips-dev protobuf-compiler pkg-config +fi + +go install google.golang.org/protobuf/cmd/protoc-gen-go@latest +go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest + +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +cd "$ROOT" +make build-proto +cd src +go mod tidy +go build -o ../bin/gows . +go test ./voip/... diff --git a/scripts/wsl-compile.sh b/scripts/wsl-compile.sh new file mode 100644 index 0000000..4d13918 --- /dev/null +++ b/scripts/wsl-compile.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +set -euo pipefail +export PATH="$HOME/.local/go/bin:$HOME/go/bin:$PATH" +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +cd "$ROOT/src" +go build -o ../bin/gows . diff --git a/scripts/wsl-proto.sh b/scripts/wsl-proto.sh new file mode 100644 index 0000000..ab11c16 --- /dev/null +++ b/scripts/wsl-proto.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -euo pipefail +export PATH="$HOME/.local/go/bin:$HOME/go/bin:$PATH" +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +cd "$ROOT" +make build-proto +grep -c StartCall src/proto/gows_grpc.pb.go diff --git a/scripts/wsl-test-voip.sh b/scripts/wsl-test-voip.sh new file mode 100644 index 0000000..2abcfad --- /dev/null +++ b/scripts/wsl-test-voip.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +set -euo pipefail + +GO_ROOT="$HOME/.local/go" +export PATH="$GO_ROOT/bin:$HOME/go/bin:$PATH" + +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +cd "$ROOT" +make build-proto +cd src +go test ./voip/... diff --git a/src/callbridge/bridge.go b/src/callbridge/bridge.go new file mode 100644 index 0000000..7eacf25 --- /dev/null +++ b/src/callbridge/bridge.go @@ -0,0 +1,96 @@ +package callbridge + +import ( + "log/slog" + "time" + + "github.com/pion/webrtc/v4" + "github.com/pion/webrtc/v4/pkg/media" +) + +type Bridge struct { + pc *webrtc.PeerConnection + localTrack *webrtc.TrackLocalStaticSample + log *slog.Logger + + OnBrowserRTP func(payload []byte) +} + +func NewBridge(offerSDP string, log *slog.Logger) (*Bridge, string, error) { + pc, err := webrtc.NewPeerConnection(webrtc.Configuration{}) + if err != nil { + return nil, "", err + } + + br := &Bridge{pc: pc, log: log} + + localTrack, err := webrtc.NewTrackLocalStaticSample( + webrtc.RTPCodecCapability{ + MimeType: webrtc.MimeTypeOpus, + ClockRate: 48000, + Channels: 2, + SDPFmtpLine: "minptime=10;useinbandfec=1", + }, + "audio", "gows", + ) + if err != nil { + pc.Close() + return nil, "", err + } + br.localTrack = localTrack + + pc.OnTrack(func(tr *webrtc.TrackRemote, _ *webrtc.RTPReceiver) { + go func() { + for { + pkt, _, err := tr.ReadRTP() + if err != nil { + return + } + if br.OnBrowserRTP != nil && len(pkt.Payload) > 0 { + br.OnBrowserRTP(pkt.Payload) + } + } + }() + }) + + pc.OnICEConnectionStateChange(func(s webrtc.ICEConnectionState) { + if log != nil { + log.Debug("browser ice state", "state", s.String()) + } + }) + + if err := pc.SetRemoteDescription(webrtc.SessionDescription{Type: webrtc.SDPTypeOffer, SDP: offerSDP}); err != nil { + pc.Close() + return nil, "", err + } + if _, err := pc.AddTrack(localTrack); err != nil { + pc.Close() + return nil, "", err + } + answer, err := pc.CreateAnswer(nil) + if err != nil { + pc.Close() + return nil, "", err + } + gatherComplete := webrtc.GatheringCompletePromise(pc) + if err := pc.SetLocalDescription(answer); err != nil { + pc.Close() + return nil, "", err + } + <-gatherComplete + + return br, pc.LocalDescription().SDP, nil +} + +func (b *Bridge) WriteOpus(payload []byte, dur time.Duration) error { + if b.localTrack == nil { + return nil + } + return b.localTrack.WriteSample(media.Sample{Data: payload, Duration: dur}) +} + +func (b *Bridge) Close() { + if b.pc != nil { + _ = b.pc.Close() + } +} diff --git a/src/go.mod b/src/go.mod index 1119a12..90d020c 100644 --- a/src/go.mod +++ b/src/go.mod @@ -15,6 +15,7 @@ require ( github.com/jackc/pgx/v5 v5.7.5 github.com/jmoiron/sqlx v1.4.0 github.com/mattn/go-sqlite3 v1.14.42 + github.com/pion/webrtc/v4 v4.2.15 github.com/stretchr/testify v1.11.1 github.com/u2takey/ffmpeg-go v0.5.0 go.mau.fi/whatsmeow v0.0.0-20250204095649-a75587ab11d7 // find "replace" for the project below with a fork project @@ -48,10 +49,26 @@ require ( github.com/mattn/go-colorable v0.1.14 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/petermattis/goid v0.0.0-20260330135022-df67b199bc81 // indirect + github.com/pion/datachannel v1.6.0 // indirect + github.com/pion/dtls/v3 v3.1.4 // indirect + github.com/pion/ice/v4 v4.2.7 // indirect + github.com/pion/interceptor v0.1.45 // indirect + github.com/pion/logging v0.2.4 // indirect + github.com/pion/mdns/v2 v2.1.0 // indirect + github.com/pion/randutil v0.1.0 // indirect + github.com/pion/rtcp v1.2.16 // indirect + github.com/pion/rtp v1.10.2 // indirect + github.com/pion/sctp v1.10.0 // indirect + github.com/pion/sdp/v3 v3.0.18 // indirect + github.com/pion/srtp/v3 v3.0.11 // indirect + github.com/pion/stun/v3 v3.1.5 // indirect + github.com/pion/transport/v4 v4.0.2 // indirect + github.com/pion/turn/v5 v5.0.9 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/rs/zerolog v1.35.0 // indirect github.com/u2takey/go-utils v0.3.1 // indirect github.com/vektah/gqlparser/v2 v2.5.27 // indirect + github.com/wlynxg/anet v0.0.5 // indirect go.mau.fi/libsignal v0.2.1 // indirect go.uber.org/atomic v1.11.0 // indirect golang.org/x/crypto v0.50.0 // indirect @@ -60,6 +77,7 @@ require ( golang.org/x/sync v0.20.0 // indirect golang.org/x/sys v0.43.0 // indirect golang.org/x/text v0.36.0 // indirect + golang.org/x/time v0.14.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20251022142026-3a174f9686a8 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/src/go.sum b/src/go.sum index be96acb..a4742b2 100644 --- a/src/go.sum +++ b/src/go.sum @@ -130,6 +130,40 @@ github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2sz github.com/panjf2000/ants/v2 v2.4.2/go.mod h1:f6F0NZVFsGCp5A7QW/Zj/m92atWwOkY0OIhFxRNFr4A= github.com/petermattis/goid v0.0.0-20260330135022-df67b199bc81 h1:WDsQxOJDy0N1VRAjXLpi8sCEZRSGarLWQevDxpTBRrM= github.com/petermattis/goid v0.0.0-20260330135022-df67b199bc81/go.mod h1:pxMtw7cyUw6B2bRH0ZBANSPg+AoSud1I1iyJHI69jH4= +github.com/pion/datachannel v1.6.0 h1:XecBlj+cvsxhAMZWFfFcPyUaDZtd7IJvrXqlXD/53i0= +github.com/pion/datachannel v1.6.0/go.mod h1:ur+wzYF8mWdC+Mkis5Thosk+u/VOL287apDNEbFpsIk= +github.com/pion/dtls/v3 v3.1.4 h1:QhvtMflMfu9Kf0RcDC5BJBle4caPskByrKQR6uuYqpY= +github.com/pion/dtls/v3 v3.1.4/go.mod h1:cr/qotLISUw/9C1m83ZPNZtj9WnXkYLpfCptPqbkInc= +github.com/pion/ice/v4 v4.2.7 h1:zDEbC6MiEdhQpF8TxBOTws+NU6ZgGpveHrQq4Lc1kao= +github.com/pion/ice/v4 v4.2.7/go.mod h1:9SNPaq0c7El/ki8leJzyCkK10zsskprR3zTNbO3monY= +github.com/pion/interceptor v0.1.45 h1:6PUo/5829bIfRFIPPJQzuDn8EjxRTSB/CSD7QVCOaqo= +github.com/pion/interceptor v0.1.45/go.mod h1:gNDYM/uFKcLe/B3gS2/7+aw6z+RDiMy2qKTnF1LO31w= +github.com/pion/logging v0.2.4 h1:tTew+7cmQ+Mc1pTBLKH2puKsOvhm32dROumOZ655zB8= +github.com/pion/logging v0.2.4/go.mod h1:DffhXTKYdNZU+KtJ5pyQDjvOAh/GsNSyv1lbkFbe3so= +github.com/pion/mdns/v2 v2.1.0 h1:3IJ9+Xio6tWYjhN6WwuY142P/1jA0D5ERaIqawg/fOY= +github.com/pion/mdns/v2 v2.1.0/go.mod h1:pcez23GdynwcfRU1977qKU0mDxSeucttSHbCSfFOd9A= +github.com/pion/randutil v0.1.0 h1:CFG1UdESneORglEsnimhUjf33Rwjubwj6xfiOXBa3mA= +github.com/pion/randutil v0.1.0/go.mod h1:XcJrSMMbbMRhASFVOlj/5hQial/Y8oH/HVo7TBZq+j8= +github.com/pion/rtcp v1.2.16 h1:fk1B1dNW4hsI78XUCljZJlC4kZOPk67mNRuQ0fcEkSo= +github.com/pion/rtcp v1.2.16/go.mod h1:/as7VKfYbs5NIb4h6muQ35kQF/J0ZVNz2Z3xKoCBYOo= +github.com/pion/rtp v1.10.2 h1:l+f6tTDcAH6xwepaAoW791ddhuYsJlqRATOzirO04Mo= +github.com/pion/rtp v1.10.2/go.mod h1:Au8fc6cEByy8RLTwKTQTEeQqDB/SJDxwL4mZuxYA5Pk= +github.com/pion/sctp v1.10.0 h1:qeoD6swF/2M5bYRcAGayqSbTKX3m4AW29CiQxG1+Pfg= +github.com/pion/sctp v1.10.0/go.mod h1:N20Dq6LY+JvJDAh9VVh1JELngb2rQ8dPgds5yBWiPgw= +github.com/pion/sdp/v3 v3.0.18 h1:l0bAXazKHpepazVdp+tPYnrsy9dfh7ZbT8DxesH5ZnI= +github.com/pion/sdp/v3 v3.0.18/go.mod h1:ZREGo6A9ZygQ9XkqAj5xYCQtQpif0i6Pa81HOiAdqQ8= +github.com/pion/srtp/v3 v3.0.11 h1:GiESUr54/K4UuPigfq/CvWUed80JenQAHXn0C2MQQIQ= +github.com/pion/srtp/v3 v3.0.11/go.mod h1:EeZOi/sd6glM1EXapg051gdNWO9yWT1YSsgQ4SlJkns= +github.com/pion/stun/v3 v3.1.5 h1:Y1FHlhaI6+4UoC5i/zQf4F7JvdZtB24/05oyy/GF1x8= +github.com/pion/stun/v3 v3.1.5/go.mod h1:zRUghXSQU32Lx5orJsz3uYMkIihweXb3mu5gIns02fs= +github.com/pion/transport/v3 v3.1.1 h1:Tr684+fnnKlhPceU+ICdrw6KKkTms+5qHMgw6bIkYOM= +github.com/pion/transport/v3 v3.1.1/go.mod h1:+c2eewC5WJQHiAA46fkMMzoYZSuGzA/7E2FPrOYHctQ= +github.com/pion/transport/v4 v4.0.2 h1:ifYlPqNwsy6aKQ9y8yzxXlHae5431ZrH2avkD/Rn6Tk= +github.com/pion/transport/v4 v4.0.2/go.mod h1:06hFI+jCFcok2X2MekVufNZ/uzNZXivGBPfviSVcjgM= +github.com/pion/turn/v5 v5.0.9 h1:zNeBfRyzGn7MPyUTvmvxeltLEjlFdSLPT1tlakoaOXM= +github.com/pion/turn/v5 v5.0.9/go.mod h1:u3XjBqy2Z4+NhCUpDoOSsNuQDrPLvKStlCGWk6sTQ1E= +github.com/pion/webrtc/v4 v4.2.15 h1:Ir/MauNFCfg+kgyBYPQLiGdVWFlzEcLxqtuzAkYkky0= +github.com/pion/webrtc/v4 v4.2.15/go.mod h1:CPTcyLfIzC4scOkQ4UY4pj6WvbUGhcNLIpK28cP5h6M= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -158,6 +192,8 @@ github.com/u2takey/go-utils v0.3.1 h1:TaQTgmEZZeDHQFYfd+AdUT1cT4QJgJn/XVPELhHw4y github.com/u2takey/go-utils v0.3.1/go.mod h1:6e+v5vEZ/6gu12w/DC2ixZdZtCrNokVxD0JUklcqdCs= github.com/vektah/gqlparser/v2 v2.5.27 h1:RHPD3JOplpk5mP5JGX8RKZkt2/Vwj/PZv0HxTdwFp0s= github.com/vektah/gqlparser/v2 v2.5.27/go.mod h1:D1/VCZtV3LPnQrcPBeR/q5jkSQIPti0uYCP/RI0gIeo= +github.com/wlynxg/anet v0.0.5 h1:J3VJGi1gvo0JwZ/P1/Yc/8p63SoW98B5dHkYDmpgvvU= +github.com/wlynxg/anet v0.0.5/go.mod h1:eay5PRQr7fIVAMbTbchTnO9gG65Hg/uYGdc7mguHxoA= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= go.mau.fi/libsignal v0.2.1 h1:vRZG4EzTn70XY6Oh/pVKrQGuMHBkAWlGRC22/85m9L0= @@ -218,6 +254,8 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg= golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= +golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= diff --git a/src/gows/calls.go b/src/gows/calls.go new file mode 100644 index 0000000..da20352 --- /dev/null +++ b/src/gows/calls.go @@ -0,0 +1,270 @@ +package gows + +import ( + "context" + "log/slog" + "time" + + "github.com/devlikeapro/gows/callbridge" + "github.com/devlikeapro/gows/voip/call" + "github.com/devlikeapro/gows/voip/core" + "github.com/devlikeapro/gows/voip/media" + voipwa "github.com/devlikeapro/gows/voip/wa" + waBinary "go.mau.fi/whatsmeow/binary" + "go.mau.fi/whatsmeow/types" + "go.mau.fi/whatsmeow/types/events" +) + +// CallLifecycleEvent is emitted to WAHA event stream with a stable webhook contract. +type CallLifecycleEvent struct { + Event string `json:"event"` + ID string `json:"id"` + From string `json:"from"` + Direction string `json:"direction"` + Status string `json:"status"` + Reason string `json:"reason,omitempty"` + Timestamp int64 `json:"timestamp"` +} + +type callBridgeState struct { + bridge *callbridge.Bridge + browserOpus media.Codec +} + +func (gows *GoWS) initCallManager() { + if gows.callManager != nil { + return + } + gows.callManager = call.NewCallManager(voipwa.NewSocket(gows.Client), slog.Default()) + gows.wireCallManager() +} + +func (gows *GoWS) wireCallManager() { + cm := gows.callManager + cm.OnIncoming = func(c *call.CallInfo) { + gows.emitCallLifecycle("call.received", c, "") + } + cm.OnStateChange = func(c *call.CallInfo) { + if c.IsEnded() { + return + } + eventName := callStatusToEvent(c) + gows.emitCallLifecycle(eventName, c, "") + } + cm.OnEnded = func(c *call.CallInfo) { + reason := string(c.StateData.EndReason) + if reason == "" { + reason = string(core.EndCallReasonUnknown) + } + eventName := "call.ended" + if c.StateData.EndReason == core.EndCallReasonDeclined { + eventName = "call.rejected" + } + gows.emitCallLifecycle(eventName, c, reason) + gows.closeCallBridge() + } + cm.OnPeerAudio = func(pcm16 []float32) { + gows.callBridgeMu.Lock() + br := gows.callBridge.bridge + oc := gows.callBridge.browserOpus + gows.callBridgeMu.Unlock() + if br == nil || oc == nil { + return + } + pcm48 := media.Upsample16to48(pcm16) + opus, err := oc.Encode(pcm48) + if err != nil || len(opus) == 0 { + return + } + _ = br.WriteOpus(opus, 60*time.Millisecond) + } +} + +func callStatusToEvent(c *call.CallInfo) string { + switch c.StateData.State { + case core.CallStateRinging, core.CallStateIncomingRinging: + return "call.ringing" + case core.CallStateConnecting: + return "call.connecting" + case core.CallStateActive: + return "call.active" + default: + return "call.status" + } +} + +func (gows *GoWS) emitCallLifecycle(eventName string, c *call.CallInfo, reason string) { + dir := "outbound" + if c.Direction == core.CallDirectionIncoming { + dir = "inbound" + } + gows.emitEvent(&CallLifecycleEvent{ + Event: eventName, + ID: c.CallID, + From: c.PeerJid, + Direction: dir, + Status: string(c.StateData.State), + Reason: reason, + Timestamp: time.Now().UnixMilli(), + }) +} + +func wrapCallNode(from types.JID, inner *waBinary.Node) *waBinary.Node { + content := []waBinary.Node{} + if inner != nil { + content = append(content, *inner) + } + return &waBinary.Node{ + Tag: "call", + Attrs: waBinary.Attrs{"from": from}, + Content: content, + } +} + +func (gows *GoWS) routeCallEvent(event interface{}) { + gows.initCallManager() + ctx := gows.Context + if ctx == nil { + ctx = context.Background() + } + switch evt := event.(type) { + case *events.CallOffer: + gows.callManager.HandleCallOffer(ctx, wrapCallNode(evt.From, evt.Data), evt.From) + case *events.CallAccept: + gows.callManager.HandleCallAccept(ctx, wrapCallNode(evt.From, evt.Data), evt.From) + case *events.CallTransport: + gows.callManager.HandleCallTransport(ctx, wrapCallNode(evt.From, evt.Data), evt.From) + case *events.CallTerminate: + gows.callManager.HandleCallTerminate(wrapCallNode(evt.From, evt.Data)) + case *events.CallReject: + gows.callManager.HandleCallTerminate(wrapCallNode(evt.From, evt.Data)) + } +} + +func (gows *GoWS) StartVoipCall(ctx context.Context, peer types.JID, isVideo bool) (string, error) { + gows.initCallManager() + return gows.callManager.StartCall(ctx, peer, isVideo) +} + +func (gows *GoWS) AcceptVoipCall(ctx context.Context, callID, ownerID string) error { + gows.initCallManager() + if ownerID != "" { + if !gows.claimCallOwner(callID, ownerID) { + return &call.CallError{"claimed by another client"} + } + } + return gows.callManager.AcceptCall(ctx, callID) +} + +func (gows *GoWS) RejectVoipCall(ctx context.Context, from types.JID, callID string) error { + gows.initCallManager() + if cm := gows.callManager.CurrentCall(); cm != nil && cm.CallID == callID { + return gows.callManager.RejectCall(ctx, callID, core.EndCallReasonDeclined) + } + return gows.Client.RejectCall(ctx, from, callID) +} + +func (gows *GoWS) EndVoipCall(ctx context.Context, callID string) error { + gows.initCallManager() + cm := gows.callManager.CurrentCall() + if cm == nil || cm.CallID != callID { + return &call.CallError{"no call with id " + callID} + } + err := gows.callManager.EndCall(ctx, core.EndCallReasonUserEnded) + gows.closeCallBridge() + return err +} + +func (gows *GoWS) ExchangeCallWebRTC(offerSDP string) (string, error) { + gows.initCallManager() + bridge, answer, err := callbridge.NewBridge(offerSDP, slog.Default()) + if err != nil { + return "", err + } + browserOpus, ocErr := media.NewOpusCodec(48000, 960) + if ocErr != nil { + gows.Log.Warnf("browser Opus codec unavailable — call audio disabled: %v", ocErr) + browserOpus = nil + } + bridge.OnBrowserRTP = func(payload []byte) { + if browserOpus == nil { + return + } + pcm48, decErr := browserOpus.Decode(payload) + if decErr != nil { + return + } + gows.callManager.FeedCapturedPCM(media.Downsample48to16(pcm48)) + } + gows.setCallBridge(bridge, browserOpus) + return answer, nil +} + +func (gows *GoWS) GetCallState() *CallLifecycleEvent { + gows.initCallManager() + cm := gows.callManager.CurrentCall() + if cm == nil { + return nil + } + dir := "outbound" + if cm.Direction == core.CallDirectionIncoming { + dir = "inbound" + } + return &CallLifecycleEvent{ + Event: callStatusToEvent(cm), + ID: cm.CallID, + From: cm.PeerJid, + Direction: dir, + Status: string(cm.StateData.State), + Timestamp: time.Now().UnixMilli(), + } +} + +func (gows *GoWS) setCallBridge(b *callbridge.Bridge, oc media.Codec) { + gows.callBridgeMu.Lock() + old := gows.callBridge + gows.callBridge = callBridgeState{bridge: b, browserOpus: oc} + gows.callBridgeMu.Unlock() + if old.bridge != nil { + old.bridge.Close() + } + if old.browserOpus != nil { + old.browserOpus.Close() + } +} + +func (gows *GoWS) closeCallBridge() { + gows.callBridgeMu.Lock() + old := gows.callBridge + gows.callBridge = callBridgeState{} + gows.callBridgeMu.Unlock() + if old.bridge != nil { + old.bridge.Close() + } + if old.browserOpus != nil { + old.browserOpus.Close() + } +} + +func (gows *GoWS) claimCallOwner(callID, ownerID string) bool { + gows.callOwnersMu.Lock() + defer gows.callOwnersMu.Unlock() + if existing, ok := gows.callOwners[callID]; ok && existing != ownerID { + return false + } + gows.callOwners[callID] = ownerID + return true +} + +func (gows *GoWS) cleanupCalls() { + if gows.callManager != nil { + cm := gows.callManager.CurrentCall() + if cm != nil && !cm.IsEnded() { + _ = gows.callManager.EndCall(context.Background(), core.EndCallReasonUserEnded) + } + } + gows.closeCallBridge() + gows.callOwnersMu.Lock() + gows.callOwners = make(map[string]string) + gows.callOwnersMu.Unlock() +} diff --git a/src/gows/gows.go b/src/gows/gows.go index d811f0c..f888f23 100644 --- a/src/gows/gows.go +++ b/src/gows/gows.go @@ -8,6 +8,7 @@ import ( "github.com/devlikeapro/gows/storage" "github.com/devlikeapro/gows/storage/sqlstorage" + "github.com/devlikeapro/gows/voip/call" "github.com/jellydator/ttlcache/v3" _ "github.com/jackc/pgx/v5" // Import the Postgres driver _ "github.com/mattn/go-sqlite3" // Import the SQLite driver @@ -36,6 +37,12 @@ type GoWS struct { // This lets subsequent download attempts reuse the fresh DirectPath without // sending another receipt, even if the first waiter already timed out. mediaRetryEvents *ttlcache.Cache[types.MessageID, *events.MediaRetry] + + callManager *call.CallManager + callBridgeMu sync.Mutex + callBridge callBridgeState + callOwnersMu sync.Mutex + callOwners map[string]string } func (gows *GoWS) reissueEvent(event interface{}) { @@ -73,6 +80,15 @@ func (gows *GoWS) reissueEvent(event interface{}) { data = event } + case *CallLifecycleEvent: + data = event + + case *events.CallOffer, *events.CallReject, *events.CallAccept, + *events.CallTerminate, *events.CallTransport: + // Raw whatsmeow call events are still forwarded for debugging; + // WAHA should prefer CallLifecycleEvent payloads. + data = event + case *events.MediaRetry: evt := event.(*events.MediaRetry) // Always cache so that callers whose 60 s wait already expired can still @@ -96,6 +112,7 @@ func (gows *GoWS) reissueEvent(event interface{}) { func (gows *GoWS) handleEvent(event interface{}) { + gows.routeCallEvent(event) go gows.reissueEvent(event) go gows.storageEventHandler.handleEvent(event) } @@ -144,6 +161,8 @@ func (gows *GoWS) Stop() { gows.RemoveEventHandler(gows.eventHandlerID) } + gows.cleanupCalls() + gows.Disconnect() if gows.mediaRetryEvents != nil { gows.mediaRetryEvents.Stop() @@ -215,6 +234,11 @@ func BuildSession( 0, sync.Map{}, retryEventsCache, + nil, + sync.Mutex{}, + callBridgeState{}, + sync.Mutex{}, + make(map[string]string), } if storageCfg == (StorageConfig{}) { storageCfg = DefaultStorageConfig() diff --git a/src/main.go b/src/main.go index 0bcfeb6..32d816e 100644 --- a/src/main.go +++ b/src/main.go @@ -117,6 +117,7 @@ func main() { clientCfg := getClientConfig() log.Infof("Using device name: '%s', browser name: '%s'", clientCfg.DeviceName, clientCfg.BrowserName) gows.SetDeviceAndBrowser(clientCfg.DeviceName, clientCfg.BrowserName) + PatchDeviceProps(gows.GetDeviceProps()) // Build the server grpcServer := buildGrpcServer(log) diff --git a/src/server/calls.go b/src/server/calls.go index a44f5e4..aa3808e 100644 --- a/src/server/calls.go +++ b/src/server/calls.go @@ -3,6 +3,7 @@ package server import ( "context" "fmt" + "strings" "github.com/devlikeapro/gows/proto" "go.mau.fi/whatsmeow/types" @@ -17,8 +18,98 @@ func (s *Server) RejectCall(ctx context.Context, req *__.RejectCallRequest) (*__ if err != nil { return nil, fmt.Errorf("parse from JID '%s': %w", req.GetFrom(), err) } - if err = cli.RejectCall(ctx, from, req.GetId()); err != nil { + if err = cli.RejectVoipCall(ctx, from, req.GetId()); err != nil { return nil, err } return &__.Empty{}, nil } + +func (s *Server) StartCall(ctx context.Context, req *__.StartCallRequest) (*__.StartCallResponse, error) { + cli, err := s.Sm.Get(req.GetSession().GetId()) + if err != nil { + return nil, err + } + jidStr := strings.TrimSpace(req.GetJid()) + if jidStr == "" { + return nil, fmt.Errorf("jid is required") + } + peer, err := types.ParseJID(jidStr) + if err != nil { + peer = types.NewJID(normalizePhone(jidStr), types.DefaultUserServer) + } + callID, err := cli.StartVoipCall(ctx, peer, req.GetVideo()) + if err != nil { + return nil, err + } + return &__.StartCallResponse{CallId: callID}, nil +} + +func (s *Server) AcceptCall(ctx context.Context, req *__.AcceptCallRequest) (*__.Empty, error) { + cli, err := s.Sm.Get(req.GetSession().GetId()) + if err != nil { + return nil, err + } + ownerID := "" + if req.OwnerId != nil { + ownerID = req.GetOwnerId() + } + if err = cli.AcceptVoipCall(ctx, req.GetCallId(), ownerID); err != nil { + return nil, err + } + return &__.Empty{}, nil +} + +func (s *Server) EndCall(ctx context.Context, req *__.EndCallRequest) (*__.Empty, error) { + cli, err := s.Sm.Get(req.GetSession().GetId()) + if err != nil { + return nil, err + } + if err = cli.EndVoipCall(ctx, req.GetCallId()); err != nil { + return nil, err + } + return &__.Empty{}, nil +} + +func (s *Server) ExchangeCallWebRTC(ctx context.Context, req *__.ExchangeCallWebRTCRequest) (*__.ExchangeCallWebRTCResponse, error) { + cli, err := s.Sm.Get(req.GetSession().GetId()) + if err != nil { + return nil, err + } + if strings.TrimSpace(req.GetSdpOffer()) == "" { + return nil, fmt.Errorf("sdp_offer is required") + } + cm := cli.GetCallState() + if cm != nil && req.GetCallId() != "" && cm.ID != req.GetCallId() { + return nil, fmt.Errorf("call_id mismatch: active call is %s", cm.ID) + } + answer, err := cli.ExchangeCallWebRTC(req.GetSdpOffer()) + if err != nil { + return nil, err + } + return &__.ExchangeCallWebRTCResponse{SdpAnswer: answer}, nil +} + +func (s *Server) GetCallState(ctx context.Context, req *__.Session) (*__.CallStateResponse, error) { + cli, err := s.Sm.Get(req.GetId()) + if err != nil { + return nil, err + } + state := cli.GetCallState() + if state == nil { + return &__.CallStateResponse{Active: false}, nil + } + return &__.CallStateResponse{ + Active: true, + CallId: state.ID, + From: state.From, + Direction: state.Direction, + Status: state.Status, + Event: state.Event, + }, nil +} + +func normalizePhone(phone string) string { + phone = strings.TrimSpace(phone) + phone = strings.TrimPrefix(phone, "+") + return phone +} diff --git a/src/server/events.go b/src/server/events.go index 11adab1..52301a6 100644 --- a/src/server/events.go +++ b/src/server/events.go @@ -2,6 +2,8 @@ package server import ( "encoding/json" + + "github.com/devlikeapro/gows/gows" "github.com/devlikeapro/gows/proto" "github.com/google/uuid" "google.golang.org/grpc" @@ -46,15 +48,11 @@ func (s *Server) StreamEvents(req *__.StreamEventsRequest, stream grpc.ServerStr if event == nil { continue } - // Remove * at the start if it's * - eventType := reflect.TypeOf(event).String() - eventType = strings.TrimPrefix(eventType, "*") - if _, ok := exclude[eventType]; ok { + eventType, jsonString := s.formatStreamEvent(event) + if jsonString == "" { continue } - - jsonString := s.safeMarshal(event) - if jsonString == "" { + if _, ok := exclude[eventType]; ok { continue } @@ -114,3 +112,12 @@ func (s *Server) removeListener(session string, id uuid.UUID) { } close(listener) } + +func (s *Server) formatStreamEvent(event interface{}) (eventType string, jsonString string) { + if ce, ok := event.(*gows.CallLifecycleEvent); ok { + return ce.Event, s.safeMarshal(ce) + } + eventType = reflect.TypeOf(event).String() + eventType = strings.TrimPrefix(eventType, "*") + return eventType, s.safeMarshal(event) +} diff --git a/src/voip/call/callmanager.go b/src/voip/call/callmanager.go new file mode 100644 index 0000000..58f5f8a --- /dev/null +++ b/src/voip/call/callmanager.go @@ -0,0 +1,217 @@ +package call + +import ( + "context" + "log/slog" + "sync" + "time" + "github.com/devlikeapro/gows/voip/core" + "github.com/devlikeapro/gows/voip/media" + "github.com/devlikeapro/gows/voip/signaling" + "github.com/devlikeapro/gows/voip/transport" + "github.com/devlikeapro/gows/voip/wanode" + + "go.mau.fi/whatsmeow/types" +) + +type CallManager struct { + sock core.VoipSocket + log *slog.Logger + + mu sync.Mutex + currentCall *CallInfo + + rtpSession *media.RtpSession + srtpSession *media.SrtpSession + codec media.Codec + relay RelayTransport + + selfSsrc uint32 + peerSsrcs []uint32 + actualPeerSet bool + + firstPacketSent bool + initialTransportSent bool + outgoingPreacceptSent bool + acceptedByJid string + debeEnabled bool + + encodeBuf []float32 + encodeBufPos int + + lastCaptureAt time.Time + keepaliveStop chan struct{} + + OnStateChange func(*CallInfo) + OnIncoming func(*CallInfo) + OnEnded func(*CallInfo) + OnPeerAudio func([]float32) +} + +func NewCallManager(sock core.VoipSocket, log *slog.Logger) *CallManager { + if log == nil { + log = slog.Default() + } + m := &CallManager{ + sock: sock, + log: log, + debeEnabled: true, + } + relay := transport.NewSctpRelayManager(log) + relay.SetOnConnected(func(ip string, port int) { m.onRelayConnected() }) + relay.SetOnReceive(func(data []byte) { m.onRelayData(data) }) + m.relay = relay + return m +} + +func (m *CallManager) CurrentCall() *CallInfo { + m.mu.Lock() + defer m.mu.Unlock() + return m.currentCall +} + +func (m *CallManager) emitState() { + if m.OnStateChange != nil && m.currentCall != nil { + m.OnStateChange(m.currentCall) + } +} + +func (m *CallManager) StartCall(ctx context.Context, peerJid types.JID, isVideo bool) (string, error) { + m.mu.Lock() + if m.currentCall != nil && !m.currentCall.IsEnded() { + m.mu.Unlock() + return "", &CallError{"a call is already in progress"} + } + + callID := signaling.GenerateCallID() + mediaType := core.CallMediaTypeAudio + if isVideo { + mediaType = core.CallMediaTypeVideo + } + creator := m.sock.OwnLID() + if creator.IsEmpty() { + creator = m.sock.OwnPN() + } + resolved := m.sock.ResolveLIDForPN(ctx, peerJid) + + call := NewOutgoingCall(callID, resolved.String(), creator.String(), mediaType) + callKey := media.GenerateCallKey() + call.EncryptionKey = callKey + m.currentCall = call + m.initialTransportSent = false + m.outgoingPreacceptSent = false + + selfJid := creator.String() + m.selfSsrc = media.GenerateSecureSsrc(callID, selfJid, 0) + m.rtpSession = media.NewWhatsAppOpusSession(m.selfSsrc) + m.peerSsrcs = []uint32{media.GenerateSecureSsrc(callID, resolved.String(), 0)} + m.initCodec() + m.mu.Unlock() + + offer, err := signaling.BuildOfferStanza(ctx, m.sock, callID, callKey, resolved, isVideo) + if err != nil { + return "", err + } + ackNode, err := m.sock.Query(ctx, offer) + if err != nil { + return "", err + } + + m.mu.Lock() + _ = m.currentCall.ApplyTransition(Transition{Type: TransitionOfferSent}) + m.emitState() + m.mu.Unlock() + + if ackNode != nil { + go m.HandleCallAck(context.Background(), ackNode) + } + + m.log.Info("call offer sent", "call_id", callID, "peer", resolved.String()) + return callID, nil +} + +func (m *CallManager) AcceptCall(ctx context.Context, callID string) error { + m.mu.Lock() + call := m.currentCall + if call == nil || call.CallID != callID { + m.mu.Unlock() + return &CallError{"no incoming call with id " + callID} + } + if !call.CanAccept() { + m.mu.Unlock() + return &CallError{"call cannot be accepted in state " + string(call.StateData.State)} + } + _ = call.ApplyTransition(Transition{Type: TransitionLocalAccepted}) + m.emitState() + key := call.EncryptionKey + peer := wanode.MustJID(call.PeerJid) + creator := wanode.MustJID(call.CallCreator) + isVideo := call.MediaType == core.CallMediaTypeVideo + relayData := call.RelayData + m.mu.Unlock() + + if key != nil { + acceptNode, err := signaling.BuildAcceptStanza(ctx, m.sock, callID, key, peer, creator, isVideo) + if err != nil { + m.log.Error("build accept failed", "err", err) + } else if _, err := m.sock.Query(ctx, acceptNode); err != nil { + m.log.Error("accept query error", "err", err) + } + } + + if relayData != nil { + m.connectRelays(relayData.Endpoints) + } + m.log.Info("call accepted", "call_id", callID) + return nil +} + +func (m *CallManager) RejectCall(ctx context.Context, callID string, reason core.EndCallReason) error { + m.mu.Lock() + call := m.currentCall + if call == nil || call.CallID != callID { + m.mu.Unlock() + return &CallError{"no call with id " + callID} + } + _ = call.ApplyTransition(Transition{Type: TransitionLocalRejected, Reason: reason}) + node := signaling.BuildRejectStanza(wanode.MustJID(call.PeerJid), call.CallID, wanode.MustJID(call.CallCreator)) + m.emitState() + m.mu.Unlock() + + go func() { _, _ = m.sock.Query(ctx, node) }() + m.cleanupMedia() + return nil +} + +func (m *CallManager) EndCall(ctx context.Context, reason core.EndCallReason) error { + m.mu.Lock() + call := m.currentCall + if call == nil || call.IsEnded() { + m.mu.Unlock() + return nil + } + _ = call.ApplyTransition(Transition{Type: TransitionTerminated, Reason: reason}) + node := signaling.BuildTerminateStanza(wanode.MustJID(call.PeerJid), call.CallID, wanode.MustJID(call.CallCreator)) + ended := call + m.emitState() + m.mu.Unlock() + + go func() { _, _ = m.sock.Query(ctx, node) }() + if m.OnEnded != nil { + m.OnEnded(ended) + } + m.cleanupMedia() + return nil +} + +func (m *CallManager) ownCredJid() string { + lid := m.sock.OwnLID() + if !lid.IsEmpty() { + return lid.String() + } + return m.sock.OwnPN().String() +} + +type CallError struct{ Msg string } + +func (e *CallError) Error() string { return e.Msg } diff --git a/src/voip/call/callmanager_media.go b/src/voip/call/callmanager_media.go new file mode 100644 index 0000000..9a9f1df --- /dev/null +++ b/src/voip/call/callmanager_media.go @@ -0,0 +1,162 @@ +package call + +import ( + "time" + "github.com/devlikeapro/gows/voip/core" + "github.com/devlikeapro/gows/voip/media" + "github.com/devlikeapro/gows/voip/transport" +) + +func (m *CallManager) initCodec() { + if m.codec != nil { + return + } + codec, err := media.NewMLowCodec(media.DefaultCodecOptions) + if err != nil { + m.log.Warn("MLow codec unavailable — call will run signaling-only (no audio)", "err", err) + return + } + m.codec = codec +} + +func (m *CallManager) FeedCapturedPCM(data []float32) { + m.mu.Lock() + defer m.mu.Unlock() + + if m.codec == nil || m.rtpSession == nil || m.srtpSession == nil || !m.relay.HasConnection() { + return + } + m.lastCaptureAt = time.Now() + frameSize := m.codec.FrameSize() + if m.encodeBuf == nil { + m.encodeBuf = make([]float32, frameSize) + m.encodeBufPos = 0 + } + + offset := 0 + for offset < len(data) { + toCopy := min(len(data)-offset, frameSize-m.encodeBufPos) + copy(m.encodeBuf[m.encodeBufPos:], data[offset:offset+toCopy]) + m.encodeBufPos += toCopy + offset += toCopy + if m.encodeBufPos < frameSize { + break + } + frame := make([]float32, frameSize) + copy(frame, m.encodeBuf) + m.encodeBufPos = 0 + + opus, err := m.codec.Encode(frame) + if err != nil { + m.log.Debug("encode error", "err", err) + continue + } + m.sendOpusFrameLocked(opus) + } +} + +func (m *CallManager) sendOpusFrameLocked(opus []byte) { + if m.rtpSession == nil || m.srtpSession == nil { + return + } + marker := !m.firstPacketSent + pkt := m.rtpSession.CreatePacketWithDuration(opus, m.codec.FrameSize(), marker) + if m.debeEnabled { + pkt.Header.Extension = true + pkt.Header.ExtensionProfile = 0xbede + pkt.Header.ExtensionData = nil + } + m.firstPacketSent = true + + srtp, err := m.srtpSession.Protect(pkt) + if err != nil { + m.log.Debug("srtp protect error", "err", err) + return + } + m.relay.Broadcast(srtp) +} + +func (m *CallManager) startSilenceKeepaliveLocked() { + if m.keepaliveStop != nil || m.codec == nil { + return + } + stop := make(chan struct{}) + m.keepaliveStop = stop + frameSize := m.codec.FrameSize() + go func() { + ticker := time.NewTicker(60 * time.Millisecond) + defer ticker.Stop() + silence := make([]float32, frameSize) + for { + select { + case <-stop: + return + case <-ticker.C: + m.mu.Lock() + ready := m.codec != nil && m.rtpSession != nil && m.srtpSession != nil && m.relay.HasConnection() + idle := time.Since(m.lastCaptureAt) > 120*time.Millisecond + if ready && idle { + if opus, err := m.codec.Encode(silence); err == nil { + m.sendOpusFrameLocked(opus) + } + } + m.mu.Unlock() + } + } + }() +} + +func (m *CallManager) onRelayData(data []byte) { + if transport.IsStunPacket(data) { + return + } + if !transport.IsRtpPacket(data) { + return + } + if len(data) < 12 { + return + } + pt := data[1] & 0x7f + if pt != core.PayloadTypeWhatsAppOpus { + return + } + + m.mu.Lock() + if m.srtpSession == nil || m.codec == nil { + m.mu.Unlock() + return + } + ssrc := uint32(data[8])<<24 | uint32(data[9])<<16 | uint32(data[10])<<8 | uint32(data[11]) + if ssrc == m.selfSsrc { + m.mu.Unlock() + return + } + if !m.actualPeerSet { + m.actualPeerSet = true + if !containsSsrc(m.peerSsrcs, ssrc) { + m.peerSsrcs = []uint32{ssrc} + m.relay.SetSubscriptionSsrc(ssrc) + go m.relay.ResendSubscriptions() + } + } + srtp := m.srtpSession + codec := m.codec + m.mu.Unlock() + + pkt, err := srtp.Unprotect(data) + if err != nil { + m.log.Debug("srtp unprotect error", "err", err) + return + } + if len(pkt.Payload) == 0 { + return + } + pcm, err := codec.Decode(pkt.Payload) + if err != nil { + return + } + pcm = media.NormalizeFrame(pcm, codec.FrameSize()) + if m.OnPeerAudio != nil { + m.OnPeerAudio(pcm) + } +} diff --git a/src/voip/call/callmanager_relay.go b/src/voip/call/callmanager_relay.go new file mode 100644 index 0000000..1b18085 --- /dev/null +++ b/src/voip/call/callmanager_relay.go @@ -0,0 +1,100 @@ +package call + +import ( + "github.com/devlikeapro/gows/voip/core" + "github.com/devlikeapro/gows/voip/transport" +) + +type RelayTransport interface { + SetSsrc(ssrc uint32) + SetSubscriptionSsrc(ssrc uint32) + SetOnConnected(fn func(ip string, port int)) + SetOnReceive(fn func(data []byte)) + ResendSubscriptions() + ConfigureRelays(relays []transport.RelayConfig) + Broadcast(data []byte) + HasConnection() bool + ConnectedCount() int + Cleanup() +} + +var _ RelayTransport = (*transport.SctpRelayManager)(nil) + +func (m *CallManager) onRelayConnected() { + m.mu.Lock() + call := m.currentCall + if call != nil && call.StateData.State == core.CallStateConnecting { + if err := call.ApplyTransition(Transition{Type: TransitionMediaConnected}); err == nil { + m.emitState() + m.startSilenceKeepaliveLocked() + m.log.Info("relay connected → active", "call_id", call.CallID) + } + } + m.mu.Unlock() +} + +func buildRelayConfigs(endpoints []core.RelayEndpoint) []transport.RelayConfig { + seen := map[string]bool{} + var relays []transport.RelayConfig + for _, ep := range endpoints { + if ep.Protocol != 0 { + continue + } + if ep.Key == "" || ep.RawToken == nil { + continue + } + key := ep.IP + if seen[key] { + continue + } + seen[key] = true + name := ep.RelayName + if name == "" { + name = ep.IP + } + relays = append(relays, transport.RelayConfig{ + IP: ep.IP, Port: 3478, Token: ep.Token, AuthToken: ep.AuthToken, + RawAuthToken: ep.RawAuthToken, RawToken: ep.RawToken, Key: ep.Key, + RelayID: ep.RelayID, Name: name, AuthTokenID: ep.AuthTokenID, + }) + } + return relays +} + +func (m *CallManager) connectRelays(endpoints []core.RelayEndpoint) { + relays := buildRelayConfigs(endpoints) + if len(relays) == 0 { + m.log.Error("no usable relay configs") + return + } + m.mu.Lock() + m.relay.SetSsrc(m.selfSsrc) + m.relay.SetSubscriptionSsrc(firstSsrc(m.peerSsrcs)) + m.mu.Unlock() + m.relay.ConfigureRelays(relays) + m.log.Info("relay configured", "connected", m.relay.ConnectedCount()) +} + +func (m *CallManager) cleanupMedia() { + m.mu.Lock() + codec := m.codec + m.codec = nil + if m.keepaliveStop != nil { + close(m.keepaliveStop) + m.keepaliveStop = nil + } + m.rtpSession = nil + m.srtpSession = nil + m.firstPacketSent = false + m.initialTransportSent = false + m.outgoingPreacceptSent = false + m.actualPeerSet = false + m.encodeBuf = nil + m.encodeBufPos = 0 + m.mu.Unlock() + + m.relay.Cleanup() + if codec != nil { + codec.Close() + } +} diff --git a/src/voip/call/callmanager_relay_test.go b/src/voip/call/callmanager_relay_test.go new file mode 100644 index 0000000..c825faf --- /dev/null +++ b/src/voip/call/callmanager_relay_test.go @@ -0,0 +1,44 @@ +package call + +import ( + "testing" + "github.com/devlikeapro/gows/voip/core" +) + +func TestBuildRelayConfigs(t *testing.T) { + endpoints := []core.RelayEndpoint{ + + {IP: "1.1.1.1", Protocol: 0, Key: "k1", RawToken: []byte{1}, RelayName: "relay-a", RelayID: 7, AuthTokenID: "a1"}, + + {IP: "1.1.1.1", Protocol: 0, Key: "k1b", RawToken: []byte{2}, RelayName: "relay-a-dup"}, + + {IP: "2.2.2.2", Protocol: 1, Key: "k2", RawToken: []byte{3}}, + + {IP: "3.3.3.3", Protocol: 0, Key: "", RawToken: []byte{4}}, + + {IP: "4.4.4.4", Protocol: 0, Key: "k4", RawToken: nil}, + + {IP: "5.5.5.5", Protocol: 0, Key: "k5", RawToken: []byte{5}, RelayName: ""}, + } + + got := buildRelayConfigs(endpoints) + + if len(got) != 2 { + t.Fatalf("expected 2 usable configs, got %d: %+v", len(got), got) + } + if got[0].IP != "1.1.1.1" || got[0].Name != "relay-a" || got[0].Port != 3478 { + t.Errorf("first config wrong: %+v", got[0]) + } + if got[0].RelayID != 7 || got[0].AuthTokenID != "a1" || got[0].Key != "k1" { + t.Errorf("first config fields not mapped: %+v", got[0]) + } + if got[1].IP != "5.5.5.5" || got[1].Name != "5.5.5.5" { + t.Errorf("expected name fallback to IP for second config: %+v", got[1]) + } +} + +func TestBuildRelayConfigsEmpty(t *testing.T) { + if got := buildRelayConfigs(nil); got != nil { + t.Errorf("expected nil for no endpoints, got %+v", got) + } +} diff --git a/src/voip/call/callmanager_signaling.go b/src/voip/call/callmanager_signaling.go new file mode 100644 index 0000000..64821eb --- /dev/null +++ b/src/voip/call/callmanager_signaling.go @@ -0,0 +1,259 @@ +package call + +import ( + "context" + "github.com/devlikeapro/gows/voip/core" + "github.com/devlikeapro/gows/voip/media" + "github.com/devlikeapro/gows/voip/signaling" + "github.com/devlikeapro/gows/voip/wanode" + + waBinary "go.mau.fi/whatsmeow/binary" + "go.mau.fi/whatsmeow/types" +) + +func (m *CallManager) HandleCallOffer(ctx context.Context, node *waBinary.Node, peerJid types.JID) { + info := signaling.ExtractNodeInfo(node) + if info == nil { + return + } + callID := info.CallID + creator := wanode.AttrString(info.InnerNode.Attrs, "call-creator") + if creator == "" { + creator = peerJid.String() + } + isVideo := hasChildTag(info.InnerNode, "video") + + callKey, err := signaling.DecryptCallKeyInNode(ctx, m.sock, info.InnerNode, peerJid) + if err != nil { + m.log.Error("offer decrypt call key", "err", err) + } + relays := signaling.ExtractRelayEndpoints(info.InnerNode) + + mediaType := core.CallMediaTypeAudio + if isVideo { + mediaType = core.CallMediaTypeVideo + } + + m.mu.Lock() + call := NewIncomingCall(callID, peerJid.String(), creator, "", mediaType) + if callKey != nil { + call.EncryptionKey = callKey + } + if len(relays) > 0 { + call.RelayData = &core.RelayData{Endpoints: relays} + } + m.currentCall = call + m.initialTransportSent = false + + selfJid := m.sock.OwnLID() + sj := selfJid.String() + if selfJid.IsEmpty() { + sj = m.sock.OwnPN().String() + } + m.selfSsrc = media.GenerateSecureSsrc(callID, sj, 0) + m.rtpSession = media.NewWhatsAppOpusSession(m.selfSsrc) + m.peerSsrcs = []uint32{media.GenerateSecureSsrc(callID, peerJid.String(), 0)} + m.initCodec() + m.mu.Unlock() + + preaccept := signaling.BuildPreacceptStanza(peerJid, callID, wanode.MustJID(creator)) + if err := m.sock.SendNode(ctx, preaccept); err != nil { + m.log.Error("send preaccept", "err", err) + } + + if m.OnIncoming != nil { + m.OnIncoming(call) + } + m.mu.Lock() + m.emitState() + m.mu.Unlock() + m.log.Info("incoming call", "call_id", callID, "peer", peerJid.String(), "video", isVideo, "relays", len(relays)) +} + +func (m *CallManager) HandleCallAccept(ctx context.Context, node *waBinary.Node, peerJid types.JID) { + m.mu.Lock() + call := m.currentCall + m.mu.Unlock() + if call == nil { + return + } + info := signaling.ExtractNodeInfo(node) + if info == nil { + return + } + + if signaling.NeedsDecryption(info.Tag) { + if peerKey, err := signaling.DecryptCallKeyInNode(ctx, m.sock, info.InnerNode, peerJid); err == nil && peerKey != nil { + m.mu.Lock() + if call.EncryptionKey != nil && !equalBytes(call.EncryptionKey, peerKey) { + m.reinitSrtpLocked(peerKey, peerJid) + } + m.mu.Unlock() + } + } + + m.mu.Lock() + _ = call.ApplyTransition(Transition{Type: TransitionRemoteAccepted}) + m.emitState() + m.acceptedByJid = peerJid.String() + if m.peerSsrcs == nil || !m.actualPeerSet { + peerDeviceJid := ensureDeviceJid(peerJid.String()) + m.peerSsrcs = []uint32{media.GenerateSecureSsrc(call.CallID, peerDeviceJid, 0)} + } + m.relay.SetSubscriptionSsrc(firstSsrc(m.peerSsrcs)) + m.initSrtpKeysLocked() + hasConn := m.relay.HasConnection() + relayData := call.RelayData + m.mu.Unlock() + + m.log.Info("remote accepted call", "call_id", call.CallID, "peer", peerJid.String(), + "relay_connected", hasConn, "relay_endpoints", relayEndpointCount(relayData)) + + m.relay.ResendSubscriptions() + + callID := call.CallID + creator := wanode.MustJID(call.CallCreator) + transport := waBinary.Node{ + Tag: "call", + Attrs: waBinary.Attrs{"to": peerJid, "id": signaling.GenerateCallStanzaID()}, + Content: []waBinary.Node{{ + Tag: "transport", + Attrs: waBinary.Attrs{ + "call-id": callID, "call-creator": creator, + "transport-message-type": "1", "p2p-cand-round": "1", + }, + Content: []waBinary.Node{{Tag: "net", Attrs: waBinary.Attrs{"medium": "2", "protocol": "0"}}}, + }}, + } + _ = m.sock.SendNode(ctx, transport) + _ = m.sock.SendNode(ctx, signaling.BuildMuteV2Stanza(peerJid, callID, creator, 0)) + if acceptMsgID := wanode.AttrString(node.Attrs, "id"); acceptMsgID != "" { + ourJid := m.sock.OwnLID() + if ourJid.IsEmpty() { + ourJid = m.sock.OwnPN() + } + _ = m.sock.SendNode(ctx, signaling.BuildAcceptReceiptStanza(peerJid, acceptMsgID, callID, creator, ourJid)) + } + + if hasConn { + m.mu.Lock() + if err := call.ApplyTransition(Transition{Type: TransitionMediaConnected}); err == nil { + m.emitState() + m.startSilenceKeepaliveLocked() + m.log.Info("call ACTIVE (media path established)", "call_id", call.CallID, "audio", m.codec != nil) + } + m.mu.Unlock() + } else if relayData != nil { + m.connectRelays(relayData.Endpoints) + } +} + +func (m *CallManager) HandleCallTransport(ctx context.Context, node *waBinary.Node, peerJid types.JID) { + m.mu.Lock() + call := m.currentCall + m.mu.Unlock() + if call == nil { + return + } + info := signaling.ExtractNodeInfo(node) + if info == nil { + return + } + relays := signaling.ExtractRelayEndpoints(info.InnerNode) + if len(relays) > 0 && !m.relay.HasConnection() { + m.mu.Lock() + if call.RelayData == nil { + call.RelayData = &core.RelayData{} + } + call.RelayData.Endpoints = relays + m.mu.Unlock() + m.connectRelays(relays) + } +} + +func (m *CallManager) HandleCallAck(ctx context.Context, node *waBinary.Node) { + if t := wanode.AttrString(node.Attrs, "type"); t != "offer" { + return + } + if e := wanode.AttrString(node.Attrs, "error"); e != "" { + m.log.Error("offer ack error", "error", e) + return + } + parsed := signaling.ParseRelayFromAck(node) + m.log.Info("offer ack received", "relays", len(parsed.Relays), "participants", len(parsed.ParticipantJids)) + if len(parsed.Relays) == 0 { + return + } + + m.mu.Lock() + call := m.currentCall + if call == nil { + m.mu.Unlock() + return + } + call.RelayData = &core.RelayData{ + Endpoints: parsed.Relays, + ParticipantJids: parsed.ParticipantJids, + UUID: parsed.UUID, + SelfPid: parsed.SelfPid, + PeerPid: parsed.PeerPid, + HbhKey: parsed.HbhKey, + } + + ourBase := wanode.CleanJID(m.ownCredJid()) + if len(parsed.ParticipantJids) > 0 { + ourDeviceJid := ensureDeviceJid(findOurDevice(parsed.ParticipantJids, ourBase, m.ownCredJid())) + newSelf := media.GenerateSecureSsrc(call.CallID, ourDeviceJid, 0) + if newSelf != m.selfSsrc { + m.selfSsrc = newSelf + m.rtpSession = media.NewWhatsAppOpusSession(newSelf) + } + if peer := firstPeerDevice(parsed.ParticipantJids, ourBase); peer != "" { + m.peerSsrcs = []uint32{media.GenerateSecureSsrc(call.CallID, ensureDeviceJid(peer), 0)} + } + if call.EncryptionKey != nil { + m.initSrtpKeysLocked() + } + } + isInitiator := call.IsInitiator() + peer := wanode.MustJID(call.PeerJid) + callID := call.CallID + creator := wanode.MustJID(call.CallCreator) + sendPreaccept := isInitiator && !m.outgoingPreacceptSent + if sendPreaccept { + m.outgoingPreacceptSent = true + } + endpoints := parsed.Relays + m.mu.Unlock() + + if sendPreaccept { + _ = m.sock.SendNode(ctx, signaling.BuildPreacceptStanza(peer, callID, creator)) + } + m.connectRelays(endpoints) +} + +func (m *CallManager) HandleCallTerminate(node *waBinary.Node) { + m.mu.Lock() + call := m.currentCall + if call == nil { + m.mu.Unlock() + return + } + info := signaling.ExtractNodeInfo(node) + reason := core.EndCallReasonUserEnded + if info != nil { + if r := wanode.AttrString(info.InnerNode.Attrs, "reason"); r != "" { + reason = core.EndCallReason(r) + } + } + m.log.Info("call terminated by peer", "call_id", call.CallID, "reason", string(reason)) + _ = call.ApplyTransition(Transition{Type: TransitionTerminated, Reason: reason}) + ended := call + m.emitState() + m.mu.Unlock() + + if m.OnEnded != nil { + m.OnEnded(ended) + } + m.cleanupMedia() +} diff --git a/src/voip/call/callmanager_srtp.go b/src/voip/call/callmanager_srtp.go new file mode 100644 index 0000000..bcbc4db --- /dev/null +++ b/src/voip/call/callmanager_srtp.go @@ -0,0 +1,67 @@ +package call + +import ( + "github.com/devlikeapro/gows/voip/core" + "github.com/devlikeapro/gows/voip/media" + "github.com/devlikeapro/gows/voip/wanode" + + "go.mau.fi/whatsmeow/types" +) + +func (m *CallManager) initSrtpKeysLocked() { + call := m.currentCall + if call == nil || call.EncryptionKey == nil { + return + } + ourBase := wanode.CleanJID(m.ownCredJid()) + var participants []string + if call.RelayData != nil { + participants = call.RelayData.ParticipantJids + } + ourDeviceJid := ensureDeviceJid(findOurDevice(participants, ourBase, m.ownCredJid())) + + rawPeer := m.acceptedByJid + if rawPeer == "" { + rawPeer = call.PeerJid + if p := firstPeerDevice(participants, ourBase); p != "" { + rawPeer = p + } + } + peerDeviceJid := ensureDeviceJid(rawPeer) + + sendKM, err1 := media.DerivePerJidSrtpKey(call.EncryptionKey, ourDeviceJid) + recvKM, err2 := media.DerivePerJidSrtpKey(call.EncryptionKey, peerDeviceJid) + if err1 != nil || err2 != nil { + m.log.Error("srtp key derivation failed", "err1", err1, "err2", err2) + return + } + sess, err := media.NewSrtpSession(sendKM, recvKM, core.SRTPSendAuthTagLen, core.SRTPRecvAuthTagLen) + if err != nil { + m.log.Error("srtp session failed", "err", err) + return + } + m.srtpSession = sess + m.log.Debug("srtp per-jid keys set", "send", ourDeviceJid, "recv", peerDeviceJid) +} + +func (m *CallManager) reinitSrtpLocked(peerKey []byte, peerJid types.JID) { + call := m.currentCall + if call == nil || call.EncryptionKey == nil { + return + } + ourBase := wanode.CleanJID(m.ownCredJid()) + var participants []string + if call.RelayData != nil { + participants = call.RelayData.ParticipantJids + } + ourDeviceJid := ensureDeviceJid(findOurDevice(participants, ourBase, m.ownCredJid())) + sendKM, err1 := media.DerivePerJidSrtpKey(call.EncryptionKey, ourDeviceJid) + recvKM, err2 := media.DerivePerJidSrtpKey(peerKey, peerJid.String()) + if err1 != nil || err2 != nil { + return + } + if sess, err := media.NewSrtpSession(sendKM, recvKM, core.SRTPSendAuthTagLen, core.SRTPRecvAuthTagLen); err == nil { + m.srtpSession = sess + m.log.Debug("srtp re-initialized with peer call key") + } +} diff --git a/src/voip/call/callstate.go b/src/voip/call/callstate.go new file mode 100644 index 0000000..519514b --- /dev/null +++ b/src/voip/call/callstate.go @@ -0,0 +1,214 @@ +package call + +import ( + "fmt" + "time" + "github.com/devlikeapro/gows/voip/core" +) + +type CallStateData struct { + State core.CallState + ConnectedAt *time.Time + AcceptedAt *time.Time + EndedAt *time.Time + AudioMuted bool + VideoOff bool + Silenced bool + EndReason core.EndCallReason + DurationSecs int +} + +type CallInfo struct { + CallID string + PeerJid string + CallCreator string + Direction core.CallDirection + MediaType core.CallMediaType + StateData CallStateData + CreatedAt time.Time + GroupJid string + IsOffline bool + CallerPn string + EncryptionKey []byte + RelayData *core.RelayData + ElectedRelayIdx *int +} + +func NewOutgoingCall(callID, peerJid, ourJid string, mediaType core.CallMediaType) *CallInfo { + return &CallInfo{ + CallID: callID, + PeerJid: peerJid, + CallCreator: ourJid, + Direction: core.CallDirectionOutgoing, + MediaType: mediaType, + CreatedAt: time.Now(), + StateData: CallStateData{ + State: core.CallStateInitiating, + AudioMuted: false, + VideoOff: mediaType != core.CallMediaTypeVideo, + }, + } +} + +func NewIncomingCall(callID, peerJid, callCreator, callerPn string, mediaType core.CallMediaType) *CallInfo { + return &CallInfo{ + CallID: callID, + PeerJid: peerJid, + CallCreator: callCreator, + Direction: core.CallDirectionIncoming, + MediaType: mediaType, + CreatedAt: time.Now(), + CallerPn: callerPn, + StateData: CallStateData{ + State: core.CallStateIncomingRinging, + AudioMuted: false, + VideoOff: mediaType != core.CallMediaTypeVideo, + }, + } +} + +func (c *CallInfo) IsInitiator() bool { return c.Direction == core.CallDirectionOutgoing } + +func (c *CallInfo) IsActive() bool { return c.StateData.State == core.CallStateActive } + +func (c *CallInfo) IsRinging() bool { + return c.StateData.State == core.CallStateRinging || c.StateData.State == core.CallStateIncomingRinging +} + +func (c *CallInfo) IsEnded() bool { return c.StateData.State == core.CallStateEnded } + +func (c *CallInfo) CanAccept() bool { return c.StateData.State == core.CallStateIncomingRinging } + +func (c *CallInfo) CanReject() bool { + return c.StateData.State == core.CallStateIncomingRinging || c.StateData.State == core.CallStateRinging +} + +type InvalidTransition struct { + CurrentState string + Attempted string +} + +func (e *InvalidTransition) Error() string { + return fmt.Sprintf("invalid transition '%s' in state '%s'", e.Attempted, e.CurrentState) +} + +const ( + TransitionOfferSent = "offer_sent" + TransitionOfferReceived = "offer_received" + TransitionLocalAccepted = "local_accepted" + TransitionRemoteAccepted = "remote_accepted" + TransitionLocalRejected = "local_rejected" + TransitionRemoteRejected = "remote_rejected" + TransitionMediaConnected = "media_connected" + TransitionTerminated = "terminated" + TransitionHold = "hold" + TransitionResume = "resume" + TransitionAudioMuteChanged = "audio_mute_changed" + TransitionVideoStateChanged = "video_state_changed" +) + +type Transition struct { + Type string + Reason core.EndCallReason + Muted bool + Off bool + Silenced bool +} + +func (c *CallInfo) ApplyTransition(t Transition) error { + s := &c.StateData + now := time.Now() + + switch t.Type { + case TransitionOfferSent: + if s.State != core.CallStateInitiating { + return &InvalidTransition{string(s.State), t.Type} + } + s.State = core.CallStateRinging + + case TransitionOfferReceived: + if s.State != core.CallStateInitiating { + return &InvalidTransition{string(s.State), t.Type} + } + s.State = core.CallStateIncomingRinging + s.Silenced = t.Silenced + + case TransitionRemoteAccepted: + if s.State != core.CallStateRinging { + return &InvalidTransition{string(s.State), t.Type} + } + s.State = core.CallStateConnecting + s.AcceptedAt = &now + + case TransitionLocalAccepted: + if s.State != core.CallStateIncomingRinging { + return &InvalidTransition{string(s.State), t.Type} + } + s.State = core.CallStateConnecting + s.AcceptedAt = &now + + case TransitionRemoteRejected: + if s.State != core.CallStateRinging { + return &InvalidTransition{string(s.State), t.Type} + } + s.State = core.CallStateEnded + s.EndedAt = &now + s.EndReason = t.Reason + + case TransitionLocalRejected: + if s.State != core.CallStateIncomingRinging { + return &InvalidTransition{string(s.State), t.Type} + } + s.State = core.CallStateEnded + s.EndedAt = &now + s.EndReason = t.Reason + + case TransitionMediaConnected: + if s.State != core.CallStateConnecting { + return &InvalidTransition{string(s.State), t.Type} + } + s.State = core.CallStateActive + s.ConnectedAt = &now + s.VideoOff = c.MediaType != core.CallMediaTypeVideo + + case TransitionTerminated: + if s.State == core.CallStateEnded { + return &InvalidTransition{string(s.State), t.Type} + } + if (s.State == core.CallStateActive || s.State == core.CallStateOnHold) && s.ConnectedAt != nil { + s.DurationSecs = int(now.Sub(*s.ConnectedAt).Seconds()) + } + s.State = core.CallStateEnded + s.EndedAt = &now + s.EndReason = t.Reason + + case TransitionHold: + if s.State != core.CallStateActive { + return &InvalidTransition{string(s.State), t.Type} + } + s.State = core.CallStateOnHold + + case TransitionResume: + if s.State != core.CallStateOnHold { + return &InvalidTransition{string(s.State), t.Type} + } + s.State = core.CallStateActive + + case TransitionAudioMuteChanged: + if s.State != core.CallStateActive { + return &InvalidTransition{string(s.State), t.Type} + } + s.AudioMuted = t.Muted + + case TransitionVideoStateChanged: + if s.State != core.CallStateActive { + return &InvalidTransition{string(s.State), t.Type} + } + s.VideoOff = t.Off + + default: + return &InvalidTransition{string(s.State), t.Type} + } + + return nil +} diff --git a/src/voip/call/foundation_test.go b/src/voip/call/foundation_test.go new file mode 100644 index 0000000..a9ebf5c --- /dev/null +++ b/src/voip/call/foundation_test.go @@ -0,0 +1,42 @@ +package call + +import ( + "testing" + + "github.com/devlikeapro/gows/voip/core" +) + +func TestCallStateMachine(t *testing.T) { + c := NewOutgoingCall("CID", "peer@lid", "me@lid", core.CallMediaTypeAudio) + if c.StateData.State != core.CallStateInitiating { + t.Fatal("should start Initiating") + } + if err := c.ApplyTransition(Transition{Type: TransitionOfferSent}); err != nil { + t.Fatal(err) + } + if c.StateData.State != core.CallStateRinging { + t.Fatal("should be Ringing after offer_sent") + } + if err := c.ApplyTransition(Transition{Type: TransitionRemoteAccepted}); err != nil { + t.Fatal(err) + } + if err := c.ApplyTransition(Transition{Type: TransitionMediaConnected}); err != nil { + t.Fatal(err) + } + if !c.IsActive() { + t.Fatal("should be Active") + } + + if err := c.ApplyTransition(Transition{Type: TransitionOfferSent}); err == nil { + t.Fatal("expected InvalidTransition") + } else if _, ok := err.(*InvalidTransition); !ok { + t.Fatalf("expected *InvalidTransition, got %T", err) + } + + if err := c.ApplyTransition(Transition{Type: TransitionTerminated, Reason: core.EndCallReasonUserEnded}); err != nil { + t.Fatal(err) + } + if !c.IsEnded() { + t.Fatal("should be Ended") + } +} diff --git a/src/voip/call/helpers.go b/src/voip/call/helpers.go new file mode 100644 index 0000000..f3298c1 --- /dev/null +++ b/src/voip/call/helpers.go @@ -0,0 +1,81 @@ +package call + +import ( + "strings" + "github.com/devlikeapro/gows/voip/core" + "github.com/devlikeapro/gows/voip/wanode" + + waBinary "go.mau.fi/whatsmeow/binary" +) + +func hasChildTag(n *waBinary.Node, tag string) bool { + for _, c := range wanode.NodeChildren(n) { + if c.Tag == tag { + return true + } + } + return false +} + +func ensureDeviceJid(jid string) string { + if strings.Contains(jid, ":") { + + if at := strings.Index(jid, "@"); at > strings.Index(jid, ":") { + return jid + } + } + return strings.Replace(jid, "@", ":0@", 1) +} + +func findOurDevice(participants []string, ourBase, fallback string) string { + for _, jid := range participants { + if wanode.CleanJID(jid) == ourBase && strings.Contains(jid, ":") { + return jid + } + } + return fallback +} + +func firstPeerDevice(participants []string, ourBase string) string { + for _, jid := range participants { + if wanode.CleanJID(jid) != ourBase { + return jid + } + } + return "" +} + +func firstSsrc(s []uint32) uint32 { + if len(s) > 0 { + return s[0] + } + return 0 +} + +func relayEndpointCount(rd *core.RelayData) int { + if rd == nil { + return 0 + } + return len(rd.Endpoints) +} + +func containsSsrc(s []uint32, v uint32) bool { + for _, x := range s { + if x == v { + return true + } + } + return false +} + +func equalBytes(a, b []byte) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} diff --git a/src/voip/core/types.go b/src/voip/core/types.go new file mode 100644 index 0000000..952dce4 --- /dev/null +++ b/src/voip/core/types.go @@ -0,0 +1,106 @@ +package core + +type CallState string + +const ( + CallStateInitiating CallState = "initiating" + CallStateRinging CallState = "ringing" + CallStateIncomingRinging CallState = "incoming_ringing" + CallStateConnecting CallState = "connecting" + CallStateActive CallState = "active" + CallStateOnHold CallState = "on_hold" + CallStateEnded CallState = "ended" +) + +type CallDirection string + +const ( + CallDirectionOutgoing CallDirection = "outgoing" + CallDirectionIncoming CallDirection = "incoming" +) + +type CallMediaType string + +const ( + CallMediaTypeAudio CallMediaType = "audio" + CallMediaTypeVideo CallMediaType = "video" +) + +type EndCallReason string + +const ( + EndCallReasonUserEnded EndCallReason = "user_ended" + EndCallReasonDeclined EndCallReason = "declined" + EndCallReasonTimeout EndCallReason = "timeout" + EndCallReasonBusy EndCallReason = "busy" + EndCallReasonCancelled EndCallReason = "cancelled" + EndCallReasonFailed EndCallReason = "failed" + EndCallReasonDoNotDisturb EndCallReason = "do_not_disturb" + EndCallReasonUnknown EndCallReason = "unknown" +) + +const ( + PayloadTypeWhatsAppOpus = 120 +) + +const ( + SRTPSendAuthTagLen = 4 + SRTPRecvAuthTagLen = 4 + SRTPAuthTagLen = 4 +) + +const ( + SRTPLabelEncryption = 0x00 + SRTPLabelAuth = 0x01 + SRTPLabelSalt = 0x02 +) + +const WARelayPort = 3480 + +const WADTLSFingerprint = "sha-256 F9:CA:0C:98:A3:CC:71:D6:42:CE:5A:E2:53:D2:15:20:D3:1B:BA:D8:57:A4:F0:AF:BE:0B:FB:F3:6B:0C:A0:68" + +type SrtpKeyingMaterial struct { + MasterKey []byte + MasterSalt []byte +} + +type RelayEndpoint struct { + IP string + Port int + Token string + AuthToken string + RawAuthToken []byte + RawToken []byte + Key string + RelayID int + Protocol int + C2RRtt *int + RelayName string + AddressBytes []byte + AuthTokenID string +} + +type RelayData struct { + Endpoints []RelayEndpoint + ParticipantJids []string + UUID string + SelfPid *int + PeerPid *int + HbhKey []byte +} + +type AudioEngineConfig struct { + SampleRate int + CaptureChunkSize int + PlaybackOutputSize int + MaxBufferSize int + IntervalMs int +} + +var DefaultAudioConfig = AudioEngineConfig{ + SampleRate: 16000, + CaptureChunkSize: 320, + PlaybackOutputSize: 256, + MaxBufferSize: 1600, + IntervalMs: 20, +} diff --git a/src/voip/core/voipsocket.go b/src/voip/core/voipsocket.go new file mode 100644 index 0000000..594472f --- /dev/null +++ b/src/voip/core/voipsocket.go @@ -0,0 +1,32 @@ +package core + +import ( + "context" + + waBinary "go.mau.fi/whatsmeow/binary" + "go.mau.fi/whatsmeow/types" +) + +type VoipSocket interface { + OwnPN() types.JID + + OwnLID() types.JID + + AccountDeviceIdentityNode() (waBinary.Node, bool) + + SendNode(ctx context.Context, node waBinary.Node) error + + Query(ctx context.Context, node waBinary.Node) (*waBinary.Node, error) + + GetUSyncDevices(ctx context.Context, jids []types.JID) ([]types.JID, error) + + AssertSessions(ctx context.Context, jids []types.JID, force bool) error + + CreateParticipantNodes(ctx context.Context, devices []types.JID, callKey []byte, encAttrs waBinary.Attrs) ([]waBinary.Node, bool, error) + + DecryptCallKey(ctx context.Context, from types.JID, encChild *waBinary.Node) ([]byte, error) + + GetTCToken(ctx context.Context, jid types.JID) ([]byte, error) + + ResolveLIDForPN(ctx context.Context, pn types.JID) types.JID +} diff --git a/src/voip/media/codec.go b/src/voip/media/codec.go new file mode 100644 index 0000000..7cb36ba --- /dev/null +++ b/src/voip/media/codec.go @@ -0,0 +1,26 @@ +package media + +type Codec interface { + Encode(pcm []float32) ([]byte, error) + + Decode(frame []byte) ([]float32, error) + + FrameSize() int + + SampleRate() int + + Close() +} + +type CodecOptions struct { + Bitrate int + Complexity int + FEC bool +} + +var DefaultCodecOptions = CodecOptions{Bitrate: 6000, Complexity: 5, FEC: false} + +const ( + mlowSampleRate = 16000 + mlowFrameSize = 960 +) diff --git a/src/voip/media/encryption.go b/src/voip/media/encryption.go new file mode 100644 index 0000000..5d66c21 --- /dev/null +++ b/src/voip/media/encryption.go @@ -0,0 +1,28 @@ +package media + +import ( + "crypto/hkdf" + "crypto/rand" + "crypto/sha256" + "github.com/devlikeapro/gows/voip/core" +) + +func DerivePerJidSrtpKey(callKey []byte, deviceJid string) (core.SrtpKeyingMaterial, error) { + out, err := hkdf.Key(sha256.New, callKey, nil, deviceJid, 46) + if err != nil { + return core.SrtpKeyingMaterial{}, err + } + mk := make([]byte, 16) + ms := make([]byte, 14) + copy(mk, out[0:16]) + copy(ms, out[16:30]) + return core.SrtpKeyingMaterial{MasterKey: mk, MasterSalt: ms}, nil +} + +func GenerateCallKey() []byte { + b := make([]byte, 32) + if _, err := rand.Read(b); err != nil { + panic(err) + } + return b +} diff --git a/src/voip/media/foundation_test.go b/src/voip/media/foundation_test.go new file mode 100644 index 0000000..a387a4c --- /dev/null +++ b/src/voip/media/foundation_test.go @@ -0,0 +1,182 @@ +package media + +import ( + "bytes" + "crypto/aes" + "crypto/cipher" + "crypto/hmac" + "crypto/sha256" + "encoding/binary" + "testing" + + "github.com/devlikeapro/gows/voip/core" +) + +func refHKDF(ikm, salt []byte, info string, length int) []byte { + if salt == nil { + salt = make([]byte, sha256.Size) + } + + ext := hmac.New(sha256.New, salt) + ext.Write(ikm) + prk := ext.Sum(nil) + + var okm, t []byte + for i := 1; len(okm) < length; i++ { + h := hmac.New(sha256.New, prk) + h.Write(t) + h.Write([]byte(info)) + h.Write([]byte{byte(i)}) + t = h.Sum(nil) + okm = append(okm, t...) + } + return okm[:length] +} + +func TestGenerateSecureSsrc(t *testing.T) { + const callID = "ABCDEF0123456789ABCDEF0123456789" + const jid = "5511999999999:3@lid" + + a := GenerateSecureSsrc(callID, jid, 0) + b := GenerateSecureSsrc(callID, jid, 0) + if a != b { + t.Fatalf("ssrc not deterministic: %d != %d", a, b) + } + if GenerateSecureSsrc(callID, jid, 1) == a { + t.Fatal("counter should change the ssrc") + } + if GenerateSecureSsrc(callID, "other@lid", 0) == a { + t.Fatal("different jid should change the ssrc") + } + + salt := make([]byte, 4) + binary.LittleEndian.PutUint32(salt, 0) + want := binary.LittleEndian.Uint32(refHKDF([]byte(callID), salt, jid, 4)) + if a != want { + t.Fatalf("ssrc mismatch with reference HKDF: got %d want %d", a, want) + } +} + +func TestDerivePerJidSrtpKey(t *testing.T) { + callKey := bytes.Repeat([]byte{0xAB}, 32) + const jid = "5511999999999:0@lid" + + km, err := DerivePerJidSrtpKey(callKey, jid) + if err != nil { + t.Fatal(err) + } + if len(km.MasterKey) != 16 || len(km.MasterSalt) != 14 { + t.Fatalf("bad lengths: key=%d salt=%d", len(km.MasterKey), len(km.MasterSalt)) + } + + ref := refHKDF(callKey, nil, jid, 46) + if !bytes.Equal(km.MasterKey, ref[0:16]) { + t.Error("master key mismatch with reference HKDF") + } + if !bytes.Equal(km.MasterSalt, ref[16:30]) { + t.Error("master salt mismatch with reference HKDF") + } +} + +func TestRtpHeaderRoundtrip(t *testing.T) { + h := NewRtpHeader(core.PayloadTypeWhatsAppOpus, 0x1234, 0xDEADBEEF, 0xCAFEBABE) + h.Marker = true + h.Extension = true + h.ExtensionProfile = 0xBEDE + h.ExtensionData = []byte{} + + buf := make([]byte, h.Size()) + if _, err := h.Encode(buf); err != nil { + t.Fatal(err) + } + got, err := DecodeRtpHeader(buf) + if err != nil { + t.Fatal(err) + } + if got.PayloadType != h.PayloadType || got.SequenceNumber != h.SequenceNumber || + got.Timestamp != h.Timestamp || got.Ssrc != h.Ssrc || got.Marker != h.Marker || + !got.Extension || got.ExtensionProfile != 0xBEDE { + t.Fatalf("header mismatch: %+v vs %+v", got, h) + } +} + +func TestRtpPacketRoundtrip(t *testing.T) { + sess := NewWhatsAppOpusSession(0x11223344) + payload := []byte{1, 2, 3, 4, 5, 6, 7, 8} + pkt := sess.CreatePacketWithDuration(payload, 960, true) + enc, err := pkt.Encode() + if err != nil { + t.Fatal(err) + } + dec, err := DecodeRtpPacket(enc) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(dec.Payload, payload) { + t.Fatalf("payload mismatch: %v", dec.Payload) + } + + pkt2 := sess.CreatePacketWithDuration(payload, 960, false) + if pkt2.Header.SequenceNumber != pkt.Header.SequenceNumber+1 { + t.Fatal("sequence number did not advance") + } + if pkt2.Header.Timestamp != pkt.Header.Timestamp+960 { + t.Fatal("timestamp did not advance by duration") + } +} + +func TestSrtpRoundtrip(t *testing.T) { + callKey := bytes.Repeat([]byte{0x11}, 32) + sendKM, _ := DerivePerJidSrtpKey(callKey, "self:0@lid") + recvKM, _ := DerivePerJidSrtpKey(callKey, "peer:0@lid") + + sender, err := NewSrtpSession(sendKM, recvKM, core.SRTPSendAuthTagLen, core.SRTPRecvAuthTagLen) + if err != nil { + t.Fatal(err) + } + receiver, err := NewSrtpSession(recvKM, sendKM, core.SRTPRecvAuthTagLen, core.SRTPSendAuthTagLen) + if err != nil { + t.Fatal(err) + } + + sess := NewWhatsAppOpusSession(0xAABBCCDD) + payload := bytes.Repeat([]byte{0x42}, 40) + pkt := sess.CreatePacketWithDuration(payload, 960, true) + + protected, err := sender.Protect(pkt) + if err != nil { + t.Fatal(err) + } + if len(protected) != pkt.Header.Size()+len(payload)+core.SRTPSendAuthTagLen { + t.Fatalf("unexpected protected length: %d", len(protected)) + } + + got, err := receiver.Unprotect(protected) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(got.Payload, payload) { + t.Fatalf("srtp roundtrip payload mismatch: %v", got.Payload) + } +} + +func TestDeriveSrtpKeyReference(t *testing.T) { + masterKey := bytes.Repeat([]byte{0x01}, 16) + masterSalt := bytes.Repeat([]byte{0x02}, 14) + + got, err := deriveSrtpKey(masterKey, masterSalt, core.SRTPLabelEncryption, 16) + if err != nil { + t.Fatal(err) + } + + iv := make([]byte, 16) + copy(iv, masterSalt) + iv[7] ^= core.SRTPLabelEncryption + block, _ := aes.NewCipher(masterKey) + want := make([]byte, 16) + cipher.NewCTR(block, iv).XORKeyStream(want, make([]byte, 16)) + + if !bytes.Equal(got, want) { + t.Fatalf("deriveSrtpKey mismatch:\n got=%x\nwant=%x", got, want) + } +} diff --git a/src/voip/media/mlow_codec_real.go b/src/voip/media/mlow_codec_real.go new file mode 100644 index 0000000..250c21c --- /dev/null +++ b/src/voip/media/mlow_codec_real.go @@ -0,0 +1,238 @@ +//go:build mlow + +package media + +/* +#cgo CFLAGS: -I${SRCDIR}/../../../native +#cgo LDFLAGS: -L${SRCDIR}/../../../native -lopus_mlow + +#include +#include + +// Prototypes for the bundled libopus_mlow (WhatsApp's Opus variant). The +// real library exports the standard opus_* symbols. +extern void opus_global_create(void); +extern void* opus_decoder_create(int32_t fs, int channels, unsigned char* err); +extern int opus_decoder_ctl(void* dec, int request, ...); +extern int opus_decode(void* dec, const unsigned char* data, int32_t len, int16_t* pcm, int frame_size, int decode_fec); +extern void opus_decoder_destroy(void* dec); +extern void* opus_encoder_create(int32_t fs, int channels, int application, unsigned char* err); +extern int opus_encoder_ctl(void* enc, int request, ...); +extern int opus_encode(void* enc, const int16_t* pcm, int frame_size, unsigned char* data, int32_t max_data_bytes); +extern void opus_encoder_destroy(void* enc); +extern const char* opus_strerror(int error); + +// Non-variadic wrappers so cgo can call the *_ctl functions with one int arg. +static int mlow_dec_ctl(void* dec, int req, int val) { return opus_decoder_ctl(dec, req, val); } +static int mlow_enc_ctl(void* enc, int req, int val) { return opus_encoder_ctl(enc, req, val); } +*/ +import "C" + +import ( + "fmt" + "sync" + "unsafe" +) + +const ( + mlowChannels = 1 + opusApplicationVOIP = 2048 + + ctlSetBitrate = 4002 + ctlSetComplexity = 4010 + ctlSetSignal = 4024 + ctlSetInbandFEC = 4012 + ctlSetDTX = 4016 + ctlSetUsingSmpl = 4050 + ctlSignalVoice = 3001 + + mlowMaxOut = 5760 +) + +var globalInitOnce sync.Once + +type mlowCodec struct { + encoder unsafe.Pointer + decoder unsafe.Pointer +} + +func NewMLowCodec(opts CodecOptions) (Codec, error) { + if opts.Bitrate == 0 { + opts.Bitrate = DefaultCodecOptions.Bitrate + } + if opts.Complexity == 0 { + opts.Complexity = DefaultCodecOptions.Complexity + } + globalInitOnce.Do(func() { C.opus_global_create() }) + + c := &mlowCodec{} + + var errBuf [4]C.uchar + c.decoder = C.opus_decoder_create(C.int32_t(mlowSampleRate), C.int(mlowChannels), &errBuf[0]) + if c.decoder == nil { + return nil, fmt.Errorf("opus_decoder_create failed") + } + C.mlow_dec_ctl(c.decoder, C.int(ctlSetUsingSmpl), C.int(1)) + + c.encoder = C.opus_encoder_create(C.int32_t(mlowSampleRate), C.int(mlowChannels), C.int(opusApplicationVOIP), &errBuf[0]) + if c.encoder == nil { + C.opus_decoder_destroy(c.decoder) + return nil, fmt.Errorf("opus_encoder_create failed") + } + C.mlow_enc_ctl(c.encoder, C.int(ctlSetUsingSmpl), C.int(1)) + C.mlow_enc_ctl(c.encoder, C.int(ctlSetBitrate), C.int(opts.Bitrate)) + C.mlow_enc_ctl(c.encoder, C.int(ctlSetComplexity), C.int(opts.Complexity)) + C.mlow_enc_ctl(c.encoder, C.int(ctlSetSignal), C.int(ctlSignalVoice)) + fec := 0 + if opts.FEC { + fec = 1 + } + C.mlow_enc_ctl(c.encoder, C.int(ctlSetInbandFEC), C.int(fec)) + C.mlow_enc_ctl(c.encoder, C.int(ctlSetDTX), C.int(1)) + + return c, nil +} + +func (c *mlowCodec) Encode(pcm []float32) ([]byte, error) { + if len(pcm) == 0 { + return nil, nil + } + in := make([]C.int16_t, len(pcm)) + for i, s := range pcm { + if s > 1 { + s = 1 + } else if s < -1 { + s = -1 + } + in[i] = C.int16_t(int16(s * 32767)) + } + out := make([]C.uchar, 4000) + n := C.opus_encode(c.encoder, &in[0], C.int(len(pcm)), &out[0], C.int32_t(len(out))) + if n < 0 { + return nil, fmt.Errorf("encode failed: %s", C.GoString(C.opus_strerror(n))) + } + res := make([]byte, int(n)) + for i := 0; i < int(n); i++ { + res[i] = byte(out[i]) + } + return res, nil +} + +func (c *mlowCodec) Decode(frame []byte) ([]float32, error) { + out := make([]C.int16_t, mlowMaxOut) + var n C.int + if frame == nil { + n = C.opus_decode(c.decoder, nil, 0, &out[0], C.int(mlowFrameSize), 0) + } else { + cdata := (*C.uchar)(unsafe.Pointer(&frame[0])) + n = C.opus_decode(c.decoder, cdata, C.int32_t(len(frame)), &out[0], C.int(mlowMaxOut), 0) + } + if n <= 0 { + return make([]float32, mlowFrameSize), nil + } + res := make([]float32, int(n)) + for i := 0; i < int(n); i++ { + res[i] = float32(int16(out[i])) / 32768.0 + } + return res, nil +} + +func (c *mlowCodec) FrameSize() int { return mlowFrameSize } +func (c *mlowCodec) SampleRate() int { return mlowSampleRate } + +func (c *mlowCodec) Close() { + if c.decoder != nil { + C.opus_decoder_destroy(c.decoder) + c.decoder = nil + } + if c.encoder != nil { + C.opus_encoder_destroy(c.encoder) + c.encoder = nil + } +} + +type opusGeneric struct { + encoder unsafe.Pointer + decoder unsafe.Pointer + sampleRate int + frameSize int +} + +func NewOpusCodec(sampleRate, frameSize int) (Codec, error) { + globalInitOnce.Do(func() { C.opus_global_create() }) + c := &opusGeneric{sampleRate: sampleRate, frameSize: frameSize} + + var errBuf [4]C.uchar + c.decoder = C.opus_decoder_create(C.int32_t(sampleRate), C.int(1), &errBuf[0]) + if c.decoder == nil { + return nil, fmt.Errorf("opus_decoder_create(%d) failed", sampleRate) + } + c.encoder = C.opus_encoder_create(C.int32_t(sampleRate), C.int(1), C.int(opusApplicationVOIP), &errBuf[0]) + if c.encoder == nil { + C.opus_decoder_destroy(c.decoder) + return nil, fmt.Errorf("opus_encoder_create(%d) failed", sampleRate) + } + C.mlow_enc_ctl(c.encoder, C.int(ctlSetBitrate), C.int(24000)) + C.mlow_enc_ctl(c.encoder, C.int(ctlSetComplexity), C.int(5)) + C.mlow_enc_ctl(c.encoder, C.int(ctlSetSignal), C.int(ctlSignalVoice)) + return c, nil +} + +func (c *opusGeneric) Encode(pcm []float32) ([]byte, error) { + if len(pcm) == 0 { + return nil, nil + } + in := make([]C.int16_t, len(pcm)) + for i, s := range pcm { + if s > 1 { + s = 1 + } else if s < -1 { + s = -1 + } + in[i] = C.int16_t(int16(s * 32767)) + } + out := make([]C.uchar, 4000) + n := C.opus_encode(c.encoder, &in[0], C.int(len(pcm)), &out[0], C.int32_t(len(out))) + if n < 0 { + return nil, fmt.Errorf("opus encode failed: %s", C.GoString(C.opus_strerror(n))) + } + res := make([]byte, int(n)) + for i := 0; i < int(n); i++ { + res[i] = byte(out[i]) + } + return res, nil +} + +func (c *opusGeneric) Decode(frame []byte) ([]float32, error) { + maxOut := c.sampleRate / 1000 * 120 + out := make([]C.int16_t, maxOut) + var n C.int + if frame == nil { + n = C.opus_decode(c.decoder, nil, 0, &out[0], C.int(c.frameSize), 0) + } else { + cdata := (*C.uchar)(unsafe.Pointer(&frame[0])) + n = C.opus_decode(c.decoder, cdata, C.int32_t(len(frame)), &out[0], C.int(maxOut), 0) + } + if n <= 0 { + return make([]float32, c.frameSize), nil + } + res := make([]float32, int(n)) + for i := 0; i < int(n); i++ { + res[i] = float32(int16(out[i])) / 32768.0 + } + return res, nil +} + +func (c *opusGeneric) FrameSize() int { return c.frameSize } +func (c *opusGeneric) SampleRate() int { return c.sampleRate } + +func (c *opusGeneric) Close() { + if c.decoder != nil { + C.opus_decoder_destroy(c.decoder) + c.decoder = nil + } + if c.encoder != nil { + C.opus_encoder_destroy(c.encoder) + c.encoder = nil + } +} diff --git a/src/voip/media/mlow_codec_real_test.go b/src/voip/media/mlow_codec_real_test.go new file mode 100644 index 0000000..748465f --- /dev/null +++ b/src/voip/media/mlow_codec_real_test.go @@ -0,0 +1,51 @@ +//go:build mlow + +package media + +import ( + "math" + "testing" +) + +func TestMLowCodecRoundtrip(t *testing.T) { + codec, err := NewMLowCodec(DefaultCodecOptions) + if err != nil { + t.Fatalf("NewMLowCodec: %v", err) + } + defer codec.Close() + + if codec.FrameSize() != 960 || codec.SampleRate() != 16000 { + t.Fatalf("unexpected frame=%d rate=%d", codec.FrameSize(), codec.SampleRate()) + } + + frame := make([]float32, 960) + for i := range frame { + frame[i] = 0.3 * float32(math.Sin(2*math.Pi*440*float64(i)/16000)) + } + + encoded, err := codec.Encode(frame) + if err != nil { + t.Fatalf("Encode: %v", err) + } + if len(encoded) == 0 { + t.Fatal("encoded frame is empty") + } + t.Logf("encoded %d samples → %d bytes (MLow)", len(frame), len(encoded)) + + decoded, err := codec.Decode(encoded) + if err != nil { + t.Fatalf("Decode: %v", err) + } + if len(decoded) == 0 { + t.Fatal("decoded PCM is empty") + } + t.Logf("decoded → %d samples", len(decoded)) + + plc, err := codec.Decode(nil) + if err != nil { + t.Fatalf("Decode(nil) PLC: %v", err) + } + if len(plc) == 0 { + t.Fatal("PLC returned no samples") + } +} diff --git a/src/voip/media/mlow_codec_stub.go b/src/voip/media/mlow_codec_stub.go new file mode 100644 index 0000000..aecb517 --- /dev/null +++ b/src/voip/media/mlow_codec_stub.go @@ -0,0 +1,15 @@ +//go:build !mlow + +package media + +import "errors" + +var ErrCodecUnavailable = errors.New("MLow codec unavailable: rebuild with -tags mlow and CGO_ENABLED=1") + +func NewMLowCodec(opts CodecOptions) (Codec, error) { + return nil, ErrCodecUnavailable +} + +func NewOpusCodec(sampleRate, frameSize int) (Codec, error) { + return nil, ErrCodecUnavailable +} diff --git a/src/voip/media/resample.go b/src/voip/media/resample.go new file mode 100644 index 0000000..a476998 --- /dev/null +++ b/src/voip/media/resample.go @@ -0,0 +1,40 @@ +package media + +func Downsample48to16(in []float32) []float32 { + out := make([]float32, len(in)/3) + for i := range out { + + j := i * 3 + out[i] = (in[j] + in[j+1] + in[j+2]) / 3 + } + return out +} + +func Upsample16to48(in []float32) []float32 { + if len(in) == 0 { + return nil + } + out := make([]float32, len(in)*3) + for i := 0; i < len(in); i++ { + cur := in[i] + var next float32 + if i+1 < len(in) { + next = in[i+1] + } else { + next = cur + } + out[i*3] = cur + out[i*3+1] = cur + (next-cur)/3 + out[i*3+2] = cur + 2*(next-cur)/3 + } + return out +} + +func NormalizeFrame(pcm []float32, n int) []float32 { + if len(pcm) == n { + return pcm + } + out := make([]float32, n) + copy(out, pcm) + return out +} diff --git a/src/voip/media/rtp.go b/src/voip/media/rtp.go new file mode 100644 index 0000000..0f50824 --- /dev/null +++ b/src/voip/media/rtp.go @@ -0,0 +1,205 @@ +package media + +import ( + "crypto/rand" + "encoding/binary" + "errors" + "math/big" + "github.com/devlikeapro/gows/voip/core" +) + +const ( + rtpVersion = 2 + rtpMinHeaderSize = 12 +) + +type RtpHeader struct { + Version uint8 + Padding bool + Extension bool + CsrcCount uint8 + Marker bool + PayloadType uint8 + SequenceNumber uint16 + Timestamp uint32 + Ssrc uint32 + Csrc []uint32 + ExtensionProfile uint16 + ExtensionData []byte +} + +func NewRtpHeader(payloadType uint8, seq uint16, ts, ssrc uint32) *RtpHeader { + return &RtpHeader{ + Version: rtpVersion, + PayloadType: payloadType, + SequenceNumber: seq, + Timestamp: ts, + Ssrc: ssrc, + } +} + +func (h *RtpHeader) Size() int { + s := rtpMinHeaderSize + int(h.CsrcCount)*4 + if h.Extension { + s += 4 + len(h.ExtensionData) + } + return s +} + +func (h *RtpHeader) Encode(buf []byte) (int, error) { + if len(buf) < h.Size() { + return 0, errors.New("buffer too small for RTP header") + } + + buf[0] = (h.Version&0x03)<<6 | + boolBit(h.Padding)<<5 | + boolBit(h.Extension)<<4 | + (h.CsrcCount & 0x0f) + + buf[1] = boolBit(h.Marker)<<7 | (h.PayloadType & 0x7f) + + binary.BigEndian.PutUint16(buf[2:], h.SequenceNumber) + binary.BigEndian.PutUint32(buf[4:], h.Timestamp) + binary.BigEndian.PutUint32(buf[8:], h.Ssrc) + + offset := 12 + for _, c := range h.Csrc { + binary.BigEndian.PutUint32(buf[offset:], c) + offset += 4 + } + + if h.Extension { + binary.BigEndian.PutUint16(buf[offset:], h.ExtensionProfile) + binary.BigEndian.PutUint16(buf[offset+2:], uint16(len(h.ExtensionData)/4)) + copy(buf[offset+4:], h.ExtensionData) + } + + return h.Size(), nil +} + +func DecodeRtpHeader(buf []byte) (*RtpHeader, error) { + if len(buf) < rtpMinHeaderSize { + return nil, errors.New("buffer too small for RTP header") + } + + version := (buf[0] >> 6) & 0x03 + if version != rtpVersion { + return nil, errors.New("invalid RTP version") + } + + h := &RtpHeader{ + Version: version, + Padding: (buf[0]>>5)&0x01 != 0, + Extension: (buf[0]>>4)&0x01 != 0, + CsrcCount: buf[0] & 0x0f, + Marker: (buf[1]>>7)&0x01 != 0, + PayloadType: buf[1] & 0x7f, + SequenceNumber: binary.BigEndian.Uint16(buf[2:]), + Timestamp: binary.BigEndian.Uint32(buf[4:]), + Ssrc: binary.BigEndian.Uint32(buf[8:]), + } + + headerSize := rtpMinHeaderSize + int(h.CsrcCount)*4 + if len(buf) < headerSize { + return nil, errors.New("buffer too small for CSRC list") + } + + offset := 12 + for i := 0; i < int(h.CsrcCount); i++ { + h.Csrc = append(h.Csrc, binary.BigEndian.Uint32(buf[offset:])) + offset += 4 + } + + if h.Extension && len(buf) >= offset+4 { + h.ExtensionProfile = binary.BigEndian.Uint16(buf[offset:]) + extWords := binary.BigEndian.Uint16(buf[offset+2:]) + extBytes := int(extWords) * 4 + offset += 4 + if len(buf) >= offset+extBytes { + h.ExtensionData = append([]byte(nil), buf[offset:offset+extBytes]...) + } + } + + return h, nil +} + +type RtpPacket struct { + Header *RtpHeader + Payload []byte +} + +func (p *RtpPacket) Size() int { + return p.Header.Size() + len(p.Payload) +} + +func (p *RtpPacket) Encode() ([]byte, error) { + buf := make([]byte, p.Size()) + headerSize, err := p.Header.Encode(buf) + if err != nil { + return nil, err + } + copy(buf[headerSize:], p.Payload) + return buf, nil +} + +func DecodeRtpPacket(buf []byte) (*RtpPacket, error) { + header, err := DecodeRtpHeader(buf) + if err != nil { + return nil, err + } + payload := append([]byte(nil), buf[header.Size():]...) + return &RtpPacket{Header: header, Payload: payload}, nil +} + +type RtpSession struct { + ssrc uint32 + payloadType uint8 + sequenceNumber uint16 + sampleRate int + timestamp uint32 + samplesPerPacket int +} + +func NewRtpSession(ssrc uint32, payloadType uint8, sampleRate, samplesPerPacket int) *RtpSession { + return &RtpSession{ + ssrc: ssrc, + payloadType: payloadType, + sequenceNumber: uint16(randUint(65536)), + sampleRate: sampleRate, + timestamp: uint32(randUint(1 << 32)), + samplesPerPacket: samplesPerPacket, + } +} + +func NewWhatsAppOpusSession(ssrc uint32) *RtpSession { + return NewRtpSession(ssrc, core.PayloadTypeWhatsAppOpus, 16000, 960) +} + +func (s *RtpSession) CreatePacket(payload []byte, marker bool) *RtpPacket { + return s.CreatePacketWithDuration(payload, s.samplesPerPacket, marker) +} + +func (s *RtpSession) CreatePacketWithDuration(payload []byte, durationSamples int, marker bool) *RtpPacket { + header := NewRtpHeader(s.payloadType, s.sequenceNumber, s.timestamp, s.ssrc) + header.Marker = marker + + s.sequenceNumber++ + s.timestamp += uint32(durationSamples) + + return &RtpPacket{Header: header, Payload: payload} +} + +func boolBit(b bool) byte { + if b { + return 1 + } + return 0 +} + +func randUint(max int64) int64 { + n, err := rand.Int(rand.Reader, big.NewInt(max)) + if err != nil { + panic(err) + } + return n.Int64() +} diff --git a/src/voip/media/srtp.go b/src/voip/media/srtp.go new file mode 100644 index 0000000..e6c9425 --- /dev/null +++ b/src/voip/media/srtp.go @@ -0,0 +1,215 @@ +package media + +import ( + "crypto/aes" + "crypto/cipher" + "crypto/hmac" + "crypto/sha1" + "encoding/binary" + "fmt" + "github.com/devlikeapro/gows/voip/core" +) + +type SrtpErrorType string + +const ( + SrtpErrPacketTooShort SrtpErrorType = "packet_too_short" + SrtpErrAuthFailed SrtpErrorType = "auth_failed" + SrtpErrEncryption SrtpErrorType = "encryption" + SrtpErrDecryption SrtpErrorType = "decryption" +) + +type SrtpError struct { + Type SrtpErrorType + Msg string +} + +func (e *SrtpError) Error() string { return fmt.Sprintf("srtp %s: %s", e.Type, e.Msg) } + +type SrtpContext struct { + sessionKey []byte + sessionSalt []byte + authKey []byte + roc uint32 + lastSeq uint16 + initialized bool + authTagLen int +} + +func NewSrtpContext(keying core.SrtpKeyingMaterial, authTagLen int) (*SrtpContext, error) { + if authTagLen <= 0 { + authTagLen = core.SRTPAuthTagLen + } + sk, err := deriveSrtpKey(keying.MasterKey, keying.MasterSalt, core.SRTPLabelEncryption, 16) + if err != nil { + return nil, err + } + ak, err := deriveSrtpKey(keying.MasterKey, keying.MasterSalt, core.SRTPLabelAuth, 20) + if err != nil { + return nil, err + } + ss, err := deriveSrtpKey(keying.MasterKey, keying.MasterSalt, core.SRTPLabelSalt, 14) + if err != nil { + return nil, err + } + return &SrtpContext{ + sessionKey: sk, + sessionSalt: ss, + authKey: ak, + authTagLen: authTagLen, + }, nil +} + +func (c *SrtpContext) SetAuthKeying(keying core.SrtpKeyingMaterial) error { + ak, err := deriveSrtpKey(keying.MasterKey, keying.MasterSalt, core.SRTPLabelAuth, 20) + if err != nil { + return err + } + c.authKey = ak + return nil +} + +func (c *SrtpContext) Protect(packet *RtpPacket) ([]byte, error) { + c.updateRoc(packet.Header.SequenceNumber) + index := c.packetIndex(packet.Header.SequenceNumber) + + headerSize := packet.Header.Size() + output := make([]byte, headerSize+len(packet.Payload)+c.authTagLen) + + if _, err := packet.Header.Encode(output); err != nil { + return nil, &SrtpError{SrtpErrEncryption, err.Error()} + } + + iv := c.generateIV(packet.Header.Ssrc, index) + if err := aesCtrXor(c.sessionKey, iv, packet.Payload, output[headerSize:headerSize+len(packet.Payload)]); err != nil { + return nil, &SrtpError{SrtpErrEncryption, err.Error()} + } + + if c.authTagLen > 0 { + authData := output[:headerSize+len(packet.Payload)] + tag := c.computeAuthTag(authData, c.roc, c.authTagLen) + copy(output[headerSize+len(packet.Payload):], tag) + } + + return output, nil +} + +func (c *SrtpContext) Unprotect(data []byte) (*RtpPacket, error) { + if len(data) < 12 { + return nil, &SrtpError{SrtpErrPacketTooShort, fmt.Sprintf("packet too short: %d bytes", len(data))} + } + + header, err := DecodeRtpHeader(data) + if err != nil { + return nil, &SrtpError{SrtpErrDecryption, err.Error()} + } + headerSize := header.Size() + payloadLen := len(data) - headerSize - c.authTagLen + if payloadLen <= 0 { + return nil, &SrtpError{SrtpErrPacketTooShort, fmt.Sprintf("no payload: %dB total, %dB header, auth=%d", len(data), headerSize, c.authTagLen)} + } + + c.updateRoc(header.SequenceNumber) + index := c.packetIndex(header.SequenceNumber) + + iv := c.generateIV(header.Ssrc, index) + decrypted := make([]byte, payloadLen) + if err := aesCtrXor(c.sessionKey, iv, data[headerSize:headerSize+payloadLen], decrypted); err != nil { + return nil, &SrtpError{SrtpErrDecryption, err.Error()} + } + + return &RtpPacket{Header: header, Payload: decrypted}, nil +} + +func (c *SrtpContext) updateRoc(seq uint16) { + if !c.initialized { + c.lastSeq = seq + c.initialized = true + return + } + + diff := int32(seq) - int32(c.lastSeq) + if diff < -32768 { + c.roc++ + } + c.lastSeq = seq +} + +func (c *SrtpContext) packetIndex(seq uint16) uint64 { + return (uint64(c.roc) << 16) | uint64(seq) +} + +func (c *SrtpContext) generateIV(ssrc uint32, index uint64) []byte { + iv := make([]byte, 16) + copy(iv, c.sessionSalt[:14]) + + var ssrcBuf [4]byte + binary.BigEndian.PutUint32(ssrcBuf[:], ssrc) + for i := 0; i < 4; i++ { + iv[4+i] ^= ssrcBuf[i] + } + + var idxBuf [8]byte + binary.BigEndian.PutUint64(idxBuf[:], index) + for i := 0; i < 6; i++ { + iv[8+i] ^= idxBuf[2+i] + } + + return iv +} + +func (c *SrtpContext) computeAuthTag(data []byte, roc uint32, tagLen int) []byte { + mac := hmac.New(sha1.New, c.authKey) + mac.Write(data) + var rocBuf [4]byte + binary.BigEndian.PutUint32(rocBuf[:], roc) + mac.Write(rocBuf[:]) + sum := mac.Sum(nil) + return sum[:tagLen] +} + +type SrtpSession struct { + sendCtx *SrtpContext + recvCtx *SrtpContext +} + +func NewSrtpSession(sendKey, recvKey core.SrtpKeyingMaterial, sendAuthLen, recvAuthLen int) (*SrtpSession, error) { + sc, err := NewSrtpContext(sendKey, sendAuthLen) + if err != nil { + return nil, err + } + rc, err := NewSrtpContext(recvKey, recvAuthLen) + if err != nil { + return nil, err + } + return &SrtpSession{sendCtx: sc, recvCtx: rc}, nil +} + +func (s *SrtpSession) Protect(packet *RtpPacket) ([]byte, error) { return s.sendCtx.Protect(packet) } + +func (s *SrtpSession) Unprotect(data []byte) (*RtpPacket, error) { return s.recvCtx.Unprotect(data) } + +func (s *SrtpSession) SetSendAuthKeying(keying core.SrtpKeyingMaterial) error { + return s.sendCtx.SetAuthKeying(keying) +} + +func deriveSrtpKey(masterKey, masterSalt []byte, label byte, length int) ([]byte, error) { + iv := make([]byte, 16) + copy(iv, masterSalt[:14]) + iv[7] ^= label + + out := make([]byte, length) + if err := aesCtrXor(masterKey, iv, make([]byte, length), out); err != nil { + return nil, err + } + return out, nil +} + +func aesCtrXor(key, iv, src, dst []byte) error { + block, err := aes.NewCipher(key) + if err != nil { + return err + } + cipher.NewCTR(block, iv).XORKeyStream(dst, src) + return nil +} diff --git a/src/voip/media/ssrc.go b/src/voip/media/ssrc.go new file mode 100644 index 0000000..a682f94 --- /dev/null +++ b/src/voip/media/ssrc.go @@ -0,0 +1,19 @@ +package media + +import ( + "crypto/hkdf" + "crypto/sha256" + "encoding/binary" +) + +func GenerateSecureSsrc(callID, selfJid string, counter uint32) uint32 { + salt := make([]byte, 4) + binary.LittleEndian.PutUint32(salt, counter) + + out, err := hkdf.Key(sha256.New, []byte(callID), salt, selfJid, 4) + if err != nil { + + panic(err) + } + return binary.LittleEndian.Uint32(out) +} diff --git a/src/voip/signaling/callkey.go b/src/voip/signaling/callkey.go new file mode 100644 index 0000000..c877e58 --- /dev/null +++ b/src/voip/signaling/callkey.go @@ -0,0 +1,79 @@ +package signaling + +import ( + "context" + "crypto/rand" + "encoding/hex" + "fmt" + "strings" + "github.com/devlikeapro/gows/voip/core" + + waBinary "go.mau.fi/whatsmeow/binary" + "go.mau.fi/whatsmeow/proto/waE2E" + "go.mau.fi/whatsmeow/types" + "google.golang.org/protobuf/proto" +) + +var encryptedCallTags = map[string]bool{"preaccept": true, "accept": true} + +func NeedsDecryption(tag string) bool { return encryptedCallTags[tag] } + +func GenerateCallID() string { + b := make([]byte, 16) + rand.Read(b) + return strings.ToUpper(hex.EncodeToString(b)) +} + +func GenerateCallStanzaID() string { + b := make([]byte, 16) + rand.Read(b) + return strings.ToUpper(hex.EncodeToString(b)) +} + +func padRandomMax16(msg []byte) []byte { + var r [1]byte + rand.Read(r[:]) + padLen := int(r[0]&0x0f) + 1 + out := make([]byte, len(msg)+padLen) + copy(out, msg) + for i := len(msg); i < len(out); i++ { + out[i] = byte(padLen) + } + return out +} + +func unpadRandomMax16(b []byte) ([]byte, error) { + if len(b) == 0 { + return nil, fmt.Errorf("unpad given empty bytes") + } + pad := int(b[len(b)-1]) + if pad > len(b) { + return nil, fmt.Errorf("unpad given %d bytes, but pad is %d", len(b), pad) + } + return b[:len(b)-pad], nil +} + +func EncodeCallKeyMessage(callKey []byte) ([]byte, error) { + msg := &waE2E.Message{Call: &waE2E.Call{CallKey: callKey}} + return proto.Marshal(msg) +} + +func DecryptCallKeyInNode(ctx context.Context, sock core.VoipSocket, inner *waBinary.Node, peerJid types.JID) ([]byte, error) { + encNode := findEncNode(inner) + if encNode == nil { + return nil, nil + } + return sock.DecryptCallKey(ctx, peerJid, encNode) +} + +func DecodeCallKeyPlaintext(plaintext []byte) ([]byte, error) { + var msg waE2E.Message + if err := proto.Unmarshal(plaintext, &msg); err != nil { + return nil, err + } + key := msg.GetCall().GetCallKey() + if len(key) != 32 { + return nil, fmt.Errorf("invalid callKey: expected 32 bytes, got %d", len(key)) + } + return key, nil +} diff --git a/src/voip/signaling/relayack.go b/src/voip/signaling/relayack.go new file mode 100644 index 0000000..73029a1 --- /dev/null +++ b/src/voip/signaling/relayack.go @@ -0,0 +1,196 @@ +package signaling + +import ( + "encoding/base64" + "sort" + "strconv" + "github.com/devlikeapro/gows/voip/core" + "github.com/devlikeapro/gows/voip/wanode" + + waBinary "go.mau.fi/whatsmeow/binary" +) + +type ParsedRelayAck struct { + Relays []core.RelayEndpoint + ParticipantJids []string + UUID string + SelfPid *int + PeerPid *int + HbhKey []byte +} + +func ParseRelayFromAck(ackNode *waBinary.Node) ParsedRelayAck { + res := ParsedRelayAck{} + participantSeen := map[string]bool{} + + addParticipant := func(jid string) { + if jid != "" && !participantSeen[jid] { + participantSeen[jid] = true + res.ParticipantJids = append(res.ParticipantJids, jid) + } + } + + for _, child := range wanode.NodeChildren(ackNode) { + child := child + + if child.Tag == "user" { + for _, deviceNode := range wanode.NodeChildren(&child) { + if deviceNode.Tag == "device" && wanode.HasAttr(deviceNode.Attrs, "jid") { + addParticipant(wanode.AttrString(deviceNode.Attrs, "jid")) + } + } + } + + if child.Tag != "relay" { + continue + } + + res.UUID = wanode.AttrString(child.Attrs, "uuid") + if wanode.HasAttr(child.Attrs, "self_pid") { + v := wanode.AttrInt(child.Attrs, "self_pid", 0) + res.SelfPid = &v + } + if wanode.HasAttr(child.Attrs, "peer_pid") { + v := wanode.AttrInt(child.Attrs, "peer_pid", 0) + res.PeerPid = &v + } + + relayContent := wanode.NodeChildren(&child) + + for _, rc := range relayContent { + if rc.Tag == "participant" && wanode.HasAttr(rc.Attrs, "jid") { + addParticipant(wanode.AttrString(rc.Attrs, "jid")) + } + } + + var relayKey string + for _, rc := range relayContent { + rc := rc + switch rc.Tag { + case "key": + if b := wanode.NodeBytes(&rc); b != nil { + relayKey = string(b) + } + case "hbh_key": + if b := wanode.NodeBytes(&rc); b != nil { + switch { + case len(b) == 30: + res.HbhKey = b + case len(b) > 30: + if decoded, err := base64.StdEncoding.DecodeString(string(b)); err == nil && len(decoded) == 30 { + res.HbhKey = decoded + } + } + } + } + } + + res.Relays = append(res.Relays, parseRelayTe2Endpoints(relayKey, relayContent)...) + } + + sortRelaysByRtt(res.Relays) + return res +} + +// parseRelayTe2Endpoints parses modern WhatsApp relay blocks (token/auth_token/te2). +func parseRelayTe2Endpoints(relayKey string, relayContent []waBinary.Node) []core.RelayEndpoint { + tokens := map[string]string{} + authTokens := map[string]string{} + rawTokens := map[string][]byte{} + rawAuthTokens := map[string][]byte{} + + for _, rc := range relayContent { + rc := rc + switch rc.Tag { + case "key": + if b := wanode.NodeBytes(&rc); b != nil { + relayKey = string(b) + } + case "token": + if b := wanode.NodeBytes(&rc); b != nil { + id := attrStringOr(rc.Attrs, "id", "0") + tokens[id] = base64.StdEncoding.EncodeToString(b) + rawTokens[id] = b + } + case "auth_token": + if b := wanode.NodeBytes(&rc); b != nil { + id := attrStringOr(rc.Attrs, "id", "0") + authTokens[id] = base64.StdEncoding.EncodeToString(b) + rawAuthTokens[id] = b + } + } + } + + var relays []core.RelayEndpoint + for _, rc := range relayContent { + rc := rc + if rc.Tag != "te2" { + continue + } + addrBytes := wanode.NodeBytes(&rc) + if len(addrBytes) < 6 { + continue + } + + tokenID := attrStringOr(rc.Attrs, "token_id", "0") + authTokenID := wanode.AttrString(rc.Attrs, "auth_token_id") + relayName := wanode.AttrString(rc.Attrs, "relay_name") + protocol := wanode.AttrInt(rc.Attrs, "protocol", 0) + + ep := core.RelayEndpoint{ + Token: tokens[tokenID], + RawToken: rawTokens[tokenID], + Key: relayKey, + RelayID: wanode.AttrInt(rc.Attrs, "relay_id", 0), + Protocol: protocol, + RelayName: relayName, + AddressBytes: append([]byte(nil), addrBytes...), + } + if authTokenID != "" { + ep.AuthToken = authTokens[authTokenID] + ep.RawAuthToken = rawAuthTokens[authTokenID] + ep.AuthTokenID = authTokenID + } else { + ep.AuthTokenID = tokenID + } + if wanode.HasAttr(rc.Attrs, "c2r_rtt") { + v := wanode.AttrInt(rc.Attrs, "c2r_rtt", 0) + ep.C2RRtt = &v + } + + if len(addrBytes) == 6 { + ep.IP = ipv4String(addrBytes[:4]) + ep.Port = int(addrBytes[4])<<8 | int(addrBytes[5]) + relays = append(relays, ep) + } + } + return relays +} + +func attrStringOr(attrs waBinary.Attrs, key, fallback string) string { + if s := wanode.AttrString(attrs, key); s != "" { + return s + } + return fallback +} + +func ipv4String(b []byte) string { + return strconv.Itoa(int(b[0])) + "." + strconv.Itoa(int(b[1])) + "." + + strconv.Itoa(int(b[2])) + "." + strconv.Itoa(int(b[3])) +} + +func sortRelaysByRtt(relays []core.RelayEndpoint) { + sort.SliceStable(relays, func(i, j int) bool { + ri, rj := relays[i].C2RRtt, relays[j].C2RRtt + switch { + case ri == nil && rj == nil: + return false + case ri == nil: + return false + case rj == nil: + return true + default: + return *ri < *rj + } + }) +} diff --git a/src/voip/signaling/relayack_test.go b/src/voip/signaling/relayack_test.go new file mode 100644 index 0000000..2f89364 --- /dev/null +++ b/src/voip/signaling/relayack_test.go @@ -0,0 +1,94 @@ +package signaling + +import ( + "testing" + + waBinary "go.mau.fi/whatsmeow/binary" +) + +func TestParseRelayFromAck(t *testing.T) { + + addr := []byte{1, 2, 3, 4, 0x0d, 0x98} + rtt := 7 + + ack := &waBinary.Node{ + Tag: "ack", + Content: []waBinary.Node{ + { + Tag: "user", + Content: []waBinary.Node{ + {Tag: "device", Attrs: waBinary.Attrs{"jid": "111:0@lid"}}, + {Tag: "device", Attrs: waBinary.Attrs{"jid": "222:1@lid"}}, + }, + }, + { + Tag: "relay", + Attrs: waBinary.Attrs{"uuid": "abc-uuid", "self_pid": "5", "peer_pid": "9"}, + Content: []waBinary.Node{ + {Tag: "participant", Attrs: waBinary.Attrs{"jid": "333:2@lid"}}, + {Tag: "key", Content: []byte("relaykey123")}, + {Tag: "token", Attrs: waBinary.Attrs{"id": "0"}, Content: []byte{0xAA, 0xBB}}, + {Tag: "auth_token", Attrs: waBinary.Attrs{"id": "1"}, Content: []byte{0xCC, 0xDD}}, + { + Tag: "te2", + Attrs: waBinary.Attrs{ + "token_id": "0", "auth_token_id": "1", + "relay_name": "relay-A", "protocol": "0", + "relay_id": "2", "c2r_rtt": "7", + }, + Content: addr, + }, + }, + }, + }, + } + + res := ParseRelayFromAck(ack) + + if res.UUID != "abc-uuid" { + t.Errorf("uuid = %q", res.UUID) + } + if res.SelfPid == nil || *res.SelfPid != 5 { + t.Errorf("self_pid = %v", res.SelfPid) + } + if res.PeerPid == nil || *res.PeerPid != 9 { + t.Errorf("peer_pid = %v", res.PeerPid) + } + want := []string{"111:0@lid", "222:1@lid", "333:2@lid"} + if len(res.ParticipantJids) != len(want) { + t.Fatalf("participants = %v", res.ParticipantJids) + } + for i := range want { + if res.ParticipantJids[i] != want[i] { + t.Errorf("participant[%d] = %q want %q", i, res.ParticipantJids[i], want[i]) + } + } + if len(res.Relays) != 1 { + t.Fatalf("expected 1 relay, got %d", len(res.Relays)) + } + ep := res.Relays[0] + if ep.IP != "1.2.3.4" { + t.Errorf("ip = %q", ep.IP) + } + if ep.Port != 3480 { + t.Errorf("port = %d", ep.Port) + } + if ep.Key != "relaykey123" { + t.Errorf("key = %q", ep.Key) + } + if ep.RelayName != "relay-A" { + t.Errorf("relay_name = %q", ep.RelayName) + } + if ep.RelayID != 2 { + t.Errorf("relay_id = %d", ep.RelayID) + } + if ep.C2RRtt == nil || *ep.C2RRtt != rtt { + t.Errorf("c2r_rtt = %v", ep.C2RRtt) + } + if len(ep.RawToken) != 2 || ep.RawToken[0] != 0xAA { + t.Errorf("raw token = %v", ep.RawToken) + } + if len(ep.RawAuthToken) != 2 || ep.RawAuthToken[0] != 0xCC { + t.Errorf("raw auth token = %v", ep.RawAuthToken) + } +} diff --git a/src/voip/signaling/signaling_build.go b/src/voip/signaling/signaling_build.go new file mode 100644 index 0000000..f726c73 --- /dev/null +++ b/src/voip/signaling/signaling_build.go @@ -0,0 +1,253 @@ +package signaling + +import ( + "context" + "fmt" + "github.com/devlikeapro/gows/voip/core" + "github.com/devlikeapro/gows/voip/wanode" + + waBinary "go.mau.fi/whatsmeow/binary" + "go.mau.fi/whatsmeow/types" +) + +var ( + capabilityOffer = []byte{0x01, 0x05, 0xf7, 0x09, 0xe4, 0xbb, 0x07} + capabilityPreaccept = []byte{0x01, 0x05, 0xff, 0x09, 0xe4, 0xbb, 0x07} +) + +func BuildOfferStanza(ctx context.Context, sock core.VoipSocket, callID string, callKey []byte, peerJid types.JID, isVideo bool) (waBinary.Node, error) { + creator := sock.OwnLID() + if creator.IsEmpty() { + creator = sock.OwnPN() + } + + rawDevices, err := sock.GetUSyncDevices(ctx, []types.JID{peerJid}) + if err != nil { + return waBinary.Node{}, fmt.Errorf("usync devices: %w", err) + } + if err := sock.AssertSessions(ctx, rawDevices, false); err != nil { + return waBinary.Node{}, fmt.Errorf("assert sessions: %w", err) + } + + destinations, includeDeviceIdentity, err := sock.CreateParticipantNodes(ctx, rawDevices, callKey, waBinary.Attrs{"count": "0"}) + if err != nil { + return waBinary.Node{}, fmt.Errorf("participant nodes: %w", err) + } + + var offerContent []waBinary.Node + + if token, err := sock.GetTCToken(ctx, wanode.MustJID(wanode.CleanJID(peerJid.String()))); err == nil && len(token) > 0 { + offerContent = append(offerContent, waBinary.Node{Tag: "privacy", Content: token}) + } + + offerContent = append(offerContent, + waBinary.Node{Tag: "audio", Attrs: waBinary.Attrs{"enc": "opus", "rate": "8000"}}, + waBinary.Node{Tag: "audio", Attrs: waBinary.Attrs{"enc": "opus", "rate": "16000"}}, + ) + if isVideo { + offerContent = append(offerContent, waBinary.Node{Tag: "video", Attrs: waBinary.Attrs{ + "enc": "vp8", "dec": "vp8", "orientation": "0", + "screen_width": "1920", "screen_height": "1080", "device_orientation": "0", + }}) + } + offerContent = append(offerContent, + waBinary.Node{Tag: "net", Attrs: waBinary.Attrs{"medium": "3"}}, + waBinary.Node{Tag: "capability", Attrs: waBinary.Attrs{"ver": "1"}, Content: capabilityOffer}, + waBinary.Node{Tag: "destination", Content: destinations}, + waBinary.Node{Tag: "encopt", Attrs: waBinary.Attrs{"keygen": "2"}}, + ) + if includeDeviceIdentity { + if di, ok := sock.AccountDeviceIdentityNode(); ok { + offerContent = append(offerContent, di) + } + } + + return waBinary.Node{ + Tag: "call", + Attrs: waBinary.Attrs{"to": peerJid, "id": GenerateCallStanzaID()}, + Content: []waBinary.Node{{ + Tag: "offer", + Attrs: waBinary.Attrs{"call-id": callID, "call-creator": creator}, + Content: offerContent, + }}, + }, nil +} + +func BuildAcceptStanza(ctx context.Context, sock core.VoipSocket, callID string, callKey []byte, peerJid, callCreator types.JID, isVideo bool) (waBinary.Node, error) { + if err := sock.AssertSessions(ctx, []types.JID{callCreator}, true); err != nil { + return waBinary.Node{}, fmt.Errorf("assert creator session: %w", err) + } + + nodes, includeDeviceIdentity, err := sock.CreateParticipantNodes(ctx, []types.JID{callCreator}, callKey, waBinary.Attrs{"count": "0"}) + if err != nil { + return waBinary.Node{}, fmt.Errorf("encrypt accept: %w", err) + } + + encNode := extractEncFromParticipant(nodes) + if encNode == nil { + return waBinary.Node{}, fmt.Errorf("no enc node produced for accept") + } + + acceptContent := []waBinary.Node{ + {Tag: "audio", Attrs: waBinary.Attrs{"enc": "opus", "rate": "16000"}}, + {Tag: "net", Attrs: waBinary.Attrs{"medium": "3"}}, + *encNode, + {Tag: "encopt", Attrs: waBinary.Attrs{"keygen": "2"}}, + } + if includeDeviceIdentity { + if di, ok := sock.AccountDeviceIdentityNode(); ok { + acceptContent = append(acceptContent, di) + } + } + if isVideo { + acceptContent = append(acceptContent, waBinary.Node{Tag: "video", Attrs: waBinary.Attrs{"enc": "vp8"}}) + } + + return waBinary.Node{ + Tag: "call", + Attrs: waBinary.Attrs{"to": wanode.MustJID(wanode.CleanJID(peerJid.String())), "id": GenerateCallStanzaID()}, + Content: []waBinary.Node{{ + Tag: "accept", + Attrs: waBinary.Attrs{"call-id": callID, "call-creator": callCreator}, + Content: acceptContent, + }}, + }, nil +} + +func extractEncFromParticipant(nodes []waBinary.Node) *waBinary.Node { + for _, n := range nodes { + n := n + if n.Tag == "enc" { + return &n + } + for _, c := range wanode.NodeChildren(&n) { + c := c + if c.Tag == "enc" { + return &c + } + } + } + return nil +} + +func BuildTerminateStanza(peerJid types.JID, callID string, callCreator types.JID) waBinary.Node { + return callWrap(peerJid, waBinary.Node{ + Tag: "terminate", + Attrs: waBinary.Attrs{"call-id": callID, "call-creator": callCreator}, + }) +} + +func BuildRejectStanza(peerJid types.JID, callID string, callCreator types.JID) waBinary.Node { + return callWrap(peerJid, waBinary.Node{ + Tag: "reject", + Attrs: waBinary.Attrs{"call-id": callID, "call-creator": callCreator}, + }) +} + +func BuildPreacceptStanza(peerJid types.JID, callID string, callCreator types.JID) waBinary.Node { + return waBinary.Node{ + Tag: "call", + Attrs: waBinary.Attrs{"to": peerJid, "id": GenerateCallStanzaID()}, + Content: []waBinary.Node{{ + Tag: "preaccept", + Attrs: waBinary.Attrs{"call-id": callID, "call-creator": callCreator}, + Content: []waBinary.Node{ + {Tag: "audio", Attrs: waBinary.Attrs{"enc": "opus", "rate": "16000"}}, + {Tag: "encopt", Attrs: waBinary.Attrs{"keygen": "2"}}, + {Tag: "capability", Attrs: waBinary.Attrs{"ver": "1"}, Content: capabilityPreaccept}, + }, + }}, + } +} + +func CreateCallAck(nodeID string, peerJid types.JID, typ string) waBinary.Node { + return waBinary.Node{ + Tag: "ack", + Attrs: waBinary.Attrs{"id": nodeID, "to": peerJid, "class": "call", "type": typ}, + } +} + +type RelayLatencyEntry struct { + RelayName string + Latency int + AddressBytes []byte +} + +func BuildRelayLatencyStanza(peerJid types.JID, callID string, callCreator types.JID, relays []RelayLatencyEntry, destinationJids []types.JID) waBinary.Node { + seen := map[string]bool{} + var teNodes []waBinary.Node + for _, r := range relays { + if r.RelayName == "" || seen[r.RelayName] { + continue + } + seen[r.RelayName] = true + encodedLatency := 0x2000000 + r.Latency + te := waBinary.Node{ + Tag: "te", + Attrs: waBinary.Attrs{"latency": fmt.Sprintf("%d", encodedLatency), "relay_name": r.RelayName}, + } + if len(r.AddressBytes) > 0 { + te.Content = r.AddressBytes + } + teNodes = append(teNodes, te) + } + + content := append([]waBinary.Node(nil), teNodes...) + if len(destinationJids) > 0 { + var dst []waBinary.Node + for _, jid := range destinationJids { + dst = append(dst, waBinary.Node{Tag: "to", Attrs: waBinary.Attrs{"jid": jid}}) + } + content = append(content, waBinary.Node{Tag: "destination", Content: dst}) + } + + return callWrap(wanode.MustJID(wanode.CleanJID(peerJid.String())), waBinary.Node{ + Tag: "relaylatency", + Attrs: waBinary.Attrs{"call-id": callID, "call-creator": callCreator}, + Content: content, + }) +} + +func BuildTransportStanza(peerJid types.JID, callID string, callCreator types.JID) waBinary.Node { + return callWrap(wanode.MustJID(wanode.CleanJID(peerJid.String())), waBinary.Node{ + Tag: "transport", + Attrs: waBinary.Attrs{ + "call-id": callID, "call-creator": callCreator, + "transport-message-type": "0", "p2p-cand-round": "0", + }, + Content: []waBinary.Node{{Tag: "net", Attrs: waBinary.Attrs{"medium": "2", "protocol": "0"}}}, + }) +} + +func BuildMuteV2Stanza(peerDeviceJid types.JID, callID string, callCreator types.JID, muteState int) waBinary.Node { + return waBinary.Node{ + Tag: "call", + Attrs: waBinary.Attrs{"to": peerDeviceJid, "id": GenerateCallStanzaID()}, + Content: []waBinary.Node{{ + Tag: "mute_v2", + Attrs: waBinary.Attrs{ + "call-id": callID, "call-creator": callCreator, + "mute-state": fmt.Sprintf("%d", muteState), + }, + }}, + } +} + +func BuildAcceptReceiptStanza(peerDeviceJid types.JID, acceptMsgID, callID string, callCreator, ourJid types.JID) waBinary.Node { + return waBinary.Node{ + Tag: "receipt", + Attrs: waBinary.Attrs{"to": peerDeviceJid, "id": acceptMsgID, "from": ourJid}, + Content: []waBinary.Node{{ + Tag: "accept", + Attrs: waBinary.Attrs{"call-id": callID, "call-creator": callCreator}, + }}, + } +} + +func callWrap(to types.JID, inner waBinary.Node) waBinary.Node { + return waBinary.Node{ + Tag: "call", + Attrs: waBinary.Attrs{"to": to, "id": GenerateCallStanzaID()}, + Content: []waBinary.Node{inner}, + } +} diff --git a/src/voip/signaling/signaling_parse.go b/src/voip/signaling/signaling_parse.go new file mode 100644 index 0000000..232abdd --- /dev/null +++ b/src/voip/signaling/signaling_parse.go @@ -0,0 +1,113 @@ +package signaling + +import ( + "github.com/devlikeapro/gows/voip/core" + "github.com/devlikeapro/gows/voip/wanode" + + waBinary "go.mau.fi/whatsmeow/binary" +) + +type NodeInfo struct { + Tag string + PeerJid string + CallID string + PeerPlatform string + PeerAppVersion string + EpochID string + Timestamp string + InnerNode *waBinary.Node +} + +func ExtractNodeInfo(node *waBinary.Node) *NodeInfo { + children := wanode.NodeChildren(node) + if len(children) == 0 { + return nil + } + inner := children[0] + return &NodeInfo{ + Tag: inner.Tag, + PeerJid: wanode.AttrString(node.Attrs, "from"), + CallID: wanode.AttrString(inner.Attrs, "call-id"), + PeerPlatform: wanode.AttrString(node.Attrs, "platform"), + PeerAppVersion: wanode.AttrString(node.Attrs, "version"), + EpochID: wanode.AttrString(inner.Attrs, "e"), + Timestamp: wanode.AttrString(inner.Attrs, "t"), + InnerNode: &inner, + } +} + +func ExtractRelayEndpoints(node *waBinary.Node) []core.RelayEndpoint { + var relays []core.RelayEndpoint + + parseRelay := func(n *waBinary.Node) { + ip := wanode.AttrString(n.Attrs, "ip") + token := wanode.AttrString(n.Attrs, "token") + if ip == "" || token == "" { + return + } + key := wanode.AttrString(n.Attrs, "relay-key") + if key == "" { + key = wanode.AttrString(n.Attrs, "key") + } + ep := core.RelayEndpoint{ + IP: ip, + Port: wanode.AttrInt(n.Attrs, "port", core.WARelayPort), + Token: token, + Key: key, + RelayID: wanode.AttrInt(n.Attrs, "relay-id", 0), + } + if wanode.HasAttr(n.Attrs, "c2r-rtt") { + v := wanode.AttrInt(n.Attrs, "c2r-rtt", 0) + ep.C2RRtt = &v + } + relays = append(relays, ep) + } + + for _, child := range wanode.NodeChildren(node) { + child := child + switch child.Tag { + case "relay": + if wanode.AttrString(child.Attrs, "ip") != "" && wanode.AttrString(child.Attrs, "token") != "" { + parseRelay(&child) + } else { + relays = append(relays, parseRelayTe2Endpoints("", wanode.NodeChildren(&child))...) + } + case "relays": + for _, rn := range wanode.NodeChildren(&child) { + rn := rn + if rn.Tag == "relay" { + parseRelay(&rn) + } + } + } + } + + sortRelaysByRtt(relays) + return relays +} + +func findEncNode(inner *waBinary.Node) *waBinary.Node { + for _, c := range wanode.NodeChildren(inner) { + c := c + if c.Tag == "enc" && wanode.HasAttr(c.Attrs, "type") { + return &c + } + } + for _, c := range wanode.NodeChildren(inner) { + if c.Tag != "destination" { + continue + } + for _, toNode := range wanode.NodeChildren(&c) { + if toNode.Tag != "to" { + continue + } + for _, e := range wanode.NodeChildren(&toNode) { + e := e + if e.Tag == "enc" && wanode.HasAttr(e.Attrs, "type") { + return &e + } + } + } + } + return nil +} diff --git a/src/voip/signaling/signaling_parse_test.go b/src/voip/signaling/signaling_parse_test.go new file mode 100644 index 0000000..ff93543 --- /dev/null +++ b/src/voip/signaling/signaling_parse_test.go @@ -0,0 +1,65 @@ +package signaling + +import ( + "testing" + + waBinary "go.mau.fi/whatsmeow/binary" +) + +func TestExtractRelayEndpointsTe2Offer(t *testing.T) { + addr1 := []byte{10, 20, 30, 40, 0x0d, 0x98} + addr2 := []byte{11, 21, 31, 41, 0x0d, 0x99} + + offer := &waBinary.Node{ + Tag: "offer", + Attrs: waBinary.Attrs{ + "call-id": "abc", + "call-creator": "111@lid", + }, + Content: []waBinary.Node{ + { + Tag: "relay", + Attrs: waBinary.Attrs{ + "uuid": "relay-uuid", + "self_pid": "5", + "peer_pid": "1", + }, + Content: []waBinary.Node{ + {Tag: "key", Content: []byte("relaykey")}, + {Tag: "token", Attrs: waBinary.Attrs{"id": "0"}, Content: []byte{0xAA}}, + {Tag: "auth_token", Attrs: waBinary.Attrs{"id": "1"}, Content: []byte{0xBB}}, + { + Tag: "te2", + Attrs: waBinary.Attrs{ + "token_id": "0", "auth_token_id": "1", + "relay_name": "fmea2c01", "relay_id": "0", "c2r_rtt": "7", + }, + Content: addr1, + }, + { + Tag: "te2", + Attrs: waBinary.Attrs{ + "token_id": "0", "auth_token_id": "1", + "relay_name": "gig4c02", "relay_id": "1", "c2r_rtt": "10", + }, + Content: addr2, + }, + }, + }, + }, + } + + relays := ExtractRelayEndpoints(offer) + if len(relays) != 2 { + t.Fatalf("expected 2 relays, got %d", len(relays)) + } + if relays[0].RelayName != "fmea2c01" || relays[0].IP != "10.20.30.40" { + t.Errorf("relay[0] = %+v", relays[0]) + } + if relays[1].RelayName != "gig4c02" { + t.Errorf("relay[1].name = %q", relays[1].RelayName) + } + if relays[0].Key != "relaykey" { + t.Errorf("relay key = %q", relays[0].Key) + } +} diff --git a/src/voip/transport/foundation_test.go b/src/voip/transport/foundation_test.go new file mode 100644 index 0000000..bc61ca5 --- /dev/null +++ b/src/voip/transport/foundation_test.go @@ -0,0 +1,81 @@ +package transport + +import ( + "bytes" + "encoding/binary" + "testing" +) + +func TestVarintEncoding(t *testing.T) { + cases := []struct { + in uint64 + out []byte + }{ + {0, []byte{0x00}}, + {127, []byte{0x7f}}, + {128, []byte{0x80, 0x01}}, + {300, []byte{0xac, 0x02}}, + } + for _, c := range cases { + if got := encodeVarint(c.in); !bytes.Equal(got, c.out) { + t.Errorf("varint(%d)=%x want %x", c.in, got, c.out) + } + } +} + +func TestSenderSubscriptions(t *testing.T) { + + subs := BuildSenderSubscriptions(0x10) + + inner := []byte{0x18, 0x10, 0x28, 0x00, 0x30, 0x00} + + want := append([]byte{0x0a, byte(len(inner))}, inner...) + if !bytes.Equal(subs, want) { + t.Fatalf("sender subscriptions mismatch:\n got=%x\nwant=%x", subs, want) + } +} + +func TestStunPacketDetection(t *testing.T) { + ping := BuildWhatsAppPing() + if !IsStunPacket(ping) { + t.Error("ping should be classified as STUN") + } + if IsRtpPacket(ping) { + t.Error("ping should not be RTP") + } + + rtp := []byte{0x80, 120, 0, 0} + if !IsRtpPacket(rtp) { + t.Error("0x80 first byte should be RTP") + } + if IsStunPacket(rtp) { + t.Error("RTP should not be STUN") + } +} + +func TestStunBindingFingerprint(t *testing.T) { + subs := BuildSenderSubscriptions(0x12345678) + msg := BuildBindingRequestWithSubs(nil, nil, subs, true, true) + + if binary.BigEndian.Uint32(msg[4:]) != stunMagicCookie { + t.Fatal("missing STUN magic cookie") + } + info := ParseStunResponse(msg) + if info == nil { + t.Fatal("could not parse the binding request we built") + } + if info.Method != "binding" { + t.Fatalf("expected method binding, got %s", info.Method) + } + + last := info.Attributes[len(info.Attributes)-1] + if last.TypeName != "FINGERPRINT" { + t.Fatalf("expected FINGERPRINT last, got %s", last.TypeName) + } + fpStart := len(msg) - 8 + want := crc32stun(msg[:fpStart]) ^ stunFingerprintXor + got := binary.BigEndian.Uint32(msg[len(msg)-4:]) + if got != want { + t.Fatalf("fingerprint mismatch: got %08x want %08x", got, want) + } +} diff --git a/src/voip/transport/sctprelay.go b/src/voip/transport/sctprelay.go new file mode 100644 index 0000000..a428955 --- /dev/null +++ b/src/voip/transport/sctprelay.go @@ -0,0 +1,435 @@ +package transport + +import ( + "fmt" + "log/slog" + "regexp" + "sync" + "time" + "github.com/devlikeapro/gows/voip/core" + + "github.com/pion/webrtc/v4" +) + +const ( + relayConnectionTimeout = 20 * time.Second + relayKeepaliveInterval = 1100 * time.Millisecond +) + +type relayConnState int + +const ( + relayStateConnecting relayConnState = iota + relayStateOpen + relayStateClosed + relayStateFailed +) + +type RelayConfig struct { + IP string + Port int + Token string + AuthToken string + RawAuthToken []byte + RawToken []byte + Key string + RelayID int + Name string + AuthTokenID string +} + +type relayConnection struct { + state relayConnState + pc *webrtc.PeerConnection + channel *webrtc.DataChannel + id string + info RelayConfig + localUfrag string + keepalive *time.Ticker + stopCh chan struct{} +} + +type SctpRelayManager struct { + mu sync.Mutex + connections map[string]*relayConnection + log *slog.Logger + + audioSsrc uint32 + subscriptionSsrc uint32 + + onConnected func(ip string, port int) + + onReceive func(data []byte) +} + +func NewSctpRelayManager(log *slog.Logger) *SctpRelayManager { + if log == nil { + log = slog.Default() + } + return &SctpRelayManager{ + connections: map[string]*relayConnection{}, + log: log, + } +} + +func (m *SctpRelayManager) SetSsrc(ssrc uint32) { m.audioSsrc = ssrc } + +func (m *SctpRelayManager) SetSubscriptionSsrc(ssrc uint32) { m.subscriptionSsrc = ssrc } + +func (m *SctpRelayManager) SetOnConnected(fn func(ip string, port int)) { m.onConnected = fn } + +func (m *SctpRelayManager) SetOnReceive(fn func(data []byte)) { m.onReceive = fn } + +func (m *SctpRelayManager) ResendSubscriptions() { + m.mu.Lock() + conns := make([]*relayConnection, 0, len(m.connections)) + for _, c := range m.connections { + conns = append(conns, c) + } + m.mu.Unlock() + for _, c := range conns { + if c.state == relayStateOpen && c.channel != nil { + m.sendStunRegistration(c) + } + } +} + +func connID(ip string, port int, authTokenID string) string { + base := fmt.Sprintf("%s:%d", ip, port) + if authTokenID != "" { + return base + "#" + authTokenID + } + return base +} + +func (m *SctpRelayManager) ConfigureRelays(relays []RelayConfig) { + var wg sync.WaitGroup + for _, r := range relays { + port := r.Port + if port == 0 { + port = core.WARelayPort + } + r.Port = port + id := connID(r.IP, port, r.AuthTokenID) + m.mu.Lock() + _, exists := m.connections[id] + m.mu.Unlock() + if exists { + continue + } + wg.Add(1) + go func(rc RelayConfig) { + defer wg.Done() + m.connectToRelay(rc) + }(r) + } + wg.Wait() +} + +func (m *SctpRelayManager) connectToRelay(info RelayConfig) { + id := connID(info.IP, info.Port, info.AuthTokenID) + m.log.Info("relay connecting", "id", id, "name", info.Name) + + conn := &relayConnection{ + state: relayStateConnecting, + id: id, + info: info, + stopCh: make(chan struct{}), + } + m.mu.Lock() + m.connections[id] = conn + m.mu.Unlock() + + pc, err := webrtc.NewPeerConnection(webrtc.Configuration{}) + if err != nil { + m.log.Error("relay peerconnection failed", "id", id, "err", err) + m.failConnection(conn) + return + } + conn.pc = pc + + pc.OnICEConnectionStateChange(func(s webrtc.ICEConnectionState) { + m.log.Info("relay ice state", "id", id, "state", s.String()) + if s == webrtc.ICEConnectionStateFailed || s == webrtc.ICEConnectionStateDisconnected { + m.failConnection(conn) + } + }) + + ordered := false + channel, err := pc.CreateDataChannel("wa-web-call", &webrtc.DataChannelInit{Ordered: &ordered}) + if err != nil { + m.log.Error("relay datachannel failed", "id", id, "err", err) + m.failConnection(conn) + return + } + conn.channel = channel + + channel.OnOpen(func() { + m.log.Info("relay datachannel open", "id", id) + conn.state = relayStateOpen + m.sendStunRegistration(conn) + m.startKeepalive(conn) + if m.onConnected != nil { + m.onConnected(info.IP, info.Port) + } + }) + channel.OnClose(func() { m.closeConnection(id) }) + channel.OnMessage(func(msg webrtc.DataChannelMessage) { + if m.onReceive != nil { + m.onReceive(msg.Data) + } + }) + + offer, err := pc.CreateOffer(nil) + if err != nil { + m.failConnection(conn) + return + } + if err := pc.SetLocalDescription(offer); err != nil { + m.failConnection(conn) + return + } + + conn.localUfrag = extractFirst(reUfrag, offer.SDP) + munged := m.modifySdpForRelay(offer.SDP, info) + + if err := pc.SetRemoteDescription(webrtc.SessionDescription{Type: webrtc.SDPTypeAnswer, SDP: munged}); err != nil { + m.log.Error("relay set remote description failed", "id", id, "err", err) + m.failConnection(conn) + return + } + + go func() { + select { + case <-time.After(relayConnectionTimeout): + if conn.state == relayStateConnecting { + m.log.Debug("relay connection timeout", "id", id) + m.failConnection(conn) + } + case <-conn.stopCh: + } + }() +} + +var ( + reSetup = regexp.MustCompile(`a=setup:actpass`) + reUfragLine = regexp.MustCompile(`a=ice-ufrag:[^\r\n]+`) + rePwdLine = regexp.MustCompile(`a=ice-pwd:[^\r\n]+`) + reFingerprint = regexp.MustCompile(`a=fingerprint:[^\r\n]+`) + reMaxMsg = regexp.MustCompile(`a=max-message-size:[^\r\n]+`) + reIceOptions = regexp.MustCompile(`a=ice-options:[^\r\n]+\r?\n`) + reCandidate = regexp.MustCompile(`a=candidate:[^\r\n]+\r?\n`) + reEndCand = regexp.MustCompile(`a=end-of-candidates\r?\n?`) + reUfrag = regexp.MustCompile(`a=ice-ufrag:([^\r\n]+)`) +) + +func (m *SctpRelayManager) modifySdpForRelay(sdp string, info RelayConfig) string { + out := reSetup.ReplaceAllString(sdp, "a=setup:passive") + + iceUfrag := info.AuthToken + if iceUfrag == "" { + iceUfrag = info.Token + } + out = reUfragLine.ReplaceAllString(out, "a=ice-ufrag:"+iceUfrag) + out = rePwdLine.ReplaceAllString(out, "a=ice-pwd:"+info.Key) + out = reFingerprint.ReplaceAllString(out, "a=fingerprint:"+core.WADTLSFingerprint) + out = reMaxMsg.ReplaceAllString(out, "a=max-message-size:1500") + out = reIceOptions.ReplaceAllString(out, "") + + out = reCandidate.ReplaceAllString(out, "") + out = reEndCand.ReplaceAllString(out, "") + candidate := fmt.Sprintf("a=candidate:2 1 udp 2122262783 %s %d typ host generation 0 network-cost 5", info.IP, info.Port) + out += candidate + "\r\n" + "a=end-of-candidates" + "\r\n" + return out +} + +func extractFirst(re *regexp.Regexp, s string) string { + if mm := re.FindStringSubmatch(s); len(mm) > 1 { + return mm[1] + } + return "" +} + +func (m *SctpRelayManager) sendStunRegistration(conn *relayConnection) { + info := conn.info + remoteUfrag := info.AuthToken + if remoteUfrag == "" { + remoteUfrag = info.Token + } + if remoteUfrag == "" { + return + } + localUfrag := conn.localUfrag + hmacKey := []byte(info.Key) + + send := func() { + if conn.state != relayStateOpen || conn.channel == nil { + return + } + ssrc := m.subscriptionSsrc + if ssrc == 0 { + ssrc = m.audioSsrc + } + if ssrc == 0 { + return + } + subs := BuildSenderSubscriptions(ssrc) + + if localUfrag != "" { + username := []byte(remoteUfrag + ":" + localUfrag) + m.sendRaw(conn, BuildBindingRequestWithSubs(username, hmacKey, subs, true, true)) + } + if info.Token != "" && info.Token != remoteUfrag && localUfrag != "" { + username := []byte(info.Token + ":" + localUfrag) + m.sendRaw(conn, BuildBindingRequestWithSubs(username, hmacKey, subs, true, true)) + } + m.sendRaw(conn, BuildBindingRequestWithSubs(nil, nil, subs, false, false)) + + if len(info.RawToken) > 0 { + var peerSsrcs []uint32 + if m.subscriptionSsrc != 0 { + peerSsrcs = []uint32{m.subscriptionSsrc} + } + ssrcList := BuildSSRCSubscriptionList([]uint32{m.audioSsrc}, peerSsrcs, 0, 0) + m.sendRaw(conn, BuildAllocateForRelay(info.RawToken, ssrcList, hmacKey, info.IP, info.Port)) + } + } + + send() + for _, d := range []time.Duration{50, 150, 500, 3000} { + delay := d * time.Millisecond + go func() { + select { + case <-time.After(delay): + m.mu.Lock() + open := conn.state == relayStateOpen + m.mu.Unlock() + if open { + send() + } + case <-conn.stopCh: + } + }() + } +} + +func (m *SctpRelayManager) startKeepalive(conn *relayConnection) { + m.sendRaw(conn, BuildWhatsAppPing()) + ticker := time.NewTicker(relayKeepaliveInterval) + conn.keepalive = ticker + go func() { + for { + select { + case <-ticker.C: + if conn.state != relayStateOpen || conn.channel == nil { + return + } + m.sendRaw(conn, BuildWhatsAppPing()) + case <-conn.stopCh: + ticker.Stop() + return + } + } + }() +} + +func (m *SctpRelayManager) sendRaw(conn *relayConnection, data []byte) { + if conn.channel == nil || conn.state != relayStateOpen { + return + } + if err := conn.channel.Send(data); err != nil { + m.log.Debug("relay send error", "id", conn.id, "err", err) + } +} + +func (m *SctpRelayManager) Broadcast(data []byte) { + m.mu.Lock() + conns := make([]*relayConnection, 0, len(m.connections)) + for _, c := range m.connections { + conns = append(conns, c) + } + m.mu.Unlock() + for _, c := range conns { + m.sendRaw(c, data) + } +} + +func (m *SctpRelayManager) HasConnection() bool { + m.mu.Lock() + defer m.mu.Unlock() + for _, c := range m.connections { + if c.state == relayStateOpen { + return true + } + } + return false +} + +func (m *SctpRelayManager) ConnectedCount() int { + m.mu.Lock() + defer m.mu.Unlock() + n := 0 + for _, c := range m.connections { + if c.state == relayStateOpen { + n++ + } + } + return n +} + +func (m *SctpRelayManager) failConnection(conn *relayConnection) { + m.mu.Lock() + if conn.state == relayStateFailed { + m.mu.Unlock() + return + } + conn.state = relayStateFailed + delete(m.connections, conn.id) + m.mu.Unlock() + m.teardown(conn) +} + +func (m *SctpRelayManager) closeConnection(id string) { + m.mu.Lock() + conn := m.connections[id] + if conn == nil { + m.mu.Unlock() + return + } + conn.state = relayStateClosed + delete(m.connections, id) + m.mu.Unlock() + m.teardown(conn) +} + +func (m *SctpRelayManager) teardown(conn *relayConnection) { + select { + case <-conn.stopCh: + default: + close(conn.stopCh) + } + if conn.keepalive != nil { + conn.keepalive.Stop() + } + if conn.channel != nil { + _ = conn.channel.Close() + } + if conn.pc != nil { + _ = conn.pc.Close() + } +} + +func (m *SctpRelayManager) Cleanup() { + m.mu.Lock() + conns := make([]*relayConnection, 0, len(m.connections)) + for _, c := range m.connections { + conns = append(conns, c) + } + m.connections = map[string]*relayConnection{} + m.audioSsrc = 0 + m.subscriptionSsrc = 0 + m.mu.Unlock() + for _, c := range conns { + m.teardown(c) + } +} diff --git a/src/voip/transport/stun.go b/src/voip/transport/stun.go new file mode 100644 index 0000000..8cb952f --- /dev/null +++ b/src/voip/transport/stun.go @@ -0,0 +1,367 @@ +package transport + +import ( + "crypto/hmac" + "crypto/rand" + "crypto/sha1" + "encoding/binary" + "encoding/hex" + "fmt" + "hash/crc32" + "strings" +) + +const ( + stunMagicCookie = 0x2112a442 + stunFingerprintXor = 0x5354554e + stunBindingRequest = 0x0001 + stunAllocateRequest = 0x0003 + whatsappPing = 0x0801 + + attrUsername = 0x0006 + attrMessageIntegrity = 0x0008 + attrLifetime = 0x000d + attrXorRelayedAddress = 0x0016 + attrRequestedTransport = 0x0019 + attrPriority = 0x0024 + attrSenderSubscriptions = 0x4000 + attrSsrcList = 0x4024 + attrIceControlled = 0x8029 + attrIceControlling = 0x802a + attrFingerprint = 0x8028 + + defaultICEPriority = 16_777_215 +) + +func generateTransactionID() []byte { + id := make([]byte, 12) + if _, err := rand.Read(id); err != nil { + panic(err) + } + return id +} + +func encodeAttribute(attrType int, data []byte) []byte { + header := make([]byte, 4) + binary.BigEndian.PutUint16(header[0:], uint16(attrType)) + binary.BigEndian.PutUint16(header[2:], uint16(len(data))) + padding := (4 - (len(data) % 4)) % 4 + out := append(header, data...) + if padding > 0 { + out = append(out, make([]byte, padding)...) + } + return out +} + +func crc32stun(data []byte) uint32 { + return crc32.ChecksumIEEE(data) +} + +func buildStunMessage(msgType int, attrs, transactionID, integrityKey []byte, includeFingerprint bool) []byte { + attrsData := append([]byte(nil), attrs...) + + if integrityKey != nil { + msgLenForHmac := len(attrsData) + 24 + hmacHeader := make([]byte, 20) + binary.BigEndian.PutUint16(hmacHeader[0:], uint16(msgType)) + binary.BigEndian.PutUint16(hmacHeader[2:], uint16(msgLenForHmac)) + binary.BigEndian.PutUint32(hmacHeader[4:], stunMagicCookie) + copy(hmacHeader[8:], transactionID) + + mac := hmac.New(sha1.New, integrityKey) + mac.Write(hmacHeader) + mac.Write(attrsData) + miAttr := encodeAttribute(attrMessageIntegrity, mac.Sum(nil)) + attrsData = append(attrsData, miAttr...) + } + + if includeFingerprint { + msgLenForCrc := len(attrsData) + 8 + crcHeader := make([]byte, 20) + binary.BigEndian.PutUint16(crcHeader[0:], uint16(msgType)) + binary.BigEndian.PutUint16(crcHeader[2:], uint16(msgLenForCrc)) + binary.BigEndian.PutUint32(crcHeader[4:], stunMagicCookie) + copy(crcHeader[8:], transactionID) + + crcInput := append(append([]byte(nil), crcHeader...), attrsData...) + fingerprint := crc32stun(crcInput) ^ stunFingerprintXor + fpBuf := make([]byte, 4) + binary.BigEndian.PutUint32(fpBuf, fingerprint) + fpAttr := encodeAttribute(attrFingerprint, fpBuf) + attrsData = append(attrsData, fpAttr...) + } + + header := make([]byte, 20) + binary.BigEndian.PutUint16(header[0:], uint16(msgType)) + binary.BigEndian.PutUint16(header[2:], uint16(len(attrsData))) + binary.BigEndian.PutUint32(header[4:], stunMagicCookie) + copy(header[8:], transactionID) + + return append(header, attrsData...) +} + +func encodeXorRelayedAddress(ip string, port int) []byte { + data := make([]byte, 8) + data[0] = 0x00 + data[1] = 0x01 + binary.BigEndian.PutUint16(data[2:], uint16(port)^uint16(stunMagicCookie>>16)) + var p0, p1, p2, p3 int + fmt.Sscanf(ip, "%d.%d.%d.%d", &p0, &p1, &p2, &p3) + ipNum := uint32(p0)<<24 | uint32(p1)<<16 | uint32(p2)<<8 | uint32(p3) + binary.BigEndian.PutUint32(data[4:], ipNum^stunMagicCookie) + return data +} + +func BuildAllocateForRelay(senderSubscriptions, ssrcList, hmacKey []byte, relayIP string, relayPort int) []byte { + txid := generateTransactionID() + var parts [][]byte + parts = append(parts, encodeAttribute(attrSenderSubscriptions, senderSubscriptions)) + parts = append(parts, encodeAttribute(attrSsrcList, ssrcList)) + if relayIP != "" && relayPort != 0 { + parts = append(parts, encodeAttribute(attrXorRelayedAddress, encodeXorRelayedAddress(relayIP, relayPort))) + } + return buildStunMessage(stunAllocateRequest, concat(parts...), txid, hmacKey, false) +} + +func BuildBindingRequestWithSubs(username, hmacKey, senderSubscriptions []byte, includeIceControlling, includeFingerprint bool) []byte { + txid := generateTransactionID() + var parts [][]byte + + if len(username) > 0 { + parts = append(parts, encodeAttribute(attrUsername, username)) + } + + priorityBuf := make([]byte, 4) + binary.BigEndian.PutUint32(priorityBuf, defaultICEPriority) + parts = append(parts, encodeAttribute(attrPriority, priorityBuf)) + + if includeIceControlling { + tieBreaker := make([]byte, 8) + rand.Read(tieBreaker) + parts = append(parts, encodeAttribute(attrIceControlling, tieBreaker)) + } + + if len(senderSubscriptions) > 0 { + parts = append(parts, encodeAttribute(attrSenderSubscriptions, senderSubscriptions)) + } + + var key []byte + if len(hmacKey) > 0 { + key = hmacKey + } + return buildStunMessage(stunBindingRequest, concat(parts...), txid, key, includeFingerprint) +} + +func BuildWhatsAppPing() []byte { + txid := generateTransactionID() + header := make([]byte, 20) + binary.BigEndian.PutUint16(header[0:], whatsappPing) + binary.BigEndian.PutUint16(header[2:], 0) + binary.BigEndian.PutUint32(header[4:], stunMagicCookie) + copy(header[8:], txid) + return header +} + +func IsStunPacket(data []byte) bool { + if len(data) < 2 { + return false + } + return data[0]&0xc0 == 0 +} + +func IsRtpPacket(data []byte) bool { + if len(data) < 2 { + return false + } + return data[0]&0xc0 == 0x80 +} + +type StunAttribute struct { + Type int + TypeName string + Length int + Data []byte +} + +type StunResponseInfo struct { + RawType int + Method string + StunClass string + IsSuccess bool + IsError bool + ErrorCode int + ErrorReason string + StableRoutingConnID uint64 + TransactionID string + Length int + Attributes []StunAttribute +} + +var stunAttrNames = map[int]string{ + 0x0001: "MAPPED-ADDRESS", 0x0006: "USERNAME", 0x0008: "MESSAGE-INTEGRITY", + 0x0009: "ERROR-CODE", 0x000a: "UNKNOWN-ATTRIBUTES", 0x0014: "REALM", + 0x0015: "NONCE", 0x0019: "REQUESTED-TRANSPORT", 0x0020: "XOR-MAPPED-ADDRESS", + 0x0024: "PRIORITY", 0x0025: "USE-CANDIDATE", 0x4000: "SENDER-SUBSCRIPTIONS", + 0x4001: "RECEIVER-SUBSCRIPTION", 0x4002: "SUBSCRIPTION-ACK", 0x8022: "SOFTWARE", + 0x8028: "FINGERPRINT", 0x8029: "ICE-CONTROLLED", 0x802a: "ICE-CONTROLLING", + 0x4033: "STABLE-ROUTING-CONN-ID", +} + +func ParseStunResponse(data []byte) *StunResponseInfo { + if len(data) < 20 { + return nil + } + + cookie := binary.BigEndian.Uint32(data[4:]) + if cookie != stunMagicCookie { + msgType := int(binary.BigEndian.Uint16(data[0:])) + if msgType == 0x0801 || msgType == 0x0802 { + method := "wa-ping" + if msgType == 0x0802 { + method = "wa-pong" + } + return &StunResponseInfo{ + RawType: msgType, + Method: method, + StunClass: "indication", + TransactionID: hex.EncodeToString(data[8:20]), + Length: len(data), + } + } + return nil + } + + rawType := int(binary.BigEndian.Uint16(data[0:])) + msgLength := int(binary.BigEndian.Uint16(data[2:])) + transactionID := hex.EncodeToString(data[8:20]) + + c0 := (rawType >> 4) & 0x1 + c1 := (rawType >> 8) & 0x1 + stunClassNum := (c1 << 1) | c0 + classes := []string{"request", "indication", "success", "error"} + stunClass := "unknown" + if stunClassNum < len(classes) { + stunClass = classes[stunClassNum] + } + + methodBits := ((rawType & 0x3e00) >> 2) | ((rawType & 0x00e0) >> 1) | (rawType & 0x000f) + method := "unknown" + switch methodBits { + case 0x001: + method = "binding" + case 0x003: + method = "allocate" + case 0x004: + method = "refresh" + case 0x006: + method = "send" + case 0x007: + method = "data" + case 0x008: + method = "create-permission" + case 0x009: + method = "channel-bind" + } + if rawType == 0x0801 { + method = "wa-ping" + } + if rawType == 0x0802 { + method = "wa-pong" + } + + info := &StunResponseInfo{ + RawType: rawType, + Method: method, + StunClass: stunClass, + IsSuccess: stunClass == "success", + IsError: stunClass == "error", + TransactionID: transactionID, + Length: len(data), + } + + offset := 20 + for offset+4 <= 20+msgLength && offset+4 <= len(data) { + attrType := int(binary.BigEndian.Uint16(data[offset:])) + attrLength := int(binary.BigEndian.Uint16(data[offset+2:])) + attrEnd := offset + 4 + attrLength + if attrEnd > len(data) { + break + } + attrData := data[offset+4 : attrEnd] + name := stunAttrNames[attrType] + if name == "" { + name = fmt.Sprintf("0x%04x", attrType) + } + info.Attributes = append(info.Attributes, StunAttribute{ + Type: attrType, TypeName: name, Length: attrLength, Data: attrData, + }) + + if attrType == 0x0009 && attrLength >= 4 { + errorClass := int(attrData[2] & 0x07) + errorNumber := int(attrData[3]) + info.ErrorCode = errorClass*100 + errorNumber + if attrLength > 4 { + info.ErrorReason = string(attrData[4:]) + } + } + if attrType == 0x4033 && stunClass == "success" && attrLength == 8 { + info.StableRoutingConnID = binary.BigEndian.Uint64(attrData) + } + + offset = attrEnd + ((4 - (attrLength % 4)) % 4) + } + + return info +} + +func FormatStunResponse(info *StunResponseInfo) string { + result := fmt.Sprintf("STUN %s %s (0x%04x, %dB)", info.Method, info.StunClass, info.RawType, info.Length) + if info.IsError && info.ErrorCode != 0 { + result += fmt.Sprintf(" ERROR %d", info.ErrorCode) + if info.ErrorReason != "" { + result += ": " + info.ErrorReason + } + } + if len(info.Attributes) > 0 { + names := make([]string, len(info.Attributes)) + for i, a := range info.Attributes { + names[i] = a.TypeName + } + result += " [" + strings.Join(names, ", ") + "]" + } + return result +} + +func ClassifyPacket(data []byte) string { + if len(data) < 2 { + return fmt.Sprintf("tiny(%dB)", len(data)) + } + twoBits := (data[0] & 0xc0) >> 6 + switch twoBits { + case 0: + if info := ParseStunResponse(data); info != nil { + return FormatStunResponse(info) + } + msgType := (int(data[0]) << 8) | int(data[1]) + return fmt.Sprintf("STUN? 0x%x (%dB)", msgType, len(data)) + case 2: + pt := data[1] & 0x7f + marker := (data[1] >> 7) & 1 + seq := 0 + if len(data) >= 4 { + seq = (int(data[2]) << 8) | int(data[3]) + } + return fmt.Sprintf("RTP/SRTP PT=%d M=%d seq=%d (%dB)", pt, marker, seq, len(data)) + case 1: + return fmt.Sprintf("DTLS? 0x%x (%dB)", data[0], len(data)) + } + return fmt.Sprintf("unknown 0x%x (%dB)", data[0], len(data)) +} + +func concat(parts ...[]byte) []byte { + var out []byte + for _, p := range parts { + out = append(out, p...) + } + return out +} diff --git a/src/voip/transport/subscriptions.go b/src/voip/transport/subscriptions.go new file mode 100644 index 0000000..80272fa --- /dev/null +++ b/src/voip/transport/subscriptions.go @@ -0,0 +1,59 @@ +package transport + +func encodeVarint(value uint64) []byte { + var out []byte + v := value + for v > 0x7f { + out = append(out, byte((v&0x7f)|0x80)) + v >>= 7 + } + out = append(out, byte(v&0x7f)) + return out +} + +func encodeProtobufVarintField(fieldNumber int, value uint64) []byte { + tag := encodeVarint(uint64(fieldNumber << 3)) + return append(tag, encodeVarint(value)...) +} + +func encodeProtobufLengthDelimited(fieldNumber int, data []byte) []byte { + tag := encodeVarint(uint64((fieldNumber << 3) | 2)) + out := append(tag, encodeVarint(uint64(len(data)))...) + return append(out, data...) +} + +func BuildSenderSubscriptions(ssrc uint32) []byte { + inner := concat( + encodeProtobufVarintField(3, uint64(ssrc)), + encodeProtobufVarintField(5, 0), + encodeProtobufVarintField(6, 0), + ) + return encodeProtobufLengthDelimited(1, inner) +} + +func BuildSSRCSubscriptionList(selfSsrcs, peerSsrcs []uint32, selfPid, peerPid int) []byte { + var entries [][]byte + for _, ssrc := range selfSsrcs { + if ssrc == 0 { + continue + } + inner := concat( + encodeProtobufVarintField(1, uint64(selfPid)), + encodeProtobufVarintField(2, 1), + encodeProtobufVarintField(3, uint64(ssrc)), + ) + entries = append(entries, encodeProtobufLengthDelimited(1, inner)) + } + for _, ssrc := range peerSsrcs { + if ssrc == 0 { + continue + } + inner := concat( + encodeProtobufVarintField(1, uint64(peerPid)), + encodeProtobufVarintField(2, 1), + encodeProtobufVarintField(3, uint64(ssrc)), + ) + entries = append(entries, encodeProtobufLengthDelimited(1, inner)) + } + return concat(entries...) +} diff --git a/src/voip/wa/socket.go b/src/voip/wa/socket.go new file mode 100644 index 0000000..5e48600 --- /dev/null +++ b/src/voip/wa/socket.go @@ -0,0 +1,104 @@ +package wa + +import ( + "context" + "time" + + "github.com/devlikeapro/gows/voip/core" + "github.com/devlikeapro/gows/voip/signaling" + + "go.mau.fi/whatsmeow" + waBinary "go.mau.fi/whatsmeow/binary" + "go.mau.fi/whatsmeow/types" +) + +type Socket struct { + cli *whatsmeow.Client +} + +func NewSocket(cli *whatsmeow.Client) *Socket { return &Socket{cli: cli} } + +var _ core.VoipSocket = (*Socket)(nil) + +func (s *Socket) di() *whatsmeow.DangerousInternalClient { return s.cli.DangerousInternals() } + +func (s *Socket) OwnPN() types.JID { return s.di().GetOwnID() } + +func (s *Socket) OwnLID() types.JID { return s.di().GetOwnLID() } + +func (s *Socket) AccountDeviceIdentityNode() (waBinary.Node, bool) { + if s.cli.Store == nil || s.cli.Store.Account == nil { + return waBinary.Node{}, false + } + return s.di().MakeDeviceIdentityNode(), true +} + +func (s *Socket) SendNode(ctx context.Context, node waBinary.Node) error { + return s.di().SendNode(ctx, node) +} + +func (s *Socket) Query(ctx context.Context, node waBinary.Node) (*waBinary.Node, error) { + id, _ := node.Attrs["id"].(string) + if id == "" { + return nil, s.di().SendNode(ctx, node) + } + di := s.di() + ch := di.WaitResponse(id) + if err := di.SendNode(ctx, node); err != nil { + di.CancelResponse(id, ch) + return nil, err + } + select { + case resp := <-ch: + return resp, nil + case <-time.After(15 * time.Second): + di.CancelResponse(id, ch) + return nil, nil + case <-ctx.Done(): + di.CancelResponse(id, ch) + return nil, ctx.Err() + } +} + +func (s *Socket) GetUSyncDevices(ctx context.Context, jids []types.JID) ([]types.JID, error) { + return s.cli.GetUserDevices(ctx, jids) +} + +func (s *Socket) AssertSessions(ctx context.Context, jids []types.JID, force bool) error { + return nil +} + +func (s *Socket) CreateParticipantNodes(ctx context.Context, devices []types.JID, callKey []byte, encAttrs waBinary.Attrs) ([]waBinary.Node, bool, error) { + plaintext, err := signaling.EncodeCallKeyMessage(callKey) + if err != nil { + return nil, false, err + } + id := s.cli.GenerateMessageID() + return s.di().EncryptMessageForDevices(ctx, devices, id, plaintext, plaintext, encAttrs) +} + +func (s *Socket) DecryptCallKey(ctx context.Context, from types.JID, encChild *waBinary.Node) ([]byte, error) { + typ, _ := encChild.Attrs["type"].(string) + isPreKey := typ == "pkmsg" + plaintext, _, err := s.di().DecryptDM(ctx, encChild, from, isPreKey, time.Now()) + if err != nil { + return nil, err + } + return signaling.DecodeCallKeyPlaintext(plaintext) +} + +func (s *Socket) GetTCToken(ctx context.Context, jid types.JID) ([]byte, error) { + return nil, nil +} + +func (s *Socket) ResolveLIDForPN(ctx context.Context, pn types.JID) types.JID { + if pn.Server == types.HiddenUserServer { + return pn + } + if s.cli.Store != nil && s.cli.Store.LIDs != nil { + if lid, err := s.cli.Store.LIDs.GetLIDForPN(ctx, pn); err == nil && !lid.IsEmpty() { + return lid + } + } + return pn +} diff --git a/src/voip/wanode/jid.go b/src/voip/wanode/jid.go new file mode 100644 index 0000000..420584d --- /dev/null +++ b/src/voip/wanode/jid.go @@ -0,0 +1,24 @@ +package wanode + +import ( + "strings" + + "go.mau.fi/whatsmeow/types" +) + +func CleanJID(jid string) string { + if i := strings.Index(jid, ":"); i >= 0 { + if at := strings.Index(jid, "@"); at > i { + return jid[:i] + jid[at:] + } + } + return jid +} + +func MustJID(s string) types.JID { + j, err := types.ParseJID(s) + if err != nil { + return types.JID{} + } + return j +} diff --git a/src/voip/wanode/nodeutil.go b/src/voip/wanode/nodeutil.go new file mode 100644 index 0000000..5af2652 --- /dev/null +++ b/src/voip/wanode/nodeutil.go @@ -0,0 +1,68 @@ +package wanode + +import ( + "fmt" + "strconv" + + waBinary "go.mau.fi/whatsmeow/binary" +) + +func NodeChildren(n *waBinary.Node) []waBinary.Node { + if n == nil { + return nil + } + if children, ok := n.Content.([]waBinary.Node); ok { + return children + } + return nil +} + +func NodeBytes(n *waBinary.Node) []byte { + if n == nil { + return nil + } + if b, ok := n.Content.([]byte); ok { + return b + } + return nil +} + +func AttrString(attrs waBinary.Attrs, key string) string { + v, ok := attrs[key] + if !ok || v == nil { + return "" + } + switch t := v.(type) { + case string: + return t + case fmt.Stringer: + return t.String() + case int64: + return strconv.FormatInt(t, 10) + case int: + return strconv.Itoa(t) + case uint64: + return strconv.FormatUint(t, 10) + case bool: + return strconv.FormatBool(t) + default: + return fmt.Sprintf("%v", t) + } +} + +func AttrInt(attrs waBinary.Attrs, key string, fallback int) int { + s := AttrString(attrs, key) + if s == "" { + return fallback + } + n, err := strconv.Atoi(s) + if err != nil { + return fallback + } + return n +} + +func HasAttr(attrs waBinary.Attrs, key string) bool { + v, ok := attrs[key] + return ok && v != nil && AttrString(attrs, key) != "" +} diff --git a/tools/call-test-client/index.html b/tools/call-test-client/index.html new file mode 100644 index 0000000..3adcf88 --- /dev/null +++ b/tools/call-test-client/index.html @@ -0,0 +1,132 @@ + + + + + WAHA / GOWS — teste WebRTC de chamada + + + +

Teste E2E — chamada VoIP via WAHA

+

Configure a URL base do WAHA e a sessão. Requer POST /calls e POST /calls/{id}/webrtc implementados no WAHA.

+ + + + + + + +
+ + + +
+ + +

+  
+
+  
+
+