diff --git a/packages/homebridge-ring/camera-source.ts b/packages/homebridge-ring/camera-source.ts index 66a0ec80..86687227 100644 --- a/packages/homebridge-ring/camera-source.ts +++ b/packages/homebridge-ring/camera-source.ts @@ -2,7 +2,6 @@ import type { RingCamera } from 'ring-client-api' import { hap } from './hap.ts' import type { SrtpOptions } from '@homebridge/camera-utils' import { - doesFfmpegSupportCodec, generateSrtpOptions, ReturnAudioTranscoder, RtpSplitter, @@ -137,86 +136,25 @@ class StreamingSessionWrapper { ) } - private listenForAudioPackets(startStreamRequest: StartStreamRequest) { + async activate(request: StartStreamRequest) { const { targetAddress, + video: { port: videoPort }, audio: { port: audioPort }, } = this.prepareStreamRequest, { - audio: { - codec: audioCodec, - sample_rate: audioSampleRate, - packet_time: audioPacketTime, - }, - } = startStreamRequest, - // Repacketize the audio stream after it's been transcoded + audio: { sample_rate: audioSampleRate, packet_time: audioPacketTime }, + } = request, + // use to encrypt Ring video to HomeKit + videoSrtpSession = new SrtpSession(getSessionConfig(this.videoSrtp)), + audioSrtpSession = new SrtpSession(getSessionConfig(this.audioSrtp)), opusRepacketizer = new OpusRepacketizer(audioPacketTime / 20), - audioIntervalScale = ((audioSampleRate / 8) * audioPacketTime) / 20, - audioSrtpSession = new SrtpSession(getSessionConfig(this.audioSrtp)) + audioIntervalScale = ((audioSampleRate / 8) * audioPacketTime) / 20 - let firstTimestamp: number, + let sentVideo = false, + firstAudioTimestamp: number, audioPacketCount = 0 - this.repacketizeAudioSplitter.addMessageHandler(({ message }) => { - let rtp: RtpPacket | undefined = RtpPacket.deSerialize(message) - - if (audioCodec === AudioStreamingCodecType.OPUS) { - // borrowed from scrypted - // Original source: https://github.com/koush/scrypted/blob/c13ba09889c3e0d9d3724cb7d49253c9d787fb97/plugins/homekit/src/types/camera/camera-streaming-srtp-sender.ts#L124-L143 - rtp = opusRepacketizer.repacketize(rtp) - - if (!rtp) { - return null - } - - if (!firstTimestamp) { - firstTimestamp = rtp.header.timestamp - } - - // from HAP spec: - // RTP Payload Format for Opus Speech and Audio Codec RFC 7587 with an exception - // that Opus audio RTP Timestamp shall be based on RFC 3550. - // RFC 3550 indicates that PCM audio based with a sample rate of 8k and a packet - // time of 20ms would have a monotonic interval of 8k / (1000 / 20) = 160. - // So 24k audio would have a monotonic interval of (24k / 8k) * 160 = 480. - // HAP spec also states that it may request packet times of 20, 30, 40, or 60. - // In practice, HAP has been seen to request 20 on LAN and 60 over LTE. - // So the RTP timestamp must scale accordingly. - // Further investigation indicates that HAP doesn't care about the actual sample rate at all, - // that's merely a suggestion. When encoding Opus, it can seemingly be an arbitrary sample rate, - // audio will work so long as the rtp timestamps are created properly: which is a construct of the sample rate - // HAP requests, and the packet time is respected, - // opus 48khz will work just fine. - rtp.header.timestamp = - (firstTimestamp + audioPacketCount * 160 * audioIntervalScale) % - 0xffffffff - audioPacketCount++ - } - - // encrypt the packet - const encryptedPacket = audioSrtpSession.encrypt(rtp.payload, rtp.header) - - // send the encrypted packet to HomeKit - this.audioSplitter - .send(encryptedPacket, { - port: audioPort, - address: targetAddress, - }) - .catch(logError) - - return null - }) - } - - async activate(request: StartStreamRequest) { - let sentVideo = false - const { - targetAddress, - video: { port: videoPort }, - } = this.prepareStreamRequest, - // use to encrypt Ring video to HomeKit - videoSrtpSession = new SrtpSession(getSessionConfig(this.videoSrtp)) - // Set up packet forwarding for video stream this.streamingSession.addSubscriptions( this.streamingSession.onVideoRtp.subscribe(({ header, payload }) => { @@ -243,43 +181,48 @@ class StreamingSessionWrapper { }), ) - const transcodingPromise = this.streamingSession.startTranscoding({ - input: ['-vn'], - audio: [ - '-map', - '0:a', - - // OPUS specific - it works, but audio is very choppy - '-acodec', - 'libopus', - '-frame_duration', - request.audio.packet_time, - '-application', - 'lowdelay', - - // Shared options - '-flags', - '+global_header', - '-ac', - `${request.audio.channel}`, - '-ar', - `${request.audio.sample_rate}k`, - '-b:a', - `${request.audio.max_bit_rate}k`, - '-bufsize', - `${request.audio.max_bit_rate * 4}k`, - '-payload_type', - request.audio.pt, - '-ssrc', - this.audioSsrc, - '-f', - 'rtp', - `rtp://127.0.0.1:${await this.repacketizeAudioSplitter - .portPromise}?pkt_size=376`, - ], - video: false, - output: [], - }) + // Set up packet forwarding for audio stream + this.streamingSession.addSubscriptions( + this.streamingSession.onAudioRtp.subscribe((rtp) => { + if (!firstAudioTimestamp) { + firstAudioTimestamp = rtp.header.timestamp + } + + // borrowed from scrypted + // Source reference: https://github.com/koush/scrypted/blob/main/plugins/homekit/src/types/camera/opus-repacketizer.ts + const packets = opusRepacketizer.repacketize(rtp) + + if (!packets) { + return + } + + for (rtp of packets) { + // RTP Payload Format for Opus Speech and Audio Codec RFC 7587 with an exception + // that Opus audio RTP Timestamp shall be based on RFC 3550. + rtp.header.timestamp = + (firstAudioTimestamp + + audioPacketCount * 160 * audioIntervalScale) % + 0xffffffff + audioPacketCount++ + + rtp.header.padding = false + rtp.header.ssrc = this.audioSsrc + rtp.header.payloadType = request.audio.pt + + const encryptedPacket = audioSrtpSession.encrypt( + rtp.payload, + rtp.header, + ) + + this.audioSplitter + .send(encryptedPacket, { + port: audioPort, + address: targetAddress, + }) + .catch(logError) + } + }), + ) let cameraSpeakerActive = false // used to send return audio from HomeKit to Ring @@ -337,14 +280,11 @@ class StreamingSessionWrapper { returnAudioTranscodedSplitter.close() }) - this.listenForAudioPackets(request) await returnAudioTranscoder.start() - await transcodingPromise } stop() { this.audioSplitter.close() - this.repacketizeAudioSplitter.close() this.videoSplitter.close() this.streamingSession.stop() } diff --git a/packages/homebridge-ring/opus-repacketizer.ts b/packages/homebridge-ring/opus-repacketizer.ts index d5cea034..f05cb7f4 100644 --- a/packages/homebridge-ring/opus-repacketizer.ts +++ b/packages/homebridge-ring/opus-repacketizer.ts @@ -1,5 +1,5 @@ -// OpusRepacketizer is borrowed from scrypted -// Original source: https://github.com/koush/scrypted/blob/3150a3033515a3886af1e6b35a0ba7432b63e02b/plugins/homekit/src/types/camera/opus-repacketizer.ts +// OpusRepacketizer is borrowed from Scrypted +// Source references: https://github.com/koush/scrypted/blob/main/plugins/homekit/src/types/camera/opus-repacketizer.ts import type { RtpPacket } from 'werift' @@ -64,17 +64,18 @@ import type { RtpPacket } from 'werift' export class OpusRepacketizer { depacketized: Buffer[] = [] + extraPackets = 0 constructor(public framesPerPacket: number) {} // repacketize a packet with a single frame into a packet with multiple frames. - repacketize(packet: RtpPacket): RtpPacket | undefined { + repacketize(packet: RtpPacket): RtpPacket[] | undefined { const code = packet.payload[0] & 0b00000011 let offset: number // see Frame Length Coding in RFC const decodeFrameLength = () => { - let frameLength = packet.payload.readUInt8(offset) + let frameLength = packet.payload.readUInt8(offset++) if (frameLength >= 252) { offset++ frameLength += packet.payload.readUInt8(offset) * 4 @@ -87,11 +88,15 @@ export class OpusRepacketizer { // code 3: cbr/vbr signaled, variable packets if (code === 0) { - if (this.framesPerPacket === 1 && !this.depacketized.length) return packet + if (this.framesPerPacket === 1 && !this.depacketized.length) { + return [packet] + } // depacketize by stripping off the config byte this.depacketized.push(packet.payload.subarray(1)) } else if (code === 1) { - if (this.framesPerPacket === 2 && !this.depacketized.length) return packet + if (this.framesPerPacket === 2 && !this.depacketized.length) { + return [packet] + } // depacketize by dividing the remaining payload into two equal sized frames const remaining = packet.payload.length - 1 if (remaining % 2) { @@ -101,7 +106,9 @@ export class OpusRepacketizer { this.depacketized.push(packet.payload.subarray(1, 1 + frameLength)) this.depacketized.push(packet.payload.subarray(1 + frameLength)) } else if (code === 2) { - if (this.framesPerPacket === 2 && !this.depacketized.length) return packet + if (this.framesPerPacket === 2 && !this.depacketized.length) { + return [packet] + } offset = 1 // depacketize by dividing the remaining payload into two inequal sized frames const frameLength = decodeFrameLength() @@ -119,7 +126,7 @@ export class OpusRepacketizer { this.framesPerPacket === packetFrameCount && !this.depacketized.length ) { - return packet + return [packet] } const paddingIndicator = frameCountByte & 0b01000000 offset = 2 @@ -146,38 +153,52 @@ export class OpusRepacketizer { } } else { const frameLengths: number[] = [] - for (let i = 0; i < packetFrameCount; i++) { + for (let i = 0; i < packetFrameCount - 1; i++) { const frameLength = decodeFrameLength() frameLengths.push(frameLength) } - for (let i = 0; i < packetFrameCount; i++) { + for (let i = 0; i < frameLengths.length; i++) { const frameLength = frameLengths[i], start = offset offset += frameLength this.depacketized.push(packet.payload.subarray(start, offset)) } + const lastFrameLength = packet.payload.length - padding - offset + this.depacketized.push( + packet.payload.subarray(offset, offset + lastFrameLength), + ) } } - if (this.depacketized.length < this.framesPerPacket) return + if (this.depacketized.length < this.framesPerPacket) return [] + + const ret: RtpPacket[] = [] - const depacketized = this.depacketized.slice(0, this.framesPerPacket) - this.depacketized = this.depacketized.slice(this.framesPerPacket) + // eslint-disable-next-line no-constant-condition + while (true) { + if (this.depacketized.length < this.framesPerPacket) return ret - // reuse the config and stereo indicator, but change the code to 3. - let toc = packet.payload[0] - toc |= 0b00000011 - // vbr | padding indicator | packet count - const frameCountByte = 0b10000000 | this.framesPerPacket, - newHeader: number[] = [toc, frameCountByte] + const depacketized = this.depacketized.slice(0, this.framesPerPacket) + this.depacketized = this.depacketized.slice(this.framesPerPacket) - // M-1 length bytes - newHeader.push(...depacketized.slice(0, -1).map((data) => data.length)) + // reuse the config and stereo indicator, but change the code to 3. + let toc = packet.payload[0] + toc |= 0b00000011 + // vbr | padding indicator | packet count + const frameCountByte = 0b10000000 | this.framesPerPacket, + newHeader: number[] = [toc, frameCountByte] - const headerBuffer = Buffer.from(newHeader), - payload = Buffer.concat([headerBuffer, ...depacketized]) + // M-1 length bytes + newHeader.push(...depacketized.slice(0, -1).map((data) => data.length)) - packet.payload = payload - return packet + const headerBuffer = Buffer.from(newHeader), + payload = Buffer.concat([headerBuffer, ...depacketized]), + newPacket = packet.clone() + if (ret.length) this.extraPackets++ + newPacket.header.sequenceNumber = + (packet.header.sequenceNumber + this.extraPackets + 0x10000) % 0x10000 + newPacket.payload = payload + ret.push(newPacket) + } } }