Skip to content

Commit 9a39b39

Browse files
committed
Moving Background Blur to GPU
This adds - `BackgroundBlurGPUProcessor` a file that handles the openGL logic, and acts as an abstraction over the inner workings of the image processing. OpenGL's logic is ... very unclear. I tried my best to document as much as possible, including any linear algebra that I had to perform. - `background_blur_vertex.glsl` the required vertex shader, needed to identify the position of each pixel on the screen, before the geometry, rasterization, and fragment shader steps of the openGL pipeline. - `gaussian_blur_frag_shader.glsl` the Gaussian Blur implementation, this is what actually performs the logic on the image pixels. The code is also somewhat unclear if you're not familiar with shaders or gaussian blur. It helps to read the technical article I commented from Intel. - `seg_mask_frag_shader.glsl` the masking operation implementation, very simple, just checking if the pixel should be blurred or retain it's original unblurred value - `testing_frag_shader` a simple fragment shader used for quick testing - `libyuv_android` a light weight helper lib for converting between image formats without the overhead of OpenCV Significant changes - `BackgroundBlurFrameProcessor` now holds it's own dedicated Thread and Handler for running OpenGL's GPU operations. It also takes in a reference to the `surfaceTextureHelper` from `CallActivity` to send the `VideoFrame` objects to the WebRTC `VideoSink` on the appropriate capture thread. Failing to do both of these causes a crash. Signed-off-by: rapterjet2004 <juliuslinus1@gmail.com>
1 parent 2934ffc commit 9a39b39

17 files changed

Lines changed: 1319 additions & 1 deletion

REUSE.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,9 @@ path = "app/src/main/res/raw/next_voice_message_doodle.ogg"
3434
precedence = "aggregate"
3535
SPDX-FileCopyrightText = "2024 Paciosoft"
3636
SPDX-License-Identifier = "CC-BY-4.0"
37+
38+
[[annotations]]
39+
path = "app/src/main/assets/selfie_segmenter.tflite"
40+
precedence = "aggregate"
41+
SPDX-FileCopyrightText = "Apache License Version 2.0, January 2004 http://www.apache.org/licenses/"
42+
SPDX-License-Identifier = "Apache-2.0"

app/build.gradle

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,10 @@ dependencies {
369369
testImplementation("com.squareup.okhttp3:mockwebserver:$okhttpVersion")
370370
testImplementation("com.google.dagger:hilt-android-testing:2.59.2")
371371
testImplementation("org.robolectric:robolectric:4.16.1")
372+
373+
// Computer Vision - for background effects during video calls
374+
implementation 'com.google.mediapipe:tasks-vision:0.10.26'
375+
implementation "io.github.crow-misia.libyuv:libyuv-android:0.43.2"
372376
}
373377

374378
tasks.register('installGitHooks', Copy) {
244 KB
Binary file not shown.

app/src/main/java/com/nextcloud/talk/activities/CallActivity.kt

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ import com.nextcloud.talk.call.ReactionAnimator
7373
import com.nextcloud.talk.call.components.ParticipantGrid
7474
import com.nextcloud.talk.call.components.SelfVideoView
7575
import com.nextcloud.talk.call.components.screenshare.ScreenShareComponent
76+
import com.nextcloud.talk.camera.BackgroundBlurFrameProcessor
77+
import com.nextcloud.talk.camera.BlurBackgroundViewModel
78+
import com.nextcloud.talk.camera.BlurBackgroundViewModel.BackgroundBlurOn
7679
import com.nextcloud.talk.chat.ChatActivity
7780
import com.nextcloud.talk.data.user.model.User
7881
import com.nextcloud.talk.databinding.CallActivityBinding
@@ -185,7 +188,6 @@ import java.util.Objects
185188
import java.util.concurrent.TimeUnit
186189
import java.util.concurrent.atomic.AtomicInteger
187190
import javax.inject.Inject
188-
import kotlin.String
189191
import kotlin.math.abs
190192

191193
@AutoInjector(NextcloudTalkApplication::class)
@@ -214,6 +216,7 @@ class CallActivity : CallBaseActivity() {
214216
var audioManager: WebRtcAudioManager? = null
215217
var callRecordingViewModel: CallRecordingViewModel? = null
216218
var raiseHandViewModel: RaiseHandViewModel? = null
219+
val blurBackgroundViewModel: BlurBackgroundViewModel = BlurBackgroundViewModel()
217220
private var mReceiver: BroadcastReceiver? = null
218221
private var peerConnectionFactory: PeerConnectionFactory? = null
219222
private var screenSharePeerConnectionFactory: PeerConnectionFactory? = null
@@ -539,6 +542,20 @@ class CallActivity : CallBaseActivity() {
539542
}
540543
}
541544

545+
private fun initBackgroundBlurViewModel(surfaceTextureHelper: SurfaceTextureHelper) {
546+
blurBackgroundViewModel.viewState.observe(this) { state ->
547+
val isOn = state == BackgroundBlurOn
548+
549+
val processor = if (isOn) {
550+
BackgroundBlurFrameProcessor(context, surfaceTextureHelper)
551+
} else {
552+
null
553+
}
554+
555+
videoSource?.setVideoProcessor(processor)
556+
}
557+
}
558+
542559
private fun processExtras(extras: Bundle) {
543560
roomId = extras.getString(KEY_ROOM_ID, "")
544561
roomToken = extras.getString(KEY_ROOM_TOKEN, "")
@@ -1116,6 +1133,7 @@ class CallActivity : CallBaseActivity() {
11161133
videoSource = peerConnectionFactory!!.createVideoSource(false)
11171134

11181135
videoCapturer!!.initialize(surfaceTextureHelper, applicationContext, videoSource!!.capturerObserver)
1136+
initBackgroundBlurViewModel(surfaceTextureHelper)
11191137
}
11201138
localVideoTrack = peerConnectionFactory!!.createVideoTrack("NCv0", videoSource)
11211139
localStream!!.addTrack(localVideoTrack)
@@ -1250,6 +1268,7 @@ class CallActivity : CallBaseActivity() {
12501268
binding!!.cameraButton.setImageResource(R.drawable.ic_videocam_white_24px)
12511269
} else {
12521270
binding!!.cameraButton.setImageResource(R.drawable.ic_videocam_off_white_24px)
1271+
blurBackgroundViewModel.turnOffBlur()
12531272
}
12541273
toggleMedia(videoOn, true)
12551274
} else if (shouldShowRequestPermissionRationale(Manifest.permission.CAMERA)) {
@@ -1326,6 +1345,10 @@ class CallActivity : CallBaseActivity() {
13261345
raiseHandViewModel!!.clickHandButton()
13271346
}
13281347

1348+
fun toggleBackgroundBlur() {
1349+
blurBackgroundViewModel.toggleBackgroundBlur()
1350+
}
1351+
13291352
public override fun onDestroy() {
13301353
if (signalingMessageReceiver != null) {
13311354
signalingMessageReceiver!!.removeListener(localParticipantMessageListener)
Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
/*
2+
* Nextcloud Talk - Android Client
3+
*
4+
* SPDX-FileCopyrightText: 2025 Julius Linus <juliuslinus1@gmail.com>
5+
* SPDX-License-Identifier: GPL-3.0-or-later
6+
*/
7+
8+
package com.nextcloud.talk.camera
9+
10+
import android.content.Context
11+
import android.os.Handler
12+
import android.os.HandlerThread
13+
import android.util.Log
14+
import android.util.LruCache
15+
import io.github.crow_misia.libyuv.AbgrBuffer
16+
import io.github.crow_misia.libyuv.I420Buffer
17+
import io.github.crow_misia.libyuv.PlanePrimitive
18+
import org.webrtc.JavaI420Buffer
19+
import org.webrtc.SurfaceTextureHelper
20+
import org.webrtc.VideoFrame
21+
import org.webrtc.VideoProcessor
22+
import org.webrtc.VideoSink
23+
import org.webrtc.YuvHelper
24+
import java.nio.ByteBuffer
25+
26+
@Suppress("TooGenericExceptionCaught")
27+
class BackgroundBlurFrameProcessor(val context: Context, val surfaceTextureHelper: SurfaceTextureHelper) :
28+
VideoProcessor,
29+
ImageSegmenterHelper.SegmenterListener {
30+
31+
companion object {
32+
val TAG: String = this::class.java.simpleName
33+
const val GPU_THREAD: String = "BackgroundBlur"
34+
const val FLOAT_ROTATION = 180.0f
35+
const val INT_4 = 4
36+
const val MAX_NUM_FRAMES = 10
37+
}
38+
39+
private var sink: VideoSink? = null
40+
private var segmenterHelper: ImageSegmenterHelper? = null
41+
private var backgroundBlurGPUProcessor: BackgroundBlurGPUProcessor? = null
42+
43+
/* This is to hold meta information between MediaPipe and GPU Render threads, in a thread safe way
44+
A LRU (least recently used) cache, holds up to MAX_NUM_FRAMES, before evicting the least recently used
45+
if full. This is used because in case an error occurs with MediaPipe, and a frame is dropped, the frame might stay
46+
in the map indefinitely, unable to be cleaned up by the garbage collector, therefore causing a memory leak. With the
47+
LRU Cache, the frame would end up being cleaned eventually as the program runs */
48+
private var rotationMap = LruCache<Long, Float>(MAX_NUM_FRAMES)
49+
private val frameBufferMap = LruCache<Long, ByteBuffer>(MAX_NUM_FRAMES)
50+
51+
// Dedicated Thread for OpenGL Operations
52+
private var glThread: HandlerThread? = null
53+
private var glHandler: Handler? = null
54+
55+
// SegmentationListener Interface
56+
57+
override fun onError(error: String, errorCode: Int) {
58+
Log.e(TAG, "Error $errorCode: $error")
59+
}
60+
61+
override fun onResults(resultBundle: ImageSegmenterHelper.ResultBundle) {
62+
val rotation = rotationMap[resultBundle.inferenceTime] ?: 0f
63+
val frameBuffer = frameBufferMap[resultBundle.inferenceTime]
64+
65+
// Remove once used to prevent mem leaks
66+
rotationMap.synchronizedRemove(resultBundle.inferenceTime)
67+
frameBufferMap.synchronizedRemove(resultBundle.inferenceTime)
68+
69+
if (frameBuffer == null) {
70+
Log.e(TAG, "Critical Error in onResults: FrameBufferMap[${resultBundle.inferenceTime}] was null")
71+
return
72+
}
73+
74+
glHandler?.post {
75+
// This block runs safely on gpu thread
76+
backgroundBlurGPUProcessor?.let { scaler ->
77+
try {
78+
val drawArray = scaler.process(
79+
resultBundle.mask,
80+
frameBuffer,
81+
resultBundle.width,
82+
resultBundle.height,
83+
rotation
84+
)
85+
86+
val webRTCBuffer = drawArray.convertToWebRTCBuffer(resultBundle.width, resultBundle.height)
87+
val videoFrame = VideoFrame(webRTCBuffer, 0, resultBundle.inferenceTime)
88+
89+
// This should run on the CaptureThread
90+
surfaceTextureHelper.handler.post {
91+
Log.d(TAG, "Sent VideoFrame to sink on :${Thread.currentThread().name}")
92+
sink?.onFrame(videoFrame)
93+
94+
// webRTCBuffer usually needs release() if it's not a JavaI420Buffer wrapper that auto-GCs,
95+
// but JavaI420Buffer.wrap() relies on GC.
96+
videoFrame.release()
97+
}
98+
} catch (e: Exception) {
99+
Log.e(TAG, "Error processing frame on GL Thread", e)
100+
}
101+
}
102+
}
103+
}
104+
105+
// Video Processor Interface
106+
107+
override fun onCapturerStarted(success: Boolean) {
108+
segmenterHelper = ImageSegmenterHelper(context = context, imageSegmenterListener = this)
109+
110+
glThread = HandlerThread(GPU_THREAD).apply { start() }
111+
glHandler = Handler(glThread!!.looper)
112+
glHandler?.post {
113+
backgroundBlurGPUProcessor = BackgroundBlurGPUProcessor(context)
114+
backgroundBlurGPUProcessor?.init()
115+
}
116+
}
117+
118+
override fun onCapturerStopped() {
119+
segmenterHelper?.destroyImageSegmenter()
120+
glHandler?.post {
121+
backgroundBlurGPUProcessor?.release()
122+
backgroundBlurGPUProcessor = null
123+
124+
// Quit thread after cleanup
125+
glThread?.quitSafely()
126+
glThread = null
127+
glHandler = null
128+
}
129+
}
130+
131+
override fun onFrameCaptured(videoFrame: VideoFrame) {
132+
val i420WebRTCBuffer = videoFrame.buffer.toI420()
133+
val width = videoFrame.buffer.width
134+
val height = videoFrame.buffer.height
135+
val rotation = FLOAT_ROTATION - videoFrame.rotation
136+
val videoFrameBuffer = i420WebRTCBuffer?.convertToABGR()
137+
138+
i420WebRTCBuffer?.release()
139+
140+
videoFrameBuffer?.let {
141+
rotationMap.synchronizedPut(videoFrame.timestampNs, rotation)
142+
frameBufferMap.synchronizedPut(videoFrame.timestampNs, it)
143+
segmenterHelper?.segmentFrame(it, width, height, videoFrame.timestampNs)
144+
} ?: {
145+
Log.e(TAG, "onFrameCaptured:: Video Frame was null!")
146+
sink?.onFrame(videoFrame)
147+
}
148+
}
149+
150+
override fun setSink(sink: VideoSink?) {
151+
this.sink = sink
152+
}
153+
154+
fun VideoFrame.I420Buffer.convertToABGR(): ByteBuffer {
155+
val dataYSize = dataY.limit() - dataY.position()
156+
val dataUSize = dataU.limit() - dataU.position()
157+
val dataVSize = dataV.limit() - dataV.position()
158+
159+
val planeY = PlanePrimitive.create(strideY, dataY, dataYSize)
160+
val planeU = PlanePrimitive.create(strideU, dataU, dataUSize)
161+
val planeV = PlanePrimitive.create(strideV, dataV, dataVSize)
162+
163+
val libYuvI420Buffer = I420Buffer.wrap(planeY, planeU, planeV, width, height)
164+
val libYuvABGRBuffer = AbgrBuffer.allocate(width, height)
165+
libYuvI420Buffer.convertTo(libYuvABGRBuffer)
166+
167+
return libYuvABGRBuffer.asBuffer()
168+
}
169+
170+
inline fun <reified K, V> LruCache<K, V>.synchronizedPut(key: K, value: V) {
171+
synchronized(this) {
172+
this.put(key, value)
173+
}
174+
}
175+
176+
inline fun <reified K, V> LruCache<K, V>.synchronizedRemove(key: K) {
177+
synchronized(this) {
178+
this.remove(key)
179+
}
180+
}
181+
182+
fun ByteArray.convertToWebRTCBuffer(width: Int, height: Int): JavaI420Buffer {
183+
val src = ByteBuffer.allocateDirect(this.size)
184+
src.put(this)
185+
186+
val srcStride = width * INT_4
187+
val yPlaneSize = width * height
188+
val uvPlaneSize = (width / 2) * (height / 2)
189+
190+
val dstYStride = width
191+
val dstUStride = width / 2
192+
val dstVStride = width / 2
193+
194+
val dstYBuffer = ByteBuffer.allocateDirect(yPlaneSize)
195+
val dstUBuffer = ByteBuffer.allocateDirect(uvPlaneSize)
196+
val dstVBuffer = ByteBuffer.allocateDirect(uvPlaneSize)
197+
198+
YuvHelper.ABGRToI420(
199+
src,
200+
srcStride,
201+
dstYBuffer,
202+
dstYStride,
203+
dstUBuffer,
204+
dstUStride,
205+
dstVBuffer,
206+
dstVStride,
207+
width,
208+
height
209+
)
210+
211+
return JavaI420Buffer.wrap(
212+
width,
213+
height,
214+
dstYBuffer,
215+
dstYStride,
216+
dstUBuffer,
217+
dstUStride,
218+
dstVBuffer,
219+
dstVStride,
220+
null
221+
)
222+
}
223+
}

0 commit comments

Comments
 (0)