floe · jjsarton · Sep 1, 2022 · Sep 1, 2022 · Sep 2, 2022 · Sep 2, 2022
diff --git a/app/background.cc b/app/background.cc
@@ -9,6 +9,8 @@
 #include <opencv2/imgproc.hpp>
 #include <opencv2/highgui.hpp>
 
+#include <lib/libbackscrub.h>
+
 // Internal state of background processing
 struct background_t {
     int debug;
@@ -18,6 +20,7 @@ struct background_t {
     int frame;
     double fps;
     cv::Mat raw;
+    int bg_stored;
     std::mutex rawmux;
     cv::Mat thumb;
     std::mutex thumbmux;
@@ -130,6 +133,7 @@ std::shared_ptr<background_t> load_background(const std::string& path, int debug
         pbkd->debug = debug;
         pbkd->video = false;
         pbkd->run = false;
+        pbkd->bg_stored = false;
         pbkd->cap.open(path, cv::CAP_ANY);    // explicitly ask for auto-detection of backend
         if (!pbkd->cap.isOpened()) {
             if (pbkd->debug) fprintf(stderr, "background: cap cannot open: %s\n", path.c_str());
@@ -143,7 +147,7 @@ std::shared_ptr<background_t> load_background(const std::string& path, int debug
         //  if: can read 2 video frames => it's a video
         //  else: is loaded as an image => it's an image
         //  else: it's not usable.
-        if (pbkd->cap.read(pbkd->raw) && pbkd->cap.read(pbkd->raw)) {
+        if (cnt > -1) {
             // it's a video, try a reset and start reader thread..
             if (pbkd->cap.set(cv::CAP_PROP_POS_FRAMES, 0))
                 pbkd->frame = 0;
@@ -183,13 +187,22 @@ int grab_background(std::shared_ptr<background_t> pbkd, int width, int height, c
     if (pbkd->video) {
         // grab frame & frame no. under mutex
         std::unique_lock<std::mutex> hold(pbkd->rawmux);
-        cv::resize(pbkd->raw, out, cv::Size(width, height));
+        cv::Rect crop = bs_calc_cropping(pbkd->raw.cols, pbkd->raw.rows, width, height);
+        cv::resize(pbkd->raw(crop), out, cv::Size(width, height));
         frm = pbkd->frame;
     } else {
-        // resize still image as requested into out
-        cv::resize(pbkd->raw, out, cv::Size(width, height));
-        frm = 1;
-    }
+		if (!pbkd->bg_stored) {
+			// resize still image as requested into out
+			cv::Rect crop = bs_calc_cropping(pbkd->raw.cols, pbkd->raw.rows, width, height);
+			// Under some circumstances we must do the job in two steps!
+			// Otherwise this resize(pbkd->raw(crop), pbkd->raw, ...) may fail.
+			pbkd->raw(crop).copyTo(pbkd->raw);
+			cv::resize(pbkd->raw, pbkd->raw, cv::Size(width, height));
+			pbkd->bg_stored = true;
+		}
+		out = pbkd->raw ;
+		frm = 1;
+	}
     return frm;
 }
 

diff --git a/app/deepseg.cc b/app/deepseg.cc
@@ -84,7 +84,7 @@ std::optional<std::pair<size_t, size_t>> geometryFromString(const std::string& i
 }
 
 // OpenCV helper functions
-cv::Mat convert_rgb_to_yuyv( cv::Mat input ) {
+cv::Mat convert_rgb_to_yuyv(cv::Mat input) {
 	cv::Mat tmp;
 	cv::cvtColor(input, tmp, cv::COLOR_RGB2YUV);
 	std::vector<cv::Mat> yuv;
@@ -372,6 +372,7 @@ int main(int argc, char* argv[]) try {
 	bool flipVertical = false;
 	int fourcc = 0;
 	size_t blur_strength = 0;
+	cv::Rect crop_region(0, 0, 0, 0);
 
 	const char* modelname = "selfiesegmentation_mlkit-256x256-2021_01_19-v1215.f16.tflite";
 
@@ -568,6 +569,12 @@ int main(int argc, char* argv[]) try {
 	if (expWidth != vidGeo.value().first) {
 		fprintf(stderr, "Warning: virtual camera aspect ratio does not match capture device.\n");
 	}
+	// calculate crop region, only if result always smaller
+	if (expWidth != vidGeo->first) {
+		crop_region = bs_calc_cropping(
+		              capGeo->first, capGeo->second,
+		              vidGeo->first, vidGeo->second);
+	}
 
 	// dump settings..
 	printf("debug:  %d\n", debug);
@@ -600,7 +607,11 @@ int main(int argc, char* argv[]) try {
 		}
 	}
 	// default green screen background (at capture true geometry)
-	cv::Mat bg = cv::Mat(capGeo.value().second, capGeo.value().first, CV_8UC3, cv::Scalar(0, 255, 0));
+	std::pair<size_t, size_t> bg_dim = *capGeo;
+	if (crop_region.height) {
+		bg_dim = {crop_region.width, crop_region.height};
+	}
+	cv::Mat bg(bg_dim.second, bg_dim.first, CV_8UC3, cv::Scalar(0, 255, 0));
 
 	// Virtual camera (at specified geometry)
 	int lbfd = loopback_init(s_vcam, vidGeo.value().first, vidGeo.value().second, debug);
@@ -613,11 +624,24 @@ int main(int argc, char* argv[]) try {
 		loopback_free(lbfd);
 	});
 
-
 	// Processing components, all at capture true geometry
-	cv::Mat mask(capGeo.value().second, capGeo.value().first, CV_8U);
+	std::pair<size_t, size_t> mask_dim = *capGeo;
+	if (crop_region.height) {
+		mask_dim = {crop_region.width, crop_region.height};
+	}
+	cv::Mat mask(mask_dim.second, mask_dim.first, CV_8U);
+
 	cv::Mat raw;
-	CalcMask ai(s_model.value(), threads, capGeo.value().first, capGeo.value().second);
+	int aiw,aih;
+	if (!crop_region.width) {
+		aiw=capGeo->first;
+		aih=capGeo->second;
+	} else {
+		aiw=crop_region.width;
+		aih=crop_region.height;
+	}
+	CalcMask ai(*s_model, threads, aiw, aih);
+
 	ti.lastns = timestamp();
 	printf("Startup: %ldns\n", diffnanosecs(ti.lastns,ti.bootns));
 
@@ -631,22 +655,35 @@ int main(int argc, char* argv[]) try {
 		// copy new frame to buffer
 		cap.retrieve(raw);
 		ti.retrns = timestamp();
+
+		if (raw.rows == 0 || raw.cols == 0) continue; // sanity check
+
+		if (crop_region.height) {
+			raw(crop_region).copyTo(raw);
+		}
 		ai.set_input_frame(raw);
 		ti.copyns = timestamp();
 
-		if (raw.rows == 0 || raw.cols == 0) continue; // sanity check
+		// do background detection magic
+		ai.get_output_mask(mask);
+		ti.copyns = timestamp();
 
 		if (filterActive) {
-			// do background detection magic
-			ai.get_output_mask(mask);
-
 			// get background frame:
 			// - specified source if set
 			// - copy of input video if blur_strength != 0
 			// - default green (initial value)
 			bool canBlur = false;
 			if (pbk) {
-				if (grab_background(pbk, capGeo.value().first, capGeo.value().second, bg)<0)
+				int tw,th;
+				if (crop_region.height) {
+					tw = crop_region.width;
+					th = crop_region.height;
+				} else {
+					tw = capGeo->first;
+					th = capGeo->second;
+				}
+				if (grab_background(pbk, tw, th, bg) < 0)
 					throw "Failed to read background frame";
 				canBlur = true;
 			} else if (blur_strength) {

diff --git a/lib/libbackscrub.cc b/lib/libbackscrub.cc
@@ -365,7 +365,11 @@ bool bs_maskgen_process(void *context, cv::Mat &frame, cv::Mat &mask) {
 
 	// scale up into full-sized mask
 	cv::Mat tmpbuf;
-	cv::resize(ctx.ofinal(ctx.in_roidim),tmpbuf,ctx.mroi.size());
+	// with body-pix-float-050-8.tflite the size of ctx.ofinal is 33x33
+	// and the wanted roi may be greater as 33x33 so we can crash with
+	// cv::resize(ctx.ofinal(ctx.in_roidim),tmpbuf,ctx.mroi.size());
+	ctx.ofinal.copyTo(tmpbuf);
+	cv::resize(tmpbuf, tmpbuf, ctx.mroi.size());
 
 	// blur at full size for maximum smoothness
 	cv::blur(tmpbuf,ctx.mroi,ctx.blur);
@@ -375,3 +379,24 @@ bool bs_maskgen_process(void *context, cv::Mat &frame, cv::Mat &mask) {
 	return true;
 }
 
+cv::Rect bs_calc_cropping(int inWidth, int inHeight, int targetWidth, int targetHigh) {
+	// if the input and output aspect ratio are not the same
+	// we can crop the source image. For example if the
+	// input image has a 16:9 (1280x720) ratio and the output is 4:3 (960x720)
+	// we will return the cropRegion set as x=160, width=960, y=0, height=720
+	// which is the centered part of the original image
+	cv::Rect cropRegion = {0, 0, 0, 0};
+	float sc = (float)targetWidth / inWidth;
+	float st = (float)targetHigh / inHeight;
+	sc = st > sc ? st : sc;
+
+	int sx = (int)(targetWidth / sc) - inWidth;
+	cropRegion.x =  (sx < 0 ? -sx : sx) / 2;
+
+	int sy = (int)(targetHigh / sc) - inHeight;
+	cropRegion.y =  (sy < 0 ? -sy : sy) / 2;
+
+	cropRegion.width = inWidth - cropRegion.x * 2;
+	cropRegion.height = inHeight - cropRegion.y * 2;
+	return cropRegion;
+}
diff --git a/lib/libbackscrub.h b/lib/libbackscrub.h
@@ -38,4 +38,6 @@ extern void bs_maskgen_delete(void *context);
 // Process a video frame into a mask
 extern bool bs_maskgen_process(void *context, cv::Mat& frame, cv::Mat &mask);
 
+extern cv::Rect bs_calc_cropping(int inWidth, int inHeight, int targetWidth, int targetHight);
+
 #endif