diff --git a/gst/compositor/blend.c b/gst/compositor/blend.c index 41e20c033b..6eeba1c351 100644 --- a/gst/compositor/blend.c +++ b/gst/compositor/blend.c @@ -42,7 +42,8 @@ GST_DEBUG_CATEGORY_STATIC (gst_compositor_blend_debug); #define BLEND_A32(name, method, LOOP) \ static void \ method##_ ##name (GstVideoFrame * srcframe, gint xpos, gint ypos, \ - gdouble src_alpha, GstVideoFrame * destframe, GstCompositorBlendMode mode) \ + gdouble src_alpha, GstVideoFrame * destframe, gint dst_y_start, \ + gint dst_y_end, GstCompositorBlendMode mode) \ { \ guint s_alpha; \ gint src_stride, dest_stride; \ @@ -65,23 +66,26 @@ method##_ ##name (GstVideoFrame * srcframe, gint xpos, gint ypos, \ if (G_UNLIKELY (s_alpha == 0)) \ return; \ \ + if (dst_y_end > dest_height) { \ + dst_y_end = dest_height; \ + } \ /* adjust src pointers for negative sizes */ \ if (xpos < 0) { \ src += -xpos * 4; \ src_width -= -xpos; \ xpos = 0; \ } \ - if (ypos < 0) { \ - src += -ypos * src_stride; \ - src_height -= -ypos; \ - ypos = 0; \ + if (ypos < dst_y_start) { \ + src += (dst_y_start - ypos) * src_stride; \ + src_height -= dst_y_start - ypos; \ + ypos = dst_y_start; \ } \ /* adjust width/height if the src is bigger than dest */ \ if (xpos + src_width > dest_width) { \ src_width = dest_width - xpos; \ } \ - if (ypos + src_height > dest_height) { \ - src_height = dest_height - ypos; \ + if (ypos + src_height > dst_y_end) { \ + src_height = dst_y_end - ypos; \ } \ \ if (src_height > 0 && src_width > 0) { \ @@ -173,20 +177,21 @@ BLEND_A32 (bgra, overlay, _overlay_loop_argb); #define A32_CHECKER_C(name, RGB, A, C1, C2, C3) \ static void \ -fill_checker_##name##_c (GstVideoFrame * frame) \ +fill_checker_##name##_c (GstVideoFrame * frame, guint y_start, guint y_end) \ { \ gint i, j; \ gint val; \ static const gint tab[] = { 80, 160, 80, 160 }; \ - gint width, height; \ + gint width, stride; \ guint8 *dest; \ \ dest = GST_VIDEO_FRAME_PLANE_DATA (frame, 0); \ width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 0); \ - height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 0); \ + stride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0); \ \ + dest += y_start * stride; \ if (!RGB) { \ - for (i = 0; i < height; i++) { \ + for (i = y_start; i < y_end; i++) { \ for (j = 0; j < width; j++) { \ dest[A] = 0xff; \ dest[C1] = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)]; \ @@ -196,7 +201,7 @@ fill_checker_##name##_c (GstVideoFrame * frame) \ } \ } \ } else { \ - for (i = 0; i < height; i++) { \ + for (i = y_start; i < y_end; i++) { \ for (j = 0; j < width; j++) { \ val = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)]; \ dest[A] = 0xFF; \ @@ -220,17 +225,17 @@ A32_CHECKER_C (vuya, FALSE, 3, 2, 1, 0); #define A32_COLOR(name, RGB, A, C1, C2, C3) \ static void \ -fill_color_##name (GstVideoFrame * frame, gint Y, gint U, gint V) \ +fill_color_##name (GstVideoFrame * frame, guint y_start, guint y_end, gint Y, gint U, gint V) \ { \ gint c1, c2, c3; \ guint32 val; \ - gint width, height; \ + gint stride; \ guint8 *dest; \ \ dest = GST_VIDEO_FRAME_PLANE_DATA (frame, 0); \ - width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 0); \ - height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 0); \ + stride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0); \ \ + dest += y_start * stride; \ if (RGB) { \ c1 = YUV_TO_R (Y, U, V); \ c2 = YUV_TO_G (Y, U, V); \ @@ -242,7 +247,7 @@ fill_color_##name (GstVideoFrame * frame, gint Y, gint U, gint V) \ } \ val = GUINT32_FROM_BE ((0xff << A) | (c1 << C1) | (c2 << C2) | (c3 << C3)); \ \ - compositor_orc_splat_u32 ((guint32 *) dest, val, height * width); \ + compositor_orc_splat_u32 ((guint32 *) dest, val, (y_end - y_start) * (stride / 4)); \ } A32_COLOR (argb, TRUE, 24, 16, 8, 0); @@ -291,7 +296,8 @@ _blend_##format_name (const guint8 * src, guint8 * dest, \ \ static void \ blend_##format_name (GstVideoFrame * srcframe, gint xpos, gint ypos, \ - gdouble src_alpha, GstVideoFrame * destframe, GstCompositorBlendMode mode) \ + gdouble src_alpha, GstVideoFrame * destframe, gint dst_y_start, \ + gint dst_y_end, GstCompositorBlendMode mode) \ { \ const guint8 *b_src; \ guint8 *b_dest; \ @@ -315,6 +321,9 @@ blend_##format_name (GstVideoFrame * srcframe, gint xpos, gint ypos, \ dest_width = GST_VIDEO_FRAME_WIDTH (destframe); \ dest_height = GST_VIDEO_FRAME_HEIGHT (destframe); \ \ + if (dst_y_end > dest_height) { \ + dst_y_end = dest_height; \ + } \ xpos = x_round (xpos); \ ypos = y_round (ypos); \ \ @@ -327,10 +336,10 @@ blend_##format_name (GstVideoFrame * srcframe, gint xpos, gint ypos, \ b_src_width -= -xpos; \ xpos = 0; \ } \ - if (ypos < 0) { \ - yoffset = -ypos; \ - b_src_height -= -ypos; \ - ypos = 0; \ + if (ypos < dst_y_start) { \ + yoffset = dst_y_start - ypos; \ + b_src_height -= dst_y_start - ypos; \ + ypos = dst_y_start; \ } \ /* If x or y offset are larger then the source it's outside of the picture */ \ if (xoffset >= src_width || yoffset >= src_height) { \ @@ -341,8 +350,8 @@ blend_##format_name (GstVideoFrame * srcframe, gint xpos, gint ypos, \ if (xpos + b_src_width > dest_width) { \ b_src_width = dest_width - xpos; \ } \ - if (ypos + b_src_height > dest_height) { \ - b_src_height = dest_height - ypos; \ + if (ypos + b_src_height > dst_y_end) { \ + b_src_height = dst_y_end - ypos; \ } \ if (b_src_width <= 0 || b_src_height <= 0) { \ return; \ @@ -400,18 +409,22 @@ blend_##format_name (GstVideoFrame * srcframe, gint xpos, gint ypos, \ #define PLANAR_YUV_FILL_CHECKER(format_name, format_enum, MEMSET) \ static void \ -fill_checker_##format_name (GstVideoFrame * frame) \ +fill_checker_##format_name (GstVideoFrame * frame, guint y_start, guint y_end) \ { \ gint i, j; \ static const int tab[] = { 80, 160, 80, 160 }; \ guint8 *p; \ gint comp_width, comp_height; \ - gint rowstride; \ + gint rowstride, comp_yoffset; \ + const GstVideoFormatInfo *info; \ \ + info = frame->info.finfo; \ p = GST_VIDEO_FRAME_COMP_DATA (frame, 0); \ comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 0); \ - comp_height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 0); \ + comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 0, y_end - y_start); \ rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0); \ + comp_yoffset = (y_start == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 0, y_start); \ + p += comp_yoffset * rowstride; \ \ for (i = 0; i < comp_height; i++) { \ for (j = 0; j < comp_width; j++) { \ @@ -422,8 +435,10 @@ fill_checker_##format_name (GstVideoFrame * frame) \ \ p = GST_VIDEO_FRAME_COMP_DATA (frame, 1); \ comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 1); \ - comp_height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 1); \ + comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 1, y_end - y_start); \ rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 1); \ + comp_yoffset = (y_start == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 1, y_start); \ + p += comp_yoffset * rowstride; \ \ for (i = 0; i < comp_height; i++) { \ MEMSET (p, 0x80, comp_width); \ @@ -432,8 +447,10 @@ fill_checker_##format_name (GstVideoFrame * frame) \ \ p = GST_VIDEO_FRAME_COMP_DATA (frame, 2); \ comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 2); \ - comp_height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 2); \ + comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 2, y_end - y_start); \ rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 2); \ + comp_yoffset = (y_start == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 2, y_start); \ + p += comp_yoffset * rowstride; \ \ for (i = 0; i < comp_height; i++) { \ MEMSET (p, 0x80, comp_width); \ @@ -444,17 +461,21 @@ fill_checker_##format_name (GstVideoFrame * frame) \ #define PLANAR_YUV_FILL_COLOR(format_name,format_enum,MEMSET) \ static void \ fill_color_##format_name (GstVideoFrame * frame, \ - gint colY, gint colU, gint colV) \ + guint y_start, guint y_end, gint colY, gint colU, gint colV) \ { \ guint8 *p; \ gint comp_width, comp_height; \ - gint rowstride; \ + gint rowstride, comp_yoffset; \ gint i; \ + const GstVideoFormatInfo *info; \ \ + info = frame->info.finfo; \ p = GST_VIDEO_FRAME_COMP_DATA (frame, 0); \ comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 0); \ - comp_height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 0); \ + comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 0, y_end - y_start); \ rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0); \ + comp_yoffset = (y_start == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 0, y_start); \ + p += comp_yoffset * rowstride; \ \ for (i = 0; i < comp_height; i++) { \ MEMSET (p, colY, comp_width); \ @@ -463,8 +484,10 @@ fill_color_##format_name (GstVideoFrame * frame, \ \ p = GST_VIDEO_FRAME_COMP_DATA (frame, 1); \ comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 1); \ - comp_height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 1); \ + comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 1, y_end - y_start); \ rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 1); \ + comp_yoffset = (y_start == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 1, y_start); \ + p += comp_yoffset * rowstride; \ \ for (i = 0; i < comp_height; i++) { \ MEMSET (p, colU, comp_width); \ @@ -473,8 +496,10 @@ fill_color_##format_name (GstVideoFrame * frame, \ \ p = GST_VIDEO_FRAME_COMP_DATA (frame, 2); \ comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 2); \ - comp_height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 2); \ + comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 2, y_end - y_start); \ rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 2); \ + comp_yoffset = (y_start == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 2, y_start); \ + p += comp_yoffset * rowstride; \ \ for (i = 0; i < comp_height; i++) { \ MEMSET (p, colV, comp_width); \ @@ -541,7 +566,8 @@ _blend_##format_name (const guint8 * src, guint8 * dest, \ \ static void \ blend_##format_name (GstVideoFrame * srcframe, gint xpos, gint ypos, \ - gdouble src_alpha, GstVideoFrame * destframe, GstCompositorBlendMode mode) \ + gdouble src_alpha, GstVideoFrame * destframe, gint dst_y_start, \ + gint dst_y_end, GstCompositorBlendMode mode) \ { \ const guint8 *b_src; \ guint8 *b_dest; \ @@ -565,6 +591,9 @@ blend_##format_name (GstVideoFrame * srcframe, gint xpos, gint ypos, \ dest_width = GST_VIDEO_FRAME_WIDTH (destframe); \ dest_height = GST_VIDEO_FRAME_HEIGHT (destframe); \ \ + if (dst_y_end > dest_height) { \ + dst_y_end = dest_height; \ + } \ xpos = GST_ROUND_UP_2 (xpos); \ ypos = GST_ROUND_UP_2 (ypos); \ \ @@ -577,10 +606,10 @@ blend_##format_name (GstVideoFrame * srcframe, gint xpos, gint ypos, \ b_src_width -= -xpos; \ xpos = 0; \ } \ - if (ypos < 0) { \ - yoffset += -ypos; \ - b_src_height -= -ypos; \ - ypos = 0; \ + if (ypos < dst_y_start) { \ + yoffset += dst_y_start - ypos; \ + b_src_height -= dst_y_start - ypos; \ + ypos = dst_y_start; \ } \ /* If x or y offset are larger then the source it's outside of the picture */ \ if (xoffset > src_width || yoffset > src_height) { \ @@ -591,8 +620,8 @@ blend_##format_name (GstVideoFrame * srcframe, gint xpos, gint ypos, \ if (xpos + src_width > dest_width) { \ b_src_width = dest_width - xpos; \ } \ - if (ypos + src_height > dest_height) { \ - b_src_height = dest_height - ypos; \ + if (ypos + src_height > dst_y_end) { \ + b_src_height = dst_y_end - ypos; \ } \ if (b_src_width < 0 || b_src_height < 0) { \ return; \ @@ -634,18 +663,22 @@ blend_##format_name (GstVideoFrame * srcframe, gint xpos, gint ypos, \ #define NV_YUV_FILL_CHECKER(format_name, MEMSET) \ static void \ -fill_checker_##format_name (GstVideoFrame * frame) \ +fill_checker_##format_name (GstVideoFrame * frame, guint y_start, guint y_end) \ { \ gint i, j; \ static const int tab[] = { 80, 160, 80, 160 }; \ guint8 *p; \ gint comp_width, comp_height; \ - gint rowstride; \ + gint rowstride, comp_yoffset; \ + const GstVideoFormatInfo *info; \ \ + info = frame->info.finfo; \ p = GST_VIDEO_FRAME_COMP_DATA (frame, 0); \ comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 0); \ - comp_height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 0); \ + comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 0, y_end - y_start); \ rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0); \ + comp_yoffset = (y_start == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 0, y_start); \ + p += comp_yoffset * rowstride; \ \ for (i = 0; i < comp_height; i++) { \ for (j = 0; j < comp_width; j++) { \ @@ -656,8 +689,10 @@ fill_checker_##format_name (GstVideoFrame * frame) \ \ p = GST_VIDEO_FRAME_PLANE_DATA (frame, 1); \ comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 1); \ - comp_height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 1); \ + comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 1, y_end - y_start); \ rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 1); \ + comp_yoffset = (y_start == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 1, y_start); \ + p += comp_yoffset * rowstride; \ \ for (i = 0; i < comp_height; i++) { \ MEMSET (p, 0x80, comp_width * 2); \ @@ -668,18 +703,22 @@ fill_checker_##format_name (GstVideoFrame * frame) \ #define NV_YUV_FILL_COLOR(format_name,MEMSET) \ static void \ fill_color_##format_name (GstVideoFrame * frame, \ - gint colY, gint colU, gint colV) \ + guint y_start, guint y_end, gint colY, gint colU, gint colV) \ { \ guint8 *y, *u, *v; \ gint comp_width, comp_height; \ - gint rowstride; \ + gint rowstride, comp_yoffset; \ gint i, j; \ + const GstVideoFormatInfo *info; \ \ + info = frame->info.finfo; \ y = GST_VIDEO_FRAME_COMP_DATA (frame, 0); \ comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 0); \ - comp_height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 0); \ + comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 0, y_end - y_start); \ rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0); \ + comp_yoffset = (y_start == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 0, y_start); \ \ + y += comp_yoffset * rowstride; \ for (i = 0; i < comp_height; i++) { \ MEMSET (y, colY, comp_width); \ y += rowstride; \ @@ -688,9 +727,12 @@ fill_color_##format_name (GstVideoFrame * frame, \ u = GST_VIDEO_FRAME_COMP_DATA (frame, 1); \ v = GST_VIDEO_FRAME_COMP_DATA (frame, 2); \ comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 1); \ - comp_height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 1); \ + comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 1, y_end - y_start); \ rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 1); \ + comp_yoffset = (y_start == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 1, y_start); \ \ + u += comp_yoffset * rowstride; \ + v += comp_yoffset * rowstride; \ for (i = 0; i < comp_height; i++) { \ for (j = 0; j < comp_width; j++) { \ u[j*2] = colU; \ @@ -712,7 +754,8 @@ NV_YUV_FILL_CHECKER (nv21, memset); #define RGB_BLEND(name, bpp, MEMCPY, BLENDLOOP) \ static void \ blend_##name (GstVideoFrame * srcframe, gint xpos, gint ypos, \ - gdouble src_alpha, GstVideoFrame * destframe, GstCompositorBlendMode mode) \ + gdouble src_alpha, GstVideoFrame * destframe, gint dst_y_start, \ + gint dst_y_end, GstCompositorBlendMode mode) \ { \ gint b_alpha; \ gint i; \ @@ -735,23 +778,26 @@ blend_##name (GstVideoFrame * srcframe, gint xpos, gint ypos, \ \ b_alpha = CLAMP ((gint) (src_alpha * 255), 0, 255); \ \ + if (dst_y_end > dest_height) { \ + dst_y_end = dest_height; \ + } \ /* adjust src pointers for negative sizes */ \ if (xpos < 0) { \ src += -xpos * bpp; \ src_width -= -xpos; \ xpos = 0; \ } \ - if (ypos < 0) { \ - src += -ypos * src_stride; \ - src_height -= -ypos; \ - ypos = 0; \ + if (ypos < dst_y_start) { \ + src += (dst_y_start - ypos) * src_stride; \ + src_height -= dst_y_start - ypos; \ + ypos = dst_y_start; \ } \ /* adjust width/height if the src is bigger than dest */ \ if (xpos + src_width > dest_width) { \ src_width = dest_width - xpos; \ } \ - if (ypos + src_height > dest_height) { \ - src_height = dest_height - ypos; \ + if (ypos + src_height > dst_y_end) { \ + src_height = dst_y_end - ypos; \ } \ \ dest = dest + bpp * xpos + (ypos * dest_stride); \ @@ -783,7 +829,7 @@ blend_##name (GstVideoFrame * srcframe, gint xpos, gint ypos, \ #define RGB_FILL_CHECKER_C(name, bpp, r, g, b) \ static void \ -fill_checker_##name##_c (GstVideoFrame * frame) \ +fill_checker_##name##_c (GstVideoFrame * frame, guint y_start, guint y_end) \ { \ gint i, j; \ static const int tab[] = { 80, 160, 80, 160 }; \ @@ -791,11 +837,12 @@ fill_checker_##name##_c (GstVideoFrame * frame) \ guint8 *dest; \ \ width = GST_VIDEO_FRAME_WIDTH (frame); \ - height = GST_VIDEO_FRAME_HEIGHT (frame); \ + height = y_end - y_start; \ dest = GST_VIDEO_FRAME_PLANE_DATA (frame, 0); \ stride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0); \ dest_add = stride - width * bpp; \ \ + dest += y_start * stride; \ for (i = 0; i < height; i++) { \ for (j = 0; j < width; j++) { \ dest[r] = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)]; /* red */ \ @@ -810,7 +857,7 @@ fill_checker_##name##_c (GstVideoFrame * frame) \ #define RGB_FILL_COLOR(name, bpp, MEMSET_RGB) \ static void \ fill_color_##name (GstVideoFrame * frame, \ - gint colY, gint colU, gint colV) \ + guint y_start, guint y_end, gint colY, gint colU, gint colV) \ { \ gint red, green, blue; \ gint i; \ @@ -819,7 +866,7 @@ fill_color_##name (GstVideoFrame * frame, \ guint8 *dest; \ \ width = GST_VIDEO_FRAME_WIDTH (frame); \ - height = GST_VIDEO_FRAME_HEIGHT (frame); \ + height = y_end - y_start; \ dest = GST_VIDEO_FRAME_PLANE_DATA (frame, 0); \ dest_stride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0); \ \ @@ -827,6 +874,7 @@ fill_color_##name (GstVideoFrame * frame, \ green = YUV_TO_G (colY, colU, colV); \ blue = YUV_TO_B (colY, colU, colV); \ \ + dest += y_start * dest_stride; \ for (i = 0; i < height; i++) { \ MEMSET_RGB (dest, red, green, blue, width); \ dest += dest_stride; \ @@ -885,7 +933,8 @@ RGB_FILL_COLOR (bgrx, 4, _memset_bgrx); #define PACKED_422_BLEND(name, MEMCPY, BLENDLOOP) \ static void \ blend_##name (GstVideoFrame * srcframe, gint xpos, gint ypos, \ - gdouble src_alpha, GstVideoFrame * destframe, GstCompositorBlendMode mode) \ + gdouble src_alpha, GstVideoFrame * destframe, gint dst_y_start, \ + gint dst_y_end, GstCompositorBlendMode mode) \ { \ gint b_alpha; \ gint i; \ @@ -910,24 +959,27 @@ blend_##name (GstVideoFrame * srcframe, gint xpos, gint ypos, \ \ xpos = GST_ROUND_UP_2 (xpos); \ \ + if (dst_y_end > dest_height) { \ + dst_y_end = dest_height; \ + } \ /* adjust src pointers for negative sizes */ \ if (xpos < 0) { \ src += -xpos * 2; \ src_width -= -xpos; \ xpos = 0; \ } \ - if (ypos < 0) { \ + if (ypos < dst_y_start) { \ src += -ypos * src_stride; \ src_height -= -ypos; \ - ypos = 0; \ + ypos = dst_y_start; \ } \ \ /* adjust width/height if the src is bigger than dest */ \ if (xpos + src_width > dest_width) { \ src_width = dest_width - xpos; \ } \ - if (ypos + src_height > dest_height) { \ - src_height = dest_height - ypos; \ + if (ypos + src_height > dst_y_end) { \ + src_height = dst_y_end - ypos; \ } \ \ dest = dest + 2 * xpos + (ypos * dest_stride); \ @@ -959,7 +1011,7 @@ blend_##name (GstVideoFrame * srcframe, gint xpos, gint ypos, \ #define PACKED_422_FILL_CHECKER_C(name, Y1, U, Y2, V) \ static void \ -fill_checker_##name##_c (GstVideoFrame * frame) \ +fill_checker_##name##_c (GstVideoFrame * frame, guint y_start, guint y_end) \ { \ gint i, j; \ static const int tab[] = { 80, 160, 80, 160 }; \ @@ -969,11 +1021,12 @@ fill_checker_##name##_c (GstVideoFrame * frame) \ \ width = GST_VIDEO_FRAME_WIDTH (frame); \ width = GST_ROUND_UP_2 (width); \ - height = GST_VIDEO_FRAME_HEIGHT (frame); \ + height = y_end - y_start; \ dest = GST_VIDEO_FRAME_PLANE_DATA (frame, 0); \ dest_add = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0) - width * 2; \ width /= 2; \ \ + dest += dest_add * y_start; \ for (i = 0; i < height; i++) { \ for (j = 0; j < width; j++) { \ dest[Y1] = tab[((i & 0x8) >> 3) + (((2 * j + 0) & 0x8) >> 3)]; \ @@ -989,7 +1042,7 @@ fill_checker_##name##_c (GstVideoFrame * frame) \ #define PACKED_422_FILL_COLOR(name, Y1, U, Y2, V) \ static void \ fill_color_##name (GstVideoFrame * frame, \ - gint colY, gint colU, gint colV) \ + guint y_start, guint y_end, gint colY, gint colU, gint colV) \ { \ gint i; \ gint dest_stride; \ @@ -999,13 +1052,14 @@ fill_color_##name (GstVideoFrame * frame, \ \ width = GST_VIDEO_FRAME_WIDTH (frame); \ width = GST_ROUND_UP_2 (width); \ - height = GST_VIDEO_FRAME_HEIGHT (frame); \ + height = y_end - y_start; \ dest = GST_VIDEO_FRAME_PLANE_DATA (frame, 0); \ dest_stride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0); \ width /= 2; \ \ val = GUINT32_FROM_BE ((colY << Y1) | (colY << Y2) | (colU << U) | (colV << V)); \ \ + dest += dest_stride * y_start; \ for (i = 0; i < height; i++) { \ compositor_orc_splat_u32 ((guint32 *) dest, val, width); \ dest += dest_stride; \ diff --git a/gst/compositor/blend.h b/gst/compositor/blend.h index 92ec21abcc..2489d1755c 100644 --- a/gst/compositor/blend.h +++ b/gst/compositor/blend.h @@ -38,10 +38,19 @@ typedef enum COMPOSITOR_BLEND_MODE_ADD, } GstCompositorBlendMode; +/* + * @srcframe: source #GstVideoFrame + * @xpos: horizontal start position of @srcframe, leftmost pixel line. + * @ypos: vertical start position of @srcframe, topmost pixel line. + * @gdouble: src_alpha, alpha factor applied to @srcframe + * @destframe: destination #GstVideoFrame + * @dst_y_start: start position of where to write into @destframe. Used for splitting work across multiple sequences. + * @dst_y_end: end position of where to write into @destframe. Used for splitting work across multiple sequences. + */ typedef void (*BlendFunction) (GstVideoFrame *srcframe, gint xpos, gint ypos, gdouble src_alpha, GstVideoFrame * destframe, - GstCompositorBlendMode mode); -typedef void (*FillCheckerFunction) (GstVideoFrame * frame); -typedef void (*FillColorFunction) (GstVideoFrame * frame, gint c1, gint c2, gint c3); + gint dst_y_start, gint dst_y_end, GstCompositorBlendMode mode); +typedef void (*FillCheckerFunction) (GstVideoFrame * frame, guint y_start, guint y_end); +typedef void (*FillColorFunction) (GstVideoFrame * frame, guint y_start, guint y_end, gint c1, gint c2, gint c3); extern BlendFunction gst_compositor_blend_argb; extern BlendFunction gst_compositor_blend_bgra; diff --git a/gst/compositor/compositor.c b/gst/compositor/compositor.c index 665d8bc68a..2b5c9a2149 100644 --- a/gst/compositor/compositor.c +++ b/gst/compositor/compositor.c @@ -827,21 +827,189 @@ _fixate_caps (GstAggregator * agg, GstCaps * caps) return ret; } +static gpointer +gst_parallelized_task_thread_func (gpointer data) +{ + GstParallelizedTaskThread *self = data; + + g_mutex_lock (&self->runner->lock); + self->runner->n_done++; + if (self->runner->n_done == self->runner->n_threads - 1) + g_cond_signal (&self->runner->cond_done); + + do { + gint idx; + + while (self->runner->n_todo == -1 && !self->runner->quit) + g_cond_wait (&self->runner->cond_todo, &self->runner->lock); + + if (self->runner->quit) + break; + + idx = self->runner->n_todo--; + g_assert (self->runner->n_todo >= -1); + g_mutex_unlock (&self->runner->lock); + + g_assert (self->runner->func != NULL); + + self->runner->func (self->runner->task_data[idx]); + + g_mutex_lock (&self->runner->lock); + self->runner->n_done++; + if (self->runner->n_done == self->runner->n_threads - 1) + g_cond_signal (&self->runner->cond_done); + } while (TRUE); + + g_mutex_unlock (&self->runner->lock); + + return NULL; +} + +static void +gst_parallelized_task_runner_free (GstParallelizedTaskRunner * self) +{ + guint i; + + g_mutex_lock (&self->lock); + self->quit = TRUE; + g_cond_broadcast (&self->cond_todo); + g_mutex_unlock (&self->lock); + + for (i = 1; i < self->n_threads; i++) { + if (!self->threads[i].thread) + continue; + + g_thread_join (self->threads[i].thread); + } + + g_mutex_clear (&self->lock); + g_cond_clear (&self->cond_todo); + g_cond_clear (&self->cond_done); + g_free (self->threads); + g_free (self); +} + +static GstParallelizedTaskRunner * +gst_parallelized_task_runner_new (guint n_threads) +{ + GstParallelizedTaskRunner *self; + guint i; + GError *err = NULL; + + if (n_threads == 0) + n_threads = g_get_num_processors (); + + self = g_new0 (GstParallelizedTaskRunner, 1); + self->n_threads = n_threads; + self->threads = g_new0 (GstParallelizedTaskThread, n_threads); + + self->quit = FALSE; + self->n_todo = -1; + self->n_done = 0; + g_mutex_init (&self->lock); + g_cond_init (&self->cond_todo); + g_cond_init (&self->cond_done); + + /* Set when scheduling a job */ + self->func = NULL; + self->task_data = NULL; + + for (i = 0; i < n_threads; i++) { + self->threads[i].runner = self; + self->threads[i].idx = i; + + /* First thread is the one calling run() */ + if (i > 0) { + self->threads[i].thread = + g_thread_try_new ("compositor-blend", + gst_parallelized_task_thread_func, &self->threads[i], &err); + if (!self->threads[i].thread) + goto error; + } + } + + g_mutex_lock (&self->lock); + while (self->n_done < self->n_threads - 1) + g_cond_wait (&self->cond_done, &self->lock); + self->n_done = 0; + g_mutex_unlock (&self->lock); + + return self; + +error: + { + GST_ERROR ("Failed to start thread %u: %s", i, err->message); + g_clear_error (&err); + + gst_parallelized_task_runner_free (self); + return NULL; + } +} + +static void +gst_parallelized_task_runner_run (GstParallelizedTaskRunner * self, + GstParallelizedTaskFunc func, gpointer * task_data) +{ + guint n_threads = self->n_threads; + + self->func = func; + self->task_data = task_data; + + if (n_threads > 1) { + g_mutex_lock (&self->lock); + self->n_todo = self->n_threads - 2; + self->n_done = 0; + g_cond_broadcast (&self->cond_todo); + g_mutex_unlock (&self->lock); + } + + self->func (self->task_data[self->n_threads - 1]); + + if (n_threads > 1) { + g_mutex_lock (&self->lock); + while (self->n_done < self->n_threads - 1) + g_cond_wait (&self->cond_done, &self->lock); + self->n_done = 0; + g_mutex_unlock (&self->lock); + } + + self->func = NULL; + self->task_data = NULL; +} + static gboolean _negotiated_caps (GstAggregator * agg, GstCaps * caps) { + GstCompositor *compositor = GST_COMPOSITOR (agg); GstVideoInfo v_info; + guint n_threads; GST_DEBUG_OBJECT (agg, "Negotiated caps %" GST_PTR_FORMAT, caps); if (!gst_video_info_from_caps (&v_info, caps)) return FALSE; - if (!set_functions (GST_COMPOSITOR (agg), &v_info)) { + if (!set_functions (compositor, &v_info)) { GST_ERROR_OBJECT (agg, "Failed to setup vfuncs"); return FALSE; } + n_threads = g_get_num_processors (); + /* Magic number of 200 lines */ + if (GST_VIDEO_INFO_HEIGHT (&v_info) / n_threads < 200) + n_threads = (GST_VIDEO_INFO_HEIGHT (&v_info) + 199) / 200; + if (n_threads < 1) + n_threads = 1; + + /* XXX: implement better thread count change */ + if (compositor->blend_runner + && compositor->blend_runner->n_threads != n_threads) { + gst_parallelized_task_runner_free (compositor->blend_runner); + compositor->blend_runner = NULL; + } + if (!compositor->blend_runner) + compositor->blend_runner = gst_parallelized_task_runner_new (n_threads); + return GST_AGGREGATOR_CLASS (parent_class)->negotiated_src_caps (agg, caps); } @@ -869,58 +1037,6 @@ _should_draw_background (GstVideoAggregator * vagg) return draw; } -static gboolean -_draw_background (GstVideoAggregator * vagg, GstVideoFrame * outframe, - BlendFunction * composite) -{ - GstCompositor *comp = GST_COMPOSITOR (vagg); - - *composite = comp->blend; - /* If one of the frames to be composited completely obscures the background, - * don't bother drawing the background at all. We can also always use the - * 'blend' BlendFunction in that case because it only changes if we have to - * overlay on top of a transparent background. */ - if (!_should_draw_background (vagg)) - return FALSE; - - switch (comp->background) { - case COMPOSITOR_BACKGROUND_CHECKER: - comp->fill_checker (outframe); - break; - case COMPOSITOR_BACKGROUND_BLACK: - comp->fill_color (outframe, 16, 128, 128); - break; - case COMPOSITOR_BACKGROUND_WHITE: - comp->fill_color (outframe, 240, 128, 128); - break; - case COMPOSITOR_BACKGROUND_TRANSPARENT: - { - guint i, plane, num_planes, height; - - num_planes = GST_VIDEO_FRAME_N_PLANES (outframe); - for (plane = 0; plane < num_planes; ++plane) { - guint8 *pdata; - gsize rowsize, plane_stride; - - pdata = GST_VIDEO_FRAME_PLANE_DATA (outframe, plane); - plane_stride = GST_VIDEO_FRAME_PLANE_STRIDE (outframe, plane); - rowsize = GST_VIDEO_FRAME_COMP_WIDTH (outframe, plane) - * GST_VIDEO_FRAME_COMP_PSTRIDE (outframe, plane); - height = GST_VIDEO_FRAME_COMP_HEIGHT (outframe, plane); - for (i = 0; i < height; ++i) { - memset (pdata, 0, rowsize); - pdata += plane_stride; - } - } - /* use overlay to keep background transparent */ - *composite = comp->overlay; - break; - } - } - - return TRUE; -} - static gboolean frames_can_copy (const GstVideoFrame * frame1, const GstVideoFrame * frame2) { @@ -933,14 +1049,101 @@ frames_can_copy (const GstVideoFrame * frame1, const GstVideoFrame * frame2) return TRUE; } +struct CompositePadInfo +{ + GstVideoFrame *prepared_frame; + GstCompositorPad *pad; + GstCompositorBlendMode blend_mode; +}; + +struct CompositeTask +{ + GstCompositor *compositor; + GstVideoFrame *out_frame; + guint dst_line_start; + guint dst_line_end; + gboolean draw_background; + guint n_pads; + struct CompositePadInfo *pads_info; +}; + +static void +_draw_background (GstCompositor * comp, GstVideoFrame * outframe, + guint y_start, guint y_end, BlendFunction * composite) +{ + *composite = comp->blend; + + switch (comp->background) { + case COMPOSITOR_BACKGROUND_CHECKER: + comp->fill_checker (outframe, y_start, y_end); + break; + case COMPOSITOR_BACKGROUND_BLACK: + comp->fill_color (outframe, y_start, y_end, 16, 128, 128); + break; + case COMPOSITOR_BACKGROUND_WHITE: + comp->fill_color (outframe, y_start, y_end, 240, 128, 128); + break; + case COMPOSITOR_BACKGROUND_TRANSPARENT: + { + guint i, plane, num_planes, height; + + num_planes = GST_VIDEO_FRAME_N_PLANES (outframe); + for (plane = 0; plane < num_planes; ++plane) { + const GstVideoFormatInfo *info; + guint8 *pdata; + gsize rowsize, plane_stride; + + info = outframe->info.finfo; + pdata = GST_VIDEO_FRAME_PLANE_DATA (outframe, plane); + plane_stride = GST_VIDEO_FRAME_PLANE_STRIDE (outframe, plane); + rowsize = GST_VIDEO_FRAME_COMP_WIDTH (outframe, plane) + * GST_VIDEO_FRAME_COMP_PSTRIDE (outframe, plane); + height = + GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, plane, y_end - y_start); + pdata += y_start * plane_stride; + for (i = 0; i < height; ++i) { + memset (pdata, 0, rowsize); + pdata += plane_stride; + } + } + /* use overlay to keep background transparent */ + *composite = comp->overlay; + break; + } + } +} + +static void +blend_pads (struct CompositeTask *comp) +{ + BlendFunction composite; + guint i; + + composite = comp->compositor->blend; + + if (comp->draw_background) { + _draw_background (comp->compositor, comp->out_frame, comp->dst_line_start, + comp->dst_line_end, &composite); + } + + for (i = 0; i < comp->n_pads; i++) { + composite (comp->pads_info[i].prepared_frame, + comp->pads_info[i].pad->xpos, comp->pads_info[i].pad->ypos, + comp->pads_info[i].pad->alpha, comp->out_frame, comp->dst_line_start, + comp->dst_line_end, comp->pads_info[i].blend_mode); + } +} + static GstFlowReturn gst_compositor_aggregate_frames (GstVideoAggregator * vagg, GstBuffer * outbuf) { + GstCompositor *compositor = GST_COMPOSITOR (vagg); GList *l; - BlendFunction composite; GstVideoFrame out_frame, *outframe; - gboolean drew_background; - guint drawn_pads = 0; + gboolean draw_background; + guint drawn_a_pad = FALSE; + struct CompositePadInfo *pads_info; + guint i, n_pads = 0; if (!gst_video_frame_map (&out_frame, &vagg->info, outbuf, GST_MAP_WRITE)) { GST_WARNING_OBJECT (vagg, "Could not map output buffer"); @@ -948,9 +1151,26 @@ gst_compositor_aggregate_frames (GstVideoAggregator * vagg, GstBuffer * outbuf) } outframe = &out_frame; - drew_background = _draw_background (vagg, outframe, &composite); + + /* If one of the frames to be composited completely obscures the background, + * don't bother drawing the background at all. We can also always use the + * 'blend' BlendFunction in that case because it only changes if we have to + * overlay on top of a transparent background. */ + draw_background = _should_draw_background (vagg); GST_OBJECT_LOCK (vagg); + for (l = GST_ELEMENT (vagg)->sinkpads; l; l = l->next) { + GstVideoAggregatorPad *pad = l->data; + GstVideoFrame *prepared_frame = + gst_video_aggregator_pad_get_prepared_frame (pad); + + if (prepared_frame) + n_pads++; + } + + pads_info = g_newa (struct CompositePadInfo, n_pads); + n_pads = 0; + for (l = GST_ELEMENT (vagg)->sinkpads; l; l = l->next) { GstVideoAggregatorPad *pad = l->data; GstCompositorPad *compo_pad = GST_COMPOSITOR_PAD (pad); @@ -978,16 +1198,53 @@ gst_compositor_aggregate_frames (GstVideoAggregator * vagg, GstBuffer * outbuf) * background, and @prepared_frame has the same format, height, and width * as @outframe, then we can just copy it as-is. Subsequent pads (if any) * will be composited on top of it. */ - if (drawn_pads == 0 && !drew_background && - frames_can_copy (prepared_frame, outframe)) + if (!drawn_a_pad && !draw_background && + frames_can_copy (prepared_frame, outframe)) { gst_video_frame_copy (outframe, prepared_frame); - else - composite (prepared_frame, - compo_pad->xpos, - compo_pad->ypos, compo_pad->alpha, outframe, blend_mode); - drawn_pads++; + } else { + pads_info[n_pads].pad = compo_pad; + pads_info[n_pads].prepared_frame = prepared_frame; + pads_info[n_pads].blend_mode = blend_mode; + n_pads++; + } + drawn_a_pad = TRUE; } } + + { + guint n_threads, lines_per_thread; + guint out_height; + struct CompositeTask *tasks; + struct CompositeTask **tasks_p; + + n_threads = compositor->blend_runner->n_threads; + + tasks = g_newa (struct CompositeTask, n_threads); + tasks_p = g_newa (struct CompositeTask *, n_threads); + + out_height = GST_VIDEO_FRAME_HEIGHT (outframe); + lines_per_thread = (out_height + n_threads - 1) / n_threads; + + for (i = 0; i < n_threads; i++) { + tasks[i].compositor = compositor; + tasks[i].n_pads = n_pads; + tasks[i].pads_info = pads_info; + tasks[i].out_frame = outframe; + tasks[i].draw_background = draw_background; + /* This is a dumb split of the work by number of output lines. + * If there is a section of the output that reads from a lot of source + * pads, then that thread will consume more time. Maybe tracking and + * splitting on the source fill rate would produce better results. */ + tasks[i].dst_line_start = i * lines_per_thread; + tasks[i].dst_line_end = MIN ((i + 1) * lines_per_thread, out_height); + + tasks_p[i] = &tasks[i]; + } + + gst_parallelized_task_runner_run (compositor->blend_runner, + (GstParallelizedTaskFunc) blend_pads, (gpointer *) tasks_p); + } + GST_OBJECT_UNLOCK (vagg); gst_video_frame_unmap (outframe); @@ -1077,6 +1334,18 @@ _sink_query (GstAggregator * agg, GstAggregatorPad * bpad, GstQuery * query) } } +static void +gst_compositor_finalize (GObject * object) +{ + GstCompositor *compositor = GST_COMPOSITOR (object); + + if (compositor->blend_runner) + gst_parallelized_task_runner_free (compositor->blend_runner); + compositor->blend_runner = NULL; + + G_OBJECT_CLASS (parent_class)->finalize (object); +} + /* GObject boilerplate */ static void gst_compositor_class_init (GstCompositorClass * klass) @@ -1089,6 +1358,7 @@ gst_compositor_class_init (GstCompositorClass * klass) gobject_class->get_property = gst_compositor_get_property; gobject_class->set_property = gst_compositor_set_property; + gobject_class->finalize = gst_compositor_finalize; gstelement_class->request_new_pad = GST_DEBUG_FUNCPTR (gst_compositor_request_new_pad); diff --git a/gst/compositor/compositor.h b/gst/compositor/compositor.h index 18e5bb0924..37c609e11b 100644 --- a/gst/compositor/compositor.h +++ b/gst/compositor/compositor.h @@ -76,6 +76,34 @@ typedef enum COMPOSITOR_OPERATOR_ADD, } GstCompositorOperator; +/* copied from video-converter.c */ +typedef void (*GstParallelizedTaskFunc) (gpointer user_data); + +typedef struct _GstParallelizedTaskRunner GstParallelizedTaskRunner; +typedef struct _GstParallelizedTaskThread GstParallelizedTaskThread; + +struct _GstParallelizedTaskThread +{ + GstParallelizedTaskRunner *runner; + guint idx; + GThread *thread; +}; + +struct _GstParallelizedTaskRunner +{ + guint n_threads; + + GstParallelizedTaskThread *threads; + + GstParallelizedTaskFunc func; + gpointer *task_data; + + GMutex lock; + GCond cond_todo, cond_done; + gint n_todo, n_done; + gboolean quit; +}; + /** * GstCompositor: * @@ -92,6 +120,8 @@ struct _GstCompositor BlendFunction blend, overlay; FillCheckerFunction fill_checker; FillColorFunction fill_color; + + GstParallelizedTaskRunner *blend_runner; }; /**