From 7346e7c1e23af1d7e4f068c660789d0d68e38b1a Mon Sep 17 00:00:00 2001 From: Josep Torra Date: Tue, 4 Feb 2020 09:23:03 -0800 Subject: [PATCH] scenechange: use orc to compute score Add an orc implementation for SAD operation. Part-of: --- gst/videofilters/gstscenechange.c | 16 +- gst/videofilters/gstscenechangeorc-dist.c | 273 ++++++++++++++++++++++ gst/videofilters/gstscenechangeorc-dist.h | 87 +++++++ gst/videofilters/gstscenechangeorc.orc | 7 + gst/videofilters/meson.build | 22 +- 5 files changed, 392 insertions(+), 13 deletions(-) create mode 100644 gst/videofilters/gstscenechangeorc-dist.c create mode 100644 gst/videofilters/gstscenechangeorc-dist.h create mode 100644 gst/videofilters/gstscenechangeorc.orc diff --git a/gst/videofilters/gstscenechange.c b/gst/videofilters/gstscenechange.c index c4504c06ef..d44ab188ed 100644 --- a/gst/videofilters/gstscenechange.c +++ b/gst/videofilters/gstscenechange.c @@ -82,6 +82,7 @@ #include #include #include "gstscenechange.h" +#include "gstscenechangeorc.h" GST_DEBUG_CATEGORY_STATIC (gst_scene_change_debug_category); #define GST_CAT_DEFAULT gst_scene_change_debug_category @@ -143,23 +144,14 @@ gst_scene_change_init (GstSceneChange * scenechange) static double get_frame_score (GstVideoFrame * f1, GstVideoFrame * f2) { - int i; - int j; - int score = 0; + guint32 score = 0; int width, height; - guint8 *s1; - guint8 *s2; width = f1->info.width; height = f1->info.height; - for (j = 0; j < height; j++) { - s1 = (guint8 *) f1->data[0] + f1->info.stride[0] * j; - s2 = (guint8 *) f2->data[0] + f2->info.stride[0] * j; - for (i = 0; i < width; i++) { - score += ABS (s1[i] - s2[i]); - } - } + orc_sad_nxm_u8 (&score, f1->data[0], f1->info.stride[0], + f2->data[0], f2->info.stride[0], width, height); return ((double) score) / (width * height); } diff --git a/gst/videofilters/gstscenechangeorc-dist.c b/gst/videofilters/gstscenechangeorc-dist.c new file mode 100644 index 0000000000..f344e71c58 --- /dev/null +++ b/gst/videofilters/gstscenechangeorc-dist.c @@ -0,0 +1,273 @@ + +/* autogenerated from gstscenechangeorc.orc */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include + +#ifndef _ORC_INTEGER_TYPEDEFS_ +#define _ORC_INTEGER_TYPEDEFS_ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#include +typedef int8_t orc_int8; +typedef int16_t orc_int16; +typedef int32_t orc_int32; +typedef int64_t orc_int64; +typedef uint8_t orc_uint8; +typedef uint16_t orc_uint16; +typedef uint32_t orc_uint32; +typedef uint64_t orc_uint64; +#define ORC_UINT64_C(x) UINT64_C(x) +#elif defined(_MSC_VER) +typedef signed __int8 orc_int8; +typedef signed __int16 orc_int16; +typedef signed __int32 orc_int32; +typedef signed __int64 orc_int64; +typedef unsigned __int8 orc_uint8; +typedef unsigned __int16 orc_uint16; +typedef unsigned __int32 orc_uint32; +typedef unsigned __int64 orc_uint64; +#define ORC_UINT64_C(x) (x##Ui64) +#define inline __inline +#else +#include +typedef signed char orc_int8; +typedef short orc_int16; +typedef int orc_int32; +typedef unsigned char orc_uint8; +typedef unsigned short orc_uint16; +typedef unsigned int orc_uint32; +#if INT_MAX == LONG_MAX +typedef long long orc_int64; +typedef unsigned long long orc_uint64; +#define ORC_UINT64_C(x) (x##ULL) +#else +typedef long orc_int64; +typedef unsigned long orc_uint64; +#define ORC_UINT64_C(x) (x##UL) +#endif +#endif +typedef union +{ + orc_int16 i; + orc_int8 x2[2]; +} orc_union16; +typedef union +{ + orc_int32 i; + float f; + orc_int16 x2[2]; + orc_int8 x4[4]; +} orc_union32; +typedef union +{ + orc_int64 i; + double f; + orc_int32 x2[2]; + float x2f[2]; + orc_int16 x4[4]; +} orc_union64; +#endif +#ifndef ORC_RESTRICT +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#define ORC_RESTRICT restrict +#elif defined(__GNUC__) && __GNUC__ >= 4 +#define ORC_RESTRICT __restrict__ +#else +#define ORC_RESTRICT +#endif +#endif + +#ifndef ORC_INTERNAL +#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590) +#define ORC_INTERNAL __attribute__((visibility("hidden"))) +#elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550) +#define ORC_INTERNAL __hidden +#elif defined (__GNUC__) +#define ORC_INTERNAL __attribute__((visibility("hidden"))) +#else +#define ORC_INTERNAL +#endif +#endif + + +#ifndef DISABLE_ORC +#include +#endif +void orc_sad_nxm_u8 (orc_uint32 * ORC_RESTRICT a1, + const orc_uint8 * ORC_RESTRICT s1, int s1_stride, + const orc_uint8 * ORC_RESTRICT s2, int s2_stride, int n, int m); + + +/* begin Orc C target preamble */ +#define ORC_CLAMP(x,a,b) ((x)<(a) ? (a) : ((x)>(b) ? (b) : (x))) +#define ORC_ABS(a) ((a)<0 ? -(a) : (a)) +#define ORC_MIN(a,b) ((a)<(b) ? (a) : (b)) +#define ORC_MAX(a,b) ((a)>(b) ? (a) : (b)) +#define ORC_SB_MAX 127 +#define ORC_SB_MIN (-1-ORC_SB_MAX) +#define ORC_UB_MAX (orc_uint8) 255 +#define ORC_UB_MIN 0 +#define ORC_SW_MAX 32767 +#define ORC_SW_MIN (-1-ORC_SW_MAX) +#define ORC_UW_MAX (orc_uint16)65535 +#define ORC_UW_MIN 0 +#define ORC_SL_MAX 2147483647 +#define ORC_SL_MIN (-1-ORC_SL_MAX) +#define ORC_UL_MAX 4294967295U +#define ORC_UL_MIN 0 +#define ORC_CLAMP_SB(x) ORC_CLAMP(x,ORC_SB_MIN,ORC_SB_MAX) +#define ORC_CLAMP_UB(x) ORC_CLAMP(x,ORC_UB_MIN,ORC_UB_MAX) +#define ORC_CLAMP_SW(x) ORC_CLAMP(x,ORC_SW_MIN,ORC_SW_MAX) +#define ORC_CLAMP_UW(x) ORC_CLAMP(x,ORC_UW_MIN,ORC_UW_MAX) +#define ORC_CLAMP_SL(x) ORC_CLAMP(x,ORC_SL_MIN,ORC_SL_MAX) +#define ORC_CLAMP_UL(x) ORC_CLAMP(x,ORC_UL_MIN,ORC_UL_MAX) +#define ORC_SWAP_W(x) ((((x)&0xffU)<<8) | (((x)&0xff00U)>>8)) +#define ORC_SWAP_L(x) ((((x)&0xffU)<<24) | (((x)&0xff00U)<<8) | (((x)&0xff0000U)>>8) | (((x)&0xff000000U)>>24)) +#define ORC_SWAP_Q(x) ((((x)&ORC_UINT64_C(0xff))<<56) | (((x)&ORC_UINT64_C(0xff00))<<40) | (((x)&ORC_UINT64_C(0xff0000))<<24) | (((x)&ORC_UINT64_C(0xff000000))<<8) | (((x)&ORC_UINT64_C(0xff00000000))>>8) | (((x)&ORC_UINT64_C(0xff0000000000))>>24) | (((x)&ORC_UINT64_C(0xff000000000000))>>40) | (((x)&ORC_UINT64_C(0xff00000000000000))>>56)) +#define ORC_PTR_OFFSET(ptr,offset) ((void *)(((unsigned char *)(ptr)) + (offset))) +#define ORC_DENORMAL(x) ((x) & ((((x)&0x7f800000) == 0) ? 0xff800000 : 0xffffffff)) +#define ORC_ISNAN(x) ((((x)&0x7f800000) == 0x7f800000) && (((x)&0x007fffff) != 0)) +#define ORC_DENORMAL_DOUBLE(x) ((x) & ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == 0) ? ORC_UINT64_C(0xfff0000000000000) : ORC_UINT64_C(0xffffffffffffffff))) +#define ORC_ISNAN_DOUBLE(x) ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == ORC_UINT64_C(0x7ff0000000000000)) && (((x)&ORC_UINT64_C(0x000fffffffffffff)) != 0)) +#ifndef ORC_RESTRICT +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#define ORC_RESTRICT restrict +#elif defined(__GNUC__) && __GNUC__ >= 4 +#define ORC_RESTRICT __restrict__ +#else +#define ORC_RESTRICT +#endif +#endif +/* end Orc C target preamble */ + + + +/* orc_sad_nxm_u8 */ +#ifdef DISABLE_ORC +void +orc_sad_nxm_u8 (orc_uint32 * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, + int s1_stride, const orc_uint8 * ORC_RESTRICT s2, int s2_stride, int n, + int m) +{ + int i; + int j; + const orc_int8 *ORC_RESTRICT ptr4; + const orc_int8 *ORC_RESTRICT ptr5; + orc_union32 var12 = { 0 }; + orc_int8 var32; + orc_int8 var33; + + for (j = 0; j < m; j++) { + ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j); + ptr5 = ORC_PTR_OFFSET (s2, s2_stride * j); + + + for (i = 0; i < n; i++) { + /* 0: loadb */ + var32 = ptr4[i]; + /* 1: loadb */ + var33 = ptr5[i]; + /* 2: accsadubl */ + var12.i = + var12.i + ORC_ABS ((orc_int32) (orc_uint8) var32 - + (orc_int32) (orc_uint8) var33); + } + } + *a1 = var12.i; + +} + +#else +static void +_backup_orc_sad_nxm_u8 (OrcExecutor * ORC_RESTRICT ex) +{ + int i; + int j; + int n = ex->n; + int m = ex->params[ORC_VAR_A1]; + const orc_int8 *ORC_RESTRICT ptr4; + const orc_int8 *ORC_RESTRICT ptr5; + orc_union32 var12 = { 0 }; + orc_int8 var32; + orc_int8 var33; + + for (j = 0; j < m; j++) { + ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j); + ptr5 = ORC_PTR_OFFSET (ex->arrays[5], ex->params[5] * j); + + + for (i = 0; i < n; i++) { + /* 0: loadb */ + var32 = ptr4[i]; + /* 1: loadb */ + var33 = ptr5[i]; + /* 2: accsadubl */ + var12.i = + var12.i + ORC_ABS ((orc_int32) (orc_uint8) var32 - + (orc_int32) (orc_uint8) var33); + } + } + ex->accumulators[0] = var12.i; + +} + +void +orc_sad_nxm_u8 (orc_uint32 * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, + int s1_stride, const orc_uint8 * ORC_RESTRICT s2, int s2_stride, int n, + int m) +{ + OrcExecutor _ex, *ex = &_ex; + static volatile int p_inited = 0; + static OrcCode *c = 0; + void (*func) (OrcExecutor *); + + if (!p_inited) { + orc_once_mutex_lock (); + if (!p_inited) { + OrcProgram *p; + +#if 1 + static const orc_uint8 bc[] = { + 1, 7, 9, 14, 111, 114, 99, 95, 115, 97, 100, 95, 110, 120, 109, 95, + 117, 56, 12, 1, 1, 12, 1, 1, 13, 4, 182, 12, 4, 5, 2, 0, + + }; + p = orc_program_new_from_static_bytecode (bc); + orc_program_set_backup_function (p, _backup_orc_sad_nxm_u8); +#else + p = orc_program_new (); + orc_program_set_2d (p); + orc_program_set_name (p, "orc_sad_nxm_u8"); + orc_program_set_backup_function (p, _backup_orc_sad_nxm_u8); + orc_program_add_source (p, 1, "s1"); + orc_program_add_source (p, 1, "s2"); + orc_program_add_accumulator (p, 4, "a1"); + + orc_program_append_2 (p, "accsadubl", 0, ORC_VAR_A1, ORC_VAR_S1, + ORC_VAR_S2, ORC_VAR_D1); +#endif + + orc_program_compile (p); + c = orc_program_take_code (p); + orc_program_free (p); + } + p_inited = TRUE; + orc_once_mutex_unlock (); + } + ex->arrays[ORC_VAR_A2] = c; + ex->program = 0; + + ex->n = n; + ORC_EXECUTOR_M (ex) = m; + ex->arrays[ORC_VAR_S1] = (void *) s1; + ex->params[ORC_VAR_S1] = s1_stride; + ex->arrays[ORC_VAR_S2] = (void *) s2; + ex->params[ORC_VAR_S2] = s2_stride; + + func = c->exec; + func (ex); + *a1 = orc_executor_get_accumulator (ex, ORC_VAR_A1); +} +#endif diff --git a/gst/videofilters/gstscenechangeorc-dist.h b/gst/videofilters/gstscenechangeorc-dist.h new file mode 100644 index 0000000000..86c22d0fad --- /dev/null +++ b/gst/videofilters/gstscenechangeorc-dist.h @@ -0,0 +1,87 @@ + +/* autogenerated from gstscenechangeorc.orc */ + +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + + + +#ifndef _ORC_INTEGER_TYPEDEFS_ +#define _ORC_INTEGER_TYPEDEFS_ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#include +typedef int8_t orc_int8; +typedef int16_t orc_int16; +typedef int32_t orc_int32; +typedef int64_t orc_int64; +typedef uint8_t orc_uint8; +typedef uint16_t orc_uint16; +typedef uint32_t orc_uint32; +typedef uint64_t orc_uint64; +#define ORC_UINT64_C(x) UINT64_C(x) +#elif defined(_MSC_VER) +typedef signed __int8 orc_int8; +typedef signed __int16 orc_int16; +typedef signed __int32 orc_int32; +typedef signed __int64 orc_int64; +typedef unsigned __int8 orc_uint8; +typedef unsigned __int16 orc_uint16; +typedef unsigned __int32 orc_uint32; +typedef unsigned __int64 orc_uint64; +#define ORC_UINT64_C(x) (x##Ui64) +#define inline __inline +#else +#include +typedef signed char orc_int8; +typedef short orc_int16; +typedef int orc_int32; +typedef unsigned char orc_uint8; +typedef unsigned short orc_uint16; +typedef unsigned int orc_uint32; +#if INT_MAX == LONG_MAX +typedef long long orc_int64; +typedef unsigned long long orc_uint64; +#define ORC_UINT64_C(x) (x##ULL) +#else +typedef long orc_int64; +typedef unsigned long orc_uint64; +#define ORC_UINT64_C(x) (x##UL) +#endif +#endif +typedef union { orc_int16 i; orc_int8 x2[2]; } orc_union16; +typedef union { orc_int32 i; float f; orc_int16 x2[2]; orc_int8 x4[4]; } orc_union32; +typedef union { orc_int64 i; double f; orc_int32 x2[2]; float x2f[2]; orc_int16 x4[4]; } orc_union64; +#endif +#ifndef ORC_RESTRICT +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#define ORC_RESTRICT restrict +#elif defined(__GNUC__) && __GNUC__ >= 4 +#define ORC_RESTRICT __restrict__ +#else +#define ORC_RESTRICT +#endif +#endif + +#ifndef ORC_INTERNAL +#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590) +#define ORC_INTERNAL __attribute__((visibility("hidden"))) +#elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550) +#define ORC_INTERNAL __hidden +#elif defined (__GNUC__) +#define ORC_INTERNAL __attribute__((visibility("hidden"))) +#else +#define ORC_INTERNAL +#endif +#endif + +void orc_sad_nxm_u8 (orc_uint32 * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, int s1_stride, const orc_uint8 * ORC_RESTRICT s2, int s2_stride, int n, int m); + +#ifdef __cplusplus +} +#endif + diff --git a/gst/videofilters/gstscenechangeorc.orc b/gst/videofilters/gstscenechangeorc.orc new file mode 100644 index 0000000000..0720558a6e --- /dev/null +++ b/gst/videofilters/gstscenechangeorc.orc @@ -0,0 +1,7 @@ +.function orc_sad_nxm_u8 +.flags 2d +.accumulator 4 a1 orc_uint32 +.source 1 s1 orc_uint8 +.source 1 s2 orc_uint8 + +accsadubl a1, s1, s2 diff --git a/gst/videofilters/meson.build b/gst/videofilters/meson.build index 15dcc94a63..3522a42e71 100644 --- a/gst/videofilters/meson.build +++ b/gst/videofilters/meson.build @@ -5,8 +5,28 @@ vfilt_sources = [ 'gstvideofiltersbad.c', ] +orcsrc = 'gstscenechangeorc' +if have_orcc + orc_h = custom_target(orcsrc + '.h', + input : orcsrc + '.orc', + output : orcsrc + '.h', + command : orcc_args + ['--header', '-o', '@OUTPUT@', '@INPUT@']) + orc_c = custom_target(orcsrc + '.c', + input : orcsrc + '.orc', + output : orcsrc + '.c', + command : orcc_args + ['--implementation', '-o', '@OUTPUT@', '@INPUT@']) + orc_targets += {'name': orcsrc, 'orc-source': files(orcsrc + '.orc'), 'header': orc_h, 'source': orc_c} +else + orc_h = configure_file(input : orcsrc + '-dist.h', + output : orcsrc + '.h', + copy : true) + orc_c = configure_file(input : orcsrc + '-dist.c', + output : orcsrc + '.c', + copy : true) +endif + gstvideofiltersbad = library('gstvideofiltersbad', - vfilt_sources, + vfilt_sources, orc_c, orc_h, c_args : gst_plugins_bad_args, include_directories : [configinc], dependencies : [gstvideo_dep, gstbase_dep, orc_dep, libm],