From d2a31378c4300c116af6824a89d03f3f7fa52e63 Mon Sep 17 00:00:00 2001 From: hwren Date: Thu, 13 Sep 2018 15:21:41 +0800 Subject: [PATCH 01/22] x86: Change assembler from YASM to NASM --- build/linux/configure | 10 +++---- source/common/x86/quant8.asm | 6 ++-- source/common/x86/x86inc.asm | 55 ++++++++++++++++++++++-------------- 3 files changed, 42 insertions(+), 29 deletions(-) diff --git a/build/linux/configure b/build/linux/configure index 4e0984f..ac90f74 100755 --- a/build/linux/configure +++ b/build/linux/configure @@ -655,7 +655,7 @@ stack_alignment=4 case $host_cpu in i*86) ARCH="X86" - AS="${AS-yasm}" + AS="${AS-nasm}" AS_EXT=".asm" CFLAGS="$CFLAGS -DARCH_X86_64=0" ASFLAGS="$ASFLAGS -DARCH_X86_64=0 -I\$(SRCPATH)/common/x86/" @@ -682,7 +682,7 @@ case $host_cpu in ;; x86_64) ARCH="X86_64" - AS="${AS-yasm}" + AS="${AS-nasm}" AS_EXT=".asm" CFLAGS="$CFLAGS -DARCH_X86_64=1" ASFLAGS="$ASFLAGS -DARCH_X86_64=1 -I\$(SRCPATH)/common/x86/" @@ -853,10 +853,10 @@ elif [ $compiler = ICC -a $ARCH = X86 ]; then fi if [ $asm = auto -a \( $ARCH = X86 -o $ARCH = X86_64 \) ] ; then - if ! as_check "vpmovzxwd ymm0, xmm0" ; then - VER=`($AS --version || echo no assembler) 2>/dev/null | head -n 1` + if ! as_check "vmovdqa32 [eax]{k1}{z}, zmm0" ; then + VER="$(($AS --version || echo no assembler) 2>/dev/null | head -n 1)" echo "Found $VER" - echo "Minimum version is yasm-1.2.0" + echo "Minimum version is nasm-2.13" echo "If you really want to compile without asm, configure with --disable-asm." exit 1 fi diff --git a/source/common/x86/quant8.asm b/source/common/x86/quant8.asm index 470b10b..9cf2b52 100644 --- a/source/common/x86/quant8.asm +++ b/source/common/x86/quant8.asm @@ -45,13 +45,13 @@ INIT_XMM sse4 cglobal dequant, 2,2,7 ;{ mov r3, r3mp ; r3 <-- shift - movd m4, r2mp ; m4[0] = scale - movd m6, r3 ; m6[0] = shift + movq m4, r2mp ; m4[0] = scale + movq m6, r3 ; m6[0] = shift dec r3 ; r3d <-- shift - 1 xor r2, r2 ; r2 <-- 0 shr r1, 4 ; r1 = i_coef/16 bts r2, r3 ; r2 <-- add = 1 < (shift - 1) - movd m5, r2 ; m5[0] = add + movq m5, r2 ; m5[0] = add pshufd m4, m4, 0 ; m4[3210] = scale pshufd m5, m5, 0 ; m5[3210] = add ; diff --git a/source/common/x86/x86inc.asm b/source/common/x86/x86inc.asm index 0b24fc1..f530dd5 100644 --- a/source/common/x86/x86inc.asm +++ b/source/common/x86/x86inc.asm @@ -66,6 +66,15 @@ %endif %endif +%define FORMAT_ELF 0 +%ifidn __OUTPUT_FORMAT__,elf + %define FORMAT_ELF 1 +%elifidn __OUTPUT_FORMAT__,elf32 + %define FORMAT_ELF 1 +%elifidn __OUTPUT_FORMAT__,elf64 + %define FORMAT_ELF 1 +%endif + %ifdef PREFIX %define mangle(x) _ %+ x %else @@ -92,6 +101,10 @@ default rel %endif +%ifdef __NASM_VER__ + %use smartalign +%endif + ; Macros to eliminate most code duplication between x86_32 and x86_64: ; Currently this works only for leaf functions which load all their arguments ; into registers at the start, and make no other use of the stack. Luckily that @@ -677,7 +690,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, CAT_XDEFINE cglobaled_, %2, 1 %endif %xdefine current_function %2 - %ifidn __OUTPUT_FORMAT__,elf + %if FORMAT_ELF global %2:function %%VISIBILITY %else global %2 @@ -703,14 +716,16 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, ; like cextern, but without the prefix %macro cextern_naked 1 - %xdefine %1 mangle(%1) + %ifdef PREFIX + %xdefine %1 mangle(%1) + %endif CAT_XDEFINE cglobaled_, %1, 1 extern %1 %endmacro %macro const 1-2+ %xdefine %1 mangle(private_prefix %+ _ %+ %1) - %ifidn __OUTPUT_FORMAT__,elf + %if FORMAT_ELF global %1:data hidden %else global %1 @@ -721,8 +736,8 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, ; This is needed for ELF, otherwise the GNU linker assumes the stack is ; executable by default. -%ifidn __OUTPUT_FORMAT__,elf -SECTION .note.GNU-stack noalloc noexec nowrite progbits +%if FORMAT_ELF + SECTION .note.GNU-stack noalloc noexec nowrite progbits %endif ; cpuflags @@ -791,11 +806,20 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits %endif %endif - %if ARCH_X86_64 || cpuflag(sse2) - CPU amdnop - %else - CPU basicnop - %endif + %if ARCH_X86_64 || cpuflag(sse2) + %ifdef __NASM_VER__ + ALIGNMODE p6 + %else + CPU amdnop + %endif + %else + %ifdef __NASM_VER__ + ALIGNMODE nop + %else + CPU basicnop + %endif + %endif + %endmacro ; Merge mmx and sse* @@ -1473,17 +1497,6 @@ FMA4_INSTR fnmsubps, fnmsub132ps, fnmsub213ps, fnmsub231ps FMA4_INSTR fnmsubsd, fnmsub132sd, fnmsub213sd, fnmsub231sd FMA4_INSTR fnmsubss, fnmsub132ss, fnmsub213ss, fnmsub231ss -; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug -%if ARCH_X86_64 == 0 -%macro vpbroadcastq 2 -%if sizeof%1 == 16 - movddup %1, %2 -%else - vbroadcastsd %1, %2 -%endif -%endmacro -%endif - ; workaround: vpbroadcastd with register, the yasm will generate wrong code %macro vpbroadcastd 2 %ifid %2 -- Gitee From 4c1721e2e95ec407049b5b2589999347f27d21ae Mon Sep 17 00:00:00 2001 From: hwren Date: Mon, 17 Sep 2018 17:21:18 +0800 Subject: [PATCH 02/22] Rename with macro prefix(1): common/aec.h --- source/common/aec.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/source/common/aec.h b/source/common/aec.h index 187037c..3ca32b7 100644 --- a/source/common/aec.h +++ b/source/common/aec.h @@ -39,42 +39,60 @@ extern "C" { /* --------------------------------------------------------------------------- * global variables */ +#define saoclip FPFX(saoclip) extern const int saoclip[NUM_SAO_OFFSET][3]; +#define tab_intra_mode_scan_type FPFX(tab_intra_mode_scan_type) extern const int tab_intra_mode_scan_type[NUM_INTRA_MODE]; /* --------------------------------------------------------------------------- * aec basic operations */ +#define aec_init_contexts FPFX(aec_init_contexts) void aec_init_contexts (aec_t *p_aec); +#define aec_new_slice FPFX(aec_new_slice) void aec_new_slice (davs2_t *h); +#define aec_start_decoding FPFX(aec_start_decoding) int aec_start_decoding (aec_t *p_aec, uint8_t *p_start, int i_byte_pos, int i_bytes); +#define aec_bits_read FPFX(aec_bits_read) int aec_bits_read (aec_t *p_aec); +#define aec_startcode_follows FPFX(aec_startcode_follows) int aec_startcode_follows (aec_t *p_aec, int eos_bit); /* --------------------------------------------------------------------------- * ctu structure information */ +#define aec_read_split_flag FPFX(aec_read_split_flag) int aec_read_split_flag (aec_t *p_aec, int i_level); /* --------------------------------------------------------------------------- * cu type information */ +#define aec_read_cu_type FPFX(aec_read_cu_type) int aec_read_cu_type (aec_t *p_aec, cu_t *p_cu, int img_type, int b_amp, int b_mhp, int b_wsm, int num_references); +#define aec_read_cu_type_sframe FPFX(aec_read_cu_type_sframe) int aec_read_cu_type_sframe(aec_t *p_aec); +#define aec_read_intra_cu_type FPFX(aec_read_intra_cu_type) int aec_read_intra_cu_type (aec_t *p_aec, cu_t *p_cu, int b_sdip, davs2_t *h); /* --------------------------------------------------------------------------- * inter prediction information */ +#define aec_read_dmh_mode FPFX(aec_read_dmh_mode) int aec_read_dmh_mode (aec_t *p_aec, int i_cu_level); +#define aec_read_mvds FPFX(aec_read_mvds) void aec_read_mvds (aec_t *p_aec, mv_t *p_mvd); +#define aec_read_inter_pred_dir FPFX(aec_read_inter_pred_dir) void aec_read_inter_pred_dir(aec_t * p_aec, cu_t *p_cu, davs2_t *h); /* --------------------------------------------------------------------------- * intra prediction information */ +#define aec_read_intra_pmode FPFX(aec_read_intra_pmode) int aec_read_intra_pmode (aec_t *p_aec); +#define aec_read_intra_pmode_c FPFX(aec_read_intra_pmode_c) int aec_read_intra_pmode_c (aec_t *p_aec, davs2_t *h, int luma_mode); /* --------------------------------------------------------------------------- * transform unit (residual) information */ +#define cu_read_cbp FPFX(cu_read_cbp) int cu_read_cbp (davs2_t *h, aec_t *p_aec, cu_t *p_cu, int scu_x, int scu_y); +#define cu_get_block_coeffs FPFX(cu_get_block_coeffs) int8_t cu_get_block_coeffs (aec_t *p_aec, runlevel_t *runlevel, cu_t *p_cu, coeff_t *p_res, int w_tr, int h_tr, int i_tu_level, int b_luma, @@ -83,11 +101,16 @@ int8_t cu_get_block_coeffs (aec_t *p_aec, runlevel_t *runlevel, /* --------------------------------------------------------------------------- * loop filter information */ +#define aec_read_sao_mergeflag FPFX(aec_read_sao_mergeflag) int aec_read_sao_mergeflag (aec_t *p_aec, int mergeleft_avail, int mergeup_avail); +#define aec_read_sao_mode FPFX(aec_read_sao_mode) int aec_read_sao_mode (aec_t *p_aec); +#define aec_read_sao_offsets FPFX(aec_read_sao_offsets) void aec_read_sao_offsets (aec_t *p_aec, sao_param_t *p_sao_param, int *offset); +#define aec_read_sao_type FPFX(aec_read_sao_type) int aec_read_sao_type (aec_t *p_aec, sao_param_t *p_sao_param); +#define aec_read_alf_lcu_ctrl FPFX(aec_read_alf_lcu_ctrl) int aec_read_alf_lcu_ctrl (aec_t *p_aec); #ifndef AEC_RETURN_ON_ERROR -- Gitee From f8103942cd5d7bbb2e74221fc411ff9556c64980 Mon Sep 17 00:00:00 2001 From: hwren Date: Mon, 17 Sep 2018 17:21:28 +0800 Subject: [PATCH 03/22] Rename with macro prefix(2): common/alf.h --- source/common/alf.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/source/common/alf.h b/source/common/alf.h index 0db94da..9b5fe6c 100644 --- a/source/common/alf.h +++ b/source/common/alf.h @@ -36,13 +36,18 @@ extern "C" { #endif +#define alf_get_buffer_size FPFX(alf_get_buffer_size) size_t alf_get_buffer_size(davs2_t *h); +#define alf_init_buffer FPFX(alf_init_buffer) void alf_init_buffer (davs2_t *h); +#define alf_lcurow FPFX(alf_lcurow) void alf_lcurow(davs2_t *h, alf_param_t *p_alf_param, davs2_frame_t *p_tmp_frm, davs2_frame_t *p_dec_frm, int i_lcu_y); +#define alf_read_param FPFX(alf_read_param) void alf_read_param(davs2_t *h, davs2_bs_t *bs); +#define davs2_alf_init FPFX(alf_init) void davs2_alf_init(uint32_t cpuid, ao_funcs_t *fh); #ifdef __cplusplus -- Gitee From 3a0daa37ea1dd9e2442c300d389b00dc48f76aed Mon Sep 17 00:00:00 2001 From: hwren Date: Mon, 17 Sep 2018 17:21:44 +0800 Subject: [PATCH 04/22] Rename with macro prefix(3): common/bitstream.h --- source/common/bitstream.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/source/common/bitstream.h b/source/common/bitstream.h index f56d0ca..1245164 100644 --- a/source/common/bitstream.h +++ b/source/common/bitstream.h @@ -38,13 +38,21 @@ extern "C" { #include "common.h" +#define bs_init FPFX(bs_init) void bs_init(davs2_bs_t *bs, uint8_t *p_data, int i_data); +#define bs_alain FPFX(bs_alain) void bs_alain(davs2_bs_t *bs); +#define bs_left_bytes FPFX(bs_left_bytes) int bs_left_bytes(davs2_bs_t *bs); +#define found_slice_header FPFX(found_slice_header) int found_slice_header(davs2_bs_t *bs); +#define bs_get_start_code FPFX(bs_get_start_code) int bs_get_start_code(davs2_bs_t *bs); +#define bs_dispose_pseudo_code FPFX(bs_dispose_pseudo_code) int bs_dispose_pseudo_code(uint8_t *dst, uint8_t *src, int i_src); +#define find_start_code FPFX(find_start_code) const uint8_t * find_start_code(const uint8_t *data, int len); +#define find_pic_start_code FPFX(find_pic_start_code) int32_t find_pic_start_code(uint8_t prevbyte3, uint8_t prevbyte2, uint8_t prevbyte1, const uint8_t *data, int32_t len); #ifdef __cplusplus -- Gitee From 993383a00dc6483c7a21b5ce2f6dc8031b5c3c47 Mon Sep 17 00:00:00 2001 From: hwren Date: Mon, 17 Sep 2018 17:22:00 +0800 Subject: [PATCH 05/22] Rename with macro prefix(4): common/block_info.h --- source/common/block_info.h | 1 + 1 file changed, 1 insertion(+) diff --git a/source/common/block_info.h b/source/common/block_info.h index d8fc9bb..f3cd4af 100644 --- a/source/common/block_info.h +++ b/source/common/block_info.h @@ -36,6 +36,7 @@ extern "C" { #endif +#define get_neighbor_cbp_y FPFX(get_neighbor_cbp_y) int get_neighbor_cbp_y(davs2_t *h, int xN, int yN, int scu_x, int scu_y, cu_t *p_cu); #ifdef __cplusplus -- Gitee From 4ad26fc93cf367039fe9857ac6ab6439fa1d5df5 Mon Sep 17 00:00:00 2001 From: hwren Date: Mon, 17 Sep 2018 17:22:16 +0800 Subject: [PATCH 06/22] Rename with macro prefix(5): common/common.h --- source/common/common.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/source/common/common.h b/source/common/common.h index 8f71fcd..d2bebd7 100644 --- a/source/common/common.h +++ b/source/common/common.h @@ -1421,10 +1421,15 @@ typedef union { /* --------------------------------------------------------------------------- * list */ +#define xl_init FPFX(xl_init) int xl_init (xlist_t *const xlist); +#define xl_destroy FPFX(xl_destroy) void xl_destroy (xlist_t *const xlist); +#define xl_append FPFX(xl_append) void xl_append (xlist_t *const xlist, void *node); +#define xl_remove_head FPFX(xl_remove_head) void *xl_remove_head (xlist_t *const xlist, const int wait); +#define xl_remove_head_ex FPFX(xl_remove_head_ex) void *xl_remove_head_ex(xlist_t *const xlist); #ifdef __cplusplus -- Gitee From cb3b403922b71c7d9f7ff86747ba1c48789724a0 Mon Sep 17 00:00:00 2001 From: hwren Date: Mon, 17 Sep 2018 17:22:28 +0800 Subject: [PATCH 07/22] Rename with macro prefix(6): common/cpu.h --- source/common/cpu.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/source/common/cpu.h b/source/common/cpu.h index 88355d0..2fef0df 100644 --- a/source/common/cpu.h +++ b/source/common/cpu.h @@ -38,16 +38,24 @@ extern "C" { #endif +#define davs2_cpu_detect FPFX(cpu_detect) uint32_t davs2_cpu_detect(void); +#define davs2_cpu_num_processors FPFX(cpu_num_processors) int davs2_cpu_num_processors(void); +#define avs_cpu_emms FPFX(avs_cpu_emms) void avs_cpu_emms(void); +#define avs_cpu_mask_misalign_sse FPFX(avs_cpu_mask_misalign_sse) void avs_cpu_mask_misalign_sse(void); +#define avs_cpu_sfence FPFX(avs_cpu_sfence) void avs_cpu_sfence(void); +#define davs2_get_simd_capabilities FPFX(get_simd_capabilities) char *davs2_get_simd_capabilities(char *buf, uint32_t cpuid); #if HAVE_MMX +#define davs2_cpu_cpuid FPFX(cpu_cpuid) uint32_t davs2_cpu_cpuid(uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); +#define davs2_cpu_xgetbv FPFX(cpu_xgetbv) void davs2_cpu_xgetbv(uint32_t op, uint32_t *eax, uint32_t *edx); #define avs_emms() avs_cpu_emms() #else @@ -71,6 +79,7 @@ void davs2_cpu_xgetbv(uint32_t op, uint32_t *eax, uint32_t *edx); #define avs_stack_align(func,...) func(__VA_ARGS__) #endif +#define avs_cpu_restore FPFX(avs_cpu_restore) void avs_cpu_restore(uint32_t cpuid); #ifdef __cplusplus -- Gitee From f80ef966d90814ea54853d1a3ae2c7099a5038f2 Mon Sep 17 00:00:00 2001 From: hwren Date: Mon, 17 Sep 2018 17:22:36 +0800 Subject: [PATCH 08/22] Rename with macro prefix(7): common/cu.h --- source/common/cu.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/source/common/cu.h b/source/common/cu.h index b482d67..0d46e2f 100644 --- a/source/common/cu.h +++ b/source/common/cu.h @@ -43,8 +43,10 @@ extern "C" { * i_lcu_x : LCU position index * i_lcu_y : LCU position index */ +#define decode_lcu_init FPFX(decode_lcu_init) void decode_lcu_init (davs2_t *h, int i_lcu_x, int i_lcu_y); +#define rowrec_lcu_init FPFX(rowrec_lcu_init) void rowrec_lcu_init (davs2_t *h, davs2_row_rec_t *row_rec, int i_lcu_x, int i_lcu_y); /* --------------------------------------------------------------------------- @@ -55,6 +57,7 @@ void rowrec_lcu_init (davs2_t *h, davs2_row_rec_t *row_rec, int i_lcu_x, int i_l * pix_x : pixel position of the decoding CU in the frame in Luma component * pix_y : pixel position of the decoding CU in the frame in Luma component */ +#define decode_lcu_parse FPFX(decode_lcu_parse) int decode_lcu_parse(davs2_t *h, int i_level, int pix_x, int pix_y); /* --------------------------------------------------------------------------- @@ -65,9 +68,12 @@ int decode_lcu_parse(davs2_t *h, int i_level, int pix_x, int pix_y); * pix_x : pixel position of the decoding CU in the frame in Luma component * pix_y : pixel position of the decoding CU in the frame in Luma component */ +#define decode_lcu_recon FPFX(decode_lcu_recon) int decode_lcu_recon(davs2_t *h, davs2_row_rec_t *row_rec, int i_level, int pix_x, int pix_y); +#define decoder_wait_lcu_row FPFX(decoder_wait_lcu_row) void decoder_wait_lcu_row(davs2_t *h, davs2_frame_t *frame, int max_y_in_pic); +#define decoder_wait_row FPFX(decoder_wait_row) void decoder_wait_row(davs2_t *h, davs2_frame_t *frame, int max_y_in_pic); #ifdef __cplusplus -- Gitee From 6ad6ad2eba985fd6631b1fd0fb92375f96dd4a7b Mon Sep 17 00:00:00 2001 From: hwren Date: Mon, 17 Sep 2018 17:22:52 +0800 Subject: [PATCH 09/22] Rename with macro prefix(8): common/deblock.h --- source/common/deblock.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/common/deblock.h b/source/common/deblock.h index 01e44be..5b5b2c3 100644 --- a/source/common/deblock.h +++ b/source/common/deblock.h @@ -36,7 +36,9 @@ extern "C" { #endif +#define davs2_deblock_init FPFX(deblock_init) void davs2_deblock_init(uint32_t cpuid, ao_funcs_t* fh); +#define davs2_lcu_deblock FPFX(lcu_deblock) void davs2_lcu_deblock(davs2_t *h, davs2_frame_t *frm, int i_lcu_x, int i_lcu_y); #ifdef __cplusplus -- Gitee From fef40876d6fddfcc6f47b5d91216a86bbcdd0bd3 Mon Sep 17 00:00:00 2001 From: hwren Date: Mon, 17 Sep 2018 17:23:12 +0800 Subject: [PATCH 10/22] Rename with macro prefix(9): common/decoder.h --- source/common/decoder.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/source/common/decoder.h b/source/common/decoder.h index 2a760f9..86759c0 100644 --- a/source/common/decoder.h +++ b/source/common/decoder.h @@ -38,18 +38,29 @@ extern "C" { #include "common.h" +#define decoder_open FPFX(decoder_decoder_open) davs2_t *decoder_open(davs2_mgr_t *mgr, davs2_t *h, int idx_decoder); +#define decoder_decode_picture_data FPFX(decoder_decode_picture_data) void *decoder_decode_picture_data(void *arg1, int arg2); +#define decoder_close FPFX(decoder_decoder_close) void decoder_close(davs2_t *h); +#define create_freepictures FPFX(create_freepictures) int create_freepictures(davs2_mgr_t *mgr, int w, int h, int size); +#define destroy_freepictures FPFX(destroy_freepictures) void destroy_freepictures(davs2_mgr_t *mgr); +#define decoder_alloc_extra_buffer FPFX(decoder_alloc_extra_buffer) int decoder_alloc_extra_buffer(davs2_t *h); +#define decoder_free_extra_buffer FPFX(decoder_free_extra_buffer) void decoder_free_extra_buffer(davs2_t *h); +#define davs2_write_a_frame FPFX(write_a_frame) void davs2_write_a_frame(davs2_picture_t *pic, davs2_frame_t *frame); +#define task_get_references FPFX(task_get_references) int task_get_references(davs2_t *h, int64_t pts, int64_t dts); +#define task_unload_packet FPFX(task_unload_packet) void task_unload_packet(davs2_t *h, es_unit_t *es_unit); +#define decoder_get_output FPFX(decoder_get_output) int decoder_get_output(davs2_mgr_t *mgr, davs2_seq_info_t *headerset, davs2_picture_t *out_frame, int is_flush); #ifdef __cplusplus -- Gitee From 5810b1362154bffe93d2703a803977a289675095 Mon Sep 17 00:00:00 2001 From: hwren Date: Mon, 17 Sep 2018 17:23:27 +0800 Subject: [PATCH 11/22] Rename with macro prefix(10): common/frame.h --- source/common/frame.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/source/common/frame.h b/source/common/frame.h index 4c28510..c5db3f0 100644 --- a/source/common/frame.h +++ b/source/common/frame.h @@ -41,18 +41,27 @@ extern "C" { * function declares * =========================================================================== */ -size_t davs2_frame_get_size(int width, int height, int chroma_format, int b_extra); +#define davs2_frame_get_size FPFX(frame_get_size) +size_t davs2_frame_get_size(int width, int height, int chroma_format, int b_extra); +#define davs2_frame_new FPFX(frame_new) davs2_frame_t *davs2_frame_new(int width, int height, int chroma_format, uint8_t **mem_base, int b_extra); +#define davs2_frame_destroy FPFX(frame_destroy) void davs2_frame_destroy(davs2_frame_t *frame); +#define davs2_frame_copy_planes FPFX(frame_copy_planes) void davs2_frame_copy_planes(davs2_frame_t *p_dst, davs2_frame_t *p_src); +#define davs2_frame_copy_properties FPFX(frame_copy_properties) void davs2_frame_copy_properties(davs2_frame_t *p_dst, davs2_frame_t *p_src); +#define davs2_frame_copy_lcu FPFX(frame_copy_lcu) void davs2_frame_copy_lcu(davs2_t *h, davs2_frame_t *p_dst, davs2_frame_t *p_src, int i_lcu_x, int i_lcu_y, int pix_offset, int padding_size); +#define davs2_frame_copy_lcurow FPFX(frame_copy_lcurow) void davs2_frame_copy_lcurow(davs2_t *h, davs2_frame_t *p_dst, davs2_frame_t *p_src, int i_lcu_y, int pix_offset, int padding_size); +#define davs2_frame_expand_border FPFX(frame_expand_border) void davs2_frame_expand_border(davs2_frame_t *frame); +#define pad_line_lcu FPFX(pad_line_lcu) void pad_line_lcu(davs2_t *h, int lcu_y); #ifdef __cplusplus -- Gitee From 79274cd4706f6de3ec0738e27ee56188edd7037f Mon Sep 17 00:00:00 2001 From: hwren Date: Mon, 17 Sep 2018 17:23:41 +0800 Subject: [PATCH 12/22] Rename with macro prefix(11): common/header.h --- source/common/header.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/source/common/header.h b/source/common/header.h index 1d046f2..cc3a0bc 100644 --- a/source/common/header.h +++ b/source/common/header.h @@ -36,15 +36,22 @@ extern "C" { #endif +#define parse_slice_header FPFX(parse_slice_header) void parse_slice_header(davs2_t *h, davs2_bs_t *bs); +#define parse_header FPFX(parse_header) int parse_header(davs2_t *h, davs2_bs_t *p_bs); +#define release_one_frame FPFX(release_one_frame) void release_one_frame(davs2_frame_t *frame); +#define task_release_frames FPFX(task_release_frames) void task_release_frames(davs2_t *h); +#define alloc_picture FPFX(alloc_picture) davs2_outpic_t *alloc_picture(int w, int h); +#define free_picture FPFX(free_picture) void free_picture(davs2_outpic_t *pic); +#define destroy_dpb FPFX(destroy_dpb) void destroy_dpb(davs2_mgr_t *mgr); #ifdef __cplusplus -- Gitee From 598996cb58cd35713a5cdfe2e9e4e6376743635c Mon Sep 17 00:00:00 2001 From: hwren Date: Mon, 17 Sep 2018 17:23:52 +0800 Subject: [PATCH 13/22] Rename with macro prefix(12): common/intra.h --- source/common/intra.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/source/common/intra.h b/source/common/intra.h index 3659a61..e95151d 100644 --- a/source/common/intra.h +++ b/source/common/intra.h @@ -52,8 +52,11 @@ void intra_pred(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsy, int bs } } +#define davs2_intra_pred_init FPFX(intra_pred_init) void davs2_intra_pred_init(uint32_t cpuid, ao_funcs_t *pf); +#define davs2_get_intra_pred FPFX(get_intra_pred) void davs2_get_intra_pred(davs2_row_rec_t *row_rec, cu_t *p_cu, int predmode, int ctu_x, int ctu_y, int bsx, int bsy); +#define davs2_get_intra_pred_chroma FPFX(get_intra_pred_chroma) void davs2_get_intra_pred_chroma(davs2_row_rec_t *h, cu_t *p_cu, int ctu_c_x, int ctu_c_y); #ifdef __cplusplus -- Gitee From f9da1934ecc6ad00b32b2092c16bf01053e3d3f4 Mon Sep 17 00:00:00 2001 From: hwren Date: Mon, 17 Sep 2018 17:24:08 +0800 Subject: [PATCH 14/22] Rename with macro prefix(13): common/mc.h --- source/common/mc.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/common/mc.h b/source/common/mc.h index ebb600a..dc65ade 100644 --- a/source/common/mc.h +++ b/source/common/mc.h @@ -36,7 +36,9 @@ extern "C" { #endif +#define mc_luma FPFX(mc_luma) void mc_luma (davs2_t *h, pel_t *dst, int i_dst, int posx, int posy, int width, int height, pel_t *p_fref, int i_fref); +#define mc_chroma FPFX(mc_chroma) void mc_chroma(davs2_t *h, pel_t *dst, int i_dst, int posx, int posy, int width, int height, pel_t *p_fref, int i_fref); #ifdef __cplusplus -- Gitee From ea39a71c9a468cdb719f6721ee7ef62be467fbae Mon Sep 17 00:00:00 2001 From: hwren Date: Mon, 17 Sep 2018 17:24:25 +0800 Subject: [PATCH 15/22] Rename with macro prefix(14): common/predict.h --- source/common/predict.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/common/predict.h b/source/common/predict.h index af8c834..56df88f 100644 --- a/source/common/predict.h +++ b/source/common/predict.h @@ -301,9 +301,11 @@ int get_pu_type_for_mvp(int bsx, int bsy, int cu_pix_x, int cu_pix_y) return 0; // default } +#define get_mvp_default FPFX(get_mvp_default) void get_mvp_default(davs2_t *h, cu_t *p_cu, int pix_x, int pix_y, mv_t *pmv, int bwd_2nd, int ref_frame, int bsx, int pu_type_for_mvp); +#define fill_mv_and_ref_for_skip FPFX(fill_mv_and_ref_for_skip) void fill_mv_and_ref_for_skip(davs2_t *h, cu_t *p_cu, int pix_x, int pix_y, int size_in_scu); #ifdef __cplusplus -- Gitee From 5655a1e51d6ab7e4fe62a191172c7cd74c75c884 Mon Sep 17 00:00:00 2001 From: hwren Date: Mon, 17 Sep 2018 17:24:50 +0800 Subject: [PATCH 16/22] Rename with macro prefix(15): common/primitives.h --- source/common/primitives.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/source/common/primitives.h b/source/common/primitives.h index 02caac1..8df3239 100644 --- a/source/common/primitives.h +++ b/source/common/primitives.h @@ -170,14 +170,18 @@ extern ao_funcs_t gf_davs2; * interface function declares * =========================================================================== */ +#define init_all_primitives FPFX(init_all_primitives) void init_all_primitives(uint32_t cpuid); /* --------------------------------------------------------------------------- * extern functions */ +#define davs2_mc_init FPFX(mc_init) void davs2_mc_init (uint32_t cpuid, ao_funcs_t *pf); +#define davs2_pixel_init FPFX(pixel_init) void davs2_pixel_init (uint32_t cpuid, ao_funcs_t* pixf); +#define davs2_memory_init FPFX(memory_init) void davs2_memory_init(uint32_t cpuid, ao_funcs_t* pixf); #ifdef __cplusplus -- Gitee From c79aa1e0c28cecfea338ab8b6008131e28d3de86 Mon Sep 17 00:00:00 2001 From: hwren Date: Mon, 17 Sep 2018 17:25:11 +0800 Subject: [PATCH 17/22] Rename with macro prefix(16): common/quant.h --- source/common/quant.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/source/common/quant.h b/source/common/quant.h index bd82464..dc20e60 100644 --- a/source/common/quant.h +++ b/source/common/quant.h @@ -36,9 +36,13 @@ extern "C" { #endif +#define QP_SCALE_CR FPFX(QP_SCALE_CR) extern const uint8_t QP_SCALE_CR[]; +#define IQ_SHIFT FPFX(IQ_SHIFT) extern const int16_t IQ_SHIFT[]; +#define IQ_TAB FPFX(IQ_TAB) extern const uint16_t IQ_TAB[]; +#define wq_param_default FPFX(wq_param_default) extern const int16_t wq_param_default[2][6]; @@ -62,14 +66,19 @@ extern const int16_t wq_param_default[2][6]; #define WQ_MODE_U 1 #define WQ_MODE_D 2 +#define wq_get_default_matrix FPFX(wq_get_default_matrix) const int *wq_get_default_matrix(int sizeId); +#define wq_init_frame_quant_param FPFX(wq_init_frame_quant_param) void wq_init_frame_quant_param(davs2_t *h); +#define wq_update_frame_matrix FPFX(wq_update_frame_matrix) void wq_update_frame_matrix(davs2_t *h); /* dequant */ +#define dequant_coeffs FPFX(dequant_coeffs) void dequant_coeffs(davs2_t *h, coeff_t *p_coeff, int bsx, int bsy, int scale, int shift, int WQMSizeId); +#define davs2_quant_init FPFX(quant_init) void davs2_quant_init(uint32_t cpuid, ao_funcs_t *fh); -- Gitee From 44be2dce790aa149f9537ea66ebe93cbc51f2736 Mon Sep 17 00:00:00 2001 From: hwren Date: Mon, 17 Sep 2018 17:25:20 +0800 Subject: [PATCH 18/22] Rename with macro prefix(17): common/sao.h --- source/common/sao.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/source/common/sao.h b/source/common/sao.h index eb722a1..f97cff4 100644 --- a/source/common/sao.h +++ b/source/common/sao.h @@ -36,11 +36,15 @@ extern "C" { #endif +#define sao_read_lcu_param FPFX(sao_read_lcu_param) void sao_read_lcu_param(davs2_t *h, int lcu_xy, bool_t *slice_sao_on, sao_t *sao_param); +#define sao_lcu FPFX(sao_lcu) void sao_lcu(davs2_t *h, davs2_frame_t *p_tmp_frm, davs2_frame_t *p_dec_frm, int i_lcu_x, int i_lcu_y); +#define sao_lcurow FPFX(sao_lcurow) void sao_lcurow(davs2_t *h, davs2_frame_t *p_tmp_frm, davs2_frame_t *p_dec_frm, int i_lcu_y); +#define davs2_sao_init FPFX(sao_init) void davs2_sao_init(uint32_t cpuid, ao_funcs_t *fh); #ifdef __cplusplus -- Gitee From f67bb54218df9bd0e770f301fdd574ccf9ef59eb Mon Sep 17 00:00:00 2001 From: hwren Date: Mon, 17 Sep 2018 17:25:35 +0800 Subject: [PATCH 19/22] Rename with macro prefix(18): common/threadpool.h --- source/common/threadpool.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/source/common/threadpool.h b/source/common/threadpool.h index c739fb0..458cf49 100644 --- a/source/common/threadpool.h +++ b/source/common/threadpool.h @@ -39,11 +39,16 @@ extern "C" { typedef struct davs2_threadpool_t davs2_threadpool_t; typedef void *(*davs2_threadpool_func_t)(void *arg1, int arg2); +#define davs2_threadpool_init FPFX(threadpool_init) int davs2_threadpool_init (davs2_threadpool_t **p_pool, int threads, davs2_threadpool_func_t init_func, void *init_arg1, int init_arg2); +#define davs2_threadpool_run FPFX(threadpool_run) void davs2_threadpool_run (davs2_threadpool_t *pool, davs2_threadpool_func_t func, void *arg1, int arg2, int wait_sign); +#define davs2_threadpool_is_free FPFX(threadpool_is_free) int davs2_threadpool_is_free(davs2_threadpool_t *pool); +#define davs2_threadpool_wait FPFX(threadpool_wait) void *davs2_threadpool_wait (davs2_threadpool_t *pool, void *arg1, int arg2); +#define davs2_threadpool_delete FPFX(threadpool_delete) void davs2_threadpool_delete(davs2_threadpool_t *pool); #ifdef __cplusplus -- Gitee From 69a4e14675e92c021f07fc3ded6525999c06a515 Mon Sep 17 00:00:00 2001 From: hwren Date: Mon, 17 Sep 2018 17:26:36 +0800 Subject: [PATCH 20/22] Rename with macro prefix(19): common/transform.h --- source/common/transform.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/common/transform.h b/source/common/transform.h index ef3a0af..c06e220 100644 --- a/source/common/transform.h +++ b/source/common/transform.h @@ -36,8 +36,10 @@ extern "C" { #endif +#define davs2_dct_init FPFX(dct_init) void davs2_dct_init(uint32_t cpuid, ao_funcs_t *fh); +#define davs2_get_recons FPFX(get_recons) void davs2_get_recons(davs2_row_rec_t *row_rec, cu_t *p_cu, int blockidx, cb_t *p_tu, int ctu_x, int ctu_y); #ifdef __cplusplus -- Gitee From 53e6b06a575be1614cf8bc921d7226850ae179e6 Mon Sep 17 00:00:00 2001 From: hwren Date: Mon, 17 Sep 2018 17:26:54 +0800 Subject: [PATCH 21/22] Rename with macro prefix(20): common/vec/intrinsic.h --- source/common/vec/intrinsic.h | 201 ++++++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) diff --git a/source/common/vec/intrinsic.h b/source/common/vec/intrinsic.h index 51141f8..6a19fe5 100644 --- a/source/common/vec/intrinsic.h +++ b/source/common/vec/intrinsic.h @@ -44,25 +44,38 @@ extern "C" { /* --------------------------------------------------------------------------- * global variables */ +#define intrinsic_mask FPFX(intrinsic_mask) ALIGN32(extern const int8_t intrinsic_mask[15][16]); +#define intrinsic_mask_256_8bit FPFX(intrinsic_mask_256_8bit) ALIGN32(extern const int8_t intrinsic_mask_256_8bit[16][32]); +#define intrinsic_mask32 FPFX(intrinsic_mask32) ALIGN32(extern const int8_t intrinsic_mask32[32][32]); +#define intrinsic_mask_10bit FPFX(intrinsic_mask_10bit) ALIGN32(extern const int16_t intrinsic_mask_10bit[15][16]); +#define tab_log2 FPFX(tab_log2) ALIGN32(extern const int8_t tab_log2[65]); +#define tab_coeff_mode_7 FPFX(tab_coeff_mode_7) ALIGN16(extern const pel_t tab_coeff_mode_7[64][16]); +#define tab_idx_mode_7 FPFX(tab_idx_mode_7) ALIGN32(extern const uint8_t tab_idx_mode_7[64]); +#define tab_coeff_mode_7_avx FPFX(tab_coeff_mode_7_avx) ALIGN32(extern const pel_t tab_coeff_mode_7_avx[64][32]); #if HIGH_BIT_DEPTH +#define tab_coeff_mode_9 FPFX(tab_coeff_mode_9) ALIGN16(extern const int16_t tab_coeff_mode_9[64][16]); #else +#define tab_coeff_mode_9 FPFX(tab_coeff_mode_9) ALIGN16(extern const int8_t tab_coeff_mode_9[64][16]); #endif +#define tab_idx_mode_9 FPFX(tab_idx_mode_9) extern const uint8_t tab_idx_mode_9[64]; #if HIGH_BIT_DEPTH +#define tab_coeff_mode_11 FPFX(tab_coeff_mode_11) ALIGN16(extern const int16_t tab_coeff_mode_11[64][16]); #else +#define tab_coeff_mode_11 FPFX(tab_coeff_mode_11) ALIGN16(extern const int8_t tab_coeff_mode_11[64][16]); #endif @@ -100,171 +113,299 @@ ALIGN16(extern const int8_t tab_coeff_mode_11[64][16]); } while (0) #endif +#define davs2_memzero_aligned_c_sse2 FPFX(memzero_aligned_c_sse2) void *davs2_memzero_aligned_c_sse2(void *dst, size_t n); +#define davs2_memzero_aligned_c_avx FPFX(memzero_aligned_c_avx) void *davs2_memzero_aligned_c_avx (void *dst, size_t n); +#define davs2_memcpy_aligned_c_sse2 FPFX(memcpy_aligned_c_sse2) void *davs2_memcpy_aligned_c_sse2 (void *dst, const void *src, size_t n); +#define davs2_memcpy_aligned_mmx FPFX(memcpy_aligned_mmx) void *davs2_memcpy_aligned_mmx(void *dst, const void *src, size_t n); +#define davs2_memcpy_aligned_sse FPFX(memcpy_aligned_sse) void *davs2_memcpy_aligned_sse(void *dst, const void *src, size_t n); +#define davs2_fast_memcpy_mmx FPFX(fast_memcpy_mmx) void *davs2_fast_memcpy_mmx(void *dst, const void *src, size_t n); +#define davs2_fast_memset_mmx FPFX(fast_memset_mmx) void *davs2_fast_memset_mmx(void *dst, int val, size_t n); +#define davs2_memzero_aligned_mmx FPFX(memzero_aligned_mmx) void *davs2_memzero_aligned_mmx (void *dst, size_t n); +#define davs2_memzero_aligned_sse FPFX(memzero_aligned_sse) void *davs2_memzero_aligned_sse (void *dst, size_t n); +#define davs2_memzero_aligned_avx FPFX(memzero_aligned_avx) void *davs2_memzero_aligned_avx (void *dst, size_t n); +#define davs2_fast_memzero_mmx FPFX(fast_memzero_mmx) void *davs2_fast_memzero_mmx (void *dst, size_t n); +#define plane_copy_c_sse2 FPFX(plane_copy_c_sse2) void plane_copy_c_sse2 (pel_t *dst, intptr_t i_dst, pel_t *src, intptr_t i_src, int w, int h); +#define intpl_copy_block_sse128 FPFX(intpl_copy_block_sse128) void intpl_copy_block_sse128 (pel_t *dst, int i_dst, pel_t *src, int i_src, int width, int height); +#define intpl_luma_block_hor_sse128 FPFX(intpl_luma_block_hor_sse128) void intpl_luma_block_hor_sse128(pel_t *dst, int i_dst, pel_t *src, int i_src, int width, int height, const int8_t *coeff); +#define intpl_luma_block_ver_sse128 FPFX(intpl_luma_block_ver_sse128) void intpl_luma_block_ver_sse128(pel_t *dst, int i_dst, pel_t *src, int i_src, int width, int height, const int8_t *coeff); +#define intpl_luma_block_ver0_sse128 FPFX(intpl_luma_block_ver0_sse128) void intpl_luma_block_ver0_sse128(pel_t *dst, int i_dst, pel_t *src, int i_src, int width, int height, const int8_t *coeff); +#define intpl_luma_block_ver1_sse128 FPFX(intpl_luma_block_ver1_sse128) void intpl_luma_block_ver1_sse128(pel_t *dst, int i_dst, pel_t *src, int i_src, int width, int height, const int8_t *coeff); +#define intpl_luma_block_ver2_sse128 FPFX(intpl_luma_block_ver2_sse128) void intpl_luma_block_ver2_sse128(pel_t *dst, int i_dst, pel_t *src, int i_src, int width, int height, const int8_t *coeff); +#define intpl_luma_block_ext_sse128 FPFX(intpl_luma_block_ext_sse128) void intpl_luma_block_ext_sse128(pel_t *dst, int i_dst, pel_t *src, int i_src, int width, int height, const int8_t *coeff_h, const int8_t *coeff_v); +#define intpl_chroma_block_hor_sse128 FPFX(intpl_chroma_block_hor_sse128) void intpl_chroma_block_hor_sse128(pel_t *dst, int i_dst, pel_t *src, int i_src, int width, int height, const int8_t *coeff); +#define intpl_chroma_block_ver_sse128 FPFX(intpl_chroma_block_ver_sse128) void intpl_chroma_block_ver_sse128(pel_t *dst, int i_dst, pel_t *src, int i_src, int width, int height, const int8_t *coeff); +#define intpl_chroma_block_ext_sse128 FPFX(intpl_chroma_block_ext_sse128) void intpl_chroma_block_ext_sse128(pel_t *dst, int i_dst, pel_t *src, int i_src, int width, int height, const int8_t *coeff_h, const int8_t *coeff_v); +#define intpl_luma_block_hor_avx2 FPFX(intpl_luma_block_hor_avx2) void intpl_luma_block_hor_avx2(pel_t *dst, int i_dst, pel_t *src, int i_src, int width, int height, const int8_t *coeff); +#define intpl_luma_block_ver_avx2 FPFX(intpl_luma_block_ver_avx2) void intpl_luma_block_ver_avx2(pel_t *dst, int i_dst, pel_t *src, int i_src, int width, int height, const int8_t *coeff); +#define intpl_luma_block_ver0_avx2 FPFX(intpl_luma_block_ver0_avx2) void intpl_luma_block_ver0_avx2(pel_t *dst, int i_dst, pel_t *src, int i_src, int width, int height, const int8_t *coeff); +#define intpl_luma_block_ver1_avx2 FPFX(intpl_luma_block_ver1_avx2) void intpl_luma_block_ver1_avx2(pel_t *dst, int i_dst, pel_t *src, int i_src, int width, int height, const int8_t *coeff); +#define intpl_luma_block_ver2_avx2 FPFX(intpl_luma_block_ver2_avx2) void intpl_luma_block_ver2_avx2(pel_t *dst, int i_dst, pel_t *src, int i_src, int width, int height, const int8_t *coeff); +#define intpl_luma_block_ext_avx2 FPFX(intpl_luma_block_ext_avx2) void intpl_luma_block_ext_avx2(pel_t *dst, int i_dst, pel_t *src, int i_src, int width, int height, const int8_t *coeff_h, const int8_t *coeff_v); /* --------------------------------------------------------------------------- */ +#define intpl_luma_hor_sse128 FPFX(intpl_luma_hor_sse128) void intpl_luma_hor_sse128(pel_t *dst, int i_dst, mct_t *tmp, int i_tmp, pel_t *src, int i_src, int width, int height, int8_t const *coeff); +#define intpl_luma_hor_x3_sse128 FPFX(intpl_luma_hor_x3_sse128) void intpl_luma_hor_x3_sse128(pel_t *const dst[3], int i_dst, mct_t *const tmp[3], int i_tmp, pel_t *src, int i_src, int width, int height, const int8_t **coeff); +#define intpl_luma_ver_x3_sse128 FPFX(intpl_luma_ver_x3_sse128) void intpl_luma_ver_x3_sse128(pel_t *const dst[3], int i_dst, pel_t *src, int i_src, int width, int height, int8_t const **coeff); +#define intpl_luma_ext_x3_sse128 FPFX(intpl_luma_ext_x3_sse128) void intpl_luma_ext_x3_sse128(pel_t *const dst[3], int i_dst, mct_t *tmp, int i_tmp, int width, int height, const int8_t **coeff); +#define intpl_luma_ext_sse128 FPFX(intpl_luma_ext_sse128) void intpl_luma_ext_sse128(pel_t *dst, int i_dst, mct_t *tmp, int i_tmp, int width, int height, const int8_t *coeff); +#define avs_pixel_average_sse128 FPFX(avs_pixel_average_sse128) void avs_pixel_average_sse128 (pel_t *dst, int i_dst, const pel_t *src0, int i_src0, const pel_t *src1, int i_src1, int width, int height); +#define davs2_pixel_average_avx FPFX(pixel_average_avx) void davs2_pixel_average_avx (pel_t *dst, int i_dst, const pel_t *src1, int i_src1, const pel_t *src2, int i_src2, int width, int height); +#define padding_rows_sse128 FPFX(padding_rows_sse128) void padding_rows_sse128 (pel_t *src, int i_src, int width, int height, int start, int rows, int pad); +#define padding_rows_lr_sse128 FPFX(padding_rows_lr_sse128) void padding_rows_lr_sse128 (pel_t *src, int i_src, int width, int height, int start, int rows, int pad); +#define intpl_chroma_block_hor_avx2 FPFX(intpl_chroma_block_hor_avx2) void intpl_chroma_block_hor_avx2(pel_t *dst, int i_dst, pel_t *src, int i_src, int width, int height, const int8_t *coeff); +#define intpl_chroma_block_ver_avx2 FPFX(intpl_chroma_block_ver_avx2) void intpl_chroma_block_ver_avx2(pel_t *dst, int i_dst, pel_t *src, int i_src, int width, int height, const int8_t *coeff); +#define intpl_chroma_block_ext_avx2 FPFX(intpl_chroma_block_ext_avx2) void intpl_chroma_block_ext_avx2(pel_t *dst, int i_dst, pel_t *src, int i_src, int width, int height, const int8_t *coeff_h, const int8_t *coeff_v); +#define deblock_edge_ver_sse128 FPFX(deblock_edge_ver_sse128) void deblock_edge_ver_sse128 (pel_t *SrcPtr, int stride, int Alpha, int Beta, uint8_t *flt_flag); +#define deblock_edge_hor_sse128 FPFX(deblock_edge_hor_sse128) void deblock_edge_hor_sse128 (pel_t *SrcPtr, int stride, int Alpha, int Beta, uint8_t *flt_flag); #if HDR_CHROMA_DELTA_QP +#define deblock_edge_ver_c_sse128 FPFX(deblock_edge_ver_c_sse128) void deblock_edge_ver_c_sse128(pel_t *SrcPtrU, pel_t *SrcPtrV, int stride, int *Alpha, int *Beta, uint8_t *flt_flag); +#define deblock_edge_hor_c_sse128 FPFX(deblock_edge_hor_c_sse128) void deblock_edge_hor_c_sse128(pel_t *SrcPtrU, pel_t *SrcPtrV, int stride, int *Alpha, int *Beta, uint8_t *flt_flag); #else +#define deblock_edge_ver_c_sse128 FPFX(deblock_edge_ver_c_sse128) void deblock_edge_ver_c_sse128(pel_t *SrcPtrU, pel_t *SrcPtrV, int stride, int Alpha, int Beta, uint8_t *flt_flag); +#define deblock_edge_hor_c_sse128 FPFX(deblock_edge_hor_c_sse128) void deblock_edge_hor_c_sse128(pel_t *SrcPtrU, pel_t *SrcPtrV, int stride, int Alpha, int Beta, uint8_t *flt_flag); #endif //--------avx2-------- +#define deblock_edge_hor_avx2 FPFX(deblock_edge_hor_avx2) void deblock_edge_hor_avx2(pel_t *SrcPtr, int stride, int Alpha, int Beta, uint8_t *flt_flag); +#define deblock_edge_ver_avx2 FPFX(deblock_edge_ver_avx2) void deblock_edge_ver_avx2(pel_t *SrcPtr, int stride, int Alpha, int Beta, uint8_t *flt_flag); +#define deblock_edge_hor_c_avx2 FPFX(deblock_edge_hor_c_avx2) void deblock_edge_hor_c_avx2(pel_t *SrcPtrU, pel_t *SrcPtrV, int stride, int Alpha, int Beta, uint8_t *flt_flag); +#define deblock_edge_ver_c_avx2 FPFX(deblock_edge_ver_c_avx2) void deblock_edge_ver_c_avx2(pel_t *SrcPtrU, pel_t *SrcPtrV, int stride, int Alpha, int Beta, uint8_t *flt_flag); +#define davs2_dequant_sse4 FPFX(dequant_sse4) void davs2_dequant_sse4(coeff_t *coef, const int i_coef, const int scale, const int shift); +#define idct_4x4_sse128 FPFX(idct_4x4_sse128) void idct_4x4_sse128 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_8x8_sse128 FPFX(idct_8x8_sse128) void idct_8x8_sse128 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_16x16_sse128 FPFX(idct_16x16_sse128) void idct_16x16_sse128(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_32x32_sse128 FPFX(idct_32x32_sse128) void idct_32x32_sse128(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_64x64_sse128 FPFX(idct_64x64_sse128) void idct_64x64_sse128(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_16x4_sse128 FPFX(idct_16x4_sse128) void idct_16x4_sse128 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_32x8_sse128 FPFX(idct_32x8_sse128) void idct_32x8_sse128 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_64x16_sse128 FPFX(idct_64x16_sse128) void idct_64x16_sse128(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_4x16_sse128 FPFX(idct_4x16_sse128) void idct_4x16_sse128 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_8x32_sse128 FPFX(idct_8x32_sse128) void idct_8x32_sse128 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_16x64_sse128 FPFX(idct_16x64_sse128) void idct_16x64_sse128(const coeff_t *src, coeff_t *dst, int i_dst); +#define inv_transform_4x4_2nd_sse128 FPFX(inv_transform_4x4_2nd_sse128) void inv_transform_4x4_2nd_sse128(coeff_t *coeff, int i_coeff); +#define inv_transform_2nd_sse128 FPFX(inv_transform_2nd_sse128) void inv_transform_2nd_sse128 (coeff_t *coeff, int i_coeff, int i_mode, int b_top, int b_left); +#define inv_wavelet_64x16_sse128 FPFX(inv_wavelet_64x16_sse128) void inv_wavelet_64x16_sse128(coeff_t *coeff); +#define inv_wavelet_16x64_sse128 FPFX(inv_wavelet_16x64_sse128) void inv_wavelet_16x64_sse128(coeff_t *coeff); //futl add 2016.11.30 avx2 +#define idct_8x8_avx2 FPFX(vec_idct_8x8_avx2) void idct_8x8_avx2 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_16x16_avx2 FPFX(vec_idct_16x16_avx2) void idct_16x16_avx2(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_32x32_avx2 FPFX(vec_idct_32x32_avx2) void idct_32x32_avx2(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_64x64_avx2 FPFX(vec_idct_64x64_avx2) void idct_64x64_avx2(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_64x16_avx2 FPFX(vec_idct_64x16_avx2) void idct_64x16_avx2(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_16x64_avx2 FPFX(vec_idct_16x64_avx2) void idct_16x64_avx2(const coeff_t *src, coeff_t *dst, int i_dst); +#define inv_wavelet_64x16_avx2 FPFX(inv_wavelet_64x16_avx2) void inv_wavelet_64x16_avx2(coeff_t *coeff); +#define inv_wavelet_16x64_avx2 FPFX(inv_wavelet_16x64_avx2) void inv_wavelet_16x64_avx2(coeff_t *coeff); +#define inv_wavelet_64x64_avx2 FPFX(inv_wavelet_64x64_avx2) void inv_wavelet_64x64_avx2(coeff_t *coeff); /* DCT half and quad */ +#define idct_4x4_half_sse128 FPFX(idct_4x4_half_sse128) void idct_4x4_half_sse128 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_8x8_half_sse128 FPFX(idct_8x8_half_sse128) void idct_8x8_half_sse128 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_16x16_half_sse128 FPFX(idct_16x16_half_sse128) void idct_16x16_half_sse128(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_32x32_half_sse128 FPFX(idct_32x32_half_sse128) void idct_32x32_half_sse128(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_64x64_half_sse128 FPFX(idct_64x64_half_sse128) void idct_64x64_half_sse128(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_16x4_half_sse128 FPFX(idct_16x4_half_sse128) void idct_16x4_half_sse128 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_32x8_half_sse128 FPFX(idct_32x8_half_sse128) void idct_32x8_half_sse128 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_4x16_half_sse128 FPFX(idct_4x16_half_sse128) void idct_4x16_half_sse128 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_8x32_half_sse128 FPFX(idct_8x32_half_sse128) void idct_8x32_half_sse128 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_16x64_half_sse128 FPFX(idct_16x64_half_sse128) void idct_16x64_half_sse128(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_64x16_half_sse128 FPFX(idct_64x16_half_sse128) void idct_64x16_half_sse128(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_4x4_quad_sse128 FPFX(idct_4x4_quad_sse128) void idct_4x4_quad_sse128 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_8x8_quad_sse128 FPFX(idct_8x8_quad_sse128) void idct_8x8_quad_sse128 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_16x16_quad_sse128 FPFX(idct_16x16_quad_sse128) void idct_16x16_quad_sse128(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_32x32_quad_sse128 FPFX(idct_32x32_quad_sse128) void idct_32x32_quad_sse128(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_64x64_quad_sse128 FPFX(idct_64x64_quad_sse128) void idct_64x64_quad_sse128(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_16x4_quad_sse128 FPFX(idct_16x4_quad_sse128) void idct_16x4_quad_sse128 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_32x8_quad_sse128 FPFX(idct_32x8_quad_sse128) void idct_32x8_quad_sse128 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_4x16_quad_sse128 FPFX(idct_4x16_quad_sse128) void idct_4x16_quad_sse128 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_8x32_quad_sse128 FPFX(idct_8x32_quad_sse128) void idct_8x32_quad_sse128 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_16x64_quad_sse128 FPFX(idct_16x64_quad_sse128) void idct_16x64_quad_sse128(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_64x16_quad_sse128 FPFX(idct_64x16_quad_sse128) void idct_64x16_quad_sse128(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_8x8_half_avx2 FPFX(idct_8x8_half_avx2) void idct_8x8_half_avx2 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_16x16_half_avx2 FPFX(idct_16x16_half_avx2) void idct_16x16_half_avx2(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_32x32_half_avx2 FPFX(idct_32x32_half_avx2) void idct_32x32_half_avx2(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_64x64_half_avx2 FPFX(idct_64x64_half_avx2) void idct_64x64_half_avx2(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_16x4_half_avx2 FPFX(idct_16x4_half_avx2) void idct_16x4_half_avx2 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_32x8_half_avx2 FPFX(idct_32x8_half_avx2) void idct_32x8_half_avx2 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_4x16_half_avx2 FPFX(idct_4x16_half_avx2) void idct_4x16_half_avx2 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_8x32_half_avx2 FPFX(idct_8x32_half_avx2) void idct_8x32_half_avx2 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_16x64_half_avx2 FPFX(idct_16x64_half_avx2) void idct_16x64_half_avx2(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_64x16_half_avx2 FPFX(idct_64x16_half_avx2) void idct_64x16_half_avx2(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_8x8_quad_avx2 FPFX(idct_8x8_quad_avx2) void idct_8x8_quad_avx2 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_16x16_quad_avx2 FPFX(idct_16x16_quad_avx2) void idct_16x16_quad_avx2(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_32x32_quad_avx2 FPFX(idct_32x32_quad_avx2) void idct_32x32_quad_avx2(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_64x64_quad_avx2 FPFX(idct_64x64_quad_avx2) void idct_64x64_quad_avx2(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_16x4_quad_avx2 FPFX(idct_16x4_quad_avx2) void idct_16x4_quad_avx2 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_32x8_quad_avx2 FPFX(idct_32x8_quad_avx2) void idct_32x8_quad_avx2 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_4x16_quad_avx2 FPFX(idct_4x16_quad_avx2) void idct_4x16_quad_avx2 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_8x32_quad_avx2 FPFX(idct_8x32_quad_avx2) void idct_8x32_quad_avx2 (const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_16x64_quad_avx2 FPFX(idct_16x64_quad_avx2) void idct_16x64_quad_avx2(const coeff_t *src, coeff_t *dst, int i_dst); +#define idct_64x16_quad_avx2 FPFX(idct_64x16_quad_avx2) void idct_64x16_quad_avx2(const coeff_t *src, coeff_t *dst, int i_dst); /* --------------------------------------------------------------------------- * SAO */ +#define SAO_on_block_bo_sse128 FPFX(SAO_on_block_bo_sse128) void SAO_on_block_bo_sse128 (pel_t *p_dst, int i_dst, const pel_t *p_src, int i_src, int i_block_w, int i_block_h, int bit_depth, const sao_param_t *sao_param); +#define SAO_on_block_eo_0_sse128 FPFX(SAO_on_block_eo_0_sse128) void SAO_on_block_eo_0_sse128 (pel_t *p_dst, int i_dst, const pel_t *p_src, int i_src, int i_block_w, int i_block_h, int bit_depth, const int *lcu_avail, const int *sao_offset); +#define SAO_on_block_eo_45_sse128 FPFX(SAO_on_block_eo_45_sse128) void SAO_on_block_eo_45_sse128 (pel_t *p_dst, int i_dst, const pel_t *p_src, int i_src, int i_block_w, int i_block_h, int bit_depth, const int *lcu_avail, const int *sao_offset); +#define SAO_on_block_eo_90_sse128 FPFX(SAO_on_block_eo_90_sse128) void SAO_on_block_eo_90_sse128 (pel_t *p_dst, int i_dst, const pel_t *p_src, int i_src, int i_block_w, int i_block_h, int bit_depth, const int *lcu_avail, const int *sao_offset); +#define SAO_on_block_eo_135_sse128 FPFX(SAO_on_block_eo_135_sse128) void SAO_on_block_eo_135_sse128(pel_t *p_dst, int i_dst, const pel_t *p_src, int i_src, int i_block_w, int i_block_h, int bit_depth, const int *lcu_avail, const int *sao_offset); +#define SAO_on_block_bo_avx2 FPFX(SAO_on_block_bo_avx2) void SAO_on_block_bo_avx2 (pel_t *p_dst, int i_dst, const pel_t *p_src, int i_src, int i_block_w, int i_block_h, int bit_depth, const sao_param_t *sao_param); +#define SAO_on_block_eo_0_avx2 FPFX(SAO_on_block_eo_0_avx2) void SAO_on_block_eo_0_avx2 (pel_t *p_dst, int i_dst, const pel_t *p_src, int i_src, int i_block_w, int i_block_h, int bit_depth, const int *lcu_avail, const int *sao_offset); +#define SAO_on_block_eo_45_avx2 FPFX(SAO_on_block_eo_45_avx2) void SAO_on_block_eo_45_avx2 (pel_t *p_dst, int i_dst, const pel_t *p_src, int i_src, int i_block_w, int i_block_h, int bit_depth, const int *lcu_avail, const int *sao_offset); +#define SAO_on_block_eo_90_avx2 FPFX(SAO_on_block_eo_90_avx2) void SAO_on_block_eo_90_avx2 (pel_t *p_dst, int i_dst, const pel_t *p_src, int i_src, int i_block_w, int i_block_h, int bit_depth, const int *lcu_avail, const int *sao_offset); +#define SAO_on_block_eo_135_avx2 FPFX(SAO_on_block_eo_135_avx2) void SAO_on_block_eo_135_avx2(pel_t *p_dst, int i_dst, const pel_t *p_src, int i_src, int i_block_w, int i_block_h, int bit_depth, const int *lcu_avail, const int *sao_offset); /* --------------------------------------------------------------------------- * ALF */ +#define alf_filter_block_sse128 FPFX(alf_filter_block_sse128) void alf_filter_block_sse128(pel_t *p_dst, const pel_t *p_src, int stride, int lcu_pix_x, int lcu_pix_y, int lcu_width, int lcu_height, int *alf_coef, int b_top_avail, int b_down_avail); @@ -273,73 +414,133 @@ void alf_filter_block_sse128(pel_t *p_dst, const pel_t *p_src, int stride, /* --------------------------------------------------------------------------- * Intra Prediction */ +#define fill_edge_samples_0_sse128 FPFX(fill_edge_samples_0_sse128) void fill_edge_samples_0_sse128 (const pel_t *pTL, int i_TL, const pel_t *pLcuEP, pel_t *EP, uint32_t i_avai, int bsx, int bsy); +#define fill_edge_samples_x_sse128 FPFX(fill_edge_samples_x_sse128) void fill_edge_samples_x_sse128 (const pel_t *pTL, int i_TL, const pel_t *pLcuEP, pel_t *EP, uint32_t i_avai, int bsx, int bsy); +#define fill_edge_samples_y_sse128 FPFX(fill_edge_samples_y_sse128) void fill_edge_samples_y_sse128 (const pel_t *pTL, int i_TL, const pel_t *pLcuEP, pel_t *EP, uint32_t i_avai, int bsx, int bsy); +#define fill_edge_samples_xy_sse128 FPFX(fill_edge_samples_xy_sse128) void fill_edge_samples_xy_sse128(const pel_t *pTL, int i_TL, const pel_t *pLcuEP, pel_t *EP, uint32_t i_avai, int bsx, int bsy); +#define intra_pred_dc_sse128 FPFX(intra_pred_dc_sse128) void intra_pred_dc_sse128 (pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_plane_sse128 FPFX(intra_pred_plane_sse128) void intra_pred_plane_sse128 (pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_bilinear_sse128 FPFX(intra_pred_bilinear_sse128) void intra_pred_bilinear_sse128 (pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_hor_sse128 FPFX(intra_pred_hor_sse128) void intra_pred_hor_sse128 (pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ver_sse128 FPFX(intra_pred_ver_sse128) void intra_pred_ver_sse128 (pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_x_3_sse128 FPFX(intra_pred_ang_x_3_sse128) void intra_pred_ang_x_3_sse128 (pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_x_4_sse128 FPFX(intra_pred_ang_x_4_sse128) void intra_pred_ang_x_4_sse128 (pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_x_5_sse128 FPFX(intra_pred_ang_x_5_sse128) void intra_pred_ang_x_5_sse128 (pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_x_6_sse128 FPFX(intra_pred_ang_x_6_sse128) void intra_pred_ang_x_6_sse128 (pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_x_7_sse128 FPFX(intra_pred_ang_x_7_sse128) void intra_pred_ang_x_7_sse128 (pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_x_8_sse128 FPFX(intra_pred_ang_x_8_sse128) void intra_pred_ang_x_8_sse128 (pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_x_9_sse128 FPFX(intra_pred_ang_x_9_sse128) void intra_pred_ang_x_9_sse128 (pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_x_10_sse128 FPFX(intra_pred_ang_x_10_sse128) void intra_pred_ang_x_10_sse128 (pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_x_11_sse128 FPFX(intra_pred_ang_x_11_sse128) void intra_pred_ang_x_11_sse128 (pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_y_25_sse128 FPFX(intra_pred_ang_y_25_sse128) void intra_pred_ang_y_25_sse128 (pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_y_26_sse128 FPFX(intra_pred_ang_y_26_sse128) void intra_pred_ang_y_26_sse128 (pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_y_27_sse128 FPFX(intra_pred_ang_y_27_sse128) void intra_pred_ang_y_27_sse128(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_y_28_sse128 FPFX(intra_pred_ang_y_28_sse128) void intra_pred_ang_y_28_sse128(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_y_29_sse128 FPFX(intra_pred_ang_y_29_sse128) void intra_pred_ang_y_29_sse128(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_y_30_sse128 FPFX(intra_pred_ang_y_30_sse128) void intra_pred_ang_y_30_sse128 (pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_y_31_sse128 FPFX(intra_pred_ang_y_31_sse128) void intra_pred_ang_y_31_sse128 (pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_y_32_sse128 FPFX(intra_pred_ang_y_32_sse128) void intra_pred_ang_y_32_sse128 (pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_xy_13_sse128 FPFX(intra_pred_ang_xy_13_sse128) void intra_pred_ang_xy_13_sse128(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_xy_14_sse128 FPFX(intra_pred_ang_xy_14_sse128) void intra_pred_ang_xy_14_sse128(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_xy_16_sse128 FPFX(intra_pred_ang_xy_16_sse128) void intra_pred_ang_xy_16_sse128(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_xy_18_sse128 FPFX(intra_pred_ang_xy_18_sse128) void intra_pred_ang_xy_18_sse128(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_xy_20_sse128 FPFX(intra_pred_ang_xy_20_sse128) void intra_pred_ang_xy_20_sse128(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_xy_22_sse128 FPFX(intra_pred_ang_xy_22_sse128) void intra_pred_ang_xy_22_sse128(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_xy_23_sse128 FPFX(intra_pred_ang_xy_23_sse128) void intra_pred_ang_xy_23_sse128(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); //intra prediction avx functions +#define intra_pred_ver_avx FPFX(intra_pred_ver_avx) void intra_pred_ver_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_hor_avx FPFX(intra_pred_hor_avx) void intra_pred_hor_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_dc_avx FPFX(intra_pred_dc_avx) void intra_pred_dc_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_plane_avx FPFX(intra_pred_plane_avx) void intra_pred_plane_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_bilinear_avx FPFX(intra_pred_bilinear_avx) void intra_pred_bilinear_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_x_3_avx FPFX(intra_pred_ang_x_3_avx) void intra_pred_ang_x_3_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_x_4_avx FPFX(intra_pred_ang_x_4_avx) void intra_pred_ang_x_4_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_x_5_avx FPFX(intra_pred_ang_x_5_avx) void intra_pred_ang_x_5_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_x_6_avx FPFX(intra_pred_ang_x_6_avx) void intra_pred_ang_x_6_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_x_7_avx FPFX(intra_pred_ang_x_7_avx) void intra_pred_ang_x_7_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_x_8_avx FPFX(intra_pred_ang_x_8_avx) void intra_pred_ang_x_8_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_x_9_avx FPFX(intra_pred_ang_x_9_avx) void intra_pred_ang_x_9_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_x_10_avx FPFX(intra_pred_ang_x_10_avx) void intra_pred_ang_x_10_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_x_11_avx FPFX(intra_pred_ang_x_11_avx) void intra_pred_ang_x_11_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_xy_13_avx FPFX(intra_pred_ang_xy_13_avx) void intra_pred_ang_xy_13_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_xy_14_avx FPFX(intra_pred_ang_xy_14_avx) void intra_pred_ang_xy_14_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_xy_16_avx FPFX(intra_pred_ang_xy_16_avx) void intra_pred_ang_xy_16_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_xy_18_avx FPFX(intra_pred_ang_xy_18_avx) void intra_pred_ang_xy_18_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_xy_20_avx FPFX(intra_pred_ang_xy_20_avx) void intra_pred_ang_xy_20_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_xy_22_avx FPFX(intra_pred_ang_xy_22_avx) void intra_pred_ang_xy_22_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_xy_23_avx FPFX(intra_pred_ang_xy_23_avx) void intra_pred_ang_xy_23_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_y_25_avx FPFX(intra_pred_ang_y_25_avx) void intra_pred_ang_y_25_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_y_26_avx FPFX(intra_pred_ang_y_26_avx) void intra_pred_ang_y_26_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_y_28_avx FPFX(intra_pred_ang_y_28_avx) void intra_pred_ang_y_28_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_y_30_avx FPFX(intra_pred_ang_y_30_avx) void intra_pred_ang_y_30_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_y_31_avx FPFX(intra_pred_ang_y_31_avx) void intra_pred_ang_y_31_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); +#define intra_pred_ang_y_32_avx FPFX(intra_pred_ang_y_32_avx) void intra_pred_ang_y_32_avx(pel_t *src, pel_t *dst, int i_dst, int dir_mode, int bsx, int bsy); /* Function declaration defines */ -- Gitee From ea272c87bccc53f9f0c3b0e93e6552283a7219bf Mon Sep 17 00:00:00 2001 From: hwren Date: Mon, 17 Sep 2018 17:27:27 +0800 Subject: [PATCH 22/22] Rename with macro prefix(21): common/win32thread.h --- source/common/win32thread.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/source/common/win32thread.h b/source/common/win32thread.h index 6c8675e..1f6376c 100644 --- a/source/common/win32thread.h +++ b/source/common/win32thread.h @@ -68,27 +68,41 @@ typedef struct { } davs2_thread_cond_t; #define davs2_thread_condattr_t int +#define davs2_thread_create FPFX(thread_create) int davs2_thread_create(davs2_thread_t *thread, const davs2_thread_attr_t *attr, void *(*start_routine)(void *), void *arg); +#define davs2_thread_join FPFX(thread_join) int davs2_thread_join(davs2_thread_t thread, void **value_ptr); +#define davs2_thread_mutex_init FPFX(thread_mutex_init) int davs2_thread_mutex_init(davs2_thread_mutex_t *mutex, const davs2_thread_mutexattr_t *attr); +#define davs2_thread_mutex_destroy FPFX(thread_mutex_destroy) int davs2_thread_mutex_destroy(davs2_thread_mutex_t *mutex); +#define davs2_thread_mutex_lock FPFX(thread_mutex_lock) int davs2_thread_mutex_lock(davs2_thread_mutex_t *mutex); +#define davs2_thread_mutex_unlock FPFX(thread_mutex_unlock) int davs2_thread_mutex_unlock(davs2_thread_mutex_t *mutex); +#define davs2_thread_cond_init FPFX(thread_cond_init) int davs2_thread_cond_init(davs2_thread_cond_t *cond, const davs2_thread_condattr_t *attr); +#define davs2_thread_cond_destroy FPFX(thread_cond_destroy) int davs2_thread_cond_destroy(davs2_thread_cond_t *cond); +#define davs2_thread_cond_broadcast FPFX(thread_cond_broadcast) int davs2_thread_cond_broadcast(davs2_thread_cond_t *cond); +#define davs2_thread_cond_wait FPFX(thread_cond_wait) int davs2_thread_cond_wait(davs2_thread_cond_t *cond, davs2_thread_mutex_t *mutex); +#define davs2_thread_cond_signal FPFX(thread_cond_signal) int davs2_thread_cond_signal(davs2_thread_cond_t *cond); #define davs2_thread_attr_init(a) 0 #define davs2_thread_attr_destroy(a) 0 +#define davs2_win32_threading_init FPFX(win32_threading_init) int davs2_win32_threading_init(void); +#define davs2_win32_threading_destroy FPFX(win32_threading_destroy) void davs2_win32_threading_destroy(void); +#define davs2_thread_num_processors_np FPFX(thread_num_processors_np) int davs2_thread_num_processors_np(void); #ifdef __cplusplus -- Gitee