1 Star 0 Fork 95

huangxiaoquan / src-openEuler-gcc

forked from src-openEuler / gcc 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0038-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch 6.38 KB
一键复制 编辑 原始数据 按行查看 历史
郑晨卉 提交于 2024-04-11 10:45 . [Sync] Sync patch from openeuler/gcc
From 6684509e81e4341675c73a7dc853180229a8abcb Mon Sep 17 00:00:00 2001
From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
Date: Tue, 24 Jan 2023 16:43:40 +0300
Subject: [PATCH 04/18] Add option to allow matching uaddsub overflow for widen
ops too.
---
gcc/common.opt | 5 ++
gcc/testsuite/gcc.dg/uaddsub.c | 143 +++++++++++++++++++++++++++++++++
gcc/tree-ssa-math-opts.cc | 43 ++++++++--
3 files changed, 184 insertions(+), 7 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/uaddsub.c
diff --git a/gcc/common.opt b/gcc/common.opt
index dac477c04..39c90604e 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -3106,6 +3106,11 @@ freciprocal-math
Common Var(flag_reciprocal_math) SetByCombined Optimization
Same as -fassociative-math for expressions which include division.
+fuaddsub-overflow-match-all
+Common Var(flag_uaddsub_overflow_match_all)
+Match unsigned add/sub overflow even if the target does not support
+the corresponding instruction.
+
; Nonzero means that unsafe floating-point math optimizations are allowed
; for the sake of speed. IEEE compliance is not guaranteed, and operations
; are allowed to assume that their arguments and results are "normal"
diff --git a/gcc/testsuite/gcc.dg/uaddsub.c b/gcc/testsuite/gcc.dg/uaddsub.c
new file mode 100644
index 000000000..96c26d308
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/uaddsub.c
@@ -0,0 +1,143 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fuaddsub-overflow-match-all -fdump-tree-optimized" } */
+#include <stdint.h>
+
+typedef unsigned __int128 uint128_t;
+typedef struct uint256_t
+{
+ uint128_t lo;
+ uint128_t hi;
+} uint256_t;
+
+uint16_t add16 (uint8_t a, uint8_t b)
+{
+ uint8_t tmp = a + b;
+ uint8_t overflow = 0;
+ if (tmp < a)
+ overflow = 1;
+
+ uint16_t res = overflow;
+ res <<= 8;
+ res += tmp;
+ return res;
+}
+
+uint32_t add32 (uint16_t a, uint16_t b)
+{
+ uint16_t tmp = a + b;
+ uint16_t overflow = 0;
+ if (tmp < a)
+ overflow = 1;
+
+ uint32_t res = overflow;
+ res <<= 16;
+ res += tmp;
+ return res;
+}
+
+uint64_t add64 (uint32_t a, uint32_t b)
+{
+ uint32_t tmp = a + b;
+ uint32_t overflow = 0;
+ if (tmp < a)
+ overflow = 1;
+
+ uint64_t res = overflow;
+ res <<= 32;
+ res += tmp;
+ return res;
+}
+
+uint128_t add128 (uint64_t a, uint64_t b)
+{
+ uint64_t tmp = a + b;
+ uint64_t overflow = 0;
+ if (tmp < a)
+ overflow = 1;
+
+ uint128_t res = overflow;
+ res <<= 64;
+ res += tmp;
+ return res;
+}
+
+uint256_t add256 (uint128_t a, uint128_t b)
+{
+ uint128_t tmp = a + b;
+ uint128_t overflow = 0;
+ if (tmp < a)
+ overflow = 1;
+
+ uint256_t res;
+ res.hi = overflow;
+ res.lo = tmp;
+ return res;
+}
+
+uint16_t sub16 (uint8_t a, uint8_t b)
+{
+ uint8_t tmp = a - b;
+ uint8_t overflow = 0;
+ if (tmp > a)
+ overflow = -1;
+
+ uint16_t res = overflow;
+ res <<= 8;
+ res += tmp;
+ return res;
+}
+
+uint32_t sub32 (uint16_t a, uint16_t b)
+{
+ uint16_t tmp = a - b;
+ uint16_t overflow = 0;
+ if (tmp > a)
+ overflow = -1;
+
+ uint32_t res = overflow;
+ res <<= 16;
+ res += tmp;
+ return res;
+}
+
+uint64_t sub64 (uint32_t a, uint32_t b)
+{
+ uint32_t tmp = a - b;
+ uint32_t overflow = 0;
+ if (tmp > a)
+ overflow = -1;
+
+ uint64_t res = overflow;
+ res <<= 32;
+ res += tmp;
+ return res;
+}
+
+uint128_t sub128 (uint64_t a, uint64_t b)
+{
+ uint64_t tmp = a - b;
+ uint64_t overflow = 0;
+ if (tmp > a)
+ overflow = -1;
+
+ uint128_t res = overflow;
+ res <<= 64;
+ res += tmp;
+ return res;
+}
+
+uint256_t sub256 (uint128_t a, uint128_t b)
+{
+ uint128_t tmp = a - b;
+ uint128_t overflow = 0;
+ if (tmp > a)
+ overflow = -1;
+
+ uint256_t res;
+ res.hi = overflow;
+ res.lo = tmp;
+ return res;
+}
+
+/* { dg-final { scan-tree-dump-times "= .ADD_OVERFLOW \\(a_\[0-9\]+\\(D\\), b_\[0-9\]+\\(D\\)\\)" 5 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "= .SUB_OVERFLOW \\(a_\[0-9\]+\\(D\\), b_\[0-9\]+\\(D\\)\\)" 5 "optimized" } } */
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 232e903b0..55d6ee8ae 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -3468,6 +3468,27 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
}
}
+/* Check if the corresponding operation has wider equivalent on the target. */
+
+static bool
+wider_optab_check_p (optab op, machine_mode mode, int unsignedp)
+{
+ machine_mode wider_mode;
+ FOR_EACH_WIDER_MODE (wider_mode, mode)
+ {
+ machine_mode next_mode;
+ if (optab_handler (op, wider_mode) != CODE_FOR_nothing
+ || (op == smul_optab
+ && GET_MODE_WIDER_MODE (wider_mode).exists (&next_mode)
+ && (find_widening_optab_handler ((unsignedp
+ ? umul_widen_optab
+ : smul_widen_optab),
+ next_mode, mode))))
+ return true;
+ }
+
+ return false;
+}
/* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have
a check for non-zero like:
@@ -3903,15 +3924,22 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
|| code == MINUS_EXPR
|| code == MULT_EXPR
|| code == BIT_NOT_EXPR);
+ int unsignedp = TYPE_UNSIGNED (type);
if (!INTEGRAL_TYPE_P (type)
- || !TYPE_UNSIGNED (type)
- || has_zero_uses (lhs)
- || (code != PLUS_EXPR
- && code != MULT_EXPR
- && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
- TYPE_MODE (type)) == CODE_FOR_nothing))
+ || !unsignedp
+ || has_zero_uses (lhs))
return false;
+ if (code == PLUS_EXPR || code == MINUS_EXPR)
+ {
+ machine_mode mode = TYPE_MODE (type);
+ optab op = code == PLUS_EXPR ? uaddv4_optab : usubv4_optab;
+ if (optab_handler (op, mode) == CODE_FOR_nothing
+ && (!flag_uaddsub_overflow_match_all
+ || !wider_optab_check_p (op, mode, unsignedp)))
+ return false;
+ }
+
tree rhs1 = gimple_assign_rhs1 (stmt);
tree rhs2 = gimple_assign_rhs2 (stmt);
FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
@@ -3986,7 +4014,8 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
|| (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
|| (code == PLUS_EXPR
&& optab_handler (uaddv4_optab,
- TYPE_MODE (type)) == CODE_FOR_nothing)
+ TYPE_MODE (type)) == CODE_FOR_nothing
+ && !flag_uaddsub_overflow_match_all)
|| (code == MULT_EXPR
&& optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
TYPE_MODE (type)) == CODE_FOR_nothing))
--
2.33.0
1
https://gitee.com/huang-xiaoquan/src-openEuler-gcc.git
git@gitee.com:huang-xiaoquan/src-openEuler-gcc.git
huang-xiaoquan
src-openEuler-gcc
src-openEuler-gcc
master

搜索帮助