1 Star 0 Fork 128

cenhuilin/gcc

forked from src-openEuler/gcc 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0036-rtl-ifcvt-introduce-rtl-ifcvt-enchancements.patch 17.75 KB
一键复制 编辑 原始数据 按行查看 历史
郑晨卉 提交于 2024-04-11 10:45 . [Sync] Sync patch from openeuler/gcc
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
From 4cae948c1c00ad7a59f0f234f809fbd9a0208eb4 Mon Sep 17 00:00:00 2001
From: vchernon <[email protected]>
Date: Wed, 28 Feb 2024 23:05:12 +0800
Subject: [PATCH 02/18] [rtl-ifcvt] introduce rtl ifcvt enchancements new
option: -fifcvt-allow-complicated-cmps: allows ifcvt to deal
with complicated cmps like
cmp reg1 (reg2 + reg3)
can increase compilation time
new param:
-param=ifcvt-allow-register-renaming=[0,1,2]
1 : allows ifcvt to rename registers in then and else bb
2 : allows to rename registers in condition and else/then bb
can increase compilation time and register pressure
---
gcc/common.opt | 4 +
gcc/ifcvt.cc | 291 +++++++++++++++---
gcc/params.opt | 4 +
.../gcc.c-torture/execute/ifcvt-renaming-1.c | 35 +++
gcc/testsuite/gcc.dg/ifcvt-6.c | 27 ++
5 files changed, 311 insertions(+), 50 deletions(-)
create mode 100644 gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c
create mode 100644 gcc/testsuite/gcc.dg/ifcvt-6.c
diff --git a/gcc/common.opt b/gcc/common.opt
index c7c6bc256..aa00fb7b0 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -3691,4 +3691,8 @@ fipa-ra
Common Var(flag_ipa_ra) Optimization
Use caller save register across calls if possible.
+fifcvt-allow-complicated-cmps
+Common Var(flag_ifcvt_allow_complicated_cmps) Optimization
+Allow RTL if-conversion pass to deal with complicated cmps (can increase compilation time).
+
; This comment is to ensure we retain the blank line above.
diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
index 2c1eba312..584db7b55 100644
--- a/gcc/ifcvt.cc
+++ b/gcc/ifcvt.cc
@@ -886,7 +886,9 @@ noce_emit_store_flag (struct noce_if_info *if_info, rtx x, int reversep,
}
/* Don't even try if the comparison operands or the mode of X are weird. */
- if (cond_complex || !SCALAR_INT_MODE_P (GET_MODE (x)))
+ if (!flag_ifcvt_allow_complicated_cmps
+ && (cond_complex
+ || !SCALAR_INT_MODE_P (GET_MODE (x))))
return NULL_RTX;
return emit_store_flag (x, code, XEXP (cond, 0),
@@ -1965,7 +1967,8 @@ insn_valid_noce_process_p (rtx_insn *insn, rtx cc)
/* Currently support only simple single sets in test_bb. */
if (!sset
|| !noce_operand_ok (SET_DEST (sset))
- || contains_ccmode_rtx_p (SET_DEST (sset))
+ || (!flag_ifcvt_allow_complicated_cmps
+ && contains_ccmode_rtx_p (SET_DEST (sset)))
|| !noce_operand_ok (SET_SRC (sset)))
return false;
@@ -1979,13 +1982,17 @@ insn_valid_noce_process_p (rtx_insn *insn, rtx cc)
in this function. */
static bool
-bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
+bbs_ok_for_cmove_arith (basic_block bb_a,
+ basic_block bb_b,
+ rtx to_rename,
+ bitmap conflict_regs)
{
rtx_insn *a_insn;
bitmap bba_sets = BITMAP_ALLOC (&reg_obstack);
-
+ bitmap intersections = BITMAP_ALLOC (&reg_obstack);
df_ref def;
df_ref use;
+ rtx_insn *last_a = last_active_insn (bb_a, FALSE);
FOR_BB_INSNS (bb_a, a_insn)
{
@@ -1995,18 +2002,15 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
rtx sset_a = single_set (a_insn);
if (!sset_a)
- {
- BITMAP_FREE (bba_sets);
- return false;
- }
+ goto end_cmove_arith_check_and_fail;
/* Record all registers that BB_A sets. */
FOR_EACH_INSN_DEF (def, a_insn)
- if (!(to_rename && DF_REF_REG (def) == to_rename))
+ if (!(to_rename && DF_REF_REG (def) == to_rename && a_insn == last_a))
bitmap_set_bit (bba_sets, DF_REF_REGNO (def));
}
+ bitmap_and (intersections, df_get_live_in (bb_b), bba_sets);
rtx_insn *b_insn;
-
FOR_BB_INSNS (bb_b, b_insn)
{
if (!active_insn_p (b_insn))
@@ -2015,10 +2019,7 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
rtx sset_b = single_set (b_insn);
if (!sset_b)
- {
- BITMAP_FREE (bba_sets);
- return false;
- }
+ goto end_cmove_arith_check_and_fail;
/* Make sure this is a REG and not some instance
of ZERO_EXTRACT or SUBREG or other dangerous stuff.
@@ -2030,25 +2031,34 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
if (MEM_P (SET_DEST (sset_b)))
gcc_assert (rtx_equal_p (SET_DEST (sset_b), to_rename));
else if (!REG_P (SET_DEST (sset_b)))
- {
- BITMAP_FREE (bba_sets);
- return false;
- }
+ goto end_cmove_arith_check_and_fail;
- /* If the insn uses a reg set in BB_A return false. */
+ /* If the insn uses a reg set in BB_A return false
+ or try to collect register list for renaming. */
FOR_EACH_INSN_USE (use, b_insn)
{
- if (bitmap_bit_p (bba_sets, DF_REF_REGNO (use)))
+ if (bitmap_bit_p (intersections, DF_REF_REGNO (use)))
{
- BITMAP_FREE (bba_sets);
- return false;
+ if (param_ifcvt_allow_register_renaming < 1)
+ goto end_cmove_arith_check_and_fail;
+
+ /* Those regs should be renamed. We can't rename CC reg, but
+ possibly we can provide combined comparison in the future. */
+ if (GET_MODE_CLASS (GET_MODE (DF_REF_REG (use))) == MODE_CC)
+ goto end_cmove_arith_check_and_fail;
+ bitmap_set_bit (conflict_regs, DF_REF_REGNO (use));
}
}
-
}
BITMAP_FREE (bba_sets);
+ BITMAP_FREE (intersections);
return true;
+
+end_cmove_arith_check_and_fail:
+ BITMAP_FREE (bba_sets);
+ BITMAP_FREE (intersections);
+ return false;
}
/* Emit copies of all the active instructions in BB except the last.
@@ -2103,6 +2113,142 @@ noce_emit_bb (rtx last_insn, basic_block bb, bool simple)
return true;
}
+/* This function tries to rename regs that intersect with considered bb
+ inside condition expression. Condition expression will be moved down
+ if the optimization will be applied, so it is essential to be sure that
+ all intersected registers will be renamed otherwise transformation
+ can't be applied. Function returns true if renaming was successful
+ and optimization can proceed futher. */
+
+static bool
+noce_rename_regs_in_cond (struct noce_if_info *if_info, bitmap cond_rename_regs)
+{
+ bool success = true;
+ if (bitmap_empty_p (cond_rename_regs))
+ return true;
+ if (param_ifcvt_allow_register_renaming < 2)
+ return false;
+ df_ref use;
+ rtx_insn *cmp_insn = if_info->cond_earliest;
+ /* Jump instruction as a condion currently unsupported. */
+ if (JUMP_P (cmp_insn))
+ return false;
+ rtx_insn *before_cmp = PREV_INSN (cmp_insn);
+ start_sequence ();
+ rtx_insn *copy_of_cmp = as_a <rtx_insn *> (copy_rtx (cmp_insn));
+ basic_block cmp_block = BLOCK_FOR_INSN (cmp_insn);
+ FOR_EACH_INSN_USE (use, cmp_insn)
+ {
+ if (bitmap_bit_p (cond_rename_regs, DF_REF_REGNO (use)))
+ {
+ rtx use_reg = DF_REF_REG (use);
+ rtx tmp = gen_reg_rtx (GET_MODE (use_reg));
+ if (!validate_replace_rtx (use_reg, tmp, copy_of_cmp))
+ {
+ end_sequence ();
+ return false;
+ }
+ noce_emit_move_insn (tmp, use_reg);
+ }
+ }
+
+ emit_insn (PATTERN (copy_of_cmp));
+ rtx_insn *seq = get_insns ();
+ unshare_all_rtl_in_chain (seq);
+ end_sequence ();
+
+ emit_insn_after_setloc (seq, before_cmp, INSN_LOCATION (cmp_insn));
+ delete_insn_and_edges (cmp_insn);
+ rtx_insn *insn;
+ FOR_BB_INSNS (cmp_block, insn)
+ df_insn_rescan (insn);
+
+ if_info->cond = noce_get_condition (if_info->jump,
+ &copy_of_cmp,
+ if_info->then_else_reversed);
+ if_info->cond_earliest = copy_of_cmp;
+ if_info->rev_cond = NULL_RTX;
+
+ return success;
+}
+
+/* This function tries to rename regs that intersect with considered bb.
+ return true if the renaming was successful and optimization can
+ proceed futher, false otherwise. */
+static bool
+noce_rename_regs_in_bb (basic_block test_bb, bitmap rename_regs)
+{
+ if (bitmap_empty_p (rename_regs))
+ return true;
+ rtx_insn *insn;
+ rtx_insn *last_insn = last_active_insn (test_bb, FALSE);
+ bool res = true;
+ start_sequence ();
+ FOR_BB_INSNS (test_bb, insn)
+ {
+ if (!active_insn_p (insn))
+ continue;
+ /* Only ssets are supported for now. */
+ rtx sset = single_set (insn);
+ gcc_assert (sset);
+ rtx x = SET_DEST (sset);
+ if (!REG_P (x) || !bitmap_bit_p (rename_regs, REGNO (x)))
+ continue;
+ /* Do not need to rename dest in the last instruction
+ it will be renamed anyway. */
+ if (insn == last_insn)
+ continue;
+ machine_mode mode = GET_MODE (x);
+ rtx tmp = gen_reg_rtx (mode);
+ if (!validate_replace_rtx_part (x, tmp, &SET_DEST (sset), insn))
+ {
+ gcc_assert (insn != last_insn);
+ /* We can generate additional move for such case,
+ but it will increase register preasure.
+ For now just stop transformation. */
+ rtx result_rtx = SET_DEST (single_set (last_insn));
+ if (REG_P (result_rtx) && (x != result_rtx))
+ {
+ res = false;
+ break;
+ }
+ if (!validate_replace_rtx (x, tmp, insn))
+ gcc_unreachable ();
+ noce_emit_move_insn (tmp,x);
+ }
+ set_used_flags (insn);
+ rtx_insn *rename_candidate;
+ for (rename_candidate = NEXT_INSN (insn);
+ rename_candidate && rename_candidate!= NEXT_INSN (BB_END (test_bb));
+ rename_candidate = NEXT_INSN (rename_candidate))
+ {
+ if (!reg_overlap_mentioned_p (x, rename_candidate))
+ continue;
+
+ int replace_res = TRUE;
+ if (rename_candidate == last_insn)
+ {
+ validate_replace_src_group (x, tmp, rename_candidate);
+ replace_res = apply_change_group ();
+ }
+ else
+ replace_res = validate_replace_rtx (x, tmp, rename_candidate);
+ gcc_assert (replace_res);
+ set_used_flags (rename_candidate);
+ }
+ set_used_flags (x);
+ set_used_flags (tmp);
+ }
+ rtx_insn *seq = get_insns ();
+ unshare_all_rtl_in_chain (seq);
+ end_sequence ();
+ emit_insn_before_setloc (seq, first_active_insn (test_bb),
+ INSN_LOCATION (first_active_insn (test_bb)));
+ FOR_BB_INSNS (test_bb, insn)
+ df_insn_rescan (insn);
+ return res;
+}
+
/* Try more complex cases involving conditional_move. */
static int
@@ -2185,11 +2331,30 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
std::swap (then_bb, else_bb);
}
}
-
+ bitmap else_bb_rename_regs = BITMAP_ALLOC (&reg_obstack);
+ bitmap then_bb_rename_regs = BITMAP_ALLOC (&reg_obstack);
if (then_bb && else_bb
- && (!bbs_ok_for_cmove_arith (then_bb, else_bb, if_info->orig_x)
- || !bbs_ok_for_cmove_arith (else_bb, then_bb, if_info->orig_x)))
- return FALSE;
+ && (!bbs_ok_for_cmove_arith (then_bb, else_bb,
+ if_info->orig_x,
+ then_bb_rename_regs)
+ || !bbs_ok_for_cmove_arith (else_bb, then_bb,
+ if_info->orig_x,
+ else_bb_rename_regs)))
+ {
+ BITMAP_FREE (then_bb_rename_regs);
+ BITMAP_FREE (else_bb_rename_regs);
+ return FALSE;
+ }
+ bool prepass_renaming = noce_rename_regs_in_bb (then_bb,
+ then_bb_rename_regs)
+ && noce_rename_regs_in_bb (else_bb,
+ else_bb_rename_regs);
+
+ BITMAP_FREE (then_bb_rename_regs);
+ BITMAP_FREE (else_bb_rename_regs);
+
+ if (!prepass_renaming)
+ return FALSE;
start_sequence ();
@@ -3072,7 +3237,8 @@ noce_operand_ok (const_rtx op)
static bool
bb_valid_for_noce_process_p (basic_block test_bb, rtx cond,
- unsigned int *cost, bool *simple_p)
+ unsigned int *cost, bool *simple_p,
+ bitmap cond_rename_regs)
{
if (!test_bb)
return false;
@@ -3112,8 +3278,9 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond,
rtx_insn *prev_last_insn = PREV_INSN (last_insn);
gcc_assert (prev_last_insn);
- /* For now, disallow setting x multiple times in test_bb. */
- if (REG_P (x) && reg_set_between_p (x, first_insn, prev_last_insn))
+ if (REG_P (x)
+ && reg_set_between_p (x, first_insn, prev_last_insn)
+ && param_ifcvt_allow_register_renaming < 1)
return false;
bitmap test_bb_temps = BITMAP_ALLOC (&reg_obstack);
@@ -3125,25 +3292,35 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond,
rtx_insn *insn;
FOR_BB_INSNS (test_bb, insn)
{
- if (insn != last_insn)
- {
- if (!active_insn_p (insn))
- continue;
+ if (insn == last_insn)
+ continue;
+ if (!active_insn_p (insn))
+ continue;
- if (!insn_valid_noce_process_p (insn, cc))
- goto free_bitmap_and_fail;
+ if (!insn_valid_noce_process_p (insn, cc))
+ goto free_bitmap_and_fail;
- rtx sset = single_set (insn);
- gcc_assert (sset);
+ rtx sset = single_set (insn);
+ gcc_assert (sset);
- if (contains_mem_rtx_p (SET_SRC (sset))
- || !REG_P (SET_DEST (sset))
- || reg_overlap_mentioned_p (SET_DEST (sset), cond))
- goto free_bitmap_and_fail;
+ if (contains_mem_rtx_p (SET_SRC (sset))
+ || !REG_P (SET_DEST (sset)))
+ goto free_bitmap_and_fail;
- potential_cost += pattern_cost (sset, speed_p);
- bitmap_set_bit (test_bb_temps, REGNO (SET_DEST (sset)));
+ if (reg_overlap_mentioned_p (SET_DEST (sset), cond))
+ {
+ if (param_ifcvt_allow_register_renaming < 1)
+ goto free_bitmap_and_fail;
+ rtx sset_dest = SET_DEST (sset);
+ if (REG_P (sset_dest)
+ && (GET_MODE_CLASS (GET_MODE (sset_dest)) != MODE_CC))
+ bitmap_set_bit (cond_rename_regs, REGNO (sset_dest));
+ else
+ goto free_bitmap_and_fail;
}
+ potential_cost += pattern_cost (sset, speed_p);
+ if (SET_DEST (sset) != SET_DEST (last_set))
+ bitmap_set_bit (test_bb_temps, REGNO (SET_DEST (sset)));
}
/* If any of the intermediate results in test_bb are live after test_bb
@@ -3777,15 +3954,29 @@ noce_process_if_block (struct noce_if_info *if_info)
bool speed_p = optimize_bb_for_speed_p (test_bb);
unsigned int then_cost = 0, else_cost = 0;
+ bitmap cond_rename_regs = BITMAP_ALLOC (&reg_obstack);
if (!bb_valid_for_noce_process_p (then_bb, cond, &then_cost,
- &if_info->then_simple))
- return false;
+ &if_info->then_simple, cond_rename_regs))
+ {
+ BITMAP_FREE (cond_rename_regs);
+ return false;
+ }
if (else_bb
&& !bb_valid_for_noce_process_p (else_bb, cond, &else_cost,
- &if_info->else_simple))
- return false;
+ &if_info->else_simple, cond_rename_regs))
+ {
+ BITMAP_FREE (cond_rename_regs);
+ return false;
+ }
+ if (!noce_rename_regs_in_cond (if_info, cond_rename_regs))
+ {
+ BITMAP_FREE (cond_rename_regs);
+ return false;
+ }
+ BITMAP_FREE (cond_rename_regs);
+ cond = if_info->cond;
if (speed_p)
if_info->original_cost += average_cost (then_cost, else_cost,
find_edge (test_bb, then_bb));
@@ -5823,12 +6014,13 @@ if_convert (bool after_combine)
{
basic_block bb;
int pass;
-
if (optimize == 1)
{
df_live_add_problem ();
df_live_set_all_dirty ();
}
+ free_dominance_info (CDI_DOMINATORS);
+ cleanup_cfg (CLEANUP_EXPENSIVE);
/* Record whether we are after combine pass. */
ifcvt_after_combine = after_combine;
@@ -5933,7 +6125,6 @@ rest_of_handle_if_conversion (void)
dump_reg_info (dump_file);
dump_flow_info (dump_file, dump_flags);
}
- cleanup_cfg (CLEANUP_EXPENSIVE);
if_convert (false);
if (num_updated_if_blocks)
/* Get rid of any dead CC-related instructions. */
diff --git a/gcc/params.opt b/gcc/params.opt
index d2196dc68..ba87f820b 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -669,6 +669,10 @@ Maximum permissible cost for the sequence that would be generated by the RTL if-
Common Joined UInteger Var(param_max_rtl_if_conversion_unpredictable_cost) Init(40) IntegerRange(0, 200) Param Optimization
Maximum permissible cost for the sequence that would be generated by the RTL if-conversion pass for a branch that is considered unpredictable.
+-param=ifcvt-allow-register-renaming=
+Common Joined UInteger Var(param_ifcvt_allow_register_renaming) IntegerRange(0, 2) Param Optimization
+Allow RTL if-conversion pass to aggressively rename registers in basic blocks. Sometimes additional moves will be created.
+
-param=max-sched-extend-regions-iters=
Common Joined UInteger Var(param_max_sched_extend_regions_iters) Param Optimization
The maximum number of iterations through CFG to extend regions.
diff --git a/gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c b/gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c
new file mode 100644
index 000000000..65c4d4140
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c
@@ -0,0 +1,35 @@
+
+extern void abort(void);
+
+__attribute__ ((noinline))
+int foo (int x, int y, int z, int a, int b)
+{
+ if (a < 2) {
+ if (a == 0) {
+ if (x - y < 0)
+ x = x - y + z;
+ else
+ x = x - y;
+ }
+ else {
+ if (x + y >= z)
+ x = x + y - z;
+ else
+ x = x + y;
+ }
+ }
+ return x;
+}
+
+int main(void) {
+ if (foo (5,10,7,0,1) != 2) // x - y + z = -5 + 7 = 2
+ abort ();
+ if (foo (50,10,7,0,1) != 40) // x - y = 40
+ abort ();
+ if (foo (5,10,7,1,1) != 8) // x + y - z = 5 + 10 - 7 = 8
+ abort ();
+ if (foo (5,10,70,1,1) != 15) // x + y = 15
+ abort ();
+ return 0;
+}
+
diff --git a/gcc/testsuite/gcc.dg/ifcvt-6.c b/gcc/testsuite/gcc.dg/ifcvt-6.c
new file mode 100644
index 000000000..be9a67b3f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ifcvt-6.c
@@ -0,0 +1,27 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-options "-fdump-rtl-ce1 -O2 --param max-rtl-if-conversion-unpredictable-cost=100 --param max-rtl-if-conversion-predictable-cost=100 --param=ifcvt-allow-register-renaming=2 -fifcvt-allow-complicated-cmps" } */
+
+typedef unsigned int uint16_t;
+
+uint16_t
+foo (uint16_t x, uint16_t y, uint16_t z, uint16_t a,
+ uint16_t b, uint16_t c, uint16_t d) {
+ int i = 1;
+ int j = 1;
+ if (a > b) {
+ j = x;
+ if (b > c)
+ i = y;
+ else
+ i = z;
+ }
+ else {
+ j = y;
+ if (c > d)
+ i = z;
+ }
+ return i * j;
+}
+
+/* { dg-final { scan-rtl-dump "7 true changes made" "ce1" } } */
+
--
2.33.0
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/cenhuilin/gcc.git
[email protected]:cenhuilin/gcc.git
cenhuilin
gcc
gcc
master

搜索帮助