maple iv相关生成代码:
754 mov w0, #0
.L.36__2:
...
821 add x12, x29, #16 <====== "tmp base地址 冗余"
822 sxtw x13, w0
823 lsl x13, x13, #2 <===== "tmp[i] 地址 单循环内不变"
824 str w11, [x12,x13,LSL #2] <===== "tmp[i][0]"
825 sub w9, w9, w10
826 add x10, x29, #16 <====== "tmp base地址 冗余"
827 sxtw x11, w0
828 lsl x11, x11, #2 <===== "tmp[i] 地址 单循环内不变 冗余"
829 add x11, x11, #2
830 str w9, [x10,x11,LSL #2] <===== "tmp[i][2]"
...
847 add w0, w0, #1 <==== "w0是i"
848 add x3, x3, x1
849 add x4, x4, x2
850 cmp w0, #4
851 blt .L.36__2
gcc iv相关代码:
sxtw x1, w1
sxtw x3, w3
mov x6, sp
add x11, sp, #0x40 <===== "x11为tmp[4][4] 地址"
mov x7, x6 <======== "x7为tmp起始地址"
18:
...
add x0, x0, x1
add x2, x2, x3
str w8, [x7] <===== tmp[i][0]
str w10, [x7, #8] <===== tmp[i][2]
add w8, w4, w5
sub w4, w4, w5
str w8, [x7, #4] <===== tmp[i][1]
str w4, [x7, #12] <===== tmp[i][3]
add x7, x7, #0x10 <====== " tmp[i]地址,每次+16,作为iv"
cmp x11, x7
b.ne 18
希望经过LFO或者strength reduction优化后可以变成gcc样子的代码.
另外,除去ivopt相关优化的因素,仅就当前pre优化的能力
4次add x12, x29, #16 <====== "tmp base地址 冗余"
&&
后3次的lsl x11, x11, #2 <===== "tmp[i] 地址 单循环内不变 冗余"
都是冗余计算,应该可以删除,但是没有。我们分析认为是中端IR
array 1 a64 <* <[4] <[4] u32>>> (addrof u64 %tmp, regread i32 %82, constval i32 0)
应该在合适时机打散成普通运算而不是一个整体。
本地开启SR + LFRT + PR712后仍有正确性问题,但是通过看SPEC 525 dct.c中的sub4x4_dct函数汇编,可以发现循环内冗余代码有所缓解,但是仍有较大进步空间。
主要在于:
例如:
int16_t d[16];
int16_t tmp[16];
for( int i = 0; i < 4; i++ )
{
int s03 = d[i*4+0] + d[i*4+3];
int s12 = d[i*4+1] + d[i*4+2];
int d03 = d[i*4+0] - d[i*4+3];
int d12 = d[i*4+1] - d[i*4+2];
tmp[0*4+i] = s03 + s12;
tmp[1*4+i] = 2*d03 + d12;
tmp[2*4+i] = s03 - s12;
tmp[3*4+i] = d03 - 2*d12;
}
对应me.mpl
417 regassign i32 %105 (constval i32 0)
418 regassign i32 %45 (mul i32 (regread i32 %105, constval i32 4)) # 0
419 regassign i32 %46 (add i32 (regread i32 %45, constval i32 2)) # 2
420 regassign a64 %47 (cvt a64 i32 (regread i32 %46)) # 2
421 regassign a64 %48 (mul a64 (regread a64 %47, constval a64 2)) # 4
422 regassign a64 %49 (add a64 (addrof u64 %d, regread a64 %48)) # d + 4
423 regassign a64 %50 (add a64 (
424 cvt a64 u64 (addrof u64 %d),
425 constval a64 36))
426 regassign i32 %52 (add i32 (regread i32 %45, constval i32 1)) # 1
427 regassign a64 %53 (cvt a64 i32 (regread i32 %52)) # 1
428 regassign a64 %54 (mul a64 (regread a64 %53, constval a64 2)) # 2
429 regassign a64 %55 (add a64 (addrof u64 %d, regread a64 %54)) # d + 2
430 regassign i32 %60 (add i32 (regread i32 %45, constval i32 3)) # 3
431 regassign a64 %61 (cvt a64 i32 (regread i32 %60)) # 3
432 regassign a64 %62 (mul a64 (regread a64 %61, constval a64 2)) # 6
433 regassign a64 %63 (add a64 (addrof u64 %d, regread a64 %62)) # d + 6
434 regassign a64 %66 (cvt a64 i32 (regread i32 %45)) # 0
435 regassign a64 %67 (mul a64 (regread a64 %66, constval a64 2)) # 0
436 regassign a64 %68 (add a64 (addrof u64 %d, regread a64 %67)) # d + 0
437 regassign a64 %73 (cvt a64 i32 (regread i32 %105)) # 0
438 regassign a64 %74 (mul a64 (regread a64 %73, constval a64 2)) # 0
439 regassign a64 %75 (add a64 (addrof u64 %tmp, regread a64 %74)) # tmp + 0
440 regassign i32 %77 (add i32 (regread i32 %105, constval i32 4)) # 4
441 regassign a64 %78 (cvt a64 i32 (regread i32 %77)) # 4
442 regassign a64 %79 (mul a64 (regread a64 %78, constval a64 2)) # 8
443 regassign a64 %80 (add a64 (addrof u64 %tmp, regread a64 %79)) # tmp + 8
444 regassign i32 %82 (add i32 (regread i32 %105, constval i32 8)) # 8
445 regassign a64 %83 (cvt a64 i32 (regread i32 %82)) # 8
446 regassign a64 %84 (mul a64 (regread a64 %83, constval a64 2)) # 16
447 regassign a64 %85 (add a64 (addrof u64 %tmp, regread a64 %84)) # tmp + 16
448 regassign i32 %87 (add i32 (regread i32 %105, constval i32 12)) # 12
449 regassign a64 %88 (cvt a64 i32 (regread i32 %87)) # 12
450 regassign a64 %89 (mul a64 (regread a64 %88, constval a64 2)) # 24
451 regassign a64 %90 (add a64 (addrof u64 %tmp, regread a64 %89)) # tmp + 24
452 @@7 regassign i32 %51 (iread i32 <* i16> 0 (regread a64 %49))
453 regassign i32 %57 (iread i32 <* i16> 0 (regread a64 %55))
454 regassign i32 %59 (add i32 (regread i32 %57, regread i32 %51))
455 regassign i32 %65 (iread i32 <* i16> 0 (regread a64 %63))
456 regassign i32 %70 (iread i32 <* i16> 0 (regread a64 %68))
457 regassign i32 %72 (add i32 (regread i32 %70, regread i32 %65))
458 LOC 2 126
459 iassign <* i16> 0 (
460 regread a64 %75,
461 sext i32 16 (add i32 (regread i32 %72, regread i32 %59)))
462 regassign i32 %58 (sub i32 (regread i32 %57, regread i32 %51))
463 regassign i32 %71 (sub i32 (regread i32 %70, regread i32 %65))
464 LOC 2 127
465 iassign <* i16> 0 (
466 regread a64 %80,
467 sext i32 16 (add i32 (
468 regread i32 %58,
469 mul i32 (regread i32 %71, constval i32 2))))
470 LOC 2 128
471 iassign <* i16> 0 (
472 regread a64 %85,
473 sext i32 16 (sub i32 (regread i32 %72, regread i32 %59)))
474 LOC 2 129
475 iassign <* i16> 0 (
476 regread a64 %90,
477 sext i32 16 (sub i32 (
478 regread i32 %71,
479 mul i32 (regread i32 %58, constval i32 2))))
480 regassign a64 %90 (add a64 (regread a64 %90, constval a64 2))
481 regassign a64 %85 (add a64 (regread a64 %85, constval a64 2))
482 regassign a64 %80 (add a64 (regread a64 %80, constval a64 2))
483 regassign a64 %75 (add a64 (regread a64 %75, constval a64 2))
484 regassign a64 %68 (add a64 (regread a64 %68, constval a64 8))
485 regassign a64 %63 (add a64 (regread a64 %63, constval a64 8))
486 regassign a64 %55 (add a64 (regread a64 %55, constval a64 8))
487 regassign a64 %49 (add a64 (regread a64 %49, constval a64 8))
488 brtrue @@7 (lt i32 a64 (regread a64 %49, regread a64 %50))
可以看出:
1.我们有8个IV,数量过多,理论上只需2个即可,%49,%55,%63均可用%68 + const替换
2.我们选取%49(addrof %d + 4)作为最后的IV,不够理想,应该选用%68(addrof %d + 0)
3.循环外417-451存在大量冗余 #已注释出prop+folding后的结果
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。
PR720 已解决以上问题。 现在的me 会输出:
func &foo () void {
funcid 1
LOC 2 10
regassign i32 %20 (constval i32 0)
regassign i32 %1 (mul i32 (regread i32 %20, constval i32 4))
regassign a64 %2 (cvt a64 i32 (regread i32 %1))
regassign a64 %3 (mul a64 (regread a64 %2, constval a64 2))
regassign a64 %17 (cvt a64 i32 (regread i32 %20))
regassign a64 %18 (mul a64 (regread a64 %17, constval a64 2))
regassign a64 %26 (addrof a64 $tmp)
regassign a64 %27 (addrof a64 $d)
@@2 regassign a64 %4 (add a64 (regread a64 %27, regread a64 %3))
regassign i32 %5 (iread i32 <* i16> 0 (add a64 (regread a64 %4, constval a64 4)))
regassign i32 %6 (cvt i32 i16 (regread i32 %5))
regassign i32 %7 (iread i32 <* i16> 0 (add a64 (regread a64 %4, constval a64 2)))
regassign i32 %8 (cvt i32 i16 (regread i32 %7))
regassign i32 %10 (add i32 (regread i32 %8, regread i32 %6))
regassign i32 %11 (iread i32 <* i16> 0 (add a64 (regread a64 %4, constval a64 6)))
regassign i32 %12 (cvt i32 i16 (regread i32 %11))
regassign i32 %13 (iread i32 <* i16> 0 (regread a64 %4))
regassign i32 %14 (cvt i32 i16 (regread i32 %13))
regassign i32 %16 (add i32 (regread i32 %14, regread i32 %12))
regassign a64 %19 (add a64 (regread a64 %26, regread a64 %18))
LOC 2 12
iassign <* i16> 0 (
regread a64 %19,
cvt i16 i32 (add i32 (regread i32 %16, regread i32 %10)))
regassign i32 %9 (sub i32 (regread i32 %8, regread i32 %6))
regassign i32 %15 (sub i32 (regread i32 %14, regread i32 %12))
LOC 2 13
iassign <* i16> 0 (
add a64 (regread a64 %19, constval a64 8),
cvt i16 i32 (add i32 (
mul i32 (regread i32 %15, constval i32 2),
regread i32 %9)))
LOC 2 14
iassign <* i16> 0 (
add a64 (regread a64 %19, constval a64 16),
cvt i16 i32 (sub i32 (regread i32 %16, regread i32 %10)))
LOC 2 15
iassign <* i16> 0 (
add a64 (regread a64 %19, constval a64 24),
cvt i16 i32 (sub i32 (
regread i32 %15,
mul i32 (regread i32 %9, constval i32 2))))
regassign a64 %18 (add a64 (regread a64 %18, constval a64 2))
regassign a64 %3 (add a64 (regread a64 %3, constval a64 8))
brtrue @@2 (lt i32 i32 (regread a64 %3, constval a64 32))
@@1 LOC 2 5
return ()
}
登录 后才可以发表评论