Secondly ,I’d like to decompose the SAD operation into three SIMD instructions to share more info.
Subtraction code:
__global__ void usad4Cmbn(unsigned int *A, unsigned int *B, unsigned int *C)
{
C[threadIdx.x]=__vsub4(A[threadIdx.x],B[threadIdx.x]);
}
Subtraction dump:
arch = sm_30
code version = [1,7]
producer = cuda
host = linux
compile_size = 64bit
code for sm_30
Function : _Z9usad4CmbnPjS_S_
.headerflags @"EF_CUDA_SM30 EF_CUDA_PTX_SM(EF_CUDA_SM30)"
/* 0x22c04282c2804307 */
/*0008*/ MOV R1, c[0x0][0x44]; /* 0x2800400110005de4 */
/*0010*/ S2R R0, SR_TID.X; /* 0x2c00000084001c04 */
/*0018*/ MOV32I R7, 0x4; /* 0x180000001001dde2 */
/*0020*/ ISCADD R2.CC, R0, c[0x0][0x140], 0x2; /* 0x4001400500009c43 */
/*0028*/ IMAD.U32.U32.HI.X R3, R0, R7, c[0x0][0x144]; /* 0x208e80051000dc43 */
/*0030*/ ISCADD R4.CC, R0, c[0x0][0x148], 0x2; /* 0x4001400520011c43 */
/*0038*/ LD.E R2, [R2]; /* 0x8400000000209c85 */
/* 0x22f04283f2c04287 */
/*0048*/ IMAD.U32.U32.HI.X R5, R0, R7, c[0x0][0x14c]; /* 0x208e800530015c43 */
/*0050*/ LD.E R4, [R4]; /* 0x8400000000411c85 */
/*0058*/ ISCADD R6.CC, R0, c[0x0][0x150], 0x2; /* 0x4001400540019c43 */
/*0060*/ IMAD.U32.U32.HI.X R7, R0, R7, c[0x0][0x154]; /* 0x208e80055001dc43 */
/*0068*/ VADD4.UD.U8.U8 R0, R2, -R4, RZ; /* 0x81fe844c10201c84 */
/*0070*/ ST.E [R6], R0; /* 0x9400000000601c85 */
/*0078*/ EXIT; /* 0x8000000000001de7 */
/*0080*/ BRA 0x80; /* 0x4003ffffe0001de7 */
/*0088*/ NOP; /* 0x4000000000001de4 */
/*0090*/ NOP; /* 0x4000000000001de4 */
/*0098*/ NOP; /* 0x4000000000001de4 */
/*00a0*/ NOP; /* 0x4000000000001de4 */
/*00a8*/ NOP; /* 0x4000000000001de4 */
/*00b0*/ NOP; /* 0x4000000000001de4 */
/*00b8*/ NOP; /* 0x4000000000001de4 */
.............................
Fatbin elf code:
================
arch = sm_35
code version = [1,7]
producer = cuda
host = linux
compile_size = 64bit
code for sm_35
Function : _Z9usad4CmbnPjS_S_
.headerflags @"EF_CUDA_SM35 EF_CUDA_PTX_SM(EF_CUDA_SM35)"
/* 0x08b010a0b0a010c0 */
/*0008*/ MOV R1, c[0x0][0x44]; /* 0x64c03c00089c0006 */
/*0010*/ S2R R0, SR_TID.X; /* 0x86400000109c0002 */
/*0018*/ MOV32I R7, 0x4; /* 0x74000000021fc01e */
/*0020*/ ISCADD R2.CC, R0, c[0x0][0x140], 0x2; /* 0x60c40800281c000a */
/*0028*/ IMAD.U32.U32.HI.X R3, R0, R7, c[0x0][0x144]; /* 0x92101c00289c000e */
/*0030*/ ISCADD R4.CC, R0, c[0x0][0x148], 0x2; /* 0x60c40800291c0012 */
/*0038*/ LD.E R2, [R2]; /* 0xc4800000001c0808 */
/* 0x08bc10a0fcb010a0 */
/*0048*/ IMAD.U32.U32.HI.X R5, R0, R7, c[0x0][0x14c]; /* 0x92101c00299c0016 */
/*0050*/ LD.E R4, [R4]; /* 0xc4800000001c1010 */
/*0058*/ ISCADD R6.CC, R0, c[0x0][0x150], 0x2; /* 0x60c408002a1c001a */
/*0060*/ IMAD.U32.U32.HI.X R7, R0, R7, c[0x0][0x154]; /* 0x92101c002a9c001e */
/*0068*/ VADD4.UD.U8.U8 R0, R2, -R4, RZ; /* 0xd4c3fda2021c0801 */
/*0070*/ ST.E [R6], R0; /* 0xe4800000001c1800 */
/*0078*/ EXIT; /* 0x18000000001c003c */
/*0080*/ BRA 0x80; /* 0x12007ffffc1c003c */
/*0088*/ NOP; /* 0x85800000001c3c02 */
/*0090*/ NOP; /* 0x85800000001c3c02 */
/*0098*/ NOP; /* 0x85800000001c3c02 */
/*00a0*/ NOP; /* 0x85800000001c3c02 */
/*00a8*/ NOP; /* 0x85800000001c3c02 */
/*00b0*/ NOP; /* 0x85800000001c3c02 */
/*00b8*/ NOP; /* 0x85800000001c3c02 */
.............................
Fatbin elf code:
================
arch = sm_37
code version = [1,7]
producer = cuda
host = linux
compile_size = 64bit
code for sm_37
Function : _Z9usad4CmbnPjS_S_
.headerflags @"EF_CUDA_SM37 EF_CUDA_PTX_SM(EF_CUDA_SM37)"
/* 0x08b010a0b0a010c0 */
/*0008*/ MOV R1, c[0x0][0x44]; /* 0x64c03c00089c0006 */
/*0010*/ S2R R0, SR_TID.X; /* 0x86400000109c0002 */
/*0018*/ MOV32I R7, 0x4; /* 0x74000000021fc01e */
/*0020*/ ISCADD R2.CC, R0, c[0x0][0x140], 0x2; /* 0x60c40800281c000a */
/*0028*/ IMAD.U32.U32.HI.X R3, R0, R7, c[0x0][0x144]; /* 0x92101c00289c000e */
/*0030*/ ISCADD R4.CC, R0, c[0x0][0x148], 0x2; /* 0x60c40800291c0012 */
/*0038*/ LD.E R2, [R2]; /* 0xc4800000001c0808 */
/* 0x08bc10a0fcb010a0 */
/*0048*/ IMAD.U32.U32.HI.X R5, R0, R7, c[0x0][0x14c]; /* 0x92101c00299c0016 */
/*0050*/ LD.E R4, [R4]; /* 0xc4800000001c1010 */
/*0058*/ ISCADD R6.CC, R0, c[0x0][0x150], 0x2; /* 0x60c408002a1c001a */
/*0060*/ IMAD.U32.U32.HI.X R7, R0, R7, c[0x0][0x154]; /* 0x92101c002a9c001e */
/*0068*/ VADD4.UD.U8.U8 R0, R2, -R4, RZ; /* 0xd4c3fda2021c0801 */
/*0070*/ ST.E [R6], R0; /* 0xe4800000001c1800 */
/*0078*/ EXIT; /* 0x18000000001c003c */
/*0080*/ BRA 0x80; /* 0x12007ffffc1c003c */
/*0088*/ NOP; /* 0x85800000001c3c02 */
/*0090*/ NOP; /* 0x85800000001c3c02 */
/*0098*/ NOP; /* 0x85800000001c3c02 */
/*00a0*/ NOP; /* 0x85800000001c3c02 */
/*00a8*/ NOP; /* 0x85800000001c3c02 */
/*00b0*/ NOP; /* 0x85800000001c3c02 */
/*00b8*/ NOP; /* 0x85800000001c3c02 */
.............................
Fatbin elf code:
================
arch = sm_50
code version = [1,7]
producer = cuda
host = linux
compile_size = 64bit
code for sm_50
Function : _Z9usad4CmbnPjS_S_
.headerflags @"EF_CUDA_SM50 EF_CUDA_PTX_SM(EF_CUDA_SM50)"
/* 0x083fc400e3e007f6 */
/*0008*/ MOV R1, c[0x0][0x20] ; /* 0x4c98078000870001 */
/*0010*/ S2R R0, SR_TID.X ; /* 0xf0c8000002170000 */
/*0018*/ SHL R6, R0.reuse, 0x2 ; /* 0x3848000000270006 */
/* 0x081fc840fec007f5 */
/*0028*/ SHR.U32 R7, R0, 0x1e ; /* 0x3828000001e70007 */
/*0030*/ IADD R2.CC, R6.reuse, c[0x0][0x140] ; /* 0x4c10800005070602 */
/*0038*/ IADD.X R3, R7.reuse, c[0x0][0x144] ; /* 0x4c10080005170703 */
/* 0x001f8800f6c007f0 */
/*0048*/ { IADD R4.CC, R6, c[0x0][0x148] ; /* 0x4c10800005270604 */
/*0050*/ LDG.E R2, [R2] }
/* 0xeed4200000070202 */
/*0058*/ IADD.X R5, R7, c[0x0][0x14c] ; /* 0x4c10080005370705 */
/* 0x001fdc00fec007b1 */
/*0068*/ LDG.E R4, [R4] ; /* 0xeed4200000070404 */
/*0070*/ IADD R6.CC, R6, c[0x0][0x150] ; /* 0x4c10800005470606 */
/*0078*/ IADD.X R7, R7, c[0x0][0x154] ; /* 0x4c10080005570707 */
/* 0x001f9400fe2307f1 */
/*0088*/ LOP32I.OR R0, R2.reuse, 0x80808080 ; /* 0x0428080808070200 */
/*0090*/ LOP32I.AND R9, R4, 0x7f7f7f7f ; /* 0x0407f7f7f7f70409 */
/*0098*/ LOP3.LUT R8, R2, c[0x2][0x0], R4, 0x84 ; /* 0x0284020800070208 */
/* 0x001fc400fe4007f6 */
/*00a8*/ IADD R9, R0, -R9 ; /* 0x5c11000000970009 */
/*00b0*/ LOP.XOR R8, R9, R8 ; /* 0x5c47040000870908 */
/*00b8*/ STG.E [R6], R8 ; /* 0xeedc200000070608 */
/* 0x001f8000ffe007ff */
/*00c8*/ EXIT ; /* 0xe30000000007000f */
/*00d0*/ BRA 0xd0 ; /* 0xe2400fffff87000f */
/*00d8*/ NOP; /* 0x50b0000000070f00 */
/* 0x001f8000fc0007e0 */
/*00e8*/ NOP; /* 0x50b0000000070f00 */
/*00f0*/ NOP; /* 0x50b0000000070f00 */
/*00f8*/ NOP; /* 0x50b0000000070f00 */
.............................
Fatbin elf code:
================
arch = sm_52
code version = [1,7]
producer = cuda
host = linux
compile_size = 64bit
code for sm_52
Function : _Z9usad4CmbnPjS_S_
.headerflags @"EF_CUDA_SM52 EF_CUDA_PTX_SM(EF_CUDA_SM52)"
/* 0x083fc400e3e007f6 */
/*0008*/ MOV R1, c[0x0][0x20] ; /* 0x4c98078000870001 */
/*0010*/ S2R R0, SR_TID.X ; /* 0xf0c8000002170000 */
/*0018*/ SHL R6, R0.reuse, 0x2 ; /* 0x3848000000270006 */
/* 0x081fc840fec007f5 */
/*0028*/ SHR.U32 R7, R0, 0x1e ; /* 0x3828000001e70007 */
/*0030*/ IADD R2.CC, R6.reuse, c[0x0][0x140] ; /* 0x4c10800005070602 */
/*0038*/ IADD.X R3, R7.reuse, c[0x0][0x144] ; /* 0x4c10080005170703 */
/* 0x001f8800f6c007f0 */
/*0048*/ { IADD R4.CC, R6, c[0x0][0x148] ; /* 0x4c10800005270604 */
/*0050*/ LDG.E R2, [R2] }
/* 0xeed4200000070202 */
/*0058*/ IADD.X R5, R7, c[0x0][0x14c] ; /* 0x4c10080005370705 */
/* 0x001fdc00fec007b1 */
/*0068*/ LDG.E R4, [R4] ; /* 0xeed4200000070404 */
/*0070*/ IADD R6.CC, R6, c[0x0][0x150] ; /* 0x4c10800005470606 */
/*0078*/ IADD.X R7, R7, c[0x0][0x154] ; /* 0x4c10080005570707 */
/* 0x001f9400fe2307f1 */
/*0088*/ LOP32I.OR R0, R2.reuse, 0x80808080 ; /* 0x0428080808070200 */
/*0090*/ LOP32I.AND R9, R4, 0x7f7f7f7f ; /* 0x0407f7f7f7f70409 */
/*0098*/ LOP3.LUT R8, R2, c[0x2][0x0], R4, 0x84 ; /* 0x0284020800070208 */
/* 0x001fc400fe4007f6 */
/*00a8*/ IADD R9, R0, -R9 ; /* 0x5c11000000970009 */
/*00b0*/ LOP.XOR R8, R9, R8 ; /* 0x5c47040000870908 */
/*00b8*/ STG.E [R6], R8 ; /* 0xeedc200000070608 */
/* 0x001f8000ffe007ff */
/*00c8*/ EXIT ; /* 0xe30000000007000f */
/*00d0*/ BRA 0xd0 ; /* 0xe2400fffff87000f */
/*00d8*/ NOP; /* 0x50b0000000070f00 */
/* 0x001f8000fc0007e0 */
/*00e8*/ NOP; /* 0x50b0000000070f00 */
/*00f0*/ NOP; /* 0x50b0000000070f00 */
/*00f8*/ NOP; /* 0x50b0000000070f00 */
.............................
Fatbin elf code:
================
arch = sm_60
code version = [1,7]
producer = cuda
host = linux
compile_size = 64bit
code for sm_60
Function : _Z9usad4CmbnPjS_S_
.headerflags @"EF_CUDA_SM60 EF_CUDA_PTX_SM(EF_CUDA_SM60)"
/* 0x083fc400e3e007f6 */
/*0008*/ MOV R1, c[0x0][0x20] ; /* 0x4c98078000870001 */
/*0010*/ S2R R0, SR_TID.X ; /* 0xf0c8000002170000 */
/*0018*/ SHL R6, R0.reuse, 0x2 ; /* 0x3848000000270006 */
/* 0x081fc840fec007f5 */
/*0028*/ SHR.U32 R7, R0, 0x1e ; /* 0x3828000001e70007 */
/*0030*/ IADD R2.CC, R6.reuse, c[0x0][0x140] ; /* 0x4c10800005070602 */
/*0038*/ IADD.X R3, R7.reuse, c[0x0][0x144] ; /* 0x4c10080005170703 */
/* 0x001f8800f6c007f0 */
/*0048*/ { IADD R4.CC, R6, c[0x0][0x148] ; /* 0x4c10800005270604 */
/*0050*/ LDG.E R2, [R2] }
/* 0xeed4200000070202 */
/*0058*/ IADD.X R5, R7, c[0x0][0x14c] ; /* 0x4c10080005370705 */
/* 0x001fdc00fec007b1 */
/*0068*/ LDG.E R4, [R4] ; /* 0xeed4200000070404 */
/*0070*/ IADD R6.CC, R6, c[0x0][0x150] ; /* 0x4c10800005470606 */
/*0078*/ IADD.X R7, R7, c[0x0][0x154] ; /* 0x4c10080005570707 */
/* 0x001f9400fe2307f1 */
/*0088*/ LOP32I.OR R0, R2.reuse, 0x80808080 ; /* 0x0428080808070200 */
/*0090*/ LOP32I.AND R9, R4, 0x7f7f7f7f ; /* 0x0407f7f7f7f70409 */
/*0098*/ LOP3.LUT R8, R2, c[0x2][0x0], R4, 0x84 ; /* 0x0284020800070208 */
/* 0x001fc400fe4007f6 */
/*00a8*/ IADD R9, R0, -R9 ; /* 0x5c11000000970009 */
/*00b0*/ LOP.XOR R8, R9, R8 ; /* 0x5c47040000870908 */
/*00b8*/ STG.E [R6], R8 ; /* 0xeedc200000070608 */
/* 0x001f8000ffe007ff */
/*00c8*/ EXIT ; /* 0xe30000000007000f */
/*00d0*/ BRA 0xd0 ; /* 0xe2400fffff87000f */
/*00d8*/ NOP; /* 0x50b0000000070f00 */
/* 0x001f8000fc0007e0 */
/*00e8*/ NOP; /* 0x50b0000000070f00 */
/*00f0*/ NOP; /* 0x50b0000000070f00 */
/*00f8*/ NOP; /* 0x50b0000000070f00 */
.............................
Fatbin elf code:
================
arch = sm_61
code version = [1,7]
producer = cuda
host = linux
compile_size = 64bit
code for sm_61
Function : _Z9usad4CmbnPjS_S_
.headerflags @"EF_CUDA_SM61 EF_CUDA_PTX_SM(EF_CUDA_SM61)"
/* 0x083fc400e3e007f6 */
/*0008*/ MOV R1, c[0x0][0x20] ; /* 0x4c98078000870001 */
/*0010*/ S2R R0, SR_TID.X ; /* 0xf0c8000002170000 */
/*0018*/ SHL R6, R0.reuse, 0x2 ; /* 0x3848000000270006 */
/* 0x081fc840fec007f5 */
/*0028*/ SHR.U32 R7, R0, 0x1e ; /* 0x3828000001e70007 */
/*0030*/ IADD R2.CC, R6.reuse, c[0x0][0x140] ; /* 0x4c10800005070602 */
/*0038*/ IADD.X R3, R7.reuse, c[0x0][0x144] ; /* 0x4c10080005170703 */
/* 0x001f8800f6c007f0 */
/*0048*/ { IADD R4.CC, R6, c[0x0][0x148] ; /* 0x4c10800005270604 */
/*0050*/ LDG.E R2, [R2] }
/* 0xeed4200000070202 */
/*0058*/ IADD.X R5, R7, c[0x0][0x14c] ; /* 0x4c10080005370705 */
/* 0x001fdc00fec007b1 */
/*0068*/ LDG.E R4, [R4] ; /* 0xeed4200000070404 */
/*0070*/ IADD R6.CC, R6, c[0x0][0x150] ; /* 0x4c10800005470606 */
/*0078*/ IADD.X R7, R7, c[0x0][0x154] ; /* 0x4c10080005570707 */
/* 0x001f9400fe2307f1 */
/*0088*/ LOP32I.OR R0, R2.reuse, 0x80808080 ; /* 0x0428080808070200 */
/*0090*/ LOP32I.AND R9, R4, 0x7f7f7f7f ; /* 0x0407f7f7f7f70409 */
/*0098*/ LOP3.LUT R8, R2, c[0x2][0x0], R4, 0x84 ; /* 0x0284020800070208 */
/* 0x001fc400fe4007f6 */
/*00a8*/ IADD R9, R0, -R9 ; /* 0x5c11000000970009 */
/*00b0*/ LOP.XOR R8, R9, R8 ; /* 0x5c47040000870908 */
/*00b8*/ STG.E [R6], R8 ; /* 0xeedc200000070608 */
/* 0x001f8000ffe007ff */
/*00c8*/ EXIT ; /* 0xe30000000007000f */
/*00d0*/ BRA 0xd0 ; /* 0xe2400fffff87000f */
/*00d8*/ NOP; /* 0x50b0000000070f00 */
/* 0x001f8000fc0007e0 */
/*00e8*/ NOP; /* 0x50b0000000070f00 */
/*00f0*/ NOP; /* 0x50b0000000070f00 */
/*00f8*/ NOP; /* 0x50b0000000070f00 */
.............................
Fatbin elf code:
================
arch = sm_70
code version = [1,7]
producer = cuda
host = linux
compile_size = 64bit
code for sm_70
Function : _Z9usad4CmbnPjS_S_
.headerflags @"EF_CUDA_SM70 EF_CUDA_PTX_SM(EF_CUDA_SM70)"
/*0000*/ IMAD.MOV.U32 R1, RZ, RZ, c[0x0][0x28] ; /* 0x00000a00ff017624 */
/* 0x000fd000078e00ff */
/*0010*/ @!PT SHFL.IDX PT, RZ, RZ, RZ, RZ ; /* 0x000000fffffff389 */
/* 0x000fe200000e00ff */
/*0020*/ S2R R7, SR_TID.X ; /* 0x0000000000077919 */
/* 0x000e220000002100 */
/*0030*/ MOV R10, 0x4 ; /* 0x00000004000a7802 */
/* 0x000fca0000000f00 */
/*0040*/ IMAD.WIDE.U32 R2, R7.reuse, R10.reuse, c[0x0][0x160] ; /* 0x0000580007027625 */
/* 0x0c1fe400078e000a */
/*0050*/ IMAD.WIDE.U32 R4, R7, R10, c[0x0][0x168] ; /* 0x00005a0007047625 */
/* 0x000fd000078e000a */
/*0060*/ LDG.E.SYS R2, [R2] ; /* 0x0000000002027381 */
/* 0x000ea800001ee900 */
/*0070*/ LDG.E.SYS R4, [R4] ; /* 0x0000000004047381 */
/* 0x000ee200001ee900 */
/*0080*/ LOP3.LUT R0, R2, 0x80808080, RZ, 0xfc, !PT ; /* 0x8080808002007812 */
/* 0x004fe400078efcff */
/*0090*/ LOP3.LUT R6, R4, 0x7f7f7f7f, RZ, 0xc0, !PT ; /* 0x7f7f7f7f04067812 */
/* 0x008fe400078ec0ff */
/*00a0*/ LOP3.LUT R8, R2, 0x80808080, R4, 0x84, !PT ; /* 0x8080808002087812 */
/* 0x000fc600078e8404 */
/*00b0*/ IMAD.IADD R9, R0, 0x1, -R6 ; /* 0x0000000100097824 */
/* 0x000fe400078e0a06 */
/*00c0*/ IMAD.WIDE.U32 R6, R7, R10, c[0x0][0x170] ; /* 0x00005c0007067625 */
/* 0x000fc600078e000a */
/*00d0*/ LOP3.LUT R8, R9, R8, RZ, 0x3c, !PT ; /* 0x0000000809087212 */
/* 0x000fd000078e3cff */
/*00e0*/ STG.E.SYS [R6], R8 ; /* 0x0000000806007386 */
/* 0x000fe2000010e900 */
/*00f0*/ EXIT ; /* 0x000000000000794d */
/* 0x000fea0003800000 */
/*0100*/ BRA 0x100; /* 0xfffffff000007947 */
/* 0x000fc0000383ffff */
/*0110*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/*0120*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/*0130*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/*0140*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/*0150*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/*0160*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/*0170*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
.............................
Fatbin elf code:
================
arch = sm_72
code version = [1,7]
producer = cuda
host = linux
compile_size = 64bit
code for sm_72
Function : _Z9usad4CmbnPjS_S_
.headerflags @"EF_CUDA_SM72 EF_CUDA_PTX_SM(EF_CUDA_SM72)"
/*0000*/ IMAD.MOV.U32 R1, RZ, RZ, c[0x0][0x28] ; /* 0x00000a00ff017624 */
/* 0x000fd000078e00ff */
/*0010*/ @!PT SHFL.IDX PT, RZ, RZ, RZ, RZ ; /* 0x000000fffffff389 */
/* 0x000fe200000e00ff */
/*0020*/ S2R R7, SR_TID.X ; /* 0x0000000000077919 */
/* 0x000e220000002100 */
/*0030*/ MOV R10, 0x4 ; /* 0x00000004000a7802 */
/* 0x000fca0000000f00 */
/*0040*/ IMAD.WIDE.U32 R2, R7.reuse, R10.reuse, c[0x0][0x160] ; /* 0x0000580007027625 */
/* 0x0c1fe400078e000a */
/*0050*/ IMAD.WIDE.U32 R4, R7, R10, c[0x0][0x168] ; /* 0x00005a0007047625 */
/* 0x000fd000078e000a */
/*0060*/ LDG.E.SYS R2, [R2] ; /* 0x0000000002027381 */
/* 0x000ea800001ee900 */
/*0070*/ LDG.E.SYS R4, [R4] ; /* 0x0000000004047381 */
/* 0x000ee200001ee900 */
/*0080*/ LOP3.LUT R0, R2, 0x80808080, RZ, 0xfc, !PT ; /* 0x8080808002007812 */
/* 0x004fe400078efcff */
/*0090*/ LOP3.LUT R6, R4, 0x7f7f7f7f, RZ, 0xc0, !PT ; /* 0x7f7f7f7f04067812 */
/* 0x008fe400078ec0ff */
/*00a0*/ LOP3.LUT R8, R2, 0x80808080, R4, 0x84, !PT ; /* 0x8080808002087812 */
/* 0x000fc600078e8404 */
/*00b0*/ IMAD.IADD R9, R0, 0x1, -R6 ; /* 0x0000000100097824 */
/* 0x000fe400078e0a06 */
/*00c0*/ IMAD.WIDE.U32 R6, R7, R10, c[0x0][0x170] ; /* 0x00005c0007067625 */
/* 0x000fc600078e000a */
/*00d0*/ LOP3.LUT R8, R9, R8, RZ, 0x3c, !PT ; /* 0x0000000809087212 */
/* 0x000fd000078e3cff */
/*00e0*/ STG.E.SYS [R6], R8 ; /* 0x0000000806007386 */
/* 0x000fe2000010e900 */
/*00f0*/ EXIT ; /* 0x000000000000794d */
/* 0x000fea0003800000 */
/*0100*/ BRA 0x100; /* 0xfffffff000007947 */
/* 0x000fc0000383ffff */
/*0110*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/*0120*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/*0130*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/*0140*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/*0150*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/*0160*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/*0170*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
.............................