|
Lan
该用户已被删除
|
17#
大 中
小 发表于 2005-8-26 16:43 只看该作者
ibelieveicandie: 引用:随便找了一段ps (HLSL result)
****************************************
Target: GeForceFX 5800 Ultra (NV30) :: Unified Compiler: v66.93
Cycles: 31 :: # R Registers: 2
Pixel throughput (assuming 1 cycle texture lookup) 64.52 MP/s
=========================================
Shader performance using all FP16
Cycles: 31 :: # R Registers: 2
Pixel throughput (assuming 1 cycle texture lookup) 64.52 MP/s
=========================================
Shader performance using all FP32
Cycles: 31 :: # R Registers: 2
Pixel throughput (assuming 1 cycle texture lookup) 64.52 MP/s
****************************************
PS Instructions: 26
ps_2_0
def c4, 0, 1, -2, 3
dcl_pp t0.xy
dcl_pp t1.xyz
dcl_pp t2.xyz
dcl_2d s0
texld r1, t0, s0
nrm_pp r0.xyz, t1
mov r2.z, c4.y
add r0.w, r2.z, c3.x
nrm_pp r2.xyz, t2
rcp r3.w, r0.w
dp3 r2.x, r0, r2
add r2.w, r2.x, c3.x
max r0.w, r2.x, c4.x
mul_sat r3.w, r3.w, r2.w
mad r2.w, r3.w, c4.z, c4.w
mul r3.w, r3.w, r3.w
mov_sat r5.w, r0.w
mad r4.w, r5.w, c4.z, c4.w
mul r5.w, r5.w, r5.w
mul r4.w, r4.w, r5.w
mad r3.w, r2.w, r3.w, -r4.w
max r2.w, r3.w, c4.x
mul r2, r2.w, c2
mul r0.xyz, r0.w, c1
mov r0.w, r2.w
add r0, r0, c0
mov r2.w, c4.y
add r0, r2, r0
mul r0, r1, r0
mov oC0, r0
似乎1D指令比2D多不少...
|