Skip to content

Commit e0186ef

Browse files
committed
Unroll and align reference implementation.
1 parent e4c31ae commit e0186ef

File tree

1 file changed

+42
-6
lines changed

1 file changed

+42
-6
lines changed

nimcrypto/sha2/sha2_ref.nim

+42-6
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,49 @@ template ROUND512(a, b, c, d, e, f, g, h, z) =
2727
d = d + t0
2828
h = t0 + t1
2929

30+
template WMIX256(i) =
31+
W[i] = SIG1(W[i - 2]) + W[i - 7] + SIG0(W[i - 15]) + W[i - 16]
32+
33+
template WMIX512(i) =
34+
W[i] = RHO1(W[i - 2]) + W[i - 7] + RHO0(W[i - 15]) + W[i - 16]
35+
36+
template WLOOP256() =
37+
WMIX256(16); WMIX256(17); WMIX256(18); WMIX256(19);
38+
WMIX256(20); WMIX256(21); WMIX256(22); WMIX256(23);
39+
WMIX256(24); WMIX256(25); WMIX256(26); WMIX256(27);
40+
WMIX256(28); WMIX256(29); WMIX256(30); WMIX256(31);
41+
WMIX256(32); WMIX256(33); WMIX256(34); WMIX256(35);
42+
WMIX256(36); WMIX256(37); WMIX256(38); WMIX256(39);
43+
WMIX256(40); WMIX256(41); WMIX256(42); WMIX256(43);
44+
WMIX256(44); WMIX256(45); WMIX256(46); WMIX256(47);
45+
WMIX256(48); WMIX256(49); WMIX256(50); WMIX256(51);
46+
WMIX256(52); WMIX256(53); WMIX256(54); WMIX256(55);
47+
WMIX256(56); WMIX256(57); WMIX256(58); WMIX256(59);
48+
WMIX256(60); WMIX256(61); WMIX256(62); WMIX256(63);
49+
50+
template WLOOP512() =
51+
WMIX512(16); WMIX512(17); WMIX512(18); WMIX512(19);
52+
WMIX512(20); WMIX512(21); WMIX512(22); WMIX512(23);
53+
WMIX512(24); WMIX512(25); WMIX512(26); WMIX512(27);
54+
WMIX512(28); WMIX512(29); WMIX512(30); WMIX512(31);
55+
WMIX512(32); WMIX512(33); WMIX512(34); WMIX512(35);
56+
WMIX512(36); WMIX512(37); WMIX512(38); WMIX512(39);
57+
WMIX512(40); WMIX512(41); WMIX512(42); WMIX512(43);
58+
WMIX512(44); WMIX512(45); WMIX512(46); WMIX512(47);
59+
WMIX512(48); WMIX512(49); WMIX512(50); WMIX512(51);
60+
WMIX512(52); WMIX512(53); WMIX512(54); WMIX512(55);
61+
WMIX512(56); WMIX512(57); WMIX512(58); WMIX512(59);
62+
WMIX512(60); WMIX512(61); WMIX512(62); WMIX512(63);
63+
WMIX512(64); WMIX512(65); WMIX512(66); WMIX512(67);
64+
WMIX512(68); WMIX512(69); WMIX512(70); WMIX512(71);
65+
WMIX512(72); WMIX512(73); WMIX512(74); WMIX512(75);
66+
WMIX512(76); WMIX512(77); WMIX512(78); WMIX512(79);
67+
3068
proc sha256Compress*(state: var array[8, uint32], data: openArray[byte],
3169
blocks: int) {.noinit, inline.} =
3270
var
71+
W {.align(16), noinit.}: array[64, uint32]
3372
t0, t1: uint32
34-
W {.noinit.}: array[64, uint32]
3573
blocksCount = blocks
3674
offset = 0
3775

@@ -45,8 +83,7 @@ proc sha256Compress*(state: var array[8, uint32], data: openArray[byte],
4583
W[12] = beLoad32(data, offset + 48); W[13] = beLoad32(data, offset + 52)
4684
W[14] = beLoad32(data, offset + 56); W[15] = beLoad32(data, offset + 60)
4785

48-
for i in 16 ..< 64:
49-
W[i] = SIG1(W[i - 2]) + W[i - 7] + SIG0(W[i - 15]) + W[i - 16]
86+
WLOOP256()
5087

5188
var
5289
s0 = state[0]
@@ -141,8 +178,8 @@ proc sha256Compress*(state: var array[8, uint32], data: openArray[byte],
141178
proc sha512Compress*(state: var array[8, uint64], data: openArray[byte],
142179
blocks: int) {.noinit, inline.} =
143180
var
181+
W {.align(16), noinit.}: array[80, uint64]
144182
t0, t1: uint64
145-
W {.noinit.}: array[80, uint64]
146183
blocksCount = blocks
147184
offset = 0
148185

@@ -156,8 +193,7 @@ proc sha512Compress*(state: var array[8, uint64], data: openArray[byte],
156193
W[12] = beLoad64(data, offset + 96); W[13] = beLoad64(data, offset + 104)
157194
W[14] = beLoad64(data, offset + 112); W[15] = beLoad64(data, offset + 120)
158195

159-
for i in 16 ..< 80:
160-
W[i] = RHO1(W[i - 2]) + W[i - 7] + RHO0(W[i - 15]) + W[i - 16]
196+
WLOOP512()
161197

162198
var s0 = state[0]
163199
var s1 = state[1]

0 commit comments

Comments
 (0)