@@ -111,9 +111,8 @@ when defined(amd64):
111
111
w[7 ] = w[6 ]; w[6 ] = w[5 ]; w[5 ] = w[4 ]; w[4 ] = w[3 ]
112
112
w[3 ] = w[2 ]; w[2 ] = w[1 ]; w[1 ] = w[0 ]; w[0 ] = tmp
113
113
114
- proc sha256UpdateAvx (x: var array [4 , m128i], k256i: int ,
115
- loMask, hiMask: m128i): m128i {.inline , noinit .} =
116
- var t {.align (64 ).}: array [4 , m128i]
114
+ template sha256UpdateAvx (x, k256i, loMask, hiMask: untyped ): m128i =
115
+ var t {.align (32 ), noinit .}: array [4 , m128i]
117
116
118
117
t[0 ] = mm_alignr_epi8 (x[1 ], x[0 ], 4 )
119
118
t[3 ] = mm_alignr_epi8 (x[3 ], x[2 ], 4 )
@@ -150,9 +149,8 @@ when defined(amd64):
150
149
151
150
mm_add_epi32 (x[3 ], m128i.load (K0D , k256i))
152
151
153
- proc sha512UpdateAvx (x: var array [8 , m128i], k512i: int ): m128i {.
154
- inline , noinit .} =
155
- var t {.align (64 ).}: array [4 , m128i]
152
+ template sha512UpdateAvx (x, k512i: untyped ): m128i =
153
+ var t {.align (32 ), noinit .}: array [4 , m128i]
156
154
157
155
t[0 ] = mm_alignr_epi8 (x[1 ], x[0 ], 8 )
158
156
t[3 ] = mm_alignr_epi8 (x[5 ], x[4 ], 8 )
@@ -188,10 +186,10 @@ when defined(amd64):
188
186
189
187
mm_add_epi64 (x[7 ], m128i.load (K1D , k512i))
190
188
191
- proc loadData32 ( x: var array [4 , m128i],
192
- ms: var array [16 , uint32 ], data: openArray [ byte ]) {.
193
- inline , noinit .} =
194
- let shuffleMask =
189
+ # x: var array[4, m128i]
190
+ # ms: var array[16, uint32]
191
+ template loadData32 (x, ms: untyped , data: openArray [ byte ]) =
192
+ let shuffleMask {. align ( 32 ).} =
195
193
mm_setr_epi32 (0x 00010203 'u32 , 0x 04050607 'u32 ,
196
194
0x 08090a0b 'u32 , 0x 0c0d0e0f 'u32 )
197
195
x[0 ] = m128i.load (data, 0 )
@@ -210,10 +208,10 @@ when defined(amd64):
210
208
x[3 ] = mm_shuffle_epi8 (x[3 ], shuffleMask)
211
209
m128i.store (ms, 12 , mm_add_epi32 (x[3 ], m128i.load (K0D , 12 )))
212
210
213
- proc loadData64 ( x: var array [8 , m128i],
214
- ms: var array [16 , uint64 ], data: openArray [ byte ]) {.
215
- inline , noinit .} =
216
- let shuffleMask =
211
+ # x: var array[8, m128i]
212
+ # ms: var array[16, uint32]
213
+ template loadData64 (x, ms: untyped , data: openArray [ byte ]) =
214
+ let shuffleMask {. align ( 32 ).} =
217
215
mm_setr_epi32 (0x 04050607 'u32 , 0x 00010203 'u32 ,
218
216
0x 0c0d0e0f 'u32 , 0x 08090a0b 'u32 )
219
217
@@ -253,17 +251,17 @@ when defined(amd64):
253
251
data: openArray [byte ],
254
252
blocks: int ) {.inline , noinit .} =
255
253
let
256
- loMask =
254
+ loMask {. align ( 32 ).} =
257
255
mm_setr_epi32 (0x 03020100 'u32 , 0x 0b0a0908 'u32 , 0x ffffffff 'u32 ,
258
256
0x ffffffff 'u32 )
259
- hiMask =
257
+ hiMask {. align ( 32 ).} =
260
258
mm_setr_epi32 (0x ffffffff 'u32 , 0x ffffffff 'u32 , 0x 03020100 'u32 ,
261
259
0x 0b0a0908 'u32 )
262
260
263
261
var
264
- ms {.align (64 ) .}: array [16 , uint32 ]
265
- x {.align (64 ) .}: array [4 , m128i]
266
- cs {.align (64 ) .}: array [8 , uint32 ]
262
+ ms {.align (32 ), noinit .}: array [16 , uint32 ]
263
+ x {.align (32 ), noinit .}: array [4 , m128i]
264
+ cs {.align (32 ), noinit .}: array [8 , uint32 ]
267
265
blocksCount = blocks
268
266
offset = 0
269
267
@@ -275,86 +273,86 @@ when defined(amd64):
275
273
offset + sha256.sizeBlock () - 1 ))
276
274
277
275
block :
278
- let s0 = sha256UpdateAvx (x, 16 , loMask, hiMask)
276
+ let s0 {. align ( 32 ).} = sha256UpdateAvx (x, 16 , loMask, hiMask)
279
277
ROUND256 (cs, ms[0 ])
280
278
ROUND256 (cs, ms[1 ])
281
279
ROUND256 (cs, ms[2 ])
282
280
ROUND256 (cs, ms[3 ])
283
281
m128i.store (ms, 0 , s0)
284
282
285
- let s1 = sha256UpdateAvx (x, 20 , loMask, hiMask)
283
+ let s1 {. align ( 32 ).} = sha256UpdateAvx (x, 20 , loMask, hiMask)
286
284
ROUND256 (cs, ms[4 ])
287
285
ROUND256 (cs, ms[5 ])
288
286
ROUND256 (cs, ms[6 ])
289
287
ROUND256 (cs, ms[7 ])
290
288
m128i.store (ms, 4 , s1)
291
289
292
- let s2 = sha256UpdateAvx (x, 24 , loMask, hiMask)
290
+ let s2 {. align ( 32 ).} = sha256UpdateAvx (x, 24 , loMask, hiMask)
293
291
ROUND256 (cs, ms[8 ])
294
292
ROUND256 (cs, ms[9 ])
295
293
ROUND256 (cs, ms[10 ])
296
294
ROUND256 (cs, ms[11 ])
297
295
m128i.store (ms, 8 , s2)
298
296
299
- let s3 = sha256UpdateAvx (x, 28 , loMask, hiMask)
297
+ let s3 {. align ( 32 ).} = sha256UpdateAvx (x, 28 , loMask, hiMask)
300
298
ROUND256 (cs, ms[12 ])
301
299
ROUND256 (cs, ms[13 ])
302
300
ROUND256 (cs, ms[14 ])
303
301
ROUND256 (cs, ms[15 ])
304
302
m128i.store (ms, 12 , s3)
305
303
306
304
block :
307
- let s0 = sha256UpdateAvx (x, 32 , loMask, hiMask)
305
+ let s0 {. align ( 32 ).} = sha256UpdateAvx (x, 32 , loMask, hiMask)
308
306
ROUND256 (cs, ms[0 ])
309
307
ROUND256 (cs, ms[1 ])
310
308
ROUND256 (cs, ms[2 ])
311
309
ROUND256 (cs, ms[3 ])
312
310
m128i.store (ms, 0 , s0)
313
311
314
- let s1 = sha256UpdateAvx (x, 36 , loMask, hiMask)
312
+ let s1 {. align ( 32 ).} = sha256UpdateAvx (x, 36 , loMask, hiMask)
315
313
ROUND256 (cs, ms[4 ])
316
314
ROUND256 (cs, ms[5 ])
317
315
ROUND256 (cs, ms[6 ])
318
316
ROUND256 (cs, ms[7 ])
319
317
m128i.store (ms, 4 , s1)
320
318
321
- let s2 = sha256UpdateAvx (x, 40 , loMask, hiMask)
319
+ let s2 {. align ( 32 ).} = sha256UpdateAvx (x, 40 , loMask, hiMask)
322
320
ROUND256 (cs, ms[8 ])
323
321
ROUND256 (cs, ms[9 ])
324
322
ROUND256 (cs, ms[10 ])
325
323
ROUND256 (cs, ms[11 ])
326
324
m128i.store (ms, 8 , s2)
327
325
328
- let s3 = sha256UpdateAvx (x, 44 , loMask, hiMask)
326
+ let s3 {. align ( 32 ).} = sha256UpdateAvx (x, 44 , loMask, hiMask)
329
327
ROUND256 (cs, ms[12 ])
330
328
ROUND256 (cs, ms[13 ])
331
329
ROUND256 (cs, ms[14 ])
332
330
ROUND256 (cs, ms[15 ])
333
331
m128i.store (ms, 12 , s3)
334
332
335
333
block :
336
- let s0 = sha256UpdateAvx (x, 48 , loMask, hiMask)
334
+ let s0 {. align ( 32 ).} = sha256UpdateAvx (x, 48 , loMask, hiMask)
337
335
ROUND256 (cs, ms[0 ])
338
336
ROUND256 (cs, ms[1 ])
339
337
ROUND256 (cs, ms[2 ])
340
338
ROUND256 (cs, ms[3 ])
341
339
m128i.store (ms, 0 , s0)
342
340
343
- let s1 = sha256UpdateAvx (x, 52 , loMask, hiMask)
341
+ let s1 {. align ( 32 ).} = sha256UpdateAvx (x, 52 , loMask, hiMask)
344
342
ROUND256 (cs, ms[4 ])
345
343
ROUND256 (cs, ms[5 ])
346
344
ROUND256 (cs, ms[6 ])
347
345
ROUND256 (cs, ms[7 ])
348
346
m128i.store (ms, 4 , s1)
349
347
350
- let s2 = sha256UpdateAvx (x, 56 , loMask, hiMask)
348
+ let s2 {. align ( 32 ).} = sha256UpdateAvx (x, 56 , loMask, hiMask)
351
349
ROUND256 (cs, ms[8 ])
352
350
ROUND256 (cs, ms[9 ])
353
351
ROUND256 (cs, ms[10 ])
354
352
ROUND256 (cs, ms[11 ])
355
353
m128i.store (ms, 8 , s2)
356
354
357
- let s3 = sha256UpdateAvx (x, 60 , loMask, hiMask)
355
+ let s3 {. align ( 32 ).} = sha256UpdateAvx (x, 60 , loMask, hiMask)
358
356
ROUND256 (cs, ms[12 ])
359
357
ROUND256 (cs, ms[13 ])
360
358
ROUND256 (cs, ms[14 ])
@@ -388,9 +386,9 @@ when defined(amd64):
388
386
data: openArray [byte ],
389
387
blocks: int ) {.inline , noinit .} =
390
388
var
391
- ms {.align (64 ) .}: array [16 , uint64 ]
392
- x {.align (64 ) .}: array [8 , m128i]
393
- cs {.align (64 ) .}: array [8 , uint64 ]
389
+ ms {.align (32 ), noinit .}: array [16 , uint64 ]
390
+ x {.align (32 ), noinit .}: array [8 , m128i]
391
+ cs {.align (32 ), noinit .}: array [8 , uint64 ]
394
392
blocksCount = blocks
395
393
offset = 0
396
394
0 commit comments