Skip to content

Commit c312210

Browse files
authored
Rework towering (#148)
* naive removal of out-of-place mul by non residue * Use {.inline.} in a consistent manner across the codebase * Handle aliasing for quadratic multiplication * reorg optimization * Handle aliasing for quadratic squaring * handle aliasing in mul_sparse_complex_by_0y * Rework multiplication by nonresidue, assume tower and twist use same non-residue * continue rework * continue on non-residues * Remove "NonResidue *" calls * handle aliasing in Chung-Hasan SQR2 * Handla aliasing in Chung-Hasan SQR3 * Use one less temporary in Chung Hasan sqr2 * handle aliasing in cubic extensions * merge extension tower in the same file to reduce duplicate proc and allow better inlining * handle aliasing in cubic inversion * drop out-of-place proc from BigInt and finite fields as well * less copies in line_projective * remove a copy in fp12 by lines
1 parent 2c5e12d commit c312210

28 files changed

+1423
-1291
lines changed

constantine/arithmetic/bigints.nim

+2-7
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import
1010
../config/[common, type_bigint],
1111
../primitives,
1212
./limbs,
13-
./limbs_double_width,
13+
./limbs_extmul,
1414
./limbs_modular,
1515
./limbs_montgomery
1616

@@ -347,7 +347,7 @@ func bit0*(a: BigInt): Ct[uint8] =
347347
# Multiplication by small cosntants
348348
# ------------------------------------------------------------
349349

350-
func `*=`*(a: var BigInt, b: static int) {.inline.} =
350+
func `*=`*(a: var BigInt, b: static int) =
351351
## Multiplication by a small integer known at compile-time
352352
# Implementation:
353353
#
@@ -420,11 +420,6 @@ func `*=`*(a: var BigInt, b: static int) {.inline.} =
420420
else:
421421
{.error: "Multiplication by this small int not implemented".}
422422

423-
func `*`*(b: static int, a: BigInt): BigInt {.noinit, inline.} =
424-
## Multiplication by a small integer known at compile-time
425-
result = a
426-
result *= b
427-
428423
# Division by constants
429424
# ------------------------------------------------------------
430425

constantine/arithmetic/finite_fields.nim

+56-55
Original file line numberDiff line numberDiff line change
@@ -43,21 +43,22 @@ export Fp, Fr, FF
4343

4444
# No exceptions allowed
4545
{.push raises: [].}
46+
{.push inline.}
4647

4748
# ############################################################
4849
#
4950
# Conversion
5051
#
5152
# ############################################################
5253

53-
func fromBig*(dst: var FF, src: BigInt) {.inline.}=
54+
func fromBig*(dst: var FF, src: BigInt) =
5455
## Convert a BigInt to its Montgomery form
5556
when nimvm:
5657
dst.mres.montyResidue_precompute(src, FF.fieldMod(), FF.getR2modP(), FF.getNegInvModWord())
5758
else:
5859
dst.mres.montyResidue(src, FF.fieldMod(), FF.getR2modP(), FF.getNegInvModWord(), FF.canUseNoCarryMontyMul())
5960

60-
func fromBig*[C: static Curve](T: type FF[C], src: BigInt): FF[C] {.noInit, inline.} =
61+
func fromBig*[C: static Curve](T: type FF[C], src: BigInt): FF[C] {.noInit.} =
6162
## Convert a BigInt to its Montgomery form
6263
result.fromBig(src)
6364

@@ -70,14 +71,14 @@ func toBig*(src: FF): auto {.noInit, inline.} =
7071
# Copy
7172
# ------------------------------------------------------------
7273

73-
func ccopy*(a: var FF, b: FF, ctl: SecretBool) {.inline, meter.} =
74+
func ccopy*(a: var FF, b: FF, ctl: SecretBool) {.meter.} =
7475
## Constant-time conditional copy
7576
## If ctl is true: b is copied into a
7677
## if ctl is false: b is not copied and a is unmodified
7778
## Time and memory accesses are the same whether a copy occurs or not
7879
ccopy(a.mres, b.mres, ctl)
7980

80-
func cswap*(a, b: var FF, ctl: CTBool) {.inline, meter.} =
81+
func cswap*(a, b: var FF, ctl: CTBool) {.meter.} =
8182
## Swap ``a`` and ``b`` if ``ctl`` is true
8283
##
8384
## Constant-time:
@@ -105,34 +106,34 @@ func cswap*(a, b: var FF, ctl: CTBool) {.inline, meter.} =
105106
# In practice I'm not aware of such prime being using in elliptic curves.
106107
# 2^127 - 1 and 2^521 - 1 are used but 127 and 521 are not multiple of 32/64
107108

108-
func `==`*(a, b: FF): SecretBool {.inline.} =
109+
func `==`*(a, b: FF): SecretBool =
109110
## Constant-time equality check
110111
a.mres == b.mres
111112

112-
func isZero*(a: FF): SecretBool {.inline.} =
113+
func isZero*(a: FF): SecretBool =
113114
## Constant-time check if zero
114115
a.mres.isZero()
115116

116-
func isOne*(a: FF): SecretBool {.inline.} =
117+
func isOne*(a: FF): SecretBool =
117118
## Constant-time check if one
118119
a.mres == FF.getMontyOne()
119120

120-
func isMinusOne*(a: FF): SecretBool {.inline.} =
121+
func isMinusOne*(a: FF): SecretBool =
121122
## Constant-time check if -1 (mod p)
122123
a.mres == FF.getMontyPrimeMinus1()
123124

124-
func setZero*(a: var FF) {.inline.} =
125+
func setZero*(a: var FF) =
125126
## Set ``a`` to zero
126127
a.mres.setZero()
127128

128-
func setOne*(a: var FF) {.inline.} =
129+
func setOne*(a: var FF) =
129130
## Set ``a`` to one
130131
# Note: we need 1 in Montgomery residue form
131132
# TODO: Nim codegen is not optimal it uses a temporary
132133
# Check if the compiler optimizes it away
133134
a.mres = FF.getMontyOne()
134135

135-
func `+=`*(a: var FF, b: FF) {.inline, meter.} =
136+
func `+=`*(a: var FF, b: FF) {.meter.} =
136137
## In-place addition modulo p
137138
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
138139
addmod_asm(a.mres.limbs, a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs)
@@ -141,15 +142,15 @@ func `+=`*(a: var FF, b: FF) {.inline, meter.} =
141142
overflowed = overflowed or not(a.mres < FF.fieldMod())
142143
discard csub(a.mres, FF.fieldMod(), overflowed)
143144

144-
func `-=`*(a: var FF, b: FF) {.inline, meter.} =
145+
func `-=`*(a: var FF, b: FF) {.meter.} =
145146
## In-place substraction modulo p
146147
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
147148
submod_asm(a.mres.limbs, a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs)
148149
else:
149150
let underflowed = sub(a.mres, b.mres)
150151
discard cadd(a.mres, FF.fieldMod(), underflowed)
151152

152-
func double*(a: var FF) {.inline, meter.} =
153+
func double*(a: var FF) {.meter.} =
153154
## Double ``a`` modulo p
154155
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
155156
addmod_asm(a.mres.limbs, a.mres.limbs, a.mres.limbs, FF.fieldMod().limbs)
@@ -158,7 +159,7 @@ func double*(a: var FF) {.inline, meter.} =
158159
overflowed = overflowed or not(a.mres < FF.fieldMod())
159160
discard csub(a.mres, FF.fieldMod(), overflowed)
160161

161-
func sum*(r: var FF, a, b: FF) {.inline, meter.} =
162+
func sum*(r: var FF, a, b: FF) {.meter.} =
162163
## Sum ``a`` and ``b`` into ``r`` modulo p
163164
## r is initialized/overwritten
164165
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
@@ -168,11 +169,11 @@ func sum*(r: var FF, a, b: FF) {.inline, meter.} =
168169
overflowed = overflowed or not(r.mres < FF.fieldMod())
169170
discard csub(r.mres, FF.fieldMod(), overflowed)
170171

171-
func sumNoReduce*(r: var FF, a, b: FF) {.inline, meter.} =
172+
func sumNoReduce*(r: var FF, a, b: FF) {.meter.} =
172173
## Sum ``a`` and ``b`` into ``r`` without reduction
173174
discard r.mres.sum(a.mres, b.mres)
174175

175-
func diff*(r: var FF, a, b: FF) {.inline, meter.} =
176+
func diff*(r: var FF, a, b: FF) {.meter.} =
176177
## Substract `b` from `a` and store the result into `r`.
177178
## `r` is initialized/overwritten
178179
## Requires r != b
@@ -182,12 +183,12 @@ func diff*(r: var FF, a, b: FF) {.inline, meter.} =
182183
var underflowed = r.mres.diff(a.mres, b.mres)
183184
discard cadd(r.mres, FF.fieldMod(), underflowed)
184185

185-
func diffNoReduce*(r: var FF, a, b: FF) {.inline, meter.} =
186+
func diffNoReduce*(r: var FF, a, b: FF) {.meter.} =
186187
## Substract `b` from `a` and store the result into `r`
187188
## without reduction
188189
discard r.mres.diff(a.mres, b.mres)
189190

190-
func double*(r: var FF, a: FF) {.inline, meter.} =
191+
func double*(r: var FF, a: FF) {.meter.} =
191192
## Double ``a`` into ``r``
192193
## `r` is initialized/overwritten
193194
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
@@ -197,16 +198,16 @@ func double*(r: var FF, a: FF) {.inline, meter.} =
197198
overflowed = overflowed or not(r.mres < FF.fieldMod())
198199
discard csub(r.mres, FF.fieldMod(), overflowed)
199200

200-
func prod*(r: var FF, a, b: FF) {.inline, meter.} =
201+
func prod*(r: var FF, a, b: FF) {.meter.} =
201202
## Store the product of ``a`` by ``b`` modulo p into ``r``
202203
## ``r`` is initialized / overwritten
203204
r.mres.montyMul(a.mres, b.mres, FF.fieldMod(), FF.getNegInvModWord(), FF.canUseNoCarryMontyMul())
204205

205-
func square*(r: var FF, a: FF) {.inline, meter.} =
206+
func square*(r: var FF, a: FF) {.meter.} =
206207
## Squaring modulo p
207208
r.mres.montySquare(a.mres, FF.fieldMod(), FF.getNegInvModWord(), FF.canUseNoCarryMontySquare())
208209

209-
func neg*(r: var FF, a: FF) {.inline, meter.} =
210+
func neg*(r: var FF, a: FF) {.meter.} =
210211
## Negate modulo p
211212
when UseASM_X86_64:
212213
negmod_asm(r.mres.limbs, a.mres.limbs, FF.fieldMod().limbs)
@@ -221,11 +222,11 @@ func neg*(r: var FF, a: FF) {.inline, meter.} =
221222
t.mres.czero(isZero)
222223
r = t
223224

224-
func neg*(a: var FF) {.inline, meter.} =
225+
func neg*(a: var FF) {.meter.} =
225226
## Negate modulo p
226227
a.neg(a)
227228

228-
func div2*(a: var FF) {.inline, meter.} =
229+
func div2*(a: var FF) {.meter.} =
229230
## Modular division by 2
230231
a.mres.div2_modular(FF.getPrimePlus1div2())
231232

@@ -269,7 +270,7 @@ func csub*(a: var FF, b: FF, ctl: SecretBool) {.meter.} =
269270
#
270271
# Internally those procedures will allocate extra scratchspace on the stack
271272

272-
func pow*(a: var FF, exponent: BigInt) {.inline.} =
273+
func pow*(a: var FF, exponent: BigInt) =
273274
## Exponentiation modulo p
274275
## ``a``: a field element to be exponentiated
275276
## ``exponent``: a big integer
@@ -282,7 +283,7 @@ func pow*(a: var FF, exponent: BigInt) {.inline.} =
282283
FF.canUseNoCarryMontySquare()
283284
)
284285

285-
func pow*(a: var FF, exponent: openarray[byte]) {.inline.} =
286+
func pow*(a: var FF, exponent: openarray[byte]) =
286287
## Exponentiation modulo p
287288
## ``a``: a field element to be exponentiated
288289
## ``exponent``: a big integer in canonical big endian representation
@@ -295,7 +296,7 @@ func pow*(a: var FF, exponent: openarray[byte]) {.inline.} =
295296
FF.canUseNoCarryMontySquare()
296297
)
297298

298-
func powUnsafeExponent*(a: var FF, exponent: BigInt) {.inline.} =
299+
func powUnsafeExponent*(a: var FF, exponent: BigInt) =
299300
## Exponentiation modulo p
300301
## ``a``: a field element to be exponentiated
301302
## ``exponent``: a big integer
@@ -315,7 +316,7 @@ func powUnsafeExponent*(a: var FF, exponent: BigInt) {.inline.} =
315316
FF.canUseNoCarryMontySquare()
316317
)
317318

318-
func powUnsafeExponent*(a: var FF, exponent: openarray[byte]) {.inline.} =
319+
func powUnsafeExponent*(a: var FF, exponent: openarray[byte]) =
319320
## Exponentiation modulo p
320321
## ``a``: a field element to be exponentiated
321322
## ``exponent``: a big integer a big integer in canonical big endian representation
@@ -342,47 +343,27 @@ func powUnsafeExponent*(a: var FF, exponent: openarray[byte]) {.inline.} =
342343
# ############################################################
343344
#
344345
# This implements extra primitives for ergonomics.
345-
# The in-place ones should be preferred as they avoid copies on assignment
346-
# Two kinds:
347-
# - Those that return a field element
348-
# - Those that internally allocate a temporary field element
349346

350-
func `+`*(a, b: FF): FF {.noInit, inline, meter.} =
351-
## Addition modulo p
352-
result.sum(a, b)
353-
354-
func `-`*(a, b: FF): FF {.noInit, inline, meter.} =
355-
## Substraction modulo p
356-
result.diff(a, b)
357-
358-
func `*`*(a, b: FF): FF {.noInit, inline, meter.} =
359-
## Multiplication modulo p
360-
##
361-
## It is recommended to assign with {.noInit.}
362-
## as FF elements are usually large and this
363-
## routine will zero init internally the result.
364-
result.prod(a, b)
365-
366-
func `*=`*(a: var FF, b: FF) {.inline, meter.} =
347+
func `*=`*(a: var FF, b: FF) {.meter.} =
367348
## Multiplication modulo p
368349
a.prod(a, b)
369350

370-
func square*(a: var FF) {.inline, meter.} =
351+
func square*(a: var FF) {.meter.} =
371352
## Squaring modulo p
372353
a.mres.montySquare(a.mres, FF.fieldMod(), FF.getNegInvModWord(), FF.canUseNoCarryMontySquare())
373354

374-
func square_repeated*(r: var FF, num: int) {.inline, meter.} =
355+
func square_repeated*(r: var FF, num: int) {.meter.} =
375356
## Repeated squarings
376357
for _ in 0 ..< num:
377358
r.square()
378359

379-
func square_repeated*(r: var FF, a: FF, num: int) {.inline, meter.} =
360+
func square_repeated*(r: var FF, a: FF, num: int) {.meter.} =
380361
## Repeated squarings
381362
r.square(a)
382363
for _ in 1 ..< num:
383364
r.square()
384365

385-
func `*=`*(a: var FF, b: static int) {.inline.} =
366+
func `*=`*(a: var FF, b: static int) =
386367
## Multiplication by a small integer known at compile-time
387368
# Implementation:
388369
# We don't want to go convert the integer to the Montgomery domain (O(n²))
@@ -464,7 +445,27 @@ func `*=`*(a: var FF, b: static int) {.inline.} =
464445
else:
465446
{.error: "Multiplication by this small int not implemented".}
466447

467-
func `*`*(b: static int, a: FF): FF {.noinit, inline.} =
448+
func prod*(r: var FF, a: FF, b: static int) =
468449
## Multiplication by a small integer known at compile-time
469-
result = a
470-
result *= b
450+
const negate = b < 0
451+
const b = if negate: -b
452+
else: b
453+
when negate:
454+
r.neg(a)
455+
else:
456+
r = a
457+
r *= b
458+
459+
template mulCheckSparse*(a: var Fp, b: Fp) =
460+
## Multiplication with optimization for sparse inputs
461+
when b.isOne().bool:
462+
discard
463+
elif b.isZero().bool:
464+
a.setZero()
465+
elif b.isMinusOne().bool:
466+
a.neg()
467+
else:
468+
a *= b
469+
470+
{.pop.} # inline
471+
{.pop.} # raises no exceptions

constantine/arithmetic/finite_fields_double_width.nim

+14-7
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import
1212
./bigints,
1313
./finite_fields,
1414
./limbs,
15-
./limbs_double_width,
15+
./limbs_extmul,
1616
./limbs_montgomery
1717

1818
when UseASM_X86_64:
@@ -28,18 +28,22 @@ template doubleWidth*(T: typedesc[Fp]): typedesc =
2828
## Return the double-width type matching with Fp
2929
FpDbl[T.C]
3030

31-
func `==`*(a, b: FpDbl): SecretBool {.inline.} =
31+
# No exceptions allowed
32+
{.push raises: [].}
33+
{.push inline.}
34+
35+
func `==`*(a, b: FpDbl): SecretBool =
3236
a.limbs2x == b.limbs2x
3337

34-
func mulNoReduce*(r: var FpDbl, a, b: Fp) {.inline.} =
38+
func mulNoReduce*(r: var FpDbl, a, b: Fp) =
3539
## Store the product of ``a`` by ``b`` into ``r``
3640
r.limbs2x.prod(a.mres.limbs, b.mres.limbs)
3741

38-
func squareNoReduce*(r: var FpDbl, a: Fp) {.inline.} =
42+
func squareNoReduce*(r: var FpDbl, a: Fp) =
3943
## Store the square of ``a`` into ``r``
4044
r.limbs2x.square(a.mres.limbs)
4145

42-
func reduce*(r: var Fp, a: FpDbl) {.inline.} =
46+
func reduce*(r: var Fp, a: FpDbl) =
4347
## Reduce a double-width field element into r
4448
const N = r.mres.limbs.len
4549
montyRed(
@@ -54,7 +58,7 @@ func diffNoReduce*(r: var FpDbl, a, b: FpDbl) =
5458
## Double-width substraction without reduction
5559
discard r.limbs2x.diff(a.limbs2x, b.limbs2x)
5660

57-
func diff*(r: var FpDbl, a, b: FpDbl) {.inline.}=
61+
func diff*(r: var FpDbl, a, b: FpDbl) =
5862
## Double-width modular substraction
5963
when UseASM_X86_64:
6064
sub2x_asm(r.limbs2x, a.limbs2x, b.limbs2x, FpDbl.C.Mod.limbs)
@@ -69,6 +73,9 @@ func diff*(r: var FpDbl, a, b: FpDbl) {.inline.}=
6973
addC(carry, sum, r.limbs2x[i+N], M.limbs[i], carry)
7074
underflowed.ccopy(r.limbs2x[i+N], sum)
7175

72-
func `-=`*(a: var FpDbl, b: FpDbl) {.inline.}=
76+
func `-=`*(a: var FpDbl, b: FpDbl) =
7377
## Double-width modular substraction
7478
a.diff(a, b)
79+
80+
{.pop.} # inline
81+
{.pop.} # raises no exceptions

0 commit comments

Comments
 (0)