File tree 12 files changed +150
-48
lines changed
12 files changed +150
-48
lines changed Original file line number Diff line number Diff line change @@ -3075,11 +3075,19 @@ __bn_sqrx8x_internal:
3075
3075
3076
3076
.align 32
3077
3077
.Lsqrx8x_break:
3078
- subq 16 +8 (%rsp ),%r8
3078
+ xorq %rbp ,%rbp
3079
+ subq 16 +8 (%rsp ),%rbx
3080
+ adcxq %rbp ,%r8
3079
3081
movq 24 +8 (%rsp ),%rcx
3082
+ adcxq %rbp ,%r9
3080
3083
movq 0 (%rsi ),%rdx
3081
- xorl %ebp , %ebp
3084
+ adcq $0 , %r10
3082
3085
movq %r8 ,0 (%rdi )
3086
+ adcq $0 ,%r11
3087
+ adcq $0 ,%r12
3088
+ adcq $0 ,%r13
3089
+ adcq $0 ,%r14
3090
+ adcq $0 ,%r15
3083
3091
cmpq %rcx ,%rdi
3084
3092
je .Lsqrx8x_outer_loop
3085
3093
Original file line number Diff line number Diff line change @@ -1036,19 +1036,18 @@ __ecp_nistz256_sqr_montx:
1036
1036
adoxq %rbp ,%r13
1037
1037
.byte 0x67 ,0x67
1038
1038
mulxq %rdx ,%rcx ,%rax
1039
- movq %r8 ,%rdx
1039
+ movq .Lpoly+ 24 ( %rip ) ,%rdx
1040
1040
adoxq %rcx ,%r14
1041
1041
shlxq %rsi ,%r8 ,%rcx
1042
1042
adoxq %rax ,%r15
1043
1043
shrxq %rsi ,%r8 ,%rax
1044
- movq .Lpoly+ 24 ( %rip ) ,%rbp
1044
+ movq %rdx ,%rbp
1045
1045
1046
1046
1047
1047
addq %rcx ,%r9
1048
1048
adcq %rax ,%r10
1049
1049
1050
- mulxq %rbp ,%rcx ,%r8
1051
- movq %r9 ,%rdx
1050
+ mulxq %r8 ,%rcx ,%r8
1052
1051
adcq %rcx ,%r11
1053
1052
shlxq %rsi ,%r9 ,%rcx
1054
1053
adcq $0 ,%r8
@@ -1058,8 +1057,7 @@ __ecp_nistz256_sqr_montx:
1058
1057
addq %rcx ,%r10
1059
1058
adcq %rax ,%r11
1060
1059
1061
- mulxq %rbp ,%rcx ,%r9
1062
- movq %r10 ,%rdx
1060
+ mulxq %r9 ,%rcx ,%r9
1063
1061
adcq %rcx ,%r8
1064
1062
shlxq %rsi ,%r10 ,%rcx
1065
1063
adcq $0 ,%r9
@@ -1069,8 +1067,7 @@ __ecp_nistz256_sqr_montx:
1069
1067
addq %rcx ,%r11
1070
1068
adcq %rax ,%r8
1071
1069
1072
- mulxq %rbp ,%rcx ,%r10
1073
- movq %r11 ,%rdx
1070
+ mulxq %r10 ,%rcx ,%r10
1074
1071
adcq %rcx ,%r9
1075
1072
shlxq %rsi ,%r11 ,%rcx
1076
1073
adcq $0 ,%r10
@@ -1080,12 +1077,12 @@ __ecp_nistz256_sqr_montx:
1080
1077
addq %rcx ,%r8
1081
1078
adcq %rax ,%r9
1082
1079
1083
- mulxq %rbp ,%rcx ,%r11
1080
+ mulxq %r11 ,%rcx ,%r11
1084
1081
adcq %rcx ,%r10
1085
1082
adcq $0 ,%r11
1086
1083
1087
1084
xorq %rdx ,%rdx
1088
- adcq %r8 ,%r12
1085
+ addq %r8 ,%r12
1089
1086
movq .Lpoly+8 (%rip ),%rsi
1090
1087
adcq %r9 ,%r13
1091
1088
movq %r12 ,%r8
@@ -1094,8 +1091,7 @@ __ecp_nistz256_sqr_montx:
1094
1091
movq %r13 ,%r9
1095
1092
adcq $0 ,%rdx
1096
1093
1097
- xorl %eax ,%eax
1098
- sbbq $-1 ,%r12
1094
+ subq $-1 ,%r12
1099
1095
movq %r14 ,%r10
1100
1096
sbbq %rsi ,%r13
1101
1097
sbbq $0 ,%r14
Original file line number Diff line number Diff line change @@ -116,8 +116,19 @@ OPENSSL_ia32_cpuid:
116
116
orl $0x40000000 ,%edx
117
117
andb $15 ,%ah
118
118
cmpb $15 ,%ah
119
- jne .Lnotintel
119
+ jne .LnotP4
120
120
orl $0x00100000 ,%edx
121
+ .LnotP4:
122
+ cmpb $6 ,%ah
123
+ jne .Lnotintel
124
+ andl $0x0fff0ff0 ,%eax
125
+ cmpl $0x00050670 ,%eax
126
+ je .Lknights
127
+ cmpl $0x00080650 ,%eax
128
+ jne .Lnotintel
129
+ .Lknights:
130
+ andl $0xfbffffff ,%ecx
131
+
121
132
.Lnotintel:
122
133
btl $28 ,%edx
123
134
jnc .Lgeneric
@@ -142,6 +153,10 @@ OPENSSL_ia32_cpuid:
142
153
movl $7 ,%eax
143
154
xorl %ecx ,%ecx
144
155
cpuid
156
+ btl $26 ,%r9d
157
+ jc .Lnotknights
158
+ andl $0xfff7ffff ,%ebx
159
+ .Lnotknights:
145
160
movl %ebx ,8 (%rdi )
146
161
.Lno_extended_info:
147
162
Original file line number Diff line number Diff line change @@ -3075,11 +3075,19 @@ L$sqrx8x_loop:
3075
3075
3076
3076
.p2align 5
3077
3077
L$sqrx8x_break:
3078
- subq 16 +8 (%rsp ),%r8
3078
+ xorq %rbp ,%rbp
3079
+ subq 16 +8 (%rsp ),%rbx
3080
+ adcxq %rbp ,%r8
3079
3081
movq 24 +8 (%rsp ),%rcx
3082
+ adcxq %rbp ,%r9
3080
3083
movq 0 (%rsi ),%rdx
3081
- xorl %ebp , %ebp
3084
+ adcq $0 , %r10
3082
3085
movq %r8 ,0 (%rdi )
3086
+ adcq $0 ,%r11
3087
+ adcq $0 ,%r12
3088
+ adcq $0 ,%r13
3089
+ adcq $0 ,%r14
3090
+ adcq $0 ,%r15
3083
3091
cmpq %rcx ,%rdi
3084
3092
je L$sqrx8x_outer_loop
3085
3093
Original file line number Diff line number Diff line change @@ -1036,19 +1036,18 @@ __ecp_nistz256_sqr_montx:
1036
1036
adoxq %rbp ,%r13
1037
1037
.byte 0x67 ,0x67
1038
1038
mulxq %rdx ,%rcx ,%rax
1039
- movq %r8 ,%rdx
1039
+ movq L$poly+ 24 ( %rip ) ,%rdx
1040
1040
adoxq %rcx ,%r14
1041
1041
shlxq %rsi ,%r8 ,%rcx
1042
1042
adoxq %rax ,%r15
1043
1043
shrxq %rsi ,%r8 ,%rax
1044
- movq L$poly+ 24 ( %rip ) ,%rbp
1044
+ movq %rdx ,%rbp
1045
1045
1046
1046
1047
1047
addq %rcx ,%r9
1048
1048
adcq %rax ,%r10
1049
1049
1050
- mulxq %rbp ,%rcx ,%r8
1051
- movq %r9 ,%rdx
1050
+ mulxq %r8 ,%rcx ,%r8
1052
1051
adcq %rcx ,%r11
1053
1052
shlxq %rsi ,%r9 ,%rcx
1054
1053
adcq $0 ,%r8
@@ -1058,8 +1057,7 @@ __ecp_nistz256_sqr_montx:
1058
1057
addq %rcx ,%r10
1059
1058
adcq %rax ,%r11
1060
1059
1061
- mulxq %rbp ,%rcx ,%r9
1062
- movq %r10 ,%rdx
1060
+ mulxq %r9 ,%rcx ,%r9
1063
1061
adcq %rcx ,%r8
1064
1062
shlxq %rsi ,%r10 ,%rcx
1065
1063
adcq $0 ,%r9
@@ -1069,8 +1067,7 @@ __ecp_nistz256_sqr_montx:
1069
1067
addq %rcx ,%r11
1070
1068
adcq %rax ,%r8
1071
1069
1072
- mulxq %rbp ,%rcx ,%r10
1073
- movq %r11 ,%rdx
1070
+ mulxq %r10 ,%rcx ,%r10
1074
1071
adcq %rcx ,%r9
1075
1072
shlxq %rsi ,%r11 ,%rcx
1076
1073
adcq $0 ,%r10
@@ -1080,12 +1077,12 @@ __ecp_nistz256_sqr_montx:
1080
1077
addq %rcx ,%r8
1081
1078
adcq %rax ,%r9
1082
1079
1083
- mulxq %rbp ,%rcx ,%r11
1080
+ mulxq %r11 ,%rcx ,%r11
1084
1081
adcq %rcx ,%r10
1085
1082
adcq $0 ,%r11
1086
1083
1087
1084
xorq %rdx ,%rdx
1088
- adcq %r8 ,%r12
1085
+ addq %r8 ,%r12
1089
1086
movq L$poly+8 (%rip ),%rsi
1090
1087
adcq %r9 ,%r13
1091
1088
movq %r12 ,%r8
@@ -1094,8 +1091,7 @@ __ecp_nistz256_sqr_montx:
1094
1091
movq %r13 ,%r9
1095
1092
adcq $0 ,%rdx
1096
1093
1097
- xorl %eax ,%eax
1098
- sbbq $-1 ,%r12
1094
+ subq $-1 ,%r12
1099
1095
movq %r14 ,%r10
1100
1096
sbbq %rsi ,%r13
1101
1097
sbbq $0 ,%r14
Original file line number Diff line number Diff line change @@ -117,8 +117,19 @@ L$nocacheinfo:
117
117
orl $0x40000000 ,%edx
118
118
andb $15 ,%ah
119
119
cmpb $15 ,%ah
120
- jne L$notintel
120
+ jne L$notP4
121
121
orl $0x00100000 ,%edx
122
+ L$notP4:
123
+ cmpb $6 ,%ah
124
+ jne L$notintel
125
+ andl $0x0fff0ff0 ,%eax
126
+ cmpl $0x00050670 ,%eax
127
+ je L$knights
128
+ cmpl $0x00080650 ,%eax
129
+ jne L$notintel
130
+ L$knights:
131
+ andl $0xfbffffff ,%ecx
132
+
122
133
L$notintel:
123
134
btl $28 ,%edx
124
135
jnc L$generic
@@ -143,6 +154,10 @@ L$generic:
143
154
movl $7 ,%eax
144
155
xorl %ecx ,%ecx
145
156
cpuid
157
+ btl $26 ,%r9d
158
+ jc L$notknights
159
+ andl $0xfff7ffff ,%ebx
160
+ L$notknights:
146
161
movl %ebx ,8 (%rdi )
147
162
L$no_extended_info:
148
163
Original file line number Diff line number Diff line change @@ -3166,11 +3166,19 @@ DB 067h
3166
3166
3167
3167
ALIGN 32
3168
3168
$ L $ sqrx8x_break::
3169
- sub r8 , QWORD PTR [ (( 16 + 8 )) + rsp ]
3169
+ xor rbp , rbp
3170
+ sub rbx , QWORD PTR [ (( 16 + 8 )) + rsp ]
3171
+ adcx r8 , rbp
3170
3172
mov rcx , QWORD PTR [ (( 24 + 8 )) + rsp ]
3173
+ adcx r9 , rbp
3171
3174
mov rdx , QWORD PTR [ rsi ]
3172
- xor ebp , ebp
3175
+ adc r10 , 0
3173
3176
mov QWORD PTR [ rdi ], r8
3177
+ adc r11 , 0
3178
+ adc r12 , 0
3179
+ adc r13 , 0
3180
+ adc r14 , 0
3181
+ adc r15 , 0
3174
3182
cmp rdi , rcx
3175
3183
je $ L $ sqrx8x_outer_loop
3176
3184
Original file line number Diff line number Diff line change @@ -1137,19 +1137,18 @@ DB 067h
1137
1137
adox r13 , rbp
1138
1138
DB 067h , 067h
1139
1139
mulx rax , rcx , rdx
1140
- mov rdx , r8
1140
+ mov rdx , QWORD PTR [ (( $ L $ poly + 24 )) ]
1141
1141
adox r14 , rcx
1142
1142
shlx rcx , r8 , rsi
1143
1143
adox r15 , rax
1144
1144
shrx rax , r8 , rsi
1145
- mov rbp , QWORD PTR [ (( $ L $ poly + 24 )) ]
1145
+ mov rbp , rdx
1146
1146
1147
1147
1148
1148
add r9 , rcx
1149
1149
adc r10 , rax
1150
1150
1151
- mulx r8 , rcx , rbp
1152
- mov rdx , r9
1151
+ mulx r8 , rcx , r8
1153
1152
adc r11 , rcx
1154
1153
shlx rcx , r9 , rsi
1155
1154
adc r8 , 0
@@ -1159,8 +1158,7 @@ DB 067h,067h
1159
1158
add r10 , rcx
1160
1159
adc r11 , rax
1161
1160
1162
- mulx r9 , rcx , rbp
1163
- mov rdx , r10
1161
+ mulx r9 , rcx , r9
1164
1162
adc r8 , rcx
1165
1163
shlx rcx , r10 , rsi
1166
1164
adc r9 , 0
@@ -1170,8 +1168,7 @@ DB 067h,067h
1170
1168
add r11 , rcx
1171
1169
adc r8 , rax
1172
1170
1173
- mulx r10 , rcx , rbp
1174
- mov rdx , r11
1171
+ mulx r10 , rcx , r10
1175
1172
adc r9 , rcx
1176
1173
shlx rcx , r11 , rsi
1177
1174
adc r10 , 0
@@ -1181,12 +1178,12 @@ DB 067h,067h
1181
1178
add r8 , rcx
1182
1179
adc r9 , rax
1183
1180
1184
- mulx r11 , rcx , rbp
1181
+ mulx r11 , rcx , r11
1185
1182
adc r10 , rcx
1186
1183
adc r11 , 0
1187
1184
1188
1185
xor rdx , rdx
1189
- adc r12 , r8
1186
+ add r12 , r8
1190
1187
mov rsi , QWORD PTR [ (( $ L $ poly + 8 )) ]
1191
1188
adc r13 , r9
1192
1189
mov r8 , r12
@@ -1195,8 +1192,7 @@ DB 067h,067h
1195
1192
mov r9 , r13
1196
1193
adc rdx , 0
1197
1194
1198
- xor eax , eax
1199
- sbb r12 ,- 1
1195
+ sub r12 ,- 1
1200
1196
mov r10 , r14
1201
1197
sbb r13 , rsi
1202
1198
sbb r14 , 0
Original file line number Diff line number Diff line change @@ -127,8 +127,19 @@ $L$nocacheinfo::
127
127
or edx , 040000000h
128
128
and ah , 15
129
129
cmp ah , 15
130
- jne $ L $ notintel
130
+ jne $ L $ notP4
131
131
or edx , 000100000h
132
+ $ L $ notP4::
133
+ cmp ah , 6
134
+ jne $ L $ notintel
135
+ and eax , 00fff0ff0h
136
+ cmp eax , 000050670h
137
+ je $ L $ knights
138
+ cmp eax , 000080650h
139
+ jne $ L $ notintel
140
+ $ L $ knights::
141
+ and ecx , 0fbffffffh
142
+
132
143
$ L $ notintel::
133
144
bt edx , 28
134
145
jnc $ L $ generic
@@ -153,6 +164,10 @@ $L$generic::
153
164
mov eax , 7
154
165
xor ecx , ecx
155
166
cpuid
167
+ bt r9d , 26
168
+ jc $ L $ notknights
169
+ and ebx , 0fff7ffffh
170
+ $ L $ notknights::
156
171
mov DWORD PTR [ 8 + rdi ], ebx
157
172
$ L $ no_extended_info::
158
173
Original file line number Diff line number Diff line change @@ -116,8 +116,19 @@ OPENSSL_ia32_cpuid:
116
116
orl $0x40000000 ,%edx
117
117
andb $15 ,%ah
118
118
cmpb $15 ,%ah
119
- jne .Lnotintel
119
+ jne .LnotP4
120
120
orl $0x00100000 ,%edx
121
+ .LnotP4:
122
+ cmpb $6 ,%ah
123
+ jne .Lnotintel
124
+ andl $0x0fff0ff0 ,%eax
125
+ cmpl $0x00050670 ,%eax
126
+ je .Lknights
127
+ cmpl $0x00080650 ,%eax
128
+ jne .Lnotintel
129
+ .Lknights:
130
+ andl $0xfbffffff ,%ecx
131
+
121
132
.Lnotintel:
122
133
btl $28 ,%edx
123
134
jnc .Lgeneric
@@ -142,6 +153,10 @@ OPENSSL_ia32_cpuid:
142
153
movl $7 ,%eax
143
154
xorl %ecx ,%ecx
144
155
cpuid
156
+ btl $26 ,%r9d
157
+ jc .Lnotknights
158
+ andl $0xfff7ffff ,%ebx
159
+ .Lnotknights:
145
160
movl %ebx ,8 (%rdi )
146
161
.Lno_extended_info:
147
162
You can’t perform that action at this time.
0 commit comments