1
1
# # conversions to floating-point ##
2
-
3
- convert (:: Type{Float32} , x:: Int128 ) = float32 (reinterpret (UInt128,abs (x)))* (1 - 2 (x< 0 ))
4
- convert (:: Type{Float32} , x:: UInt128 ) = float32 (uint64 (x& 0xffffffffffffffff )) + ldexp (float32 (uint64 (x>>> 64 )),64 )
5
- promote_rule (:: Type{Float32} , :: Type{Int128} ) = Float32
6
- promote_rule (:: Type{Float32} , :: Type{UInt128} ) = Float32
7
-
8
- convert (:: Type{Float64} , x:: Int128 ) = float64 (reinterpret (UInt128,abs (x)))* (1 - 2 (x< 0 ))
9
- convert (:: Type{Float64} , x:: UInt128 ) = float64 (uint64 (x& 0xffffffffffffffff )) + ldexp (float64 (uint64 (x>>> 64 )),64 )
10
- promote_rule (:: Type{Float64} , :: Type{Int128} ) = Float64
11
- promote_rule (:: Type{Float64} , :: Type{UInt128} ) = Float64
12
-
13
2
convert (:: Type{Float16} , x:: Integer ) = convert (Float16, convert (Float32,x))
14
3
for t in (Bool,Char,Int8,Int16,Int32,Int64,UInt8,UInt16,UInt32,UInt64)
15
4
@eval promote_rule (:: Type{Float16} , :: Type{$t} ) = Float32
16
5
end
17
6
18
7
for t1 in (Float32,Float64)
19
- for st in (Int8,Int16,Int32,Int64)
8
+ for st in (Int8,Int16,Int32,Int64,Int128 )
20
9
@eval begin
21
10
convert (:: Type{$t1} ,x:: ($st) ) = box ($ t1,sitofp ($ t1,unbox ($ st,x)))
22
11
promote_rule (:: Type{$t1} , :: Type{$st} ) = $ t1
23
12
end
24
13
end
25
- for ut in (Bool,Char,UInt8,UInt16,UInt32,UInt64)
14
+ for ut in (Bool,Char,UInt8,UInt16,UInt32,UInt64,UInt128 )
26
15
@eval begin
27
16
convert (:: Type{$t1} ,x:: ($ut) ) = box ($ t1,uitofp ($ t1,unbox ($ ut,x)))
28
17
promote_rule (:: Type{$t1} , :: Type{$ut} ) = $ t1
@@ -55,55 +44,39 @@ float32(x) = convert(Float32, x)
55
44
float64 (x) = convert (Float64, x)
56
45
float (x) = convert (FloatingPoint, x)
57
46
58
- # # conversions from floating-point ##
59
-
60
- # fallbacks using only convert, trunc, ceil, floor, round
61
- itrunc (x:: FloatingPoint ) = convert (Integer,trunc (x))
62
- iceil (x:: FloatingPoint ) = convert (Integer,ceil (x)) # TODO : fast primitive for iceil
63
- ifloor (x:: FloatingPoint ) = convert (Integer,floor (x)) # TOOD: fast primitive for ifloor
64
- iround (x:: FloatingPoint ) = convert (Integer,round (x))
65
-
66
- itrunc {T<:Integer} (:: Type{T} , x:: FloatingPoint ) = convert (T,trunc (x))
67
- iceil {T<: Integer }(:: Type{T} , x:: FloatingPoint ) = convert (T,ceil (x))
68
- ifloor {T<:Integer} (:: Type{T} , x:: FloatingPoint ) = convert (T,floor (x))
69
- iround {T<:Integer} (:: Type{T} , x:: FloatingPoint ) = convert (T,round (x))
70
-
71
- # # fast specific type conversions ##
72
-
73
- iround (x:: Float32 ) = iround (Int, x)
74
- iround (x:: Float64 ) = iround (Int, x)
75
- itrunc (x:: Float32 ) = itrunc (Int, x)
76
- itrunc (x:: Float64 ) = itrunc (Int, x)
77
-
78
- for to in (Int8, Int16, Int32, Int64)
47
+ for Ti in (Int8, Int16, Int32, Int64, Int128)
79
48
@eval begin
80
- iround (:: Type{$to} , x:: Float32 ) = box ($ to,fpsiround ($ to,unbox (Float32,x)))
81
- iround (:: Type{$to} , x:: Float64 ) = box ($ to,fpsiround ($ to,unbox (Float64,x)))
82
- itrunc (:: Type{$to} , x:: Float32 ) = box ($ to,fptosi ($ to,unbox (Float32,x)))
83
- itrunc (:: Type{$to} , x:: Float64 ) = box ($ to,fptosi ($ to,unbox (Float64,x)))
49
+ unsafe_trunc (:: Type{$Ti} , x:: Float32 ) = box ($ Ti,fptosi ($ Ti,unbox (Float32,x)))
50
+ unsafe_trunc (:: Type{$Ti} , x:: Float64 ) = box ($ Ti,fptosi ($ Ti,unbox (Float64,x)))
84
51
end
85
52
end
86
-
87
- for to in (UInt8, UInt16, UInt32, UInt64)
53
+ for Ti in (UInt8, UInt16, UInt32, UInt64, UInt128)
88
54
@eval begin
89
- iround (:: Type{$to} , x:: Float32 ) = box ($ to,fpuiround ($ to,unbox (Float32,x)))
90
- iround (:: Type{$to} , x:: Float64 ) = box ($ to,fpuiround ($ to,unbox (Float64,x)))
91
- itrunc (:: Type{$to} , x:: Float32 ) = box ($ to,fptoui ($ to,unbox (Float32,x)))
92
- itrunc (:: Type{$to} , x:: Float64 ) = box ($ to,fptoui ($ to,unbox (Float64,x)))
55
+ unsafe_trunc (:: Type{$Ti} , x:: Float32 ) = box ($ Ti,fptoui ($ Ti,unbox (Float32,x)))
56
+ unsafe_trunc (:: Type{$Ti} , x:: Float64 ) = box ($ Ti,fptoui ($ Ti,unbox (Float64,x)))
93
57
end
94
58
end
95
59
96
- iround (:: Type{Int128} , x:: Float32 ) = convert (Int128,round (x))
97
- iround (:: Type{Int128} , x:: Float64 ) = convert (Int128,round (x))
98
- iround (:: Type{UInt128} , x:: Float32 ) = convert (UInt128,round (x))
99
- iround (:: Type{UInt128} , x:: Float64 ) = convert (UInt128,round (x))
60
+ # matches convert methods
61
+ # also determines floor, ceil, round
62
+ trunc (:: Type{Signed} , x:: Float32 ) = trunc (Int,x)
63
+ trunc (:: Type{Signed} , x:: Float64 ) = trunc (Int,x)
64
+ trunc (:: Type{Unsigned} , x:: Float32 ) = trunc (UInt,x)
65
+ trunc (:: Type{Unsigned} , x:: Float64 ) = trunc (UInt,x)
66
+ trunc (:: Type{Integer} , x:: Float32 ) = trunc (Int,x)
67
+ trunc (:: Type{Integer} , x:: Float64 ) = trunc (Int,x)
68
+
69
+ # fallbacks
70
+ floor {T<:Integer} (:: Type{T} , x:: FloatingPoint ) = trunc (T,floor (x))
71
+ ceil {T<: Integer }(:: Type{T} , x:: FloatingPoint ) = trunc (T,ceil (x))
72
+ round {T<: Integer }(:: Type{T} , x:: FloatingPoint ) = trunc (T,round (x))
73
+
100
74
101
75
# this is needed very early because it is used by Range and colon
102
76
round (x:: Float64 ) = ccall ((:round , Base. libm_name), Float64, (Float64,), x)
103
77
floor (x:: Float64 ) = ccall ((:floor , Base. libm_name), Float64, (Float64,), x)
104
78
105
79
# # floating point promotions ##
106
-
107
80
promote_rule (:: Type{Float32} , :: Type{Float16} ) = Float32
108
81
promote_rule (:: Type{Float64} , :: Type{Float16} ) = Float64
109
82
promote_rule (:: Type{Float64} , :: Type{Float32} ) = Float64
@@ -112,7 +85,6 @@ widen(::Type{Float16}) = Float32
112
85
widen (:: Type{Float32} ) = Float64
113
86
114
87
# # floating point arithmetic ##
115
-
116
88
- (x:: Float32 ) = box (Float32,neg_float (unbox (Float32,x)))
117
89
- (x:: Float64 ) = box (Float64,neg_float (unbox (Float64,x)))
118
90
@@ -128,7 +100,6 @@ widen(::Type{Float32}) = Float64
128
100
# TODO : faster floating point div?
129
101
# TODO : faster floating point fld?
130
102
# TODO : faster floating point mod?
131
-
132
103
rem (x:: Float32 , y:: Float32 ) = box (Float32,rem_float (unbox (Float32,x),unbox (Float32,y)))
133
104
rem (x:: Float64 , y:: Float64 ) = box (Float64,rem_float (unbox (Float64,x),unbox (Float64,y)))
134
105
147
118
148
119
149
120
# # floating point comparisons ##
150
-
151
121
== (x:: Float32 , y:: Float32 ) = eq_float (unbox (Float32,x),unbox (Float32,y))
152
122
== (x:: Float64 , y:: Float64 ) = eq_float (unbox (Float64,x),unbox (Float64,y))
153
123
!= (x:: Float32 , y:: Float32 ) = ne_float (unbox (Float32,x),unbox (Float32,y))
@@ -177,31 +147,31 @@ function cmp(x::FloatingPoint, y::Real)
177
147
ifelse (x< y, - 1 , ifelse (x> y, 1 , 0 ))
178
148
end
179
149
180
- for Ti in (Int64,UInt64)
150
+ for Ti in (Int64,UInt64,Int128,UInt128 )
181
151
for Tf in (Float32,Float64)
182
152
@eval begin
183
153
function == (x:: $Tf , y:: $Ti )
184
154
fy = ($ Tf)(y)
185
- (x == fy) & (y == itrunc ($ Ti,fy))
155
+ (x == fy) & (y == unsafe_trunc ($ Ti,fy))
186
156
end
187
157
== (y:: $Ti , x:: $Tf ) = x== y
188
158
189
159
function < (x:: $Ti , y:: $Tf )
190
160
fx = ($ Tf)(x)
191
- (fx < y) | ((fx == y) & ((fx == $ (Tf (typemax (Ti)))) | (x < itrunc ($ Ti,fx)) ))
161
+ (fx < y) | ((fx == y) & ((fx == $ (Tf (typemax (Ti)))) | (x < unsafe_trunc ($ Ti,fx)) ))
192
162
end
193
163
function <= (x:: $Ti , y:: $Tf )
194
164
fx = ($ Tf)(x)
195
- (fx < y) | ((fx == y) & ((fx == $ (Tf (typemax (Ti)))) | (x <= itrunc ($ Ti,fx)) ))
165
+ (fx < y) | ((fx == y) & ((fx == $ (Tf (typemax (Ti)))) | (x <= unsafe_trunc ($ Ti,fx)) ))
196
166
end
197
167
198
168
function < (x:: $Tf , y:: $Ti )
199
169
fy = ($ Tf)(y)
200
- (x < fy) | ((x == fy) & (fy < $ (Tf (typemax (Ti)))) & (itrunc ($ Ti,fy) < y))
170
+ (x < fy) | ((x == fy) & (fy < $ (Tf (typemax (Ti)))) & (unsafe_trunc ($ Ti,fy) < y))
201
171
end
202
172
function <= (x:: $Tf , y:: $Ti )
203
173
fy = ($ Tf)(y)
204
- (x < fy) | ((x == fy) & (fy < $ (Tf (typemax (Ti)))) & (itrunc ($ Ti,fy) <= y))
174
+ (x < fy) | ((x == fy) & (fy < $ (Tf (typemax (Ti)))) & (unsafe_trunc ($ Ti,fy) <= y))
205
175
end
206
176
end
207
177
end
@@ -263,6 +233,31 @@ nextfloat(x::Float64, i::Integer) =
263
233
nextfloat (x:: FloatingPoint ) = nextfloat (x,1 )
264
234
prevfloat (x:: FloatingPoint ) = nextfloat (x,- 1 )
265
235
236
+ for Ti in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128)
237
+ for Tf in (Float32, Float64)
238
+ if sizeof (Ti) < sizeof (Tf) || Ti <: Unsigned # Tf(typemin(Ti))-1 is exact
239
+ @eval function trunc (:: Type{$Ti} ,x:: $Tf )
240
+ $ (Tf (typemin (Ti))- one (Tf)) < x < $ (Tf (typemax (Ti))+ one (Tf)) || throw (InexactError ())
241
+ unsafe_trunc ($ Ti,x)
242
+ end
243
+ else
244
+ @eval function trunc (:: Type{$Ti} ,x:: $Tf )
245
+ $ (Tf (typemin (Ti))) <= x < $ (Tf (typemax (Ti))) || throw (InexactError ())
246
+ unsafe_trunc ($ Ti,x)
247
+ end
248
+ end
249
+ end
250
+ end
251
+
252
+ # adding prevfloat(0.5) will prevent prevfloat(0.5) and odd x with eps(x)=1.0
253
+ # from rounding in the wrong direction in RoundToNearest
254
+ for Tf in (Float32,Float64)
255
+ @eval function round {T<:Integer} (:: Type{T} , x:: $Tf )
256
+ trunc (T,x+ copysign ($ (prevfloat (Tf (0.5 ))),x))
257
+ end
258
+ end
259
+
260
+
266
261
@eval begin
267
262
issubnormal (x:: Float32 ) = (abs (x) < $ (box (Float32,unbox (UInt32,0x00800000 )))) & (x!= 0 )
268
263
issubnormal (x:: Float64 ) = (abs (x) < $ (box (Float64,unbox (UInt64,0x0010000000000000 )))) & (x!= 0 )
0 commit comments