Skip to content

Commit b5fcb59

Browse files
committed
Implement more bf16/fp16 compiler runtimes
Fixes #1259
1 parent 6b10f4d commit b5fcb59

File tree

11 files changed

+209
-178
lines changed

11 files changed

+209
-178
lines changed

libc/integral/c.inc

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,26 @@ typedef __UINT64_TYPE__ uint64_t;
6565
typedef __INTMAX_TYPE__ intmax_t;
6666
typedef __UINTMAX_TYPE__ uintmax_t;
6767

68+
/* TODO(jart): re-import compiler-rt once they have it */
69+
#if defined(__x86_64__) && defined(__FLT128_MAX_10_EXP__)
70+
#undef __FLT128_MAX_10_EXP__
71+
#undef __FLT128_DENORM_MIN__
72+
#undef __FLT128_MIN_EXP__
73+
#undef __FLT128_MIN_10_EXP__
74+
#undef __FLT128_MANT_DIG__
75+
#undef __FLT128_HAS_INFINITY__
76+
#undef __FLT128_EPSILON__
77+
#undef __FLT128_MAX_EXP__
78+
#undef __FLT128_HAS_DENORM__
79+
#undef __FLT128_DIG__
80+
#undef __FLT128_MIN__
81+
#undef __FLT128_MAX__
82+
#undef __FLT128_NORM_MAX__
83+
#undef __FLT128_HAS_QUIET_NAN__
84+
#undef __FLT128_IS_IEC_60559__
85+
#undef __FLT128_DECIMAL_DIG__
86+
#endif
87+
6888
#define __DEFINED_max_align_t
6989
typedef long double max_align_t;
7090

libc/intrin/truncsfbf2.c renamed to libc/intrin/brain16.c

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,53 @@
1717
│ PERFORMANCE OF THIS SOFTWARE. │
1818
╚─────────────────────────────────────────────────────────────────────────────*/
1919

20-
__bf16 __truncsfbf2(float f) {
20+
/**
21+
* @fileoverview bf16 compiler runtime
22+
*/
23+
24+
_Float32 __extendbfsf2(__bf16 f) {
25+
union {
26+
__bf16 f;
27+
uint16_t i;
28+
} ub = {f};
29+
30+
// convert brain16 to binary32
31+
uint32_t x = (uint32_t)ub.i << 16;
32+
33+
// force nan to quiet
34+
if ((x & 0x7fffffff) > 0x7f800000)
35+
x |= 0x00400000;
36+
37+
// pun to _Float32
38+
union {
39+
uint32_t i;
40+
_Float32 f;
41+
} uf = {x};
42+
return uf.f;
43+
}
44+
45+
_Float64 __extendbfdf2(__bf16 f) {
46+
return __extendbfsf2(f);
47+
}
48+
49+
#ifdef __x86_64__
50+
__float80 __extendbfxf2(__bf16 f) {
51+
return __extendbfsf2(f);
52+
}
53+
#endif
54+
55+
#ifdef __aarch64__
56+
_Float128 __extendbftf2(__bf16 f) {
57+
return __extendbfsf2(f);
58+
}
59+
#endif
60+
61+
__bf16 __truncsfbf2(_Float32 f) {
2162
union {
22-
float f;
23-
unsigned i;
63+
_Float32 f;
64+
uint32_t i;
2465
} uf = {f};
25-
unsigned x = uf.i;
66+
uint32_t x = uf.i;
2667

2768
if ((x & 0x7fffffff) > 0x7f800000)
2869
// force nan to quiet
@@ -33,8 +74,24 @@ __bf16 __truncsfbf2(float f) {
3374

3475
// pun to bf16
3576
union {
36-
unsigned short i;
77+
uint16_t i;
3778
__bf16 f;
3879
} ub = {x};
3980
return ub.f;
4081
}
82+
83+
__bf16 __truncdfbf2(_Float64 f) {
84+
return __truncsfbf2(f);
85+
}
86+
87+
#ifdef __x86_64__
88+
__bf16 __truncxfbf2(__float80 f) {
89+
return __truncsfbf2(f);
90+
}
91+
#endif
92+
93+
#ifdef __aarch64__
94+
__bf16 __trunctfbf2(_Float128 f) {
95+
return __truncsfbf2(f);
96+
}
97+
#endif

libc/intrin/extendbfsf2.c

Lines changed: 0 additions & 39 deletions
This file was deleted.

third_party/compiler_rt/extendsftf2.c renamed to libc/intrin/extendsftf2.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
//===----------------------------------------------------------------------===//
99
//
1010

11-
__static_yoink("huge_compiler_rt_license");
12-
1311
#define QUAD_PRECISION
1412
#include "third_party/compiler_rt/fp_lib.inc"
1513

@@ -19,7 +17,7 @@ __static_yoink("huge_compiler_rt_license");
1917
#include "third_party/compiler_rt/fp_extend_impl.inc"
2018

2119
COMPILER_RT_ABI long double __extendsftf2(float a) {
22-
return __extendXfYf2__(a);
20+
return __extendXfYf2__(a);
2321
}
2422

2523
#endif

libc/intrin/float16.c

Lines changed: 125 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,22 +21,135 @@
2121
* @fileoverview fp16 compiler runtime
2222
*/
2323

24-
#define asint(x) ((union pun){x}).i
25-
#define isnan(x) (((x) & 0x7fff) > 0x7c00)
24+
#define isnan16(x) (((x) & 0x7fff) > 0x7c00)
2625

27-
union pun {
28-
_Float16 f;
29-
unsigned short i;
30-
};
26+
static inline _Float16 tofloat16(int x) {
27+
union {
28+
uint16_t i;
29+
_Float16 f;
30+
} u = {x};
31+
return u.f;
32+
}
33+
34+
static inline int fromfloat16(_Float16 x) {
35+
union {
36+
_Float16 f;
37+
uint16_t i;
38+
} u = {x};
39+
return u.i;
40+
}
41+
42+
static inline _Float32 tofloat32(uint32_t w) {
43+
union {
44+
uint32_t as_bits;
45+
_Float32 as_value;
46+
} fp32;
47+
fp32.as_bits = w;
48+
return fp32.as_value;
49+
}
50+
51+
static inline uint32_t fromfloat32(_Float32 f) {
52+
union {
53+
_Float32 as_value;
54+
uint32_t as_bits;
55+
} fp32;
56+
fp32.as_value = f;
57+
return fp32.as_bits;
58+
}
59+
60+
static inline _Float32 fabs32(_Float32 x) {
61+
return tofloat32(fromfloat32(x) & 0x7fffffffu);
62+
}
3163

3264
int __eqhf2(_Float16 fx, _Float16 fy) {
33-
int x = asint(fx);
34-
int y = asint(fy);
35-
return (x == y) & !isnan(x) & !isnan(y);
65+
int x = fromfloat16(fx);
66+
int y = fromfloat16(fy);
67+
return (x == y) & !isnan16(x) & !isnan16(y);
3668
}
3769

3870
int __nehf2(_Float16 fx, _Float16 fy) {
39-
int x = asint(fx);
40-
int y = asint(fy);
41-
return (x != y) & !isnan(x) & !isnan(y);
71+
int x = fromfloat16(fx);
72+
int y = fromfloat16(fy);
73+
return (x != y) & !isnan16(x) & !isnan16(y);
74+
}
75+
76+
_Float32 __extendhfsf2(_Float16 f) {
77+
uint16_t h = fromfloat16(f);
78+
const uint32_t w = (uint32_t)h << 16;
79+
const uint32_t sign = w & 0x80000000u;
80+
const uint32_t two_w = w + w;
81+
const uint32_t exp_offset = 0xE0u << 23;
82+
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || \
83+
defined(__GNUC__) && !defined(__STRICT_ANSI__)
84+
const _Float32 exp_scale = 0x1.0p-112f;
85+
#else
86+
const _Float32 exp_scale = tofloat32(0x7800000u);
87+
#endif
88+
const _Float32 normalized_value =
89+
tofloat32((two_w >> 4) + exp_offset) * exp_scale;
90+
const uint32_t magic_mask = 126u << 23;
91+
const _Float32 magic_bias = 0.5f;
92+
const _Float32 denormalized_value =
93+
tofloat32((two_w >> 17) | magic_mask) - magic_bias;
94+
const uint32_t denormalized_cutoff = 1u << 27;
95+
const uint32_t result =
96+
sign | (two_w < denormalized_cutoff ? fromfloat32(denormalized_value)
97+
: fromfloat32(normalized_value));
98+
return tofloat32(result);
99+
}
100+
101+
_Float64 __extendhfdf2(_Float16 f) {
102+
return __extendhfsf2(f);
103+
}
104+
105+
#ifdef __x86_64__
106+
__float80 __extendhfxf2(_Float16 f) {
107+
return __extendhfsf2(f);
108+
}
109+
#endif
110+
111+
#ifdef __aarch64__
112+
_Float128 __extendhftf2(_Float16 f) {
113+
return __extendhfsf2(f);
114+
}
115+
#endif
116+
117+
_Float16 __truncsfhf2(_Float32 f) {
118+
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || \
119+
defined(__GNUC__) && !defined(__STRICT_ANSI__)
120+
const _Float32 scale_to_inf = 0x1.0p+112f;
121+
const _Float32 scale_to_zero = 0x1.0p-110f;
122+
#else
123+
const _Float32 scale_to_inf = tofloat32(0x77800000u);
124+
const _Float32 scale_to_zero = tofloat32(0x08800000u);
125+
#endif
126+
_Float32 base = (fabs32(f) * scale_to_inf) * scale_to_zero;
127+
const uint32_t w = fromfloat32(f);
128+
const uint32_t shl1_w = w + w;
129+
const uint32_t sign = w & 0x80000000u;
130+
uint32_t bias = shl1_w & 0xFF000000u;
131+
if (bias < 0x71000000u)
132+
bias = 0x71000000u;
133+
base = tofloat32((bias >> 1) + 0x07800000u) + base;
134+
const uint32_t bits = fromfloat32(base);
135+
const uint32_t exp_bits = (bits >> 13) & 0x00007C00u;
136+
const uint32_t mantissa_bits = bits & 0x00000FFFu;
137+
const uint32_t nonsign = exp_bits + mantissa_bits;
138+
return tofloat16((sign >> 16) | (shl1_w > 0xFF000000u ? 0x7E00u : nonsign));
139+
}
140+
141+
_Float16 __truncdfhf2(_Float64 f) {
142+
return __truncsfhf2(f);
143+
}
144+
145+
#ifdef __x86_64__
146+
_Float16 __truncxfhf2(__float80 f) {
147+
return __truncsfhf2(f);
148+
}
149+
#endif
150+
151+
#ifdef __aarch64__
152+
_Float16 __trunctfhf2(_Float128 f) {
153+
return __truncsfhf2(f);
42154
}
155+
#endif

libc/intrin/truncdfbf2.c

Lines changed: 0 additions & 24 deletions
This file was deleted.

third_party/compiler_rt/trunctfsf2.c renamed to libc/intrin/trunctfsf2.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
//
88
//===----------------------------------------------------------------------===//
99

10-
__static_yoink("huge_compiler_rt_license");
11-
1210
#define QUAD_PRECISION
1311
#include "third_party/compiler_rt/fp_lib.inc"
1412

@@ -18,7 +16,7 @@ __static_yoink("huge_compiler_rt_license");
1816
#include "third_party/compiler_rt/fp_trunc_impl.inc"
1917

2018
COMPILER_RT_ABI float __trunctfsf2(long double a) {
21-
return __truncXfYf2__(a);
19+
return __truncXfYf2__(a);
2220
}
2321

2422
#endif

third_party/compiler_rt/extendhfdf2.c

Lines changed: 0 additions & 17 deletions
This file was deleted.

0 commit comments

Comments
 (0)