Skip to content

Commit c11bf59

Browse files
matthiaskrgrgitbot
authored and
gitbot
committed
Rollup merge of rust-lang#132975 - arichardson:ffi-c-char, r=tgross35
De-duplicate and improve definition of core::ffi::c_char Instead of having a list of unsigned char targets for each OS, follow the logic Clang uses and instead set the value based on architecture with a special case for Darwin and Windows operating systems. This makes it easier to support new operating systems targeting Arm/AArch64 without having to modify this config statement for each new OS. The new list does not quite match Clang since I noticed a few bugs in the Clang implementation (llvm/llvm-project#115957). Fixes rust-lang#129945 Closes rust-lang#131319
2 parents 6cddada + cd585e8 commit c11bf59

File tree

1 file changed

+80
-53
lines changed

1 file changed

+80
-53
lines changed

core/src/ffi/mod.rs

+80-53
Original file line numberDiff line numberDiff line change
@@ -91,59 +91,86 @@ pub type c_ssize_t = isize;
9191

9292
mod c_char_definition {
9393
cfg_if! {
94-
// These are the targets on which c_char is unsigned.
95-
if #[cfg(any(
96-
all(
97-
target_os = "linux",
98-
any(
99-
target_arch = "aarch64",
100-
target_arch = "arm",
101-
target_arch = "hexagon",
102-
target_arch = "powerpc",
103-
target_arch = "powerpc64",
104-
target_arch = "s390x",
105-
target_arch = "riscv64",
106-
target_arch = "riscv32",
107-
target_arch = "csky"
108-
)
109-
),
110-
all(target_os = "android", any(target_arch = "aarch64", target_arch = "arm")),
111-
all(target_os = "l4re", target_arch = "x86_64"),
112-
all(
113-
any(target_os = "freebsd", target_os = "openbsd", target_os = "rtems"),
114-
any(
115-
target_arch = "aarch64",
116-
target_arch = "arm",
117-
target_arch = "powerpc",
118-
target_arch = "powerpc64",
119-
target_arch = "riscv64"
120-
)
121-
),
122-
all(
123-
target_os = "netbsd",
124-
any(
125-
target_arch = "aarch64",
126-
target_arch = "arm",
127-
target_arch = "powerpc",
128-
target_arch = "riscv64"
129-
)
130-
),
131-
all(
132-
target_os = "vxworks",
133-
any(
134-
target_arch = "aarch64",
135-
target_arch = "arm",
136-
target_arch = "powerpc64",
137-
target_arch = "powerpc"
138-
)
139-
),
140-
all(
141-
target_os = "fuchsia",
142-
any(target_arch = "aarch64", target_arch = "riscv64")
143-
),
144-
all(target_os = "nto", target_arch = "aarch64"),
145-
target_os = "horizon",
146-
target_os = "aix",
94+
// These are the targets on which c_char is unsigned. Usually the
95+
// signedness is the same for all target_os values on a given architecture
96+
// but there are some exceptions (see isSignedCharDefault() in clang).
97+
//
98+
// aarch64:
99+
// Section 10 "Arm C and C++ language mappings" in Procedure Call Standard for the Arm®
100+
// 64-bit Architecture (AArch64) says C/C++ char is unsigned byte.
101+
// https://github.com/ARM-software/abi-aa/blob/2024Q3/aapcs64/aapcs64.rst#arm-c-and-c-language-mappings
102+
// arm:
103+
// Section 8 "Arm C and C++ Language Mappings" in Procedure Call Standard for the Arm®
104+
// Architecture says C/C++ char is unsigned byte.
105+
// https://github.com/ARM-software/abi-aa/blob/2024Q3/aapcs32/aapcs32.rst#arm-c-and-c-language-mappings
106+
// csky:
107+
// Section 2.1.2 "Primary Data Type" in C-SKY V2 CPU Applications Binary Interface
108+
// Standards Manual says ANSI C char is unsigned byte.
109+
// https://github.com/c-sky/csky-doc/blob/9f7121f7d40970ba5cc0f15716da033db2bb9d07/C-SKY_V2_CPU_Applications_Binary_Interface_Standards_Manual.pdf
110+
// Note: this doesn't seem to match Clang's default (https://github.com/rust-lang/rust/issues/129945).
111+
// hexagon:
112+
// Section 3.1 "Basic data type" in Qualcomm Hexagon™ Application
113+
// Binary Interface User Guide says "By default, the `char` data type is unsigned."
114+
// https://docs.qualcomm.com/bundle/publicresource/80-N2040-23_REV_K_Qualcomm_Hexagon_Application_Binary_Interface_User_Guide.pdf
115+
// msp430:
116+
// Section 2.1 "Basic Types" in MSP430 Embedded Application Binary
117+
// Interface says "The char type is unsigned by default".
118+
// https://www.ti.com/lit/an/slaa534a/slaa534a.pdf
119+
// Note: this doesn't seem to match Clang's default (https://github.com/rust-lang/rust/issues/129945).
120+
// powerpc/powerpc64:
121+
// - PPC32 SysV: "Table 3-1 Scalar Types" in System V Application Binary Interface PowerPC
122+
// Processor Supplement says ANSI C char is unsigned byte
123+
// https://refspecs.linuxfoundation.org/elf/elfspec_ppc.pdf
124+
// - PPC64 ELFv1: Section 3.1.4 "Fundamental Types" in 64-bit PowerPC ELF Application
125+
// Binary Interface Supplement 1.9 says ANSI C is unsigned byte
126+
// https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html#FUND-TYPE
127+
// - PPC64 ELFv2: Section 2.1.2.2 "Fundamental Types" in 64-Bit ELF V2 ABI Specification
128+
// says char is unsigned byte
129+
// https://openpowerfoundation.org/specifications/64bitelfabi/
130+
// - AIX: XL C for AIX Language Reference says "By default, char behaves like an unsigned char."
131+
// https://www.ibm.com/docs/en/xl-c-aix/13.1.3?topic=specifiers-character-types
132+
// riscv32/riscv64:
133+
// C/C++ type representations section in RISC-V Calling Conventions
134+
// page in RISC-V ELF psABI Document says "char is unsigned."
135+
// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/draft-20240829-13bfa9f54634cb60d86b9b333e109f077805b4b3/riscv-cc.adoc#cc-type-representations
136+
// s390x:
137+
// - ELF: "Table 1.1.: Scalar types" in ELF Application Binary Interface s390x Supplement
138+
// Version 1.6.1 categorize ISO C char in unsigned integer
139+
// https://github.com/IBM/s390x-abi/releases/tag/v1.6.1
140+
// - z/OS: XL C/C++ Language Reference says: "By default, char behaves like an unsigned char."
141+
// https://www.ibm.com/docs/en/zos/3.1.0?topic=specifiers-character-types
142+
// Xtensa:
143+
// - "The char type is unsigned by default for Xtensa processors."
144+
//
145+
// On the following operating systems, c_char is signed by default, regardless of architecture.
146+
// Darwin (macOS, iOS, etc.):
147+
// Apple targets' c_char is signed by default even on arm
148+
// https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms#Handle-data-types-and-data-alignment-properly
149+
// Windows:
150+
// Windows MSVC C++ Language Reference says "Microsoft-specific: Variables of type char
151+
// are promoted to int as if from type signed char by default, unless the /J compilation
152+
// option is used."
153+
// https://learn.microsoft.com/en-us/cpp/cpp/fundamental-types-cpp?view=msvc-170#character-types)
154+
// L4RE:
155+
// The kernel builds with -funsigned-char on all targets (but useserspace follows the
156+
// architecture defaults). As we only have a target for userspace apps so there are no
157+
// special cases for L4RE below.
158+
if #[cfg(all(
159+
not(windows),
160+
not(target_vendor = "apple"),
161+
any(
162+
target_arch = "aarch64",
163+
target_arch = "arm",
164+
target_arch = "csky",
165+
target_arch = "hexagon",
166+
target_arch = "msp430",
167+
target_arch = "powerpc",
168+
target_arch = "powerpc64",
169+
target_arch = "riscv64",
170+
target_arch = "riscv32",
171+
target_arch = "s390x",
172+
target_arch = "xtensa",
173+
)
147174
))] {
148175
pub type c_char = u8;
149176
} else {

0 commit comments

Comments
 (0)