std_detect/detect/os/linux/
aarch64.rs

1//! Run-time feature detection for Aarch64 on Linux.
2
3use super::auxvec;
4use crate::detect::{Feature, bit, cache};
5
6/// Try to read the features from the auxiliary vector.
7pub(crate) fn detect_features() -> cache::Initializer {
8    #[cfg(target_os = "android")]
9    let is_exynos9810 = {
10        // Samsung Exynos 9810 has a bug that big and little cores have different
11        // ISAs. And on older Android (pre-9), the kernel incorrectly reports
12        // that features available only on some cores are available on all cores.
13        // https://reviews.llvm.org/D114523
14        let mut arch = [0_u8; libc::PROP_VALUE_MAX as usize];
15        let len = unsafe {
16            libc::__system_property_get(c"ro.arch".as_ptr(), arch.as_mut_ptr() as *mut libc::c_char)
17        };
18        // On Exynos, ro.arch is not available on Android 12+, but it is fine
19        // because Android 9+ includes the fix.
20        len > 0 && arch.starts_with(b"exynos9810")
21    };
22    #[cfg(not(target_os = "android"))]
23    let is_exynos9810 = false;
24
25    if let Ok(auxv) = auxvec::auxv() {
26        let hwcap: AtHwcap = auxv.into();
27        return hwcap.cache(is_exynos9810);
28    }
29    cache::Initializer::default()
30}
31
32/// These values are part of the platform-specific [asm/hwcap.h][hwcap] .
33///
34/// The names match those used for cpuinfo.
35///
36/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
37#[derive(Debug, Default, PartialEq)]
38struct AtHwcap {
39    // AT_HWCAP
40    fp: bool,
41    asimd: bool,
42    // evtstrm: No LLVM support.
43    aes: bool,
44    pmull: bool,
45    sha1: bool,
46    sha2: bool,
47    crc32: bool,
48    atomics: bool,
49    fphp: bool,
50    asimdhp: bool,
51    // cpuid: No LLVM support.
52    asimdrdm: bool,
53    jscvt: bool,
54    fcma: bool,
55    lrcpc: bool,
56    dcpop: bool,
57    sha3: bool,
58    sm3: bool,
59    sm4: bool,
60    asimddp: bool,
61    sha512: bool,
62    sve: bool,
63    fhm: bool,
64    dit: bool,
65    uscat: bool,
66    ilrcpc: bool,
67    flagm: bool,
68    ssbs: bool,
69    sb: bool,
70    paca: bool,
71    pacg: bool,
72
73    // AT_HWCAP2
74    dcpodp: bool,
75    sve2: bool,
76    sveaes: bool,
77    svepmull: bool,
78    svebitperm: bool,
79    svesha3: bool,
80    svesm4: bool,
81    flagm2: bool,
82    frint: bool,
83    // svei8mm: See i8mm feature.
84    svef32mm: bool,
85    svef64mm: bool,
86    // svebf16: See bf16 feature.
87    i8mm: bool,
88    bf16: bool,
89    // dgh: No LLVM support.
90    rng: bool,
91    bti: bool,
92    mte: bool,
93    ecv: bool,
94    // afp: bool,
95    // rpres: bool,
96    // mte3: bool,
97    sme: bool,
98    smei16i64: bool,
99    smef64f64: bool,
100    // smei8i32: bool,
101    // smef16f32: bool,
102    // smeb16f32: bool,
103    // smef32f32: bool,
104    smefa64: bool,
105    wfxt: bool,
106    // ebf16: bool,
107    // sveebf16: bool,
108    cssc: bool,
109    // rprfm: bool,
110    sve2p1: bool,
111    sme2: bool,
112    sme2p1: bool,
113    // smei16i32: bool,
114    // smebi32i32: bool,
115    smeb16b16: bool,
116    smef16f16: bool,
117    mops: bool,
118    hbc: bool,
119    sveb16b16: bool,
120    lrcpc3: bool,
121    lse128: bool,
122    fpmr: bool,
123    lut: bool,
124    faminmax: bool,
125    f8cvt: bool,
126    f8fma: bool,
127    f8dp4: bool,
128    f8dp2: bool,
129    f8e4m3: bool,
130    f8e5m2: bool,
131    smelutv2: bool,
132    smef8f16: bool,
133    smef8f32: bool,
134    smesf8fma: bool,
135    smesf8dp4: bool,
136    smesf8dp2: bool,
137    // pauthlr: bool,
138}
139
140impl From<auxvec::AuxVec> for AtHwcap {
141    /// Reads AtHwcap from the auxiliary vector.
142    fn from(auxv: auxvec::AuxVec) -> Self {
143        let mut cap = AtHwcap {
144            fp: bit::test(auxv.hwcap, 0),
145            asimd: bit::test(auxv.hwcap, 1),
146            // evtstrm: bit::test(auxv.hwcap, 2),
147            aes: bit::test(auxv.hwcap, 3),
148            pmull: bit::test(auxv.hwcap, 4),
149            sha1: bit::test(auxv.hwcap, 5),
150            sha2: bit::test(auxv.hwcap, 6),
151            crc32: bit::test(auxv.hwcap, 7),
152            atomics: bit::test(auxv.hwcap, 8),
153            fphp: bit::test(auxv.hwcap, 9),
154            asimdhp: bit::test(auxv.hwcap, 10),
155            // cpuid: bit::test(auxv.hwcap, 11),
156            asimdrdm: bit::test(auxv.hwcap, 12),
157            jscvt: bit::test(auxv.hwcap, 13),
158            fcma: bit::test(auxv.hwcap, 14),
159            lrcpc: bit::test(auxv.hwcap, 15),
160            dcpop: bit::test(auxv.hwcap, 16),
161            sha3: bit::test(auxv.hwcap, 17),
162            sm3: bit::test(auxv.hwcap, 18),
163            sm4: bit::test(auxv.hwcap, 19),
164            asimddp: bit::test(auxv.hwcap, 20),
165            sha512: bit::test(auxv.hwcap, 21),
166            sve: bit::test(auxv.hwcap, 22),
167            fhm: bit::test(auxv.hwcap, 23),
168            dit: bit::test(auxv.hwcap, 24),
169            uscat: bit::test(auxv.hwcap, 25),
170            ilrcpc: bit::test(auxv.hwcap, 26),
171            flagm: bit::test(auxv.hwcap, 27),
172            ssbs: bit::test(auxv.hwcap, 28),
173            sb: bit::test(auxv.hwcap, 29),
174            paca: bit::test(auxv.hwcap, 30),
175            pacg: bit::test(auxv.hwcap, 31),
176
177            // AT_HWCAP2
178            dcpodp: bit::test(auxv.hwcap2, 0),
179            sve2: bit::test(auxv.hwcap2, 1),
180            sveaes: bit::test(auxv.hwcap2, 2),
181            svepmull: bit::test(auxv.hwcap2, 3),
182            svebitperm: bit::test(auxv.hwcap2, 4),
183            svesha3: bit::test(auxv.hwcap2, 5),
184            svesm4: bit::test(auxv.hwcap2, 6),
185            flagm2: bit::test(auxv.hwcap2, 7),
186            frint: bit::test(auxv.hwcap2, 8),
187            // svei8mm: bit::test(auxv.hwcap2, 9),
188            svef32mm: bit::test(auxv.hwcap2, 10),
189            svef64mm: bit::test(auxv.hwcap2, 11),
190            // svebf16: bit::test(auxv.hwcap2, 12),
191            i8mm: bit::test(auxv.hwcap2, 13),
192            bf16: bit::test(auxv.hwcap2, 14),
193            // dgh: bit::test(auxv.hwcap2, 15),
194            rng: bit::test(auxv.hwcap2, 16),
195            bti: bit::test(auxv.hwcap2, 17),
196            mte: bit::test(auxv.hwcap2, 18),
197            ecv: bit::test(auxv.hwcap2, 19),
198            // afp: bit::test(auxv.hwcap2, 20),
199            // rpres: bit::test(auxv.hwcap2, 21),
200            // mte3: bit::test(auxv.hwcap2, 22),
201            sme: bit::test(auxv.hwcap2, 23),
202            smei16i64: bit::test(auxv.hwcap2, 24),
203            smef64f64: bit::test(auxv.hwcap2, 25),
204            // smei8i32: bit::test(auxv.hwcap2, 26),
205            // smef16f32: bit::test(auxv.hwcap2, 27),
206            // smeb16f32: bit::test(auxv.hwcap2, 28),
207            // smef32f32: bit::test(auxv.hwcap2, 29),
208            smefa64: bit::test(auxv.hwcap2, 30),
209            wfxt: bit::test(auxv.hwcap2, 31),
210            ..Default::default()
211        };
212
213        // Hardware capabilities from bits 32 to 63 should only
214        // be tested on LP64 targets with 64 bits `usize`.
215        // On ILP32 targets like `aarch64-unknown-linux-gnu_ilp32`,
216        // these hardware capabilities will default to `false`.
217        // https://github.com/rust-lang/rust/issues/146230
218        #[cfg(target_pointer_width = "64")]
219        {
220            // cap.ebf16: bit::test(auxv.hwcap2, 32);
221            // cap.sveebf16: bit::test(auxv.hwcap2, 33);
222            cap.cssc = bit::test(auxv.hwcap2, 34);
223            // cap.rprfm: bit::test(auxv.hwcap2, 35);
224            cap.sve2p1 = bit::test(auxv.hwcap2, 36);
225            cap.sme2 = bit::test(auxv.hwcap2, 37);
226            cap.sme2p1 = bit::test(auxv.hwcap2, 38);
227            // cap.smei16i32 = bit::test(auxv.hwcap2, 39);
228            // cap.smebi32i32 = bit::test(auxv.hwcap2, 40);
229            cap.smeb16b16 = bit::test(auxv.hwcap2, 41);
230            cap.smef16f16 = bit::test(auxv.hwcap2, 42);
231            cap.mops = bit::test(auxv.hwcap2, 43);
232            cap.hbc = bit::test(auxv.hwcap2, 44);
233            cap.sveb16b16 = bit::test(auxv.hwcap2, 45);
234            cap.lrcpc3 = bit::test(auxv.hwcap2, 46);
235            cap.lse128 = bit::test(auxv.hwcap2, 47);
236            cap.fpmr = bit::test(auxv.hwcap2, 48);
237            cap.lut = bit::test(auxv.hwcap2, 49);
238            cap.faminmax = bit::test(auxv.hwcap2, 50);
239            cap.f8cvt = bit::test(auxv.hwcap2, 51);
240            cap.f8fma = bit::test(auxv.hwcap2, 52);
241            cap.f8dp4 = bit::test(auxv.hwcap2, 53);
242            cap.f8dp2 = bit::test(auxv.hwcap2, 54);
243            cap.f8e4m3 = bit::test(auxv.hwcap2, 55);
244            cap.f8e5m2 = bit::test(auxv.hwcap2, 56);
245            cap.smelutv2 = bit::test(auxv.hwcap2, 57);
246            cap.smef8f16 = bit::test(auxv.hwcap2, 58);
247            cap.smef8f32 = bit::test(auxv.hwcap2, 59);
248            cap.smesf8fma = bit::test(auxv.hwcap2, 60);
249            cap.smesf8dp4 = bit::test(auxv.hwcap2, 61);
250            cap.smesf8dp2 = bit::test(auxv.hwcap2, 62);
251            // cap.pauthlr = bit::test(auxv.hwcap2, ??);
252        }
253        cap
254    }
255}
256
257impl AtHwcap {
258    /// Initializes the cache from the feature -bits.
259    ///
260    /// The feature dependencies here come directly from LLVM's feature definitions:
261    /// https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AArch64/AArch64.td
262    fn cache(self, is_exynos9810: bool) -> cache::Initializer {
263        let mut value = cache::Initializer::default();
264        {
265            let mut enable_feature = |f, enable| {
266                if enable {
267                    value.set(f as u32);
268                }
269            };
270
271            // Samsung Exynos 9810 has a bug that big and little cores have different
272            // ISAs. And on older Android (pre-9), the kernel incorrectly reports
273            // that features available only on some cores are available on all cores.
274            // So, only check features that are known to be available on exynos-m3:
275            // $ rustc --print cfg --target aarch64-linux-android -C target-cpu=exynos-m3 | grep target_feature
276            // See also https://github.com/rust-lang/stdarch/pull/1378#discussion_r1103748342.
277            if is_exynos9810 {
278                enable_feature(Feature::fp, self.fp);
279                enable_feature(Feature::crc, self.crc32);
280                // ASIMD support requires float support - if half-floats are
281                // supported, it also requires half-float support:
282                let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp);
283                enable_feature(Feature::asimd, asimd);
284                // Cryptographic extensions require ASIMD
285                // AES also covers FEAT_PMULL
286                enable_feature(Feature::aes, self.aes && self.pmull && asimd);
287                enable_feature(Feature::sha2, self.sha1 && self.sha2 && asimd);
288                return value;
289            }
290
291            enable_feature(Feature::fp, self.fp);
292            // Half-float support requires float support
293            enable_feature(Feature::fp16, self.fp && self.fphp);
294            // FHM (fp16fml in LLVM) requires half float support
295            enable_feature(Feature::fhm, self.fphp && self.fhm);
296            enable_feature(Feature::pmull, self.pmull);
297            enable_feature(Feature::crc, self.crc32);
298            enable_feature(Feature::lse, self.atomics);
299            enable_feature(Feature::lse2, self.uscat);
300            enable_feature(Feature::lse128, self.lse128 && self.atomics);
301            enable_feature(Feature::rcpc, self.lrcpc);
302            // RCPC2 (rcpc-immo in LLVM) requires RCPC support
303            let rcpc2 = self.ilrcpc && self.lrcpc;
304            enable_feature(Feature::rcpc2, rcpc2);
305            enable_feature(Feature::rcpc3, self.lrcpc3 && rcpc2);
306            enable_feature(Feature::dit, self.dit);
307            enable_feature(Feature::flagm, self.flagm);
308            enable_feature(Feature::flagm2, self.flagm2);
309            enable_feature(Feature::ssbs, self.ssbs);
310            enable_feature(Feature::sb, self.sb);
311            enable_feature(Feature::paca, self.paca);
312            enable_feature(Feature::pacg, self.pacg);
313            // enable_feature(Feature::pauth_lr, self.pauthlr);
314            enable_feature(Feature::dpb, self.dcpop);
315            enable_feature(Feature::dpb2, self.dcpodp);
316            enable_feature(Feature::rand, self.rng);
317            enable_feature(Feature::bti, self.bti);
318            enable_feature(Feature::mte, self.mte);
319            // jsconv requires float support
320            enable_feature(Feature::jsconv, self.jscvt && self.fp);
321            enable_feature(Feature::rdm, self.asimdrdm);
322            enable_feature(Feature::dotprod, self.asimddp);
323            enable_feature(Feature::frintts, self.frint);
324
325            // FEAT_I8MM & FEAT_BF16 also include optional SVE components which linux exposes
326            // separately. We ignore that distinction here.
327            enable_feature(Feature::i8mm, self.i8mm);
328            enable_feature(Feature::bf16, self.bf16);
329
330            // ASIMD support requires float support - if half-floats are
331            // supported, it also requires half-float support:
332            let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp);
333            enable_feature(Feature::asimd, asimd);
334            // ASIMD extensions require ASIMD support:
335            enable_feature(Feature::fcma, self.fcma && asimd);
336            enable_feature(Feature::sve, self.sve && asimd);
337
338            // SVE extensions require SVE & ASIMD
339            enable_feature(Feature::f32mm, self.svef32mm && self.sve && asimd);
340            enable_feature(Feature::f64mm, self.svef64mm && self.sve && asimd);
341
342            // Cryptographic extensions require ASIMD
343            enable_feature(Feature::aes, self.aes && asimd);
344            enable_feature(Feature::sha2, self.sha1 && self.sha2 && asimd);
345            // SHA512/SHA3 require SHA1 & SHA256
346            enable_feature(
347                Feature::sha3,
348                self.sha512 && self.sha3 && self.sha1 && self.sha2 && asimd,
349            );
350            enable_feature(Feature::sm4, self.sm3 && self.sm4 && asimd);
351
352            // SVE2 requires SVE
353            let sve2 = self.sve2 && self.sve && asimd;
354            enable_feature(Feature::sve2, sve2);
355            enable_feature(Feature::sve2p1, self.sve2p1 && sve2);
356            // SVE2 extensions require SVE2 and crypto features
357            enable_feature(Feature::sve2_aes, self.sveaes && self.svepmull && sve2 && self.aes);
358            enable_feature(Feature::sve2_sm4, self.svesm4 && sve2 && self.sm3 && self.sm4);
359            enable_feature(
360                Feature::sve2_sha3,
361                self.svesha3 && sve2 && self.sha512 && self.sha3 && self.sha1 && self.sha2,
362            );
363            enable_feature(Feature::sve2_bitperm, self.svebitperm && self.sve2);
364            enable_feature(Feature::sve_b16b16, self.bf16 && self.sveb16b16);
365            enable_feature(Feature::hbc, self.hbc);
366            enable_feature(Feature::mops, self.mops);
367            enable_feature(Feature::ecv, self.ecv);
368            enable_feature(Feature::lut, self.lut);
369            enable_feature(Feature::cssc, self.cssc);
370            enable_feature(Feature::fpmr, self.fpmr);
371            enable_feature(Feature::faminmax, self.faminmax);
372            let fp8 = self.f8cvt && self.faminmax && self.lut && self.bf16;
373            enable_feature(Feature::fp8, fp8);
374            let fp8fma = self.f8fma && fp8;
375            enable_feature(Feature::fp8fma, fp8fma);
376            let fp8dot4 = self.f8dp4 && fp8fma;
377            enable_feature(Feature::fp8dot4, fp8dot4);
378            enable_feature(Feature::fp8dot2, self.f8dp2 && fp8dot4);
379            enable_feature(Feature::wfxt, self.wfxt);
380            let sme = self.sme && self.bf16;
381            enable_feature(Feature::sme, sme);
382            enable_feature(Feature::sme_i16i64, self.smei16i64 && sme);
383            enable_feature(Feature::sme_f64f64, self.smef64f64 && sme);
384            enable_feature(Feature::sme_fa64, self.smefa64 && sme && sve2);
385            let sme2 = self.sme2 && sme;
386            enable_feature(Feature::sme2, sme2);
387            enable_feature(Feature::sme2p1, self.sme2p1 && sme2);
388            enable_feature(
389                Feature::sme_b16b16,
390                sme2 && self.bf16 && self.sveb16b16 && self.smeb16b16,
391            );
392            enable_feature(Feature::sme_f16f16, self.smef16f16 && sme2);
393            enable_feature(Feature::sme_lutv2, self.smelutv2);
394            let sme_f8f32 = self.smef8f32 && sme2 && fp8;
395            enable_feature(Feature::sme_f8f32, sme_f8f32);
396            enable_feature(Feature::sme_f8f16, self.smef8f16 && sme_f8f32);
397            let ssve_fp8fma = self.smesf8fma && sme2 && fp8;
398            enable_feature(Feature::ssve_fp8fma, ssve_fp8fma);
399            let ssve_fp8dot4 = self.smesf8dp4 && ssve_fp8fma;
400            enable_feature(Feature::ssve_fp8dot4, ssve_fp8dot4);
401            enable_feature(Feature::ssve_fp8dot2, self.smesf8dp2 && ssve_fp8dot4);
402        }
403        value
404    }
405}
406
407#[cfg(target_endian = "little")]
408#[cfg(test)]
409mod tests;