std_detect/detect/os/linux/
aarch64.rs

1//! Run-time feature detection for Aarch64 on Linux.
2
3use super::auxvec;
4use crate::detect::{Feature, bit, cache};
5
6/// Try to read the features from the auxiliary vector.
7pub(crate) fn detect_features() -> cache::Initializer {
8    #[cfg(target_os = "android")]
9    let is_exynos9810 = {
10        // Samsung Exynos 9810 has a bug that big and little cores have different
11        // ISAs. And on older Android (pre-9), the kernel incorrectly reports
12        // that features available only on some cores are available on all cores.
13        // https://reviews.llvm.org/D114523
14        let mut arch = [0_u8; libc::PROP_VALUE_MAX as usize];
15        let len = unsafe {
16            libc::__system_property_get(c"ro.arch".as_ptr(), arch.as_mut_ptr() as *mut libc::c_char)
17        };
18        // On Exynos, ro.arch is not available on Android 12+, but it is fine
19        // because Android 9+ includes the fix.
20        len > 0 && arch.starts_with(b"exynos9810")
21    };
22    #[cfg(not(target_os = "android"))]
23    let is_exynos9810 = false;
24
25    if let Ok(auxv) = auxvec::auxv() {
26        let hwcap: AtHwcap = auxv.into();
27        return hwcap.cache(is_exynos9810);
28    }
29    cache::Initializer::default()
30}
31
32/// These values are part of the platform-specific [asm/hwcap.h][hwcap] .
33///
34/// The names match those used for cpuinfo.
35///
36/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
37#[derive(Debug, Default, PartialEq)]
38struct AtHwcap {
39    // AT_HWCAP
40    fp: bool,
41    asimd: bool,
42    // evtstrm: No LLVM support.
43    aes: bool,
44    pmull: bool,
45    sha1: bool,
46    sha2: bool,
47    crc32: bool,
48    atomics: bool,
49    fphp: bool,
50    asimdhp: bool,
51    // cpuid: No LLVM support.
52    asimdrdm: bool,
53    jscvt: bool,
54    fcma: bool,
55    lrcpc: bool,
56    dcpop: bool,
57    sha3: bool,
58    sm3: bool,
59    sm4: bool,
60    asimddp: bool,
61    sha512: bool,
62    sve: bool,
63    fhm: bool,
64    dit: bool,
65    uscat: bool,
66    ilrcpc: bool,
67    flagm: bool,
68    ssbs: bool,
69    sb: bool,
70    paca: bool,
71    pacg: bool,
72
73    // AT_HWCAP2
74    dcpodp: bool,
75    sve2: bool,
76    sveaes: bool,
77    svepmull: bool,
78    svebitperm: bool,
79    svesha3: bool,
80    svesm4: bool,
81    flagm2: bool,
82    frint: bool,
83    // svei8mm: See i8mm feature.
84    svef32mm: bool,
85    svef64mm: bool,
86    // svebf16: See bf16 feature.
87    i8mm: bool,
88    bf16: bool,
89    // dgh: No LLVM support.
90    rng: bool,
91    bti: bool,
92    mte: bool,
93    ecv: bool,
94    // afp: bool,
95    // rpres: bool,
96    // mte3: bool,
97    sme: bool,
98    smei16i64: bool,
99    smef64f64: bool,
100    // smei8i32: bool,
101    // smef16f32: bool,
102    // smeb16f32: bool,
103    // smef32f32: bool,
104    smefa64: bool,
105    wfxt: bool,
106    // ebf16: bool,
107    // sveebf16: bool,
108    cssc: bool,
109    // rprfm: bool,
110    sve2p1: bool,
111    sme2: bool,
112    sme2p1: bool,
113    // smei16i32: bool,
114    // smebi32i32: bool,
115    smeb16b16: bool,
116    smef16f16: bool,
117    mops: bool,
118    hbc: bool,
119    sveb16b16: bool,
120    lrcpc3: bool,
121    lse128: bool,
122    fpmr: bool,
123    lut: bool,
124    faminmax: bool,
125    f8cvt: bool,
126    f8fma: bool,
127    f8dp4: bool,
128    f8dp2: bool,
129    f8e4m3: bool,
130    f8e5m2: bool,
131    smelutv2: bool,
132    smef8f16: bool,
133    smef8f32: bool,
134    smesf8fma: bool,
135    smesf8dp4: bool,
136    smesf8dp2: bool,
137    // pauthlr: bool,
138}
139
140impl From<auxvec::AuxVec> for AtHwcap {
141    /// Reads AtHwcap from the auxiliary vector.
142    fn from(auxv: auxvec::AuxVec) -> Self {
143        AtHwcap {
144            fp: bit::test(auxv.hwcap, 0),
145            asimd: bit::test(auxv.hwcap, 1),
146            // evtstrm: bit::test(auxv.hwcap, 2),
147            aes: bit::test(auxv.hwcap, 3),
148            pmull: bit::test(auxv.hwcap, 4),
149            sha1: bit::test(auxv.hwcap, 5),
150            sha2: bit::test(auxv.hwcap, 6),
151            crc32: bit::test(auxv.hwcap, 7),
152            atomics: bit::test(auxv.hwcap, 8),
153            fphp: bit::test(auxv.hwcap, 9),
154            asimdhp: bit::test(auxv.hwcap, 10),
155            // cpuid: bit::test(auxv.hwcap, 11),
156            asimdrdm: bit::test(auxv.hwcap, 12),
157            jscvt: bit::test(auxv.hwcap, 13),
158            fcma: bit::test(auxv.hwcap, 14),
159            lrcpc: bit::test(auxv.hwcap, 15),
160            dcpop: bit::test(auxv.hwcap, 16),
161            sha3: bit::test(auxv.hwcap, 17),
162            sm3: bit::test(auxv.hwcap, 18),
163            sm4: bit::test(auxv.hwcap, 19),
164            asimddp: bit::test(auxv.hwcap, 20),
165            sha512: bit::test(auxv.hwcap, 21),
166            sve: bit::test(auxv.hwcap, 22),
167            fhm: bit::test(auxv.hwcap, 23),
168            dit: bit::test(auxv.hwcap, 24),
169            uscat: bit::test(auxv.hwcap, 25),
170            ilrcpc: bit::test(auxv.hwcap, 26),
171            flagm: bit::test(auxv.hwcap, 27),
172            ssbs: bit::test(auxv.hwcap, 28),
173            sb: bit::test(auxv.hwcap, 29),
174            paca: bit::test(auxv.hwcap, 30),
175            pacg: bit::test(auxv.hwcap, 31),
176
177            // AT_HWCAP2
178            dcpodp: bit::test(auxv.hwcap2, 0),
179            sve2: bit::test(auxv.hwcap2, 1),
180            sveaes: bit::test(auxv.hwcap2, 2),
181            svepmull: bit::test(auxv.hwcap2, 3),
182            svebitperm: bit::test(auxv.hwcap2, 4),
183            svesha3: bit::test(auxv.hwcap2, 5),
184            svesm4: bit::test(auxv.hwcap2, 6),
185            flagm2: bit::test(auxv.hwcap2, 7),
186            frint: bit::test(auxv.hwcap2, 8),
187            // svei8mm: bit::test(auxv.hwcap2, 9),
188            svef32mm: bit::test(auxv.hwcap2, 10),
189            svef64mm: bit::test(auxv.hwcap2, 11),
190            // svebf16: bit::test(auxv.hwcap2, 12),
191            i8mm: bit::test(auxv.hwcap2, 13),
192            bf16: bit::test(auxv.hwcap2, 14),
193            // dgh: bit::test(auxv.hwcap2, 15),
194            rng: bit::test(auxv.hwcap2, 16),
195            bti: bit::test(auxv.hwcap2, 17),
196            mte: bit::test(auxv.hwcap2, 18),
197            ecv: bit::test(auxv.hwcap2, 19),
198            // afp: bit::test(auxv.hwcap2, 20),
199            // rpres: bit::test(auxv.hwcap2, 21),
200            // mte3: bit::test(auxv.hwcap2, 22),
201            sme: bit::test(auxv.hwcap2, 23),
202            smei16i64: bit::test(auxv.hwcap2, 24),
203            smef64f64: bit::test(auxv.hwcap2, 25),
204            // smei8i32: bit::test(auxv.hwcap2, 26),
205            // smef16f32: bit::test(auxv.hwcap2, 27),
206            // smeb16f32: bit::test(auxv.hwcap2, 28),
207            // smef32f32: bit::test(auxv.hwcap2, 29),
208            smefa64: bit::test(auxv.hwcap2, 30),
209            wfxt: bit::test(auxv.hwcap2, 31),
210            // ebf16: bit::test(auxv.hwcap2, 32),
211            // sveebf16: bit::test(auxv.hwcap2, 33),
212            cssc: bit::test(auxv.hwcap2, 34),
213            // rprfm: bit::test(auxv.hwcap2, 35),
214            sve2p1: bit::test(auxv.hwcap2, 36),
215            sme2: bit::test(auxv.hwcap2, 37),
216            sme2p1: bit::test(auxv.hwcap2, 38),
217            // smei16i32: bit::test(auxv.hwcap2, 39),
218            // smebi32i32: bit::test(auxv.hwcap2, 40),
219            smeb16b16: bit::test(auxv.hwcap2, 41),
220            smef16f16: bit::test(auxv.hwcap2, 42),
221            mops: bit::test(auxv.hwcap2, 43),
222            hbc: bit::test(auxv.hwcap2, 44),
223            sveb16b16: bit::test(auxv.hwcap2, 45),
224            lrcpc3: bit::test(auxv.hwcap2, 46),
225            lse128: bit::test(auxv.hwcap2, 47),
226            fpmr: bit::test(auxv.hwcap2, 48),
227            lut: bit::test(auxv.hwcap2, 49),
228            faminmax: bit::test(auxv.hwcap2, 50),
229            f8cvt: bit::test(auxv.hwcap2, 51),
230            f8fma: bit::test(auxv.hwcap2, 52),
231            f8dp4: bit::test(auxv.hwcap2, 53),
232            f8dp2: bit::test(auxv.hwcap2, 54),
233            f8e4m3: bit::test(auxv.hwcap2, 55),
234            f8e5m2: bit::test(auxv.hwcap2, 56),
235            smelutv2: bit::test(auxv.hwcap2, 57),
236            smef8f16: bit::test(auxv.hwcap2, 58),
237            smef8f32: bit::test(auxv.hwcap2, 59),
238            smesf8fma: bit::test(auxv.hwcap2, 60),
239            smesf8dp4: bit::test(auxv.hwcap2, 61),
240            smesf8dp2: bit::test(auxv.hwcap2, 62),
241            // pauthlr: bit::test(auxv.hwcap2, ??),
242        }
243    }
244}
245
246impl AtHwcap {
247    /// Initializes the cache from the feature -bits.
248    ///
249    /// The feature dependencies here come directly from LLVM's feature definitions:
250    /// https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AArch64/AArch64.td
251    fn cache(self, is_exynos9810: bool) -> cache::Initializer {
252        let mut value = cache::Initializer::default();
253        {
254            let mut enable_feature = |f, enable| {
255                if enable {
256                    value.set(f as u32);
257                }
258            };
259
260            // Samsung Exynos 9810 has a bug that big and little cores have different
261            // ISAs. And on older Android (pre-9), the kernel incorrectly reports
262            // that features available only on some cores are available on all cores.
263            // So, only check features that are known to be available on exynos-m3:
264            // $ rustc --print cfg --target aarch64-linux-android -C target-cpu=exynos-m3 | grep target_feature
265            // See also https://github.com/rust-lang/stdarch/pull/1378#discussion_r1103748342.
266            if is_exynos9810 {
267                enable_feature(Feature::fp, self.fp);
268                enable_feature(Feature::crc, self.crc32);
269                // ASIMD support requires float support - if half-floats are
270                // supported, it also requires half-float support:
271                let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp);
272                enable_feature(Feature::asimd, asimd);
273                // Cryptographic extensions require ASIMD
274                // AES also covers FEAT_PMULL
275                enable_feature(Feature::aes, self.aes && self.pmull && asimd);
276                enable_feature(Feature::sha2, self.sha1 && self.sha2 && asimd);
277                return value;
278            }
279
280            enable_feature(Feature::fp, self.fp);
281            // Half-float support requires float support
282            enable_feature(Feature::fp16, self.fp && self.fphp);
283            // FHM (fp16fml in LLVM) requires half float support
284            enable_feature(Feature::fhm, self.fphp && self.fhm);
285            enable_feature(Feature::pmull, self.pmull);
286            enable_feature(Feature::crc, self.crc32);
287            enable_feature(Feature::lse, self.atomics);
288            enable_feature(Feature::lse2, self.uscat);
289            enable_feature(Feature::lse128, self.lse128 && self.atomics);
290            enable_feature(Feature::rcpc, self.lrcpc);
291            // RCPC2 (rcpc-immo in LLVM) requires RCPC support
292            let rcpc2 = self.ilrcpc && self.lrcpc;
293            enable_feature(Feature::rcpc2, rcpc2);
294            enable_feature(Feature::rcpc3, self.lrcpc3 && rcpc2);
295            enable_feature(Feature::dit, self.dit);
296            enable_feature(Feature::flagm, self.flagm);
297            enable_feature(Feature::flagm2, self.flagm2);
298            enable_feature(Feature::ssbs, self.ssbs);
299            enable_feature(Feature::sb, self.sb);
300            enable_feature(Feature::paca, self.paca);
301            enable_feature(Feature::pacg, self.pacg);
302            // enable_feature(Feature::pauth_lr, self.pauthlr);
303            enable_feature(Feature::dpb, self.dcpop);
304            enable_feature(Feature::dpb2, self.dcpodp);
305            enable_feature(Feature::rand, self.rng);
306            enable_feature(Feature::bti, self.bti);
307            enable_feature(Feature::mte, self.mte);
308            // jsconv requires float support
309            enable_feature(Feature::jsconv, self.jscvt && self.fp);
310            enable_feature(Feature::rdm, self.asimdrdm);
311            enable_feature(Feature::dotprod, self.asimddp);
312            enable_feature(Feature::frintts, self.frint);
313
314            // FEAT_I8MM & FEAT_BF16 also include optional SVE components which linux exposes
315            // separately. We ignore that distinction here.
316            enable_feature(Feature::i8mm, self.i8mm);
317            enable_feature(Feature::bf16, self.bf16);
318
319            // ASIMD support requires float support - if half-floats are
320            // supported, it also requires half-float support:
321            let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp);
322            enable_feature(Feature::asimd, asimd);
323            // ASIMD extensions require ASIMD support:
324            enable_feature(Feature::fcma, self.fcma && asimd);
325            enable_feature(Feature::sve, self.sve && asimd);
326
327            // SVE extensions require SVE & ASIMD
328            enable_feature(Feature::f32mm, self.svef32mm && self.sve && asimd);
329            enable_feature(Feature::f64mm, self.svef64mm && self.sve && asimd);
330
331            // Cryptographic extensions require ASIMD
332            enable_feature(Feature::aes, self.aes && asimd);
333            enable_feature(Feature::sha2, self.sha1 && self.sha2 && asimd);
334            // SHA512/SHA3 require SHA1 & SHA256
335            enable_feature(
336                Feature::sha3,
337                self.sha512 && self.sha3 && self.sha1 && self.sha2 && asimd,
338            );
339            enable_feature(Feature::sm4, self.sm3 && self.sm4 && asimd);
340
341            // SVE2 requires SVE
342            let sve2 = self.sve2 && self.sve && asimd;
343            enable_feature(Feature::sve2, sve2);
344            enable_feature(Feature::sve2p1, self.sve2p1 && sve2);
345            // SVE2 extensions require SVE2 and crypto features
346            enable_feature(Feature::sve2_aes, self.sveaes && self.svepmull && sve2 && self.aes);
347            enable_feature(Feature::sve2_sm4, self.svesm4 && sve2 && self.sm3 && self.sm4);
348            enable_feature(
349                Feature::sve2_sha3,
350                self.svesha3 && sve2 && self.sha512 && self.sha3 && self.sha1 && self.sha2,
351            );
352            enable_feature(Feature::sve2_bitperm, self.svebitperm && self.sve2);
353            enable_feature(Feature::sve_b16b16, self.bf16 && self.sveb16b16);
354            enable_feature(Feature::hbc, self.hbc);
355            enable_feature(Feature::mops, self.mops);
356            enable_feature(Feature::ecv, self.ecv);
357            enable_feature(Feature::lut, self.lut);
358            enable_feature(Feature::cssc, self.cssc);
359            enable_feature(Feature::fpmr, self.fpmr);
360            enable_feature(Feature::faminmax, self.faminmax);
361            let fp8 = self.f8cvt && self.faminmax && self.lut && self.bf16;
362            enable_feature(Feature::fp8, fp8);
363            let fp8fma = self.f8fma && fp8;
364            enable_feature(Feature::fp8fma, fp8fma);
365            let fp8dot4 = self.f8dp4 && fp8fma;
366            enable_feature(Feature::fp8dot4, fp8dot4);
367            enable_feature(Feature::fp8dot2, self.f8dp2 && fp8dot4);
368            enable_feature(Feature::wfxt, self.wfxt);
369            let sme = self.sme && self.bf16;
370            enable_feature(Feature::sme, sme);
371            enable_feature(Feature::sme_i16i64, self.smei16i64 && sme);
372            enable_feature(Feature::sme_f64f64, self.smef64f64 && sme);
373            enable_feature(Feature::sme_fa64, self.smefa64 && sme && sve2);
374            let sme2 = self.sme2 && sme;
375            enable_feature(Feature::sme2, sme2);
376            enable_feature(Feature::sme2p1, self.sme2p1 && sme2);
377            enable_feature(
378                Feature::sme_b16b16,
379                sme2 && self.bf16 && self.sveb16b16 && self.smeb16b16,
380            );
381            enable_feature(Feature::sme_f16f16, self.smef16f16 && sme2);
382            enable_feature(Feature::sme_lutv2, self.smelutv2);
383            let sme_f8f32 = self.smef8f32 && sme2 && fp8;
384            enable_feature(Feature::sme_f8f32, sme_f8f32);
385            enable_feature(Feature::sme_f8f16, self.smef8f16 && sme_f8f32);
386            let ssve_fp8fma = self.smesf8fma && sme2 && fp8;
387            enable_feature(Feature::ssve_fp8fma, ssve_fp8fma);
388            let ssve_fp8dot4 = self.smesf8dp4 && ssve_fp8fma;
389            enable_feature(Feature::ssve_fp8dot4, ssve_fp8dot4);
390            enable_feature(Feature::ssve_fp8dot2, self.smesf8dp2 && ssve_fp8dot4);
391        }
392        value
393    }
394}
395
396#[cfg(target_endian = "little")]
397#[cfg(test)]
398mod tests;