const std = @import("std");
const Target = std.Target;
pub const CoreInfo = struct {
architecture: u8 = 0,
implementer: u8 = 0,
variant: u8 = 0,
part: u16 = 0,
};
pub const cpu_models = struct {
const A32 = Target.arm.cpu;
const A64 = Target.aarch64.cpu;
const E = struct {
part: u16,
variant: ?u8 = null,
m32: ?*const Target.Cpu.Model = null,
m64: ?*const Target.Cpu.Model = null,
};
const ARM = [_]E{
E{ .part = 0x926, .m32 = &A32.arm926ej_s, .m64 = null },
E{ .part = 0xb02, .m32 = &A32.mpcore, .m64 = null },
E{ .part = 0xb36, .m32 = &A32.arm1136j_s, .m64 = null },
E{ .part = 0xb56, .m32 = &A32.arm1156t2_s, .m64 = null },
E{ .part = 0xb76, .m32 = &A32.arm1176jz_s, .m64 = null },
E{ .part = 0xc05, .m32 = &A32.cortex_a5, .m64 = null },
E{ .part = 0xc07, .m32 = &A32.cortex_a7, .m64 = null },
E{ .part = 0xc08, .m32 = &A32.cortex_a8, .m64 = null },
E{ .part = 0xc09, .m32 = &A32.cortex_a9, .m64 = null },
E{ .part = 0xc0d, .m32 = &A32.cortex_a17, .m64 = null },
E{ .part = 0xc0f, .m32 = &A32.cortex_a15, .m64 = null },
E{ .part = 0xc0e, .m32 = &A32.cortex_a17, .m64 = null },
E{ .part = 0xc14, .m32 = &A32.cortex_r4, .m64 = null },
E{ .part = 0xc15, .m32 = &A32.cortex_r5, .m64 = null },
E{ .part = 0xc17, .m32 = &A32.cortex_r7, .m64 = null },
E{ .part = 0xc18, .m32 = &A32.cortex_r8, .m64 = null },
E{ .part = 0xc20, .m32 = &A32.cortex_m0, .m64 = null },
E{ .part = 0xc21, .m32 = &A32.cortex_m1, .m64 = null },
E{ .part = 0xc23, .m32 = &A32.cortex_m3, .m64 = null },
E{ .part = 0xc24, .m32 = &A32.cortex_m4, .m64 = null },
E{ .part = 0xc27, .m32 = &A32.cortex_m7, .m64 = null },
E{ .part = 0xc60, .m32 = &A32.cortex_m0plus, .m64 = null },
E{ .part = 0xd01, .m32 = &A32.cortex_a32, .m64 = null },
E{ .part = 0xd03, .m32 = &A32.cortex_a53, .m64 = &A64.cortex_a53 },
E{ .part = 0xd04, .m32 = &A32.cortex_a35, .m64 = &A64.cortex_a35 },
E{ .part = 0xd05, .m32 = &A32.cortex_a55, .m64 = &A64.cortex_a55 },
E{ .part = 0xd07, .m32 = &A32.cortex_a57, .m64 = &A64.cortex_a57 },
E{ .part = 0xd08, .m32 = &A32.cortex_a72, .m64 = &A64.cortex_a72 },
E{ .part = 0xd09, .m32 = &A32.cortex_a73, .m64 = &A64.cortex_a73 },
E{ .part = 0xd0a, .m32 = &A32.cortex_a75, .m64 = &A64.cortex_a75 },
E{ .part = 0xd0b, .m32 = &A32.cortex_a76, .m64 = &A64.cortex_a76 },
E{ .part = 0xd0c, .m32 = &A32.neoverse_n1, .m64 = &A64.neoverse_n1 },
E{ .part = 0xd0d, .m32 = &A32.cortex_a77, .m64 = &A64.cortex_a77 },
E{ .part = 0xd13, .m32 = &A32.cortex_r52, .m64 = null },
E{ .part = 0xd20, .m32 = &A32.cortex_m23, .m64 = null },
E{ .part = 0xd21, .m32 = &A32.cortex_m33, .m64 = null },
E{ .part = 0xd41, .m32 = &A32.cortex_a78, .m64 = &A64.cortex_a78 },
E{ .part = 0xd4b, .m32 = &A32.cortex_a78c, .m64 = &A64.cortex_a78c },
E{ .part = 0xd4c, .m32 = &A32.cortex_x1c, .m64 = &A64.cortex_x1c },
E{ .part = 0xd44, .m32 = &A32.cortex_x1, .m64 = &A64.cortex_x1 },
E{ .part = 0xd02, .m64 = &A64.cortex_a34 },
E{ .part = 0xd06, .m64 = &A64.cortex_a65 },
E{ .part = 0xd43, .m64 = &A64.cortex_a65ae },
};
const Broadcom = [_]E{
E{ .part = 0x516, .m64 = &A64.thunderx2t99 },
};
const Cavium = [_]E{
E{ .part = 0x0a0, .m64 = &A64.thunderx },
E{ .part = 0x0a2, .m64 = &A64.thunderxt81 },
E{ .part = 0x0a3, .m64 = &A64.thunderxt83 },
E{ .part = 0x0a1, .m64 = &A64.thunderxt88 },
E{ .part = 0x0af, .m64 = &A64.thunderx2t99 },
};
const Fujitsu = [_]E{
E{ .part = 0x001, .m64 = &A64.a64fx },
};
const HiSilicon = [_]E{
E{ .part = 0xd01, .m64 = &A64.tsv110 },
};
const Nvidia = [_]E{
E{ .part = 0x004, .m64 = &A64.carmel },
};
const Ampere = [_]E{
E{ .part = 0x000, .variant = 3, .m64 = &A64.emag },
E{ .part = 0x000, .m64 = &A64.xgene1 },
};
const Qualcomm = [_]E{
E{ .part = 0x06f, .m32 = &A32.krait },
E{ .part = 0x201, .m64 = &A64.kryo, .m32 = &A64.kryo },
E{ .part = 0x205, .m64 = &A64.kryo, .m32 = &A64.kryo },
E{ .part = 0x211, .m64 = &A64.kryo, .m32 = &A64.kryo },
E{ .part = 0x800, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 },
E{ .part = 0x801, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 },
E{ .part = 0x802, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 },
E{ .part = 0x803, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 },
E{ .part = 0x804, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 },
E{ .part = 0x805, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 },
E{ .part = 0xc00, .m64 = &A64.falkor },
E{ .part = 0xc01, .m64 = &A64.saphira },
};
pub fn isKnown(core: CoreInfo, is_64bit: bool) ?*const Target.Cpu.Model {
const models = switch (core.implementer) {
0x41 => &ARM,
0x42 => &Broadcom,
0x43 => &Cavium,
0x46 => &Fujitsu,
0x48 => &HiSilicon,
0x50 => &Ampere,
0x51 => &Qualcomm,
else => return null,
};
for (models) |model| {
if (model.part == core.part and
(model.variant == null or model.variant.? == core.variant))
return if (is_64bit) model.m64 else model.m32;
}
return null;
}
};
pub const aarch64 = struct {
fn setFeature(cpu: *Target.Cpu, feature: Target.aarch64.Feature, enabled: bool) void {
const idx = @as(Target.Cpu.Feature.Set.Index, @enumToInt(feature));
if (enabled) cpu.features.addFeature(idx) else cpu.features.removeFeature(idx);
}
inline fn bitField(input: u64, offset: u6) u4 {
return @truncate(u4, input >> offset);
}
pub fn detectNativeCpuAndFeatures(arch: Target.Cpu.Arch, registers: [12]u64) ?Target.Cpu {
const info = detectNativeCoreInfo(registers[0]);
const model = cpu_models.isKnown(info, true) orelse return null;
var cpu = Target.Cpu{
.arch = arch,
.model = model,
.features = Target.Cpu.Feature.Set.empty,
};
detectNativeCpuFeatures(&cpu, registers[1..12]);
addInstructionFusions(&cpu, info);
return cpu;
}
fn detectNativeCoreInfo(midr: u64) CoreInfo {
var info = CoreInfo{
.implementer = @truncate(u8, midr >> 24),
.part = @truncate(u12, midr >> 4),
};
blk: {
if (info.implementer == 0x41) {
const special_bits = @truncate(u4, info.part >> 8);
if (special_bits == 0x0 or special_bits == 0x7) {
break :blk;
}
}
info.variant |= @intCast(u8, @truncate(u4, midr >> 20)) << 4;
info.variant |= @truncate(u4, midr);
info.architecture = @truncate(u4, midr >> 16);
}
return info;
}
fn detectNativeCpuFeatures(cpu: *Target.Cpu, registers: *const [11]u64) void {
setFeature(cpu, .dit, bitField(registers[0], 48) >= 1);
setFeature(cpu, .am, bitField(registers[0], 44) >= 1);
setFeature(cpu, .amvs, bitField(registers[0], 44) >= 2);
setFeature(cpu, .mpam, bitField(registers[0], 40) >= 1);
setFeature(cpu, .sel2, bitField(registers[0], 36) >= 1);
setFeature(cpu, .sve, bitField(registers[0], 32) >= 1);
setFeature(cpu, .el3, bitField(registers[0], 12) >= 1);
setFeature(cpu, .ras, bitField(registers[0], 28) >= 1);
if (bitField(registers[0], 20) < 0xF) blk: {
if (bitField(registers[0], 16) != bitField(registers[0], 20)) break :blk;
setFeature(cpu, .neon, true);
setFeature(cpu, .fp_armv8, true);
setFeature(cpu, .fullfp16, bitField(registers[0], 20) > 0);
}
setFeature(cpu, .mpam, bitField(registers[1], 16) > 0 and bitField(registers[0], 40) == 0);
setFeature(cpu, .mte, bitField(registers[1], 8) >= 1);
setFeature(cpu, .ssbs, bitField(registers[1], 4) >= 1);
setFeature(cpu, .bti, bitField(registers[1], 0) >= 1);
setFeature(cpu, .tracev8_4, bitField(registers[2], 40) >= 1);
setFeature(cpu, .spe, bitField(registers[2], 32) >= 1);
setFeature(cpu, .perfmon, bitField(registers[2], 8) >= 1 and bitField(registers[2], 8) < 0xF);
setFeature(cpu, .rand, bitField(registers[6], 60) >= 1);
setFeature(cpu, .tlb_rmi, bitField(registers[6], 56) >= 1);
setFeature(cpu, .flagm, bitField(registers[6], 52) >= 1);
setFeature(cpu, .fp16fml, bitField(registers[6], 48) >= 1);
setFeature(cpu, .dotprod, bitField(registers[6], 44) >= 1);
setFeature(cpu, .sm4, bitField(registers[6], 40) >= 1 and bitField(registers[6], 36) >= 1);
setFeature(cpu, .sha3, bitField(registers[6], 32) >= 1 and bitField(registers[6], 12) >= 2);
setFeature(cpu, .rdm, bitField(registers[6], 28) >= 1);
setFeature(cpu, .lse, bitField(registers[6], 20) >= 1);
setFeature(cpu, .crc, bitField(registers[6], 16) >= 1);
setFeature(cpu, .sha2, bitField(registers[6], 12) >= 1 and bitField(registers[6], 8) >= 1);
setFeature(cpu, .aes, bitField(registers[6], 4) >= 1);
setFeature(cpu, .i8mm, bitField(registers[7], 52) >= 1);
setFeature(cpu, .bf16, bitField(registers[7], 44) >= 1);
setFeature(cpu, .predres, bitField(registers[7], 40) >= 1);
setFeature(cpu, .sb, bitField(registers[7], 36) >= 1);
setFeature(cpu, .fptoint, bitField(registers[7], 32) >= 1);
setFeature(cpu, .rcpc, bitField(registers[7], 20) >= 1);
setFeature(cpu, .rcpc_immo, bitField(registers[7], 20) >= 2);
setFeature(cpu, .complxnum, bitField(registers[7], 16) >= 1);
setFeature(cpu, .jsconv, bitField(registers[7], 12) >= 1);
setFeature(cpu, .pauth, bitField(registers[7], 8) >= 1 or bitField(registers[7], 4) >= 1);
setFeature(cpu, .ccpp, bitField(registers[7], 0) >= 1);
setFeature(cpu, .ccdp, bitField(registers[7], 0) >= 2);
setFeature(cpu, .ecv, bitField(registers[8], 60) >= 1);
setFeature(cpu, .fgt, bitField(registers[8], 56) >= 1);
setFeature(cpu, .pan, bitField(registers[9], 20) >= 1);
setFeature(cpu, .pan_rwv, bitField(registers[9], 20) >= 2);
setFeature(cpu, .lor, bitField(registers[9], 16) >= 1);
setFeature(cpu, .vh, bitField(registers[9], 8) >= 1);
setFeature(cpu, .contextidr_el2, bitField(registers[9], 8) >= 1);
setFeature(cpu, .nv, bitField(registers[10], 24) >= 1);
setFeature(cpu, .ccidx, bitField(registers[10], 20) >= 1);
setFeature(cpu, .uaops, bitField(registers[10], 4) >= 1);
}
fn addInstructionFusions(cpu: *Target.Cpu, info: CoreInfo) void {
switch (info.implementer) {
0x41 => switch (info.part) {
0xd4b, 0xd4c => {
setFeature(cpu, .cmp_bcc_fusion, true);
setFeature(cpu, .fuse_aes, true);
},
else => {},
},
else => {},
}
}
};