| use std::fmt; |
| |
| use rustc_span::Symbol; |
| |
| use super::{InlineAsmArch, InlineAsmType, ModifierInfo}; |
| |
| // Types are listed as SGPR_*/VGPR_* in llvm/lib/Target/AMDGPU/SIRegisterInfo.td |
| |
| /// Amdgpu register classes |
| /// |
| /// The number is the size of the register class in bits. |
| #[derive( |
| Copy, |
| Clone, |
| rustc_macros::Encodable, |
| rustc_macros::Decodable, |
| Debug, |
| Eq, |
| PartialEq, |
| PartialOrd, |
| Hash, |
| rustc_macros::StableHash |
| )] |
| #[allow(non_camel_case_types)] |
| pub enum AmdgpuInlineAsmRegClass { |
| Sgpr(u16), |
| Vgpr(u16), |
| } |
| |
| pub(super) fn regclass_map() -> rustc_data_structures::fx::FxHashMap< |
| super::InlineAsmRegClass, |
| rustc_data_structures::fx::FxIndexSet<super::InlineAsmReg>, |
| > { |
| use rustc_data_structures::fx::{FxHashMap, FxIndexSet}; |
| |
| use super::InlineAsmRegClass; |
| let mut map = FxHashMap::default(); |
| |
| // SGPR and VGPR sizes |
| for i in [32, 64, 96, 128, 256, 512] { |
| map.insert( |
| InlineAsmRegClass::Amdgpu(AmdgpuInlineAsmRegClass::Sgpr(i)), |
| FxIndexSet::default(), |
| ); |
| map.insert( |
| InlineAsmRegClass::Amdgpu(AmdgpuInlineAsmRegClass::Vgpr(i)), |
| FxIndexSet::default(), |
| ); |
| } |
| |
| // VGPR-only sizes |
| for i in [16, 160, 192, 224, 288, 320, 352, 384, 1024] { |
| map.insert( |
| InlineAsmRegClass::Amdgpu(AmdgpuInlineAsmRegClass::Vgpr(i)), |
| FxIndexSet::default(), |
| ); |
| } |
| |
| map |
| } |
| |
| // See https://llvm.org/docs/AMDGPUOperandSyntax.html |
| impl AmdgpuInlineAsmRegClass { |
| /// Prefix when printed and register constraint in LLVM |
| fn prefix(self) -> &'static str { |
| match self { |
| Self::Sgpr(_) => "s", |
| Self::Vgpr(_) => "v", |
| } |
| } |
| |
| /// Return size of the register class in bits |
| fn bits(self) -> u16 { |
| let (Self::Sgpr(i) | Self::Vgpr(i)) = self; |
| i |
| } |
| |
| /// Return size of the register class in bytes |
| fn bytes(self) -> u16 { |
| self.bits() / 8 |
| } |
| |
| /// Returns the name or `None` if this is not a valid register class |
| fn try_get_name(self) -> Option<rustc_span::Symbol> { |
| let s = match self { |
| Self::Sgpr(32) => rustc_span::sym::sgpr32, |
| Self::Sgpr(64) => rustc_span::sym::sgpr64, |
| Self::Sgpr(96) => rustc_span::sym::sgpr96, |
| Self::Sgpr(128) => rustc_span::sym::sgpr128, |
| Self::Sgpr(256) => rustc_span::sym::sgpr256, |
| Self::Sgpr(512) => rustc_span::sym::sgpr512, |
| Self::Vgpr(16) => rustc_span::sym::vgpr16, |
| Self::Vgpr(32) => rustc_span::sym::vgpr32, |
| Self::Vgpr(64) => rustc_span::sym::vgpr64, |
| Self::Vgpr(96) => rustc_span::sym::vgpr96, |
| Self::Vgpr(128) => rustc_span::sym::vgpr128, |
| Self::Vgpr(160) => rustc_span::sym::vgpr160, |
| Self::Vgpr(192) => rustc_span::sym::vgpr192, |
| Self::Vgpr(224) => rustc_span::sym::vgpr224, |
| Self::Vgpr(256) => rustc_span::sym::vgpr256, |
| Self::Vgpr(288) => rustc_span::sym::vgpr288, |
| Self::Vgpr(320) => rustc_span::sym::vgpr320, |
| Self::Vgpr(352) => rustc_span::sym::vgpr352, |
| Self::Vgpr(384) => rustc_span::sym::vgpr384, |
| Self::Vgpr(512) => rustc_span::sym::vgpr512, |
| Self::Vgpr(1024) => rustc_span::sym::vgpr1024, |
| _ => return None, |
| }; |
| Some(s) |
| } |
| |
| pub fn name(self) -> rustc_span::Symbol { |
| self.try_get_name().expect("Invalid amdgpu register class") |
| } |
| |
| pub fn parse(name: rustc_span::Symbol) -> Result<Self, &'static [rustc_span::Symbol]> { |
| match name { |
| rustc_span::sym::sgpr32 => Ok(Self::Sgpr(32)), |
| rustc_span::sym::sgpr64 => Ok(Self::Sgpr(64)), |
| rustc_span::sym::sgpr96 => Ok(Self::Sgpr(96)), |
| rustc_span::sym::sgpr128 => Ok(Self::Sgpr(128)), |
| rustc_span::sym::sgpr256 => Ok(Self::Sgpr(256)), |
| rustc_span::sym::sgpr512 => Ok(Self::Sgpr(512)), |
| rustc_span::sym::vgpr16 => Ok(Self::Vgpr(16)), |
| rustc_span::sym::vgpr32 => Ok(Self::Vgpr(32)), |
| rustc_span::sym::vgpr64 => Ok(Self::Vgpr(64)), |
| rustc_span::sym::vgpr96 => Ok(Self::Vgpr(96)), |
| rustc_span::sym::vgpr128 => Ok(Self::Vgpr(128)), |
| rustc_span::sym::vgpr160 => Ok(Self::Vgpr(160)), |
| rustc_span::sym::vgpr192 => Ok(Self::Vgpr(192)), |
| rustc_span::sym::vgpr224 => Ok(Self::Vgpr(224)), |
| rustc_span::sym::vgpr256 => Ok(Self::Vgpr(256)), |
| rustc_span::sym::vgpr288 => Ok(Self::Vgpr(288)), |
| rustc_span::sym::vgpr320 => Ok(Self::Vgpr(320)), |
| rustc_span::sym::vgpr352 => Ok(Self::Vgpr(352)), |
| rustc_span::sym::vgpr384 => Ok(Self::Vgpr(384)), |
| rustc_span::sym::vgpr512 => Ok(Self::Vgpr(512)), |
| rustc_span::sym::vgpr1024 => Ok(Self::Vgpr(1024)), |
| _ => Err(&[ |
| rustc_span::sym::sgpr32, |
| rustc_span::sym::sgpr64, |
| rustc_span::sym::sgpr96, |
| rustc_span::sym::sgpr128, |
| rustc_span::sym::sgpr256, |
| rustc_span::sym::sgpr512, |
| rustc_span::sym::vgpr16, |
| rustc_span::sym::vgpr32, |
| rustc_span::sym::vgpr64, |
| rustc_span::sym::vgpr96, |
| rustc_span::sym::vgpr128, |
| rustc_span::sym::vgpr160, |
| rustc_span::sym::vgpr192, |
| rustc_span::sym::vgpr224, |
| rustc_span::sym::vgpr256, |
| rustc_span::sym::vgpr288, |
| rustc_span::sym::vgpr320, |
| rustc_span::sym::vgpr352, |
| rustc_span::sym::vgpr384, |
| rustc_span::sym::vgpr512, |
| rustc_span::sym::vgpr1024, |
| ]), |
| } |
| } |
| |
| pub fn valid_modifiers(self, _arch: InlineAsmArch) -> &'static [char] { |
| &[] |
| } |
| |
| pub fn suggest_class(self, _arch: InlineAsmArch, ty: InlineAsmType) -> Option<Self> { |
| // 8-bit types and f128 are not supported |
| if matches!( |
| ty, |
| InlineAsmType::I8 |
| | InlineAsmType::VecI8(_) |
| | InlineAsmType::F128 |
| | InlineAsmType::VecF128(_) |
| ) { |
| return None; |
| } |
| |
| Some(Self::Vgpr(ty.size().bits().try_into().ok()?)) |
| } |
| |
| pub fn suggest_modifier( |
| self, |
| _arch: InlineAsmArch, |
| _ty: InlineAsmType, |
| ) -> Option<ModifierInfo> { |
| None |
| } |
| |
| pub fn default_modifier(self, _arch: InlineAsmArch) -> Option<ModifierInfo> { |
| None |
| } |
| |
| pub fn supported_types(self, _arch: InlineAsmArch) -> Vec<(InlineAsmType, Option<Symbol>)> { |
| use InlineAsmType::*; |
| let mut types = Vec::new(); |
| let mut add_types = |ts: &[_]| { |
| for t in ts { |
| types.push((*t, None)) |
| } |
| }; |
| let bits = self.bits() as u64; |
| |
| // Primitive types |
| match bits { |
| 16 => add_types(&[I16, F16]), |
| // Many 16-bit instructions take 32-bit registers, so allow 16-bit values |
| 32 => add_types(&[I16, F16, I32, F32]), |
| 64 => add_types(&[I64, F64]), |
| 128 => add_types(&[I128]), |
| _ => {} |
| } |
| |
| // Vector types |
| if bits == 1024 { |
| add_types(&[VecF32(1024 / 32)]); |
| } else { |
| if bits > 16 && bits.is_power_of_two() { |
| // 32, 64, 128, 256, 512 |
| add_types(&[VecI16(bits / 16), VecF16(bits / 16)]); |
| } |
| if bits > 32 { |
| // 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 512 |
| add_types(&[VecI32(bits / 32), VecF32(bits / 32)]); |
| } |
| } |
| |
| // The LLVM backend supports more vector types, but these are rather uncommon |
| // and not systematic, so we only list common types here. |
| |
| types |
| } |
| |
| /// The number of supported registers in this class. |
| /// The returned number is the length, so supported register |
| /// indices are 0 to max_num()-1. |
| fn max_num(self) -> u16 { |
| if self == Self::Vgpr(16) { |
| return 512; |
| } |
| let size = self.bytes(); |
| match self { |
| Self::Sgpr(_) => 106 - (size / 4 - 1), |
| Self::Vgpr(_) => 256 - (size / 4 - 1), |
| } |
| } |
| |
| /// Get register class from prefix and size. |
| fn parse_with_prefix(prefix: char, bits: u16) -> Result<Self, &'static str> { |
| let res = match prefix { |
| 's' => Self::Sgpr(bits), |
| 'v' => Self::Vgpr(bits), |
| _ => return Err("unknown register prefix"), |
| }; |
| |
| // Check that the size is valid by converting it to a symbol |
| if res.try_get_name().is_none() { |
| return Err("invalid register size for this class"); |
| } |
| |
| Ok(res) |
| } |
| } |
| |
| /// Start index of a register. |
| /// |
| /// Together with the register size this gives the range occupied by a register. |
| #[derive( |
| Copy, |
| Clone, |
| rustc_macros::Encodable, |
| rustc_macros::Decodable, |
| Debug, |
| Eq, |
| PartialEq, |
| PartialOrd, |
| Hash, |
| rustc_macros::StableHash |
| )] |
| enum AmdgpuRegStart { |
| /// Low 16-bit of the register at this index |
| Low(u16), |
| /// High 16-bit of the register at this index |
| High(u16), |
| /// One or more 32-bit registers, starting at this index |
| Full(u16), |
| } |
| |
| #[derive( |
| Copy, |
| Clone, |
| rustc_macros::Encodable, |
| rustc_macros::Decodable, |
| Debug, |
| Eq, |
| PartialEq, |
| PartialOrd, |
| Hash, |
| rustc_macros::StableHash |
| )] |
| #[allow(non_camel_case_types)] |
| pub struct AmdgpuInlineAsmReg { |
| class: AmdgpuInlineAsmRegClass, |
| range: AmdgpuRegStart, |
| } |
| |
| impl AmdgpuInlineAsmReg { |
| pub fn name(self) -> String { |
| let c = self.class.prefix(); |
| match self.range { |
| AmdgpuRegStart::Low(n) => format!("{c}{n}.l"), |
| AmdgpuRegStart::High(n) => format!("{c}{n}.h"), |
| AmdgpuRegStart::Full(n) if self.class.bytes() == 4 => format!("{c}{n}"), |
| AmdgpuRegStart::Full(n) => format!("{c}[{n}:{}]", n + self.class.bytes() / 4 - 1), |
| } |
| } |
| |
| pub fn reg_class(self) -> AmdgpuInlineAsmRegClass { |
| self.class |
| } |
| |
| pub fn parse(name: &str) -> Result<Self, &'static str> { |
| if name.is_empty() { |
| return Err("invalid empty register"); |
| } |
| // s or v |
| let prefix = name.chars().next().unwrap(); |
| // Form with range, e.g. s[2:3] |
| let res; |
| if name[1..].starts_with('[') { |
| if !name.ends_with(']') { |
| return Err("invalid register, missing closing bracket"); |
| } |
| if let Some((start, end)) = name[2..name.len() - 1].split_once(':') { |
| let Ok(start) = start.parse() else { |
| return Err("invalid register range start"); |
| }; |
| let Ok(end) = end.parse() else { |
| return Err("invalid register range end"); |
| }; |
| |
| // Check range |
| if start > end { |
| return Err("invalid reversed register range"); |
| } |
| |
| let class = |
| AmdgpuInlineAsmRegClass::parse_with_prefix(prefix, ((end - start) + 1) * 32)?; |
| if end >= class.max_num() { |
| return Err("too large register for this class"); |
| } |
| res = Self { class, range: AmdgpuRegStart::Full(start) }; |
| } else { |
| return Err("invalid register range"); |
| } |
| } else { |
| let parse_num = |core: &str| { |
| let Ok(start) = core.parse() else { |
| return Err("invalid register number"); |
| }; |
| |
| let class = AmdgpuInlineAsmRegClass::parse_with_prefix(prefix, 32)?; |
| if start >= class.max_num() { |
| return Err("too large register for this class"); |
| } |
| |
| Ok(start) |
| }; |
| |
| let name = &name[1..]; |
| let class; |
| let range = if let Some(name) = name.strip_suffix(".l") { |
| class = AmdgpuInlineAsmRegClass::parse_with_prefix(prefix, 16)?; |
| if matches!(class, AmdgpuInlineAsmRegClass::Sgpr(_)) { |
| return Err("invalid 16-bit SGPR register"); |
| } |
| AmdgpuRegStart::Low(parse_num(name)?) |
| } else if let Some(name) = name.strip_suffix(".h") { |
| class = AmdgpuInlineAsmRegClass::parse_with_prefix(prefix, 16)?; |
| if matches!(class, AmdgpuInlineAsmRegClass::Sgpr(_)) { |
| return Err("invalid 16-bit SGPR register"); |
| } |
| AmdgpuRegStart::High(parse_num(name)?) |
| } else { |
| class = AmdgpuInlineAsmRegClass::parse_with_prefix(prefix, 32)?; |
| let start = parse_num(name)?; |
| AmdgpuRegStart::Full(start) |
| }; |
| res = Self { class, range }; |
| } |
| Ok(res) |
| } |
| |
| pub fn validate( |
| self, |
| _arch: super::InlineAsmArch, |
| _reloc_model: crate::spec::RelocModel, |
| _target_features: &rustc_data_structures::fx::FxIndexSet<Symbol>, |
| _target: &crate::spec::Target, |
| _is_clobber: bool, |
| ) -> Result<(), &'static str> { |
| Ok(()) |
| } |
| } |
| |
| pub(super) fn fill_reg_map( |
| _arch: super::InlineAsmArch, |
| _reloc_model: crate::spec::RelocModel, |
| _target_features: &rustc_data_structures::fx::FxIndexSet<Symbol>, |
| _target: &crate::spec::Target, |
| map: &mut rustc_data_structures::fx::FxHashMap< |
| super::InlineAsmRegClass, |
| rustc_data_structures::fx::FxIndexSet<super::InlineAsmReg>, |
| >, |
| ) { |
| use super::{InlineAsmReg, InlineAsmRegClass}; |
| |
| #[allow(rustc::potential_query_instability)] |
| for class in regclass_map().keys() { |
| let InlineAsmRegClass::Amdgpu(class) = *class else { unreachable!("Must be amdgpu class") }; |
| if let Some(set) = map.get_mut(&InlineAsmRegClass::Amdgpu(class)) { |
| if class == AmdgpuInlineAsmRegClass::Vgpr(16) { |
| for i in 0..(class.max_num() / 2) { |
| set.insert(InlineAsmReg::Amdgpu(AmdgpuInlineAsmReg { |
| class, |
| range: AmdgpuRegStart::Low(i), |
| })); |
| set.insert(InlineAsmReg::Amdgpu(AmdgpuInlineAsmReg { |
| class, |
| range: AmdgpuRegStart::High(i), |
| })); |
| } |
| } else { |
| for i in 0..class.max_num() { |
| set.insert(InlineAsmReg::Amdgpu(AmdgpuInlineAsmReg { |
| class, |
| range: AmdgpuRegStart::Full(i), |
| })); |
| } |
| } |
| } |
| } |
| } |
| |
| impl AmdgpuInlineAsmReg { |
| pub fn emit( |
| self, |
| out: &mut dyn fmt::Write, |
| _arch: InlineAsmArch, |
| _modifier: Option<char>, |
| ) -> fmt::Result { |
| out.write_str(&self.name()) |
| } |
| |
| pub fn overlapping_regs(self, mut cb: impl FnMut(AmdgpuInlineAsmReg)) { |
| if matches!(self.class, AmdgpuInlineAsmRegClass::Vgpr(_)) { |
| // Overlapping 16-bit registers (not supported for sgprs) |
| if let AmdgpuRegStart::Full(start) = self.range { |
| for i in start..(start + self.class.bytes().div_ceil(4) - 1) { |
| cb(AmdgpuInlineAsmReg { |
| class: AmdgpuInlineAsmRegClass::Vgpr(16), |
| range: AmdgpuRegStart::Low(i), |
| }); |
| cb(AmdgpuInlineAsmReg { |
| class: AmdgpuInlineAsmRegClass::Vgpr(16), |
| range: AmdgpuRegStart::High(i), |
| }); |
| } |
| } |
| } |
| |
| // Overlapping 32-bit registers, up to size 32 |
| for size in 1..=32 { |
| let (AmdgpuRegStart::Low(start) |
| | AmdgpuRegStart::High(start) |
| | AmdgpuRegStart::Full(start)) = self.range; |
| |
| let size_range = size - 1; |
| for overlap_start in |
| start.saturating_sub(size_range)..=(start + self.class.bytes().div_ceil(4) - 1) |
| { |
| let class = match self.class { |
| AmdgpuInlineAsmRegClass::Sgpr(_) => AmdgpuInlineAsmRegClass::Sgpr(size * 32), |
| AmdgpuInlineAsmRegClass::Vgpr(_) => AmdgpuInlineAsmRegClass::Vgpr(size * 32), |
| }; |
| cb(AmdgpuInlineAsmReg { class, range: AmdgpuRegStart::Full(overlap_start) }); |
| } |
| } |
| } |
| } |