blob: b9dfe754485ab34e86a0c78840e708888a5c8a19 [file] [log] [blame]
//! Parse Windows prefixes, for both Windows and Cygwin.
use super::{is_sep_byte, is_verbatim_sep};
use crate::ffi::OsStr;
use crate::path::Prefix;
struct PrefixParser<'a, const LEN: usize> {
path: &'a OsStr,
prefix: [u8; LEN],
}
impl<'a, const LEN: usize> PrefixParser<'a, LEN> {
#[inline]
fn get_prefix(path: &OsStr) -> [u8; LEN] {
let mut prefix = [0; LEN];
// SAFETY: Only ASCII characters are modified.
for (i, &ch) in path.as_encoded_bytes().iter().take(LEN).enumerate() {
prefix[i] = if ch == b'/' { b'\\' } else { ch };
}
prefix
}
fn new(path: &'a OsStr) -> Self {
Self { path, prefix: Self::get_prefix(path) }
}
fn as_slice(&self) -> PrefixParserSlice<'a, '_> {
PrefixParserSlice {
path: self.path,
prefix: &self.prefix[..LEN.min(self.path.len())],
index: 0,
}
}
}
struct PrefixParserSlice<'a, 'b> {
path: &'a OsStr,
prefix: &'b [u8],
index: usize,
}
impl<'a> PrefixParserSlice<'a, '_> {
fn strip_prefix(&self, prefix: &str) -> Option<Self> {
self.prefix[self.index..]
.starts_with(prefix.as_bytes())
.then_some(Self { index: self.index + prefix.len(), ..*self })
}
fn prefix_bytes(&self) -> &'a [u8] {
&self.path.as_encoded_bytes()[..self.index]
}
fn finish(self) -> &'a OsStr {
// SAFETY: The unsafety here stems from converting between &OsStr and
// &[u8] and back. This is safe to do because (1) we only look at ASCII
// contents of the encoding and (2) new &OsStr values are produced only
// from ASCII-bounded slices of existing &OsStr values.
unsafe { OsStr::from_encoded_bytes_unchecked(&self.path.as_encoded_bytes()[self.index..]) }
}
}
pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
use Prefix::{DeviceNS, Disk, UNC, Verbatim, VerbatimDisk, VerbatimUNC};
let parser = PrefixParser::<8>::new(path);
let parser = parser.as_slice();
if let Some(parser) = parser.strip_prefix(r"\\") {
// \\
// It's a POSIX path.
if cfg!(target_os = "cygwin") && !path.as_encoded_bytes().iter().any(|&x| x == b'\\') {
return None;
}
// The meaning of verbatim paths can change when they use a different
// separator.
if let Some(parser) = parser.strip_prefix(r"?\")
// Cygwin allows `/` in verbatim paths.
&& (cfg!(target_os = "cygwin") || !parser.prefix_bytes().iter().any(|&x| x == b'/'))
{
// \\?\
if let Some(parser) = parser.strip_prefix(r"UNC\") {
// \\?\UNC\server\share
let path = parser.finish();
let (server, path) = parse_next_component(path, true);
let (share, _) = parse_next_component(path, true);
Some(VerbatimUNC(server, share))
} else {
let path = parser.finish();
// in verbatim paths only recognize an exact drive prefix
if let Some(drive) = parse_drive_exact(path) {
// \\?\C:
Some(VerbatimDisk(drive))
} else {
// \\?\prefix
let (prefix, _) = parse_next_component(path, true);
Some(Verbatim(prefix))
}
}
} else if let Some(parser) = parser.strip_prefix(r".\") {
// \\.\COM42
let path = parser.finish();
let (prefix, _) = parse_next_component(path, false);
Some(DeviceNS(prefix))
} else {
let path = parser.finish();
let (server, path) = parse_next_component(path, false);
let (share, _) = parse_next_component(path, false);
if !server.is_empty() && !share.is_empty() {
// \\server\share
Some(UNC(server, share))
} else {
// no valid prefix beginning with "\\" recognized
None
}
}
} else {
// If it has a drive like `C:` then it's a disk.
// Otherwise there is no prefix.
Some(Disk(parse_drive(path)?))
}
}
// Parses a drive prefix, e.g. "C:" and "C:\whatever"
fn parse_drive(path: &OsStr) -> Option<u8> {
// In most DOS systems, it is not possible to have more than 26 drive letters.
// See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
fn is_valid_drive_letter(drive: &u8) -> bool {
drive.is_ascii_alphabetic()
}
match path.as_encoded_bytes() {
[drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()),
_ => None,
}
}
// Parses a drive prefix exactly, e.g. "C:"
fn parse_drive_exact(path: &OsStr) -> Option<u8> {
// only parse two bytes: the drive letter and the drive separator
if path.as_encoded_bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) {
parse_drive(path)
} else {
None
}
}
// Parse the next path component.
//
// Returns the next component and the rest of the path excluding the component and separator.
// Does not recognize `/` as a separator character on Windows if `verbatim` is true.
pub(crate) fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) {
let separator = if verbatim { is_verbatim_sep } else { is_sep_byte };
match path.as_encoded_bytes().iter().position(|&x| separator(x)) {
Some(separator_start) => {
let separator_end = separator_start + 1;
let component = &path.as_encoded_bytes()[..separator_start];
// Panic safe
// The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index.
let path = &path.as_encoded_bytes()[separator_end..];
// SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\')
// is encoded in a single byte, therefore `bytes[separator_start]` and
// `bytes[separator_end]` must be code point boundaries and thus
// `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices.
unsafe {
(
OsStr::from_encoded_bytes_unchecked(component),
OsStr::from_encoded_bytes_unchecked(path),
)
}
}
None => (path, OsStr::new("")),
}
}