Patchwork D7864: rust-utils: add Rust implementation of Python's "os.path.splitdrive"

login
register
mail settings
Submitter phabricator
Date Jan. 16, 2020, 10:21 a.m.
Message ID <ac76e4bc1811b3dfeccb84312c116dd4@localhost.localdomain>
Download mbox | patch
Permalink /patch/44422/
State Not Applicable
Headers show

Comments

phabricator - Jan. 16, 2020, 10:21 a.m.
Alphare updated this revision to Diff 19350.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7864?vs=19316&id=19350

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7864/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7864

AFFECTED FILES
  rust/hg-core/src/utils/hg_path.rs

CHANGE DETAILS




To: Alphare, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel

Patch

diff --git a/rust/hg-core/src/utils/hg_path.rs b/rust/hg-core/src/utils/hg_path.rs
--- a/rust/hg-core/src/utils/hg_path.rs
+++ b/rust/hg-core/src/utils/hg_path.rs
@@ -138,6 +138,79 @@ 
             None
         }
     }
+
+    #[cfg(windows)]
+    /// Copied from the Python stdlib's `os.path.splitdrive` implementation.
+    ///
+    /// Split a pathname into drive/UNC sharepoint and relative path specifiers.
+    /// Returns a 2-tuple (drive_or_unc, path); either part may be empty.
+    ///
+    /// If you assign
+    ///  result = split_drive(p)
+    /// It is always true that:
+    ///  result[0] + result[1] == p
+    ///
+    /// If the path contained a drive letter, drive_or_unc will contain everything
+    /// up to and including the colon.
+    /// e.g. split_drive("c:/dir") returns ("c:", "/dir")
+    ///
+    /// If the path contained a UNC path, the drive_or_unc will contain the host
+    /// name and share up to but not including the fourth directory separator
+    /// character.
+    /// e.g. split_drive("//host/computer/dir") returns ("//host/computer", "/dir")
+    ///
+    /// Paths cannot contain both a drive letter and a UNC path.
+    pub fn split_drive<'a>(&self) -> (&HgPath, &HgPath) {
+        let bytes = self.as_bytes();
+        let is_sep = |b| std::path::is_separator(b as char);
+
+        if self.len() < 2 {
+            (HgPath::new(b""), &self)
+        } else if is_sep(bytes[0])
+            && is_sep(bytes[1])
+            && (self.len() == 2 || !is_sep(bytes[2]))
+        {
+            // Is a UNC path:
+            // vvvvvvvvvvvvvvvvvvvv drive letter or UNC path
+            // \\machine\mountpoint\directory\etc\...
+            //           directory ^^^^^^^^^^^^^^^
+
+            let machine_end_index = bytes[2..].iter().position(|b| is_sep(*b));
+            let mountpoint_start_index = if let Some(i) = machine_end_index {
+                i + 2
+            } else {
+                return (HgPath::new(b""), &self);
+            };
+
+            match bytes[mountpoint_start_index + 1..]
+                .iter()
+                .position(|b| is_sep(*b))
+            {
+                // A UNC path can't have two slashes in a row
+                // (after the initial two)
+                Some(0) => (HgPath::new(b""), &self),
+                Some(i) => {
+                    let (a, b) =
+                        bytes.split_at(mountpoint_start_index + 1 + i);
+                    (HgPath::new(a), HgPath::new(b))
+                }
+                None => (&self, HgPath::new(b"")),
+            }
+        } else if bytes[1] == b':' {
+            // Drive path c:\directory
+            let (a, b) = bytes.split_at(2);
+            (HgPath::new(a), HgPath::new(b))
+        } else {
+            (HgPath::new(b""), &self)
+        }
+    }
+
+    #[cfg(unix)]
+    /// Split a pathname into drive and path. On Posix, drive is always empty.
+    pub fn split_drive(&self) -> (&HgPath, &HgPath) {
+        (HgPath::new(b""), &self)
+    }
+
     /// Checks for errors in the path, short-circuiting at the first one.
     /// This generates fine-grained errors useful for debugging.
     /// To simply check if the path is valid during tests, use `is_valid`.
@@ -473,4 +546,101 @@ 
         let base = HgPath::new(b"ends/");
         assert_eq!(Some(HgPath::new(b"with/dir/")), path.relative_to(base));
     }
+
+    #[test]
+    #[cfg(unix)]
+    fn test_split_drive() {
+        // Taken from the Python stdlib's tests
+        assert_eq!(
+            HgPath::new(br"/foo/bar").split_drive(),
+            (HgPath::new(b""), HgPath::new(br"/foo/bar"))
+        );
+        assert_eq!(
+            HgPath::new(br"foo:bar").split_drive(),
+            (HgPath::new(b""), HgPath::new(br"foo:bar"))
+        );
+        assert_eq!(
+            HgPath::new(br":foo:bar").split_drive(),
+            (HgPath::new(b""), HgPath::new(br":foo:bar"))
+        );
+        // Also try NT paths; should not split them
+        assert_eq!(
+            HgPath::new(br"c:\foo\bar").split_drive(),
+            (HgPath::new(b""), HgPath::new(br"c:\foo\bar"))
+        );
+        assert_eq!(
+            HgPath::new(b"c:/foo/bar").split_drive(),
+            (HgPath::new(b""), HgPath::new(br"c:/foo/bar"))
+        );
+        assert_eq!(
+            HgPath::new(br"\\conky\mountpoint\foo\bar").split_drive(),
+            (
+                HgPath::new(b""),
+                HgPath::new(br"\\conky\mountpoint\foo\bar")
+            )
+        );
+    }
+
+    #[test]
+    #[cfg(windows)]
+    fn test_split_drive() {
+        assert_eq!(
+            HgPath::new(br"c:\foo\bar").split_drive(),
+            (HgPath::new(br"c:"), HgPath::new(br"\foo\bar"))
+        );
+        assert_eq!(
+            HgPath::new(b"c:/foo/bar").split_drive(),
+            (HgPath::new(br"c:"), HgPath::new(br"/foo/bar"))
+        );
+        assert_eq!(
+            HgPath::new(br"\\conky\mountpoint\foo\bar").split_drive(),
+            (
+                HgPath::new(br"\\conky\mountpoint"),
+                HgPath::new(br"\foo\bar")
+            )
+        );
+        assert_eq!(
+            HgPath::new(br"//conky/mountpoint/foo/bar").split_drive(),
+            (
+                HgPath::new(br"//conky/mountpoint"),
+                HgPath::new(br"/foo/bar")
+            )
+        );
+        assert_eq!(
+            HgPath::new(br"\\\conky\mountpoint\foo\bar").split_drive(),
+            (
+                HgPath::new(br""),
+                HgPath::new(br"\\\conky\mountpoint\foo\bar")
+            )
+        );
+        assert_eq!(
+            HgPath::new(br"///conky/mountpoint/foo/bar").split_drive(),
+            (
+                HgPath::new(br""),
+                HgPath::new(br"///conky/mountpoint/foo/bar")
+            )
+        );
+        assert_eq!(
+            HgPath::new(br"\\conky\\mountpoint\foo\bar").split_drive(),
+            (
+                HgPath::new(br""),
+                HgPath::new(br"\\conky\\mountpoint\foo\bar")
+            )
+        );
+        assert_eq!(
+            HgPath::new(br"//conky//mountpoint/foo/bar").split_drive(),
+            (
+                HgPath::new(br""),
+                HgPath::new(br"//conky//mountpoint/foo/bar")
+            )
+        );
+        // UNC part containing U+0130
+        assert_eq!(
+            HgPath::new(b"//conky/MOUNTPO\xc4\xb0NT/foo/bar").split_drive(),
+            (
+                HgPath::new(b"//conky/MOUNTPO\xc4\xb0NT"),
+                HgPath::new(br"/foo/bar")
+            )
+        );
+    }
 }