Patchwork D9596: rust: introduce Repo and Vfs types for filesystem abstraction

login
register
mail settings
Submitter phabricator
Date Dec. 14, 2020, 4:28 p.m.
Message ID <differential-rev-PHID-DREV-pt2fejot45wortzox4xj-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/47894/
State Superseded
Headers show

Comments

phabricator - Dec. 14, 2020, 4:28 p.m.
SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  This is similar to the corresponding Python classes.
  
  Repo represents a repository and knows the path to the `.hg` directory,
  the `store` directory, and the working directory.
  Separating these will enable supporting the share extension.
  
  A Vfs is created from a Repo for one of these three directories.
  It has filesystem access APIs that take a relative std::path::Path
  as a parameter.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D9596

AFFECTED FILES
  rust/hg-core/src/lib.rs
  rust/hg-core/src/operations/cat.rs
  rust/hg-core/src/operations/debugdata.rs
  rust/hg-core/src/operations/list_tracked_files.rs
  rust/hg-core/src/repo.rs
  rust/hg-core/src/requirements.rs
  rust/hg-core/src/revlog/changelog.rs
  rust/hg-core/src/revlog/manifest.rs
  rust/hg-core/src/revlog/revlog.rs
  rust/rhg/src/commands/cat.rs
  rust/rhg/src/commands/debugdata.rs
  rust/rhg/src/commands/debugrequirements.rs
  rust/rhg/src/commands/files.rs
  rust/rhg/src/commands/root.rs

CHANGE DETAILS




To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/rust/rhg/src/commands/root.rs b/rust/rhg/src/commands/root.rs
--- a/rust/rhg/src/commands/root.rs
+++ b/rust/rhg/src/commands/root.rs
@@ -2,7 +2,7 @@ 
 use crate::error::CommandError;
 use crate::ui::Ui;
 use format_bytes::format_bytes;
-use hg::operations::find_root;
+use hg::repo::Repo;
 use hg::utils::files::get_bytes_from_path;
 
 pub const HELP_TEXT: &str = "
@@ -21,12 +21,9 @@ 
 
 impl Command for RootCommand {
     fn run(&self, ui: &Ui) -> Result<(), CommandError> {
-        let path_buf = find_root()?;
-
-        let bytes = get_bytes_from_path(path_buf);
-
+        let repo = Repo::find()?;
+        let bytes = get_bytes_from_path(repo.working_directory_path());
         ui.write_stdout(&format_bytes!(b"{}\n", bytes.as_slice()))?;
-
         Ok(())
     }
 }
diff --git a/rust/rhg/src/commands/files.rs b/rust/rhg/src/commands/files.rs
--- a/rust/rhg/src/commands/files.rs
+++ b/rust/rhg/src/commands/files.rs
@@ -2,7 +2,6 @@ 
 use crate::error::{CommandError, CommandErrorKind};
 use crate::ui::utf8_to_local;
 use crate::ui::Ui;
-use hg::operations::find_root;
 use hg::operations::{
     list_rev_tracked_files, ListRevTrackedFilesError,
     ListRevTrackedFilesErrorKind,
@@ -10,10 +9,9 @@ 
 use hg::operations::{
     Dirstate, ListDirstateTrackedFilesError, ListDirstateTrackedFilesErrorKind,
 };
-use hg::requirements;
+use hg::repo::Repo;
 use hg::utils::files::{get_bytes_from_path, relativize_path};
 use hg::utils::hg_path::{HgPath, HgPathBuf};
-use std::path::Path;
 
 pub const HELP_TEXT: &str = "
 List tracked files.
@@ -33,13 +31,13 @@ 
     fn display_files(
         &self,
         ui: &Ui,
-        root: &Path,
+        repo: &Repo,
         files: impl IntoIterator<Item = &'a HgPath>,
     ) -> Result<(), CommandError> {
         let cwd = std::env::current_dir()
             .or_else(|e| Err(CommandErrorKind::CurrentDirNotFound(e)))?;
         let rooted_cwd = cwd
-            .strip_prefix(root)
+            .strip_prefix(repo.working_directory_path())
             .expect("cwd was already checked within the repository");
         let rooted_cwd = HgPathBuf::from(get_bytes_from_path(rooted_cwd));
 
@@ -56,16 +54,16 @@ 
 
 impl<'a> Command for FilesCommand<'a> {
     fn run(&self, ui: &Ui) -> Result<(), CommandError> {
-        let root = find_root()?;
-        requirements::check(&root)?;
+        let repo = Repo::find()?;
+        repo.check_requirements()?;
         if let Some(rev) = self.rev {
-            let files = list_rev_tracked_files(&root, rev)
+            let files = list_rev_tracked_files(&repo, rev)
                 .map_err(|e| map_rev_error(rev, e))?;
-            self.display_files(ui, &root, files.iter())
+            self.display_files(ui, &repo, files.iter())
         } else {
-            let distate = Dirstate::new(&root).map_err(map_dirstate_error)?;
+            let distate = Dirstate::new(&repo).map_err(map_dirstate_error)?;
             let files = distate.tracked_files().map_err(map_dirstate_error)?;
-            self.display_files(ui, &root, files)
+            self.display_files(ui, &repo, files)
         }
     }
 }
diff --git a/rust/rhg/src/commands/debugrequirements.rs b/rust/rhg/src/commands/debugrequirements.rs
--- a/rust/rhg/src/commands/debugrequirements.rs
+++ b/rust/rhg/src/commands/debugrequirements.rs
@@ -1,7 +1,7 @@ 
 use crate::commands::Command;
 use crate::error::CommandError;
 use crate::ui::Ui;
-use hg::operations::find_root;
+use hg::repo::Repo;
 use hg::requirements;
 
 pub const HELP_TEXT: &str = "
@@ -18,9 +18,9 @@ 
 
 impl Command for DebugRequirementsCommand {
     fn run(&self, ui: &Ui) -> Result<(), CommandError> {
-        let root = find_root()?;
+        let repo = Repo::find()?;
         let mut output = String::new();
-        for req in requirements::load(&root)? {
+        for req in requirements::load(&repo)? {
             output.push_str(&req);
             output.push('\n');
         }
diff --git a/rust/rhg/src/commands/debugdata.rs b/rust/rhg/src/commands/debugdata.rs
--- a/rust/rhg/src/commands/debugdata.rs
+++ b/rust/rhg/src/commands/debugdata.rs
@@ -2,10 +2,10 @@ 
 use crate::error::{CommandError, CommandErrorKind};
 use crate::ui::utf8_to_local;
 use crate::ui::Ui;
-use hg::operations::find_root;
 use hg::operations::{
     debug_data, DebugDataError, DebugDataErrorKind, DebugDataKind,
 };
+use hg::repo::Repo;
 use micro_timer::timed;
 
 pub const HELP_TEXT: &str = "
@@ -26,8 +26,8 @@ 
 impl<'a> Command for DebugDataCommand<'a> {
     #[timed]
     fn run(&self, ui: &Ui) -> Result<(), CommandError> {
-        let root = find_root()?;
-        let data = debug_data(&root, self.rev, self.kind)
+        let repo = Repo::find()?;
+        let data = debug_data(&repo, self.rev, self.kind)
             .map_err(|e| to_command_error(self.rev, e))?;
 
         let mut stdout = ui.stdout_buffer();
diff --git a/rust/rhg/src/commands/cat.rs b/rust/rhg/src/commands/cat.rs
--- a/rust/rhg/src/commands/cat.rs
+++ b/rust/rhg/src/commands/cat.rs
@@ -2,9 +2,8 @@ 
 use crate::error::{CommandError, CommandErrorKind};
 use crate::ui::utf8_to_local;
 use crate::ui::Ui;
-use hg::operations::find_root;
 use hg::operations::{cat, CatRevError, CatRevErrorKind};
-use hg::requirements;
+use hg::repo::Repo;
 use hg::utils::hg_path::HgPathBuf;
 use micro_timer::timed;
 use std::convert::TryFrom;
@@ -32,8 +31,8 @@ 
 impl<'a> Command for CatCommand<'a> {
     #[timed]
     fn run(&self, ui: &Ui) -> Result<(), CommandError> {
-        let root = find_root()?;
-        requirements::check(&root)?;
+        let repo = Repo::find()?;
+        repo.check_requirements()?;
         let cwd = std::env::current_dir()
             .or_else(|e| Err(CommandErrorKind::CurrentDirNotFound(e)))?;
 
@@ -41,7 +40,7 @@ 
         for file in self.files.iter() {
             let normalized = cwd.join(&file);
             let stripped = normalized
-                .strip_prefix(&root)
+                .strip_prefix(&repo.working_directory_path())
                 .or(Err(CommandErrorKind::Abort(None)))?;
             let hg_file = HgPathBuf::try_from(stripped.to_path_buf())
                 .or(Err(CommandErrorKind::Abort(None)))?;
@@ -50,7 +49,7 @@ 
 
         match self.rev {
             Some(rev) => {
-                let data = cat(&root, rev, &files)
+                let data = cat(&repo, rev, &files)
                     .map_err(|e| map_rev_error(rev, e))?;
                 self.display(ui, &data)
             }
diff --git a/rust/hg-core/src/revlog/revlog.rs b/rust/hg-core/src/revlog/revlog.rs
--- a/rust/hg-core/src/revlog/revlog.rs
+++ b/rust/hg-core/src/revlog/revlog.rs
@@ -1,5 +1,4 @@ 
 use std::borrow::Cow;
-use std::fs::File;
 use std::io::Read;
 use std::ops::Deref;
 use std::path::Path;
@@ -8,13 +7,13 @@ 
 use crypto::digest::Digest;
 use crypto::sha1::Sha1;
 use flate2::read::ZlibDecoder;
-use memmap::{Mmap, MmapOptions};
 use micro_timer::timed;
 use zstd;
 
 use super::index::Index;
 use super::node::{NodePrefixRef, NODE_BYTES_LENGTH, NULL_NODE};
 use super::patch;
+use crate::repo::Repo;
 use crate::revlog::Revision;
 
 pub enum RevlogError {
@@ -27,12 +26,6 @@ 
     UnknowDataFormat(u8),
 }
 
-fn mmap_open(path: &Path) -> Result<Mmap, std::io::Error> {
-    let file = File::open(path)?;
-    let mmap = unsafe { MmapOptions::new().map(&file) }?;
-    Ok(mmap)
-}
-
 /// Read only implementation of revlog.
 pub struct Revlog {
     /// When index and data are not interleaved: bytes of the revlog index.
@@ -50,11 +43,15 @@ 
     /// interleaved.
     #[timed]
     pub fn open(
-        index_path: &Path,
+        repo: &Repo,
+        index_path: impl AsRef<Path>,
         data_path: Option<&Path>,
     ) -> Result<Self, RevlogError> {
-        let index_mmap =
-            mmap_open(&index_path).map_err(RevlogError::IoError)?;
+        let index_path = index_path.as_ref();
+        let index_mmap = repo
+            .store_vfs()
+            .mmap_open(&index_path)
+            .map_err(RevlogError::IoError)?;
 
         let version = get_version(&index_mmap);
         if version != 1 {
@@ -72,8 +69,10 @@ 
                 None
             } else {
                 let data_path = data_path.unwrap_or(&default_data_path);
-                let data_mmap =
-                    mmap_open(data_path).map_err(RevlogError::IoError)?;
+                let data_mmap = repo
+                    .store_vfs()
+                    .mmap_open(data_path)
+                    .map_err(RevlogError::IoError)?;
                 Some(Box::new(data_mmap))
             };
 
diff --git a/rust/hg-core/src/revlog/manifest.rs b/rust/hg-core/src/revlog/manifest.rs
--- a/rust/hg-core/src/revlog/manifest.rs
+++ b/rust/hg-core/src/revlog/manifest.rs
@@ -1,8 +1,8 @@ 
+use crate::repo::Repo;
 use crate::revlog::revlog::{Revlog, RevlogError};
 use crate::revlog::NodePrefixRef;
 use crate::revlog::Revision;
 use crate::utils::hg_path::HgPath;
-use std::path::Path;
 
 /// A specialized `Revlog` to work with `manifest` data format.
 pub struct Manifest {
@@ -12,9 +12,8 @@ 
 
 impl Manifest {
     /// Open the `manifest` of a repository given by its root.
-    pub fn open(root: &Path) -> Result<Self, RevlogError> {
-        let index_file = root.join(".hg/store/00manifest.i");
-        let revlog = Revlog::open(&index_file, None)?;
+    pub fn open(repo: &Repo) -> Result<Self, RevlogError> {
+        let revlog = Revlog::open(repo, "00manifest.i", None)?;
         Ok(Self { revlog })
     }
 
diff --git a/rust/hg-core/src/revlog/changelog.rs b/rust/hg-core/src/revlog/changelog.rs
--- a/rust/hg-core/src/revlog/changelog.rs
+++ b/rust/hg-core/src/revlog/changelog.rs
@@ -1,7 +1,7 @@ 
+use crate::repo::Repo;
 use crate::revlog::revlog::{Revlog, RevlogError};
 use crate::revlog::NodePrefixRef;
 use crate::revlog::Revision;
-use std::path::Path;
 
 /// A specialized `Revlog` to work with `changelog` data format.
 pub struct Changelog {
@@ -11,9 +11,8 @@ 
 
 impl Changelog {
     /// Open the `changelog` of a repository given by its root.
-    pub fn open(root: &Path) -> Result<Self, RevlogError> {
-        let index_file = root.join(".hg/store/00changelog.i");
-        let revlog = Revlog::open(&index_file, None)?;
+    pub fn open(repo: &Repo) -> Result<Self, RevlogError> {
+        let revlog = Revlog::open(repo, "00changelog.i", None)?;
         Ok(Self { revlog })
     }
 
diff --git a/rust/hg-core/src/requirements.rs b/rust/hg-core/src/requirements.rs
--- a/rust/hg-core/src/requirements.rs
+++ b/rust/hg-core/src/requirements.rs
@@ -1,5 +1,5 @@ 
+use crate::repo::Repo;
 use std::io;
-use std::path::Path;
 
 #[derive(Debug)]
 pub enum RequirementsError {
@@ -33,8 +33,8 @@ 
         .collect()
 }
 
-pub fn load(repo_root: &Path) -> Result<Vec<String>, RequirementsError> {
-    match std::fs::read(repo_root.join(".hg").join("requires")) {
+pub fn load(repo: &Repo) -> Result<Vec<String>, RequirementsError> {
+    match repo.hg_vfs().read("requires") {
         Ok(bytes) => parse(&bytes).map_err(|()| RequirementsError::Corrupted),
 
         // Treat a missing file the same as an empty file.
@@ -52,8 +52,8 @@ 
     }
 }
 
-pub fn check(repo_root: &Path) -> Result<(), RequirementsError> {
-    for feature in load(repo_root)? {
+pub fn check(repo: &Repo) -> Result<(), RequirementsError> {
+    for feature in load(repo)? {
         if !SUPPORTED.contains(&&*feature) {
             return Err(RequirementsError::Unsupported { feature });
         }
diff --git a/rust/hg-core/src/repo.rs b/rust/hg-core/src/repo.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-core/src/repo.rs
@@ -0,0 +1,91 @@ 
+use crate::operations::{find_root, FindRootError};
+use crate::requirements;
+use memmap::{Mmap, MmapOptions};
+use std::path::{Path, PathBuf};
+
+/// A repository on disk
+pub struct Repo {
+    working_directory: PathBuf,
+    dot_hg: PathBuf,
+    store: PathBuf,
+}
+
+/// Filesystem access abstraction for the contents of a given "base" diretory
+#[derive(Clone, Copy)]
+pub(crate) struct Vfs<'a> {
+    base: &'a Path,
+}
+
+impl Repo {
+    /// Returns `None` if the given path doesn’t look like a repository
+    /// (doesn’t contain a `.hg` sub-directory).
+    pub fn for_path(root: impl Into<PathBuf>) -> Self {
+        let working_directory = root.into();
+        let dot_hg = working_directory.join(".hg");
+        Self {
+            store: dot_hg.join("store"),
+            dot_hg,
+            working_directory,
+        }
+    }
+
+    pub fn find() -> Result<Self, FindRootError> {
+        find_root().map(Self::for_path)
+    }
+
+    pub fn check_requirements(
+        &self,
+    ) -> Result<(), requirements::RequirementsError> {
+        requirements::check(self)
+    }
+
+    pub fn working_directory_path(&self) -> &Path {
+        &self.working_directory
+    }
+
+    /// For accessing repository files (in `.hg`), except for the store
+    /// (`.hg/store`).
+    pub(crate) fn hg_vfs(&self) -> Vfs<'_> {
+        Vfs { base: &self.dot_hg }
+    }
+
+    /// For accessing repository store files (in `.hg/store`)
+    pub(crate) fn store_vfs(&self) -> Vfs<'_> {
+        Vfs { base: &self.store }
+    }
+
+    /// For accessing the working copy
+
+    // The undescore prefix silences the "never used" warning. Remove before using.
+    pub(crate) fn _working_directory_vfs(&self) -> Vfs<'_> {
+        Vfs {
+            base: &self.working_directory,
+        }
+    }
+}
+
+impl Vfs<'_> {
+    pub(crate) fn read(
+        &self,
+        relative_path: impl AsRef<Path>,
+    ) -> std::io::Result<Vec<u8>> {
+        std::fs::read(self.base.join(relative_path))
+    }
+
+    pub(crate) fn open(
+        &self,
+        relative_path: impl AsRef<Path>,
+    ) -> std::io::Result<std::fs::File> {
+        std::fs::File::open(self.base.join(relative_path))
+    }
+
+    pub(crate) fn mmap_open(
+        &self,
+        relative_path: impl AsRef<Path>,
+    ) -> std::io::Result<Mmap> {
+        let file = self.open(relative_path)?;
+        // TODO: what are the safety requirements here?
+        let mmap = unsafe { MmapOptions::new().map(&file) }?;
+        Ok(mmap)
+    }
+}
diff --git a/rust/hg-core/src/operations/list_tracked_files.rs b/rust/hg-core/src/operations/list_tracked_files.rs
--- a/rust/hg-core/src/operations/list_tracked_files.rs
+++ b/rust/hg-core/src/operations/list_tracked_files.rs
@@ -6,6 +6,7 @@ 
 // GNU General Public License version 2 or any later version.
 
 use crate::dirstate::parsers::parse_dirstate;
+use crate::repo::Repo;
 use crate::revlog::changelog::Changelog;
 use crate::revlog::manifest::{Manifest, ManifestEntry};
 use crate::revlog::node::{Node, NodePrefix};
@@ -15,8 +16,6 @@ 
 use crate::{DirstateParseError, EntryState};
 use rayon::prelude::*;
 use std::convert::From;
-use std::fs;
-use std::path::Path;
 
 /// Kind of error encountered by `ListDirstateTrackedFiles`
 #[derive(Debug)]
@@ -57,9 +56,8 @@ 
 }
 
 impl Dirstate {
-    pub fn new(root: &Path) -> Result<Self, ListDirstateTrackedFilesError> {
-        let dirstate = root.join(".hg/dirstate");
-        let content = fs::read(&dirstate)?;
+    pub fn new(repo: &Repo) -> Result<Self, ListDirstateTrackedFilesError> {
+        let content = repo.hg_vfs().read("dirstate")?;
         Ok(Self { content })
     }
 
@@ -138,11 +136,11 @@ 
 
 /// List files under Mercurial control at a given revision.
 pub fn list_rev_tracked_files(
-    root: &Path,
+    repo: &Repo,
     rev: &str,
 ) -> Result<FilesForRev, ListRevTrackedFilesError> {
-    let changelog = Changelog::open(root)?;
-    let manifest = Manifest::open(root)?;
+    let changelog = Changelog::open(repo)?;
+    let manifest = Manifest::open(repo)?;
 
     let changelog_entry = match rev.parse::<Revision>() {
         Ok(rev) => changelog.get_rev(rev)?,
diff --git a/rust/hg-core/src/operations/debugdata.rs b/rust/hg-core/src/operations/debugdata.rs
--- a/rust/hg-core/src/operations/debugdata.rs
+++ b/rust/hg-core/src/operations/debugdata.rs
@@ -5,8 +5,7 @@ 
 // This software may be used and distributed according to the terms of the
 // GNU General Public License version 2 or any later version.
 
-use std::path::Path;
-
+use crate::repo::Repo;
 use crate::revlog::revlog::{Revlog, RevlogError};
 use crate::revlog::NodePrefix;
 use crate::revlog::Revision;
@@ -79,15 +78,15 @@ 
 
 /// Dump the contents data of a revision.
 pub fn debug_data(
-    root: &Path,
+    repo: &Repo,
     rev: &str,
     kind: DebugDataKind,
 ) -> Result<Vec<u8>, DebugDataError> {
     let index_file = match kind {
-        DebugDataKind::Changelog => root.join(".hg/store/00changelog.i"),
-        DebugDataKind::Manifest => root.join(".hg/store/00manifest.i"),
+        DebugDataKind::Changelog => "00changelog.i",
+        DebugDataKind::Manifest => "00manifest.i",
     };
-    let revlog = Revlog::open(&index_file, None)?;
+    let revlog = Revlog::open(repo, index_file, None)?;
 
     let data = match rev.parse::<Revision>() {
         Ok(rev) => revlog.get_rev_data(rev)?,
diff --git a/rust/hg-core/src/operations/cat.rs b/rust/hg-core/src/operations/cat.rs
--- a/rust/hg-core/src/operations/cat.rs
+++ b/rust/hg-core/src/operations/cat.rs
@@ -6,8 +6,9 @@ 
 // GNU General Public License version 2 or any later version.
 
 use std::convert::From;
-use std::path::{Path, PathBuf};
+use std::path::PathBuf;
 
+use crate::repo::Repo;
 use crate::revlog::changelog::Changelog;
 use crate::revlog::manifest::Manifest;
 use crate::revlog::path_encode::path_encode;
@@ -75,12 +76,12 @@ 
 /// * `rev`: The revision to cat the files from.
 /// * `files`: The files to output.
 pub fn cat(
-    root: &Path,
+    repo: &Repo,
     rev: &str,
     files: &[HgPathBuf],
 ) -> Result<Vec<u8>, CatRevError> {
-    let changelog = Changelog::open(&root)?;
-    let manifest = Manifest::open(&root)?;
+    let changelog = Changelog::open(repo)?;
+    let manifest = Manifest::open(repo)?;
 
     let changelog_entry = match rev.parse::<Revision>() {
         Ok(rev) => changelog.get_rev(rev)?,
@@ -99,10 +100,11 @@ 
     for (manifest_file, node_bytes) in manifest_entry.files_with_nodes() {
         for cat_file in files.iter() {
             if cat_file.as_bytes() == manifest_file.as_bytes() {
-                let index_path = store_path(root, manifest_file, b".i");
-                let data_path = store_path(root, manifest_file, b".d");
+                let index_path = store_path(manifest_file, b".i");
+                let data_path = store_path(manifest_file, b".d");
 
-                let file_log = Revlog::open(&index_path, Some(&data_path))?;
+                let file_log =
+                    Revlog::open(repo, &index_path, Some(&data_path))?;
                 let file_node = Node::from_hex(node_bytes)
                     .map_err(|_| CatRevErrorKind::CorruptedRevlog)?;
                 let file_rev = file_log.get_node_rev((&file_node).into())?;
@@ -126,14 +128,8 @@ 
     Ok(bytes)
 }
 
-fn store_path(root: &Path, hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
+fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
     let encoded_bytes =
         path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
-    [
-        root,
-        &Path::new(".hg/store/"),
-        get_path_from_bytes(&encoded_bytes),
-    ]
-    .iter()
-    .collect()
+    get_path_from_bytes(&encoded_bytes).into()
 }
diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs
--- a/rust/hg-core/src/lib.rs
+++ b/rust/hg-core/src/lib.rs
@@ -23,6 +23,7 @@ 
 pub mod copy_tracing;
 mod filepatterns;
 pub mod matchers;
+pub mod repo;
 pub mod revlog;
 pub use revlog::*;
 pub mod operations;