Patchwork D9520: rhg: use persistent nodemap when available

login
register
mail settings
Submitter phabricator
Date Dec. 4, 2020, 5:28 p.m.
Message ID <differential-rev-PHID-DREV-fpeajtbur534mdvbfdoc-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/47803/
State Superseded
Headers show

Comments

Patch

diff --git a/tests/test-rhg.t b/tests/test-rhg.t
--- a/tests/test-rhg.t
+++ b/tests/test-rhg.t
@@ -197,4 +197,6 @@ 
   .hg/store/00changelog.i
   .hg/store/00changelog.n
   $ rhg files -r c3ae8dec9fad
-  [252]
+  of
+  $ rhg cat -r c3ae8dec9fad of
+  r5000
diff --git a/rust/hg-core/src/revlog/revlog.rs b/rust/hg-core/src/revlog/revlog.rs
--- a/rust/hg-core/src/revlog/revlog.rs
+++ b/rust/hg-core/src/revlog/revlog.rs
@@ -14,6 +14,9 @@ 
 
 use super::index::Index;
 use super::node::{NodePrefixRef, NODE_BYTES_LENGTH, NULL_NODE};
+use super::nodemap;
+use super::nodemap::NodeMap;
+use super::nodemap_docket::NodeMapDocket;
 use super::patch;
 use crate::revlog::Revision;
 
@@ -27,7 +30,7 @@ 
     UnknowDataFormat(u8),
 }
 
-fn mmap_open(path: &Path) -> Result<Mmap, std::io::Error> {
+pub(super) fn mmap_open(path: &Path) -> Result<Mmap, std::io::Error> {
     let file = File::open(path)?;
     let mmap = unsafe { MmapOptions::new().map(&file) }?;
     Ok(mmap)
@@ -41,6 +44,7 @@ 
     index: Index,
     /// When index and data are not interleaved: bytes of the revlog data
     data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
+    nodemap: Option<nodemap::NodeTree>,
 }
 
 impl Revlog {
@@ -77,7 +81,20 @@ 
                 Some(Box::new(data_mmap))
             };
 
-        Ok(Revlog { index, data_bytes })
+        let nodemap = NodeMapDocket::read_from_file(index_path)?.map(
+            |(docket, data)| {
+                nodemap::NodeTree::load_bytes(
+                    Box::new(data),
+                    docket.data_length,
+                )
+            },
+        );
+
+        Ok(Revlog {
+            index,
+            data_bytes,
+            nodemap,
+        })
     }
 
     /// Return number of entries of the `Revlog`.
@@ -96,6 +113,13 @@ 
         &self,
         node: NodePrefixRef,
     ) -> Result<Revision, RevlogError> {
+        if let Some(nodemap) = &self.nodemap {
+            return nodemap
+                .find_bin(&self.index, node)
+                // TODO: propagate details of this error:
+                .map_err(|_| RevlogError::Corrupted)?
+                .ok_or(RevlogError::InvalidRevision);
+        }
         // This is brute force. But it is fast enough for now.
         // Optimization will come later.
         let mut found_by_prefix = None;
diff --git a/rust/hg-core/src/revlog/nodemap_docket.rs b/rust/hg-core/src/revlog/nodemap_docket.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-core/src/revlog/nodemap_docket.rs
@@ -0,0 +1,112 @@ 
+use memmap::Mmap;
+use std::convert::TryInto;
+use std::path::{Path, PathBuf};
+
+use super::revlog::{mmap_open, RevlogError};
+
+const ONDISK_VERSION: u8 = 1;
+
+pub(super) struct NodeMapDocket {
+    pub data_length: usize,
+    // TODO: keep here more of the data from `parse()` when we need it
+}
+
+impl NodeMapDocket {
+    pub fn read_from_file(
+        index_path: &Path,
+    ) -> Result<Option<(Self, Mmap)>, RevlogError> {
+        let docket_path = index_path.with_extension("n");
+        let docket_bytes = match std::fs::read(&docket_path) {
+            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
+                return Ok(None)
+            }
+            Err(e) => return Err(RevlogError::IoError(e)),
+            Ok(bytes) => bytes,
+        };
+
+        let mut input = if let Some((&ONDISK_VERSION, rest)) =
+            docket_bytes.split_first()
+        {
+            rest
+        } else {
+            return Ok(None);
+        };
+        let input = &mut input;
+
+        let uid_size = read_u8(input)? as usize;
+        let _tip_rev = read_be_u64(input)?;
+        // TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit
+        // systems?
+        let data_length = read_be_u64(input)? as usize;
+        let _data_unused = read_be_u64(input)?;
+        let tip_node_size = read_be_u64(input)? as usize;
+        let uid = read_bytes(input, uid_size)?;
+        let _tip_node = read_bytes(input, tip_node_size)?;
+
+        let uid =
+            std::str::from_utf8(uid).map_err(|_| RevlogError::Corrupted)?;
+        let docket = NodeMapDocket { data_length };
+
+        // TODO: use `std::fs::read` here when the `persistent-nodemap.mmap`
+        // config is false?
+        let mmap = mmap_open(&rawdata_path(&docket_path, uid))
+            // TODO: do we want to silently ignore a file not found here and
+            // return `None`? The equivalent Python code has a
+            // branch that might have intended to do that, but looks like it
+            // causes a NameError exception.
+            .map_err(RevlogError::IoError)?;
+        if mmap.len() < data_length {
+            return Ok(None);
+        }
+
+        Ok(Some((docket, mmap)))
+    }
+}
+
+fn read_bytes<'a>(
+    input: &mut &'a [u8],
+    count: usize,
+) -> Result<&'a [u8], RevlogError> {
+    if let Some(start) = input.get(..count) {
+        *input = &input[count..];
+        Ok(start)
+    } else {
+        Err(RevlogError::Corrupted)
+    }
+}
+
+fn read_u8<'a>(input: &mut &[u8]) -> Result<u8, RevlogError> {
+    Ok(read_bytes(input, 1)?[0])
+}
+
+fn read_be_u64<'a>(input: &mut &[u8]) -> Result<u64, RevlogError> {
+    let array = read_bytes(input, std::mem::size_of::<u64>())?
+        .try_into()
+        .unwrap();
+    Ok(u64::from_be_bytes(array))
+}
+
+fn rawdata_path(docket_path: &Path, uid: &str) -> PathBuf {
+    let docket_name = docket_path
+        .file_name()
+        .expect("expected a base name")
+        .to_str()
+        .expect("expected an ASCII file name in the store");
+    let prefix = strip_suffix(docket_name, ".n.a")
+        .or_else(|| strip_suffix(docket_name, ".n"))
+        .expect("expected docket path in .n or .n.a");
+    let name = format!("{}-{}.nd", prefix, uid);
+    docket_path
+        .parent()
+        .expect("expected a non-root path")
+        .join(name)
+}
+
+// TODO: use the str method when we require Rust 1.45
+fn strip_suffix<'a>(s: &'a str, suffix: &str) -> Option<&'a str> {
+    if s.ends_with(suffix) {
+        Some(&s[..s.len() - suffix.len()])
+    } else {
+        None
+    }
+}
diff --git a/rust/hg-core/src/revlog/index.rs b/rust/hg-core/src/revlog/index.rs
--- a/rust/hg-core/src/revlog/index.rs
+++ b/rust/hg-core/src/revlog/index.rs
@@ -132,6 +132,16 @@ 
     }
 }
 
+impl super::RevlogIndex for Index {
+    fn len(&self) -> usize {
+        self.len()
+    }
+
+    fn node(&self, rev: Revision) -> Option<&Node> {
+        self.get_entry(rev).map(|entry| entry.hash())
+    }
+}
+
 #[derive(Debug)]
 pub struct IndexEntry<'a> {
     bytes: &'a [u8],
@@ -190,7 +200,7 @@ 
     ///
     /// Currently, SHA-1 is used and only the first 20 bytes of this field
     /// are used.
-    pub fn hash(&self) -> &Node {
+    pub fn hash(&self) -> &'a Node {
         (&self.bytes[32..52]).try_into().unwrap()
     }
 }
diff --git a/rust/hg-core/src/revlog.rs b/rust/hg-core/src/revlog.rs
--- a/rust/hg-core/src/revlog.rs
+++ b/rust/hg-core/src/revlog.rs
@@ -7,6 +7,7 @@ 
 
 pub mod node;
 pub mod nodemap;
+mod nodemap_docket;
 pub mod path_encode;
 pub use node::{Node, NodeError, NodePrefix, NodePrefixRef};
 pub mod changelog;
diff --git a/rust/hg-core/src/requirements.rs b/rust/hg-core/src/requirements.rs
--- a/rust/hg-core/src/requirements.rs
+++ b/rust/hg-core/src/requirements.rs
@@ -69,4 +69,8 @@ 
     "revlogv1",
     "sparserevlog",
     "store",
+    // As of this writing everything rhg does is read-only.
+    // When it starts writing to the repository, it’ll need to either keep the
+    // persistent nodemap up to date or remove this entry:
+    "persistent-nodemap",
 ];