Patchwork D11881: rhg: centralize index header parsing

login
register
mail settings
Submitter phabricator
Date Dec. 7, 2021, 6:58 p.m.
Message ID <differential-rev-PHID-DREV-gkej6viwrsl34m5324tz-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/50207/
State New
Headers show

Comments

phabricator - Dec. 7, 2021, 6:58 p.m.
aalekseyev created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  Centralize index header parsing, parse the generaldelta flag,
  and leave breadcrumbs to relate the code to python.

REPOSITORY
  rHG Mercurial

BRANCH
  stable

REVISION DETAIL
  https://phab.mercurial-scm.org/D11881

AFFECTED FILES
  rust/hg-core/src/revlog/index.rs
  rust/hg-core/src/revlog/revlog.rs

CHANGE DETAILS




To: aalekseyev, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/rust/hg-core/src/revlog/revlog.rs b/rust/hg-core/src/revlog/revlog.rs
--- a/rust/hg-core/src/revlog/revlog.rs
+++ b/rust/hg-core/src/revlog/revlog.rs
@@ -3,7 +3,6 @@ 
 use std::ops::Deref;
 use std::path::Path;
 
-use byteorder::{BigEndian, ByteOrder};
 use flate2::read::ZlibDecoder;
 use micro_timer::timed;
 use sha1::{Digest, Sha1};
@@ -74,13 +73,6 @@ 
             match repo.store_vfs().mmap_open_opt(&index_path)? {
                 None => Index::new(Box::new(vec![])),
                 Some(index_mmap) => {
-                    let version = get_version(&index_mmap)?;
-                    if version != 1 {
-                        // A proper new version should have had a repo/store
-                        // requirement.
-                        return Err(HgError::corrupted("corrupted revlog"));
-                    }
-
                     let index = Index::new(Box::new(index_mmap))?;
                     Ok(index)
                 }
@@ -387,19 +379,6 @@ 
     }
 }
 
-/// Format version of the revlog.
-pub fn get_version(index_bytes: &[u8]) -> Result<u16, HgError> {
-    if index_bytes.len() == 0 {
-        return Ok(1);
-    };
-    if index_bytes.len() < 4 {
-        return Err(HgError::corrupted(
-            "corrupted revlog: can't read the index format header",
-        ));
-    };
-    Ok(BigEndian::read_u16(&index_bytes[2..=3]))
-}
-
 /// Calculate the hash of a revision given its data and its parents.
 fn hash(
     data: &[u8],
diff --git a/rust/hg-core/src/revlog/index.rs b/rust/hg-core/src/revlog/index.rs
--- a/rust/hg-core/src/revlog/index.rs
+++ b/rust/hg-core/src/revlog/index.rs
@@ -9,6 +9,76 @@ 
 
 pub const INDEX_ENTRY_SIZE: usize = 64;
 
+pub struct IndexHeader {
+    header_bytes: [u8; 4],
+}
+
+#[derive(Copy, Clone)]
+pub struct IndexHeaderFlags {
+    flags: u16,
+}
+
+// Corresponds to the high bits of `_format_flags` in python
+impl IndexHeaderFlags {
+    // Corresponds to FLAG_INLINE_DATA in python
+    pub fn is_inline(self) -> bool {
+        return self.flags & 1 != 0;
+    }
+    // Corresponds to FLAG_GENERALDELTA in python
+    pub fn uses_generaldelta(self) -> bool {
+        return self.flags & 2 != 0;
+    }
+}
+
+// Corresponds to the INDEX_HEADER structure,
+// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
+impl IndexHeader {
+    fn format_flags(&self) -> IndexHeaderFlags {
+        // No "unknown flags" check here, unlike in python. Maybe there should
+        // be.
+        return IndexHeaderFlags {
+            flags: BigEndian::read_u16(&self.header_bytes[0..2]),
+        };
+    }
+
+    // The only revlog version currently supported by rhg.
+    const REVLOGV1: u16 = 1;
+
+    // Corresponds to `_format_version` in Python.
+    // The only curently supported version is
+    fn format_version(&self) -> u16 {
+        return BigEndian::read_u16(&self.header_bytes[2..4]);
+    }
+
+    const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader {
+        // We treat an empty file as a valid index with no entries.
+        // Here we make an arbitrary choice of what we assume the format of the
+        // index to be (V1, using generaldelta).
+        // This doesn't matter too much, since we're only doing read-only
+        // access. but the value corresponds to the `new_header` variable in
+        // `revlog.py`, `_loadindex`
+        header_bytes: [0, 3, 0, 1],
+    };
+
+    fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> {
+        if index_bytes.len() == 0 {
+            return Ok(IndexHeader::EMPTY_INDEX_HEADER);
+        }
+        if index_bytes.len() < 4 {
+            return Err(HgError::corrupted(
+                "corrupted revlog: can't read the index format header",
+            ));
+        }
+        return Ok(IndexHeader {
+            header_bytes: {
+                let bytes: [u8; 4] =
+                    index_bytes[0..4].try_into().expect("impossible");
+                bytes
+            },
+        });
+    }
+}
+
 /// A Revlog index
 pub struct Index {
     bytes: Box<dyn Deref<Target = [u8]> + Send>,
@@ -23,7 +93,15 @@ 
     pub fn new(
         bytes: Box<dyn Deref<Target = [u8]> + Send>,
     ) -> Result<Self, HgError> {
-        if is_inline(&bytes) {
+        let header = IndexHeader::parse(bytes.as_ref())?;
+
+        if header.format_version() != IndexHeader::REVLOGV1 {
+            // A proper new version should have had a repo/store
+            // requirement.
+            return Err(HgError::corrupted("unsupported revlog version"));
+        }
+
+        if header.format_flags().is_inline() {
             let mut offset: usize = 0;
             let mut offsets = Vec::new();
 
@@ -206,17 +284,6 @@ 
     }
 }
 
-/// Value of the inline flag.
-pub fn is_inline(index_bytes: &[u8]) -> bool {
-    if index_bytes.len() < 4 {
-        return true;
-    }
-    match &index_bytes[0..=1] {
-        [0, 0] | [0, 2] => false,
-        _ => true,
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;