Patchwork D11961: rhg: Expose FilelogEntry that wraps RevlogEntry

login
register
mail settings
Submitter phabricator
Date Jan. 6, 2022, 6:57 p.m.
Message ID <differential-rev-PHID-DREV-mher3modgvvc3a3cch5e-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/50286/
State New
Headers show

Comments

phabricator - Jan. 6, 2022, 6:57 p.m.
SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  This can be later extended to access metadata such as `uncompressed_len` without
  necessarily resolving deltas.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D11961

AFFECTED FILES
  rust/hg-core/src/revlog/filelog.rs
  rust/hg-core/src/revlog/revlog.rs
  rust/rhg/src/commands/status.rs

CHANGE DETAILS




To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/rust/rhg/src/commands/status.rs b/rust/rhg/src/commands/status.rs
--- a/rust/rhg/src/commands/status.rs
+++ b/rust/rhg/src/commands/status.rs
@@ -512,17 +512,18 @@ 
     }
     let filelog = repo.filelog(hg_path)?;
     let fs_len = fs_metadata.len();
+    let filelog_entry =
+        filelog.entry_for_node(entry.node_id()?).map_err(|_| {
+            HgError::corrupted("filelog missing node from manifest")
+        })?;
     // TODO: check `fs_len` here like below, but based on
     // `RevlogEntry::uncompressed_len` without decompressing the full filelog
     // contents where possible. This is only valid if the revlog data does not
     // contain metadata. See how Python’s `revlog.rawsize` calls
     // `storageutil.filerevisioncopied`.
     // (Maybe also check for content-modifying flags? See `revlog.size`.)
-    let filelog_entry =
-        filelog.data_for_node(entry.node_id()?).map_err(|_| {
-            HgError::corrupted("filelog missing node from manifest")
-        })?;
-    let contents_in_p1 = filelog_entry.file_data()?;
+    let filelog_data = filelog_entry.data()?;
+    let contents_in_p1 = filelog_data.file_data()?;
     if contents_in_p1.len() as u64 != fs_len {
         // No need to read the file contents:
         // it cannot be equal if it has a different length.
diff --git a/rust/hg-core/src/revlog/revlog.rs b/rust/hg-core/src/revlog/revlog.rs
--- a/rust/hg-core/src/revlog/revlog.rs
+++ b/rust/hg-core/src/revlog/revlog.rs
@@ -39,9 +39,13 @@ 
     }
 }
 
+fn corrupted() -> HgError {
+    HgError::corrupted("corrupted revlog")
+}
+
 impl RevlogError {
     fn corrupted() -> Self {
-        RevlogError::Other(HgError::corrupted("corrupted revlog"))
+        RevlogError::Other(corrupted())
     }
 }
 
@@ -191,7 +195,7 @@ 
         if rev == NULL_REVISION {
             return Ok(Cow::Borrowed(&[]));
         };
-        self.get_entry(rev)?.data()
+        Ok(self.get_entry(rev)?.data()?)
     }
 
     /// Check the hash of some given data against the recorded hash.
@@ -222,13 +226,13 @@ 
     fn build_data_from_deltas(
         snapshot: RevlogEntry,
         deltas: &[RevlogEntry],
-    ) -> Result<Vec<u8>, RevlogError> {
+    ) -> Result<Vec<u8>, HgError> {
         let snapshot = snapshot.data_chunk()?;
         let deltas = deltas
             .iter()
             .rev()
             .map(RevlogEntry::data_chunk)
-            .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
+            .collect::<Result<Vec<_>, _>>()?;
         let patches: Vec<_> =
             deltas.iter().map(|d| patch::PatchList::new(d)).collect();
         let patch = patch::fold_patch_lists(&patches);
@@ -246,7 +250,10 @@ 
     }
 
     /// Get an entry of the revlog.
-    fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
+    pub fn get_entry(
+        &self,
+        rev: Revision,
+    ) -> Result<RevlogEntry, RevlogError> {
         let index_entry = self
             .index
             .get_entry(rev)
@@ -281,8 +288,8 @@ 
     fn get_entry_internal(
         &self,
         rev: Revision,
-    ) -> Result<RevlogEntry, RevlogError> {
-        return self.get_entry(rev).map_err(|_| RevlogError::corrupted());
+    ) -> Result<RevlogEntry, HgError> {
+        return self.get_entry(rev).map_err(|_| corrupted());
     }
 }
 
@@ -304,7 +311,7 @@ 
     }
 
     /// The data for this entry, after resolving deltas if any.
-    pub fn data(&self) -> Result<Cow<'a, [u8]>, RevlogError> {
+    pub fn data(&self) -> Result<Cow<'a, [u8]>, HgError> {
         let mut entry = self.clone();
         let mut delta_chain = vec![];
 
@@ -328,7 +335,7 @@ 
             .revlog
             .index
             .get_entry(self.rev)
-            .ok_or(RevlogError::InvalidRevision)?;
+            .ok_or_else(corrupted)?;
 
         let data = if delta_chain.is_empty() {
             entry.data_chunk()?
@@ -344,13 +351,13 @@ 
         ) {
             Ok(data)
         } else {
-            Err(RevlogError::corrupted())
+            Err(corrupted())
         }
     }
 
     /// Extract the data contained in the entry.
     /// This may be a delta. (See `is_delta`.)
-    fn data_chunk(&self) -> Result<Cow<'a, [u8]>, RevlogError> {
+    fn data_chunk(&self) -> Result<Cow<'a, [u8]>, HgError> {
         if self.bytes.is_empty() {
             return Ok(Cow::Borrowed(&[]));
         }
@@ -365,39 +372,35 @@ 
             // zstd data.
             b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
             // A proper new format should have had a repo/store requirement.
-            _format_type => Err(RevlogError::corrupted()),
+            _format_type => Err(corrupted()),
         }
     }
 
-    fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
+    fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
         let mut decoder = ZlibDecoder::new(self.bytes);
         if self.is_delta() {
             let mut buf = Vec::with_capacity(self.compressed_len);
-            decoder
-                .read_to_end(&mut buf)
-                .map_err(|_| RevlogError::corrupted())?;
+            decoder.read_to_end(&mut buf).map_err(|_| corrupted())?;
             Ok(buf)
         } else {
             let mut buf = vec![0; self.uncompressed_len];
-            decoder
-                .read_exact(&mut buf)
-                .map_err(|_| RevlogError::corrupted())?;
+            decoder.read_exact(&mut buf).map_err(|_| corrupted())?;
             Ok(buf)
         }
     }
 
-    fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
+    fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
         if self.is_delta() {
             let mut buf = Vec::with_capacity(self.compressed_len);
             zstd::stream::copy_decode(self.bytes, &mut buf)
-                .map_err(|_| RevlogError::corrupted())?;
+                .map_err(|_| corrupted())?;
             Ok(buf)
         } else {
             let mut buf = vec![0; self.uncompressed_len];
             let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
-                .map_err(|_| RevlogError::corrupted())?;
+                .map_err(|_| corrupted())?;
             if len != self.uncompressed_len {
-                Err(RevlogError::corrupted())
+                Err(corrupted())
             } else {
                 Ok(buf)
             }
diff --git a/rust/hg-core/src/revlog/filelog.rs b/rust/hg-core/src/revlog/filelog.rs
--- a/rust/hg-core/src/revlog/filelog.rs
+++ b/rust/hg-core/src/revlog/filelog.rs
@@ -1,6 +1,7 @@ 
 use crate::errors::HgError;
 use crate::repo::Repo;
 use crate::revlog::path_encode::path_encode;
+use crate::revlog::revlog::RevlogEntry;
 use crate::revlog::revlog::{Revlog, RevlogError};
 use crate::revlog::NodePrefix;
 use crate::revlog::Revision;
@@ -23,7 +24,7 @@ 
         Ok(Self { revlog })
     }
 
-    /// The given node ID is that of the file as found in a manifest, not of a
+    /// The given node ID is that of the file as found in a filelog, not of a
     /// changeset.
     pub fn data_for_node(
         &self,
@@ -33,7 +34,7 @@ 
         self.data_for_rev(file_rev)
     }
 
-    /// The given revision is that of the file as found in a manifest, not of a
+    /// The given revision is that of the file as found in a filelog, not of a
     /// changeset.
     pub fn data_for_rev(
         &self,
@@ -42,6 +43,25 @@ 
         let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned();
         Ok(FilelogRevisionData(data.into()))
     }
+
+    /// The given node ID is that of the file as found in a filelog, not of a
+    /// changeset.
+    pub fn entry_for_node(
+        &self,
+        file_node: impl Into<NodePrefix>,
+    ) -> Result<FilelogEntry, RevlogError> {
+        let file_rev = self.revlog.rev_from_node(file_node.into())?;
+        self.entry_for_rev(file_rev)
+    }
+
+    /// The given revision is that of the file as found in a filelog, not of a
+    /// changeset.
+    pub fn entry_for_rev(
+        &self,
+        file_rev: Revision,
+    ) -> Result<FilelogEntry, RevlogError> {
+        Ok(FilelogEntry(self.revlog.get_entry(file_rev)?))
+    }
 }
 
 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
@@ -50,6 +70,14 @@ 
     get_path_from_bytes(&encoded_bytes).into()
 }
 
+pub struct FilelogEntry<'a>(RevlogEntry<'a>);
+
+impl FilelogEntry<'_> {
+    pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
+        Ok(FilelogRevisionData(self.0.data()?.into_owned()))
+    }
+}
+
 /// The data for one revision in a filelog, uncompressed and delta-resolved.
 pub struct FilelogRevisionData(Vec<u8>);