Patchwork D12439: [RFC] rhg: start parsing changeset data

login
register
mail settings
Submitter phabricator
Date April 5, 2022, 6:11 p.m.
Message ID <differential-rev-PHID-DREV-hnalrsqiq7br56wf5b7s-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/50785/
State New
Headers show

Comments

phabricator - April 5, 2022, 6:11 p.m.
martinvonz created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D12439

AFFECTED FILES
  rust/hg-core/src/revlog/changelog.rs

CHANGE DETAILS




To: martinvonz, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/rust/hg-core/src/revlog/changelog.rs b/rust/hg-core/src/revlog/changelog.rs
--- a/rust/hg-core/src/revlog/changelog.rs
+++ b/rust/hg-core/src/revlog/changelog.rs
@@ -3,6 +3,10 @@ 
 use crate::revlog::revlog::{Revlog, RevlogError};
 use crate::revlog::Revision;
 use crate::revlog::{Node, NodePrefix};
+use crate::utils::hg_path::HgPath;
+use std::ascii::escape_default;
+use std::fmt::{Debug, Formatter};
+use std::ops::Range;
 
 /// A specialized `Revlog` to work with `changelog` data format.
 pub struct Changelog {
@@ -35,7 +39,12 @@ 
         if bytes.is_empty() {
             Ok(ChangelogRevisionData::null())
         } else {
-            Ok(ChangelogRevisionData::new(bytes))
+            Ok(ChangelogRevisionData::new(bytes).ok_or_else(|| {
+                RevlogError::Other(HgError::CorruptedRepository(format!(
+                    "Invalid changelog data for revision {}",
+                    rev
+                )))
+            })?)
         }
     }
 
@@ -45,21 +54,66 @@ 
 }
 
 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
-#[derive(Debug)]
+#[derive(PartialEq)]
 pub struct ChangelogRevisionData {
     /// The data bytes of the `changelog` entry.
     bytes: Vec<u8>,
+    /// The byte range for the hex manifest (not including the newline)
+    manifest_range: Range<usize>,
+    /// The byte range for the user+email (not including the newline)
+    user_range: Range<usize>,
+    /// The byte range for the timestamp+timezone+extras (not including the
+    /// newline)
+    timestamp_range: Range<usize>,
+    /// The byte range for the file list (including newlines between, but not
+    /// after)
+    files_range: Range<usize>,
+    /// The byte range for the description (including newlines)
+    description_range: Range<usize>,
 }
 
 impl ChangelogRevisionData {
-    fn new(bytes: Vec<u8>) -> Self {
-        Self { bytes }
+    fn new(bytes: Vec<u8>) -> Option<Self> {
+        let mut line_iter = bytes.split(|b| b == &b'\n');
+        let manifest_range = 0..line_iter.next().unwrap().len();
+        let mut start_pos = manifest_range.end + 1;
+        let user_slice = line_iter.next()?;
+        let user_range = start_pos..start_pos + user_slice.len();
+        start_pos += user_slice.len() + 1;
+        let timestamp_slice = line_iter.next()?;
+        let timestamp_range = start_pos..start_pos + timestamp_slice.len();
+        start_pos += timestamp_slice.len() + 1;
+        let mut files_end_pos = start_pos;
+        loop {
+            // This line intentionally returns `None` is the list does not end
+            // in a newline
+            let line = line_iter.next()?;
+            if line.is_empty() {
+                break;
+            }
+            files_end_pos += line.len() + 1;
+        }
+        let files_range = start_pos..files_end_pos - 1;
+        if files_end_pos >= bytes.len() {
+            return None;
+        }
+        let description_range = files_end_pos + 1..bytes.len();
+
+        Some(Self {
+            bytes,
+            manifest_range,
+            user_range,
+            timestamp_range,
+            files_range,
+            description_range,
+        })
     }
 
     fn null() -> Self {
         Self::new(
             b"0000000000000000000000000000000000000000\n\n0 0\n\n".to_vec(),
         )
+        .unwrap()
     }
 
     /// Return an iterator over the lines of the entry.
@@ -70,7 +124,92 @@ 
     /// Return the node id of the `manifest` referenced by this `changelog`
     /// entry.
     pub fn manifest_node(&self) -> Result<Node, HgError> {
-        let manifest_node_hex = self.lines().next().unwrap();
+        let manifest_node_hex = &self.bytes[self.manifest_range.clone()];
         Node::from_hex_for_repo(manifest_node_hex)
     }
+
+    /// Return the node id of the `manifest` referenced by this `changelog`
+    /// entry.
+    pub fn files(&self) -> impl Iterator<Item = &HgPath> {
+        self.bytes[self.files_range.clone()]
+            .split(|b| b == &b'\n')
+            .map(|path| HgPath::new(path))
+    }
+
+    /// Return the node id of the `manifest` referenced by this `changelog`
+    /// entry.
+    pub fn description(&self) -> &[u8] {
+        &self.bytes[self.description_range.clone()]
+    }
 }
+
+impl Debug for ChangelogRevisionData {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("ChangelogRevisionData")
+            .field("bytes", &debug_bytes(&self.bytes))
+            .field(
+                "manifest",
+                &debug_bytes(&self.bytes[self.manifest_range.clone()]),
+            )
+            .field("user", &debug_bytes(&self.bytes[self.user_range.clone()]))
+            .field(
+                "timestamp",
+                &debug_bytes(&self.bytes[self.timestamp_range.clone()]),
+            )
+            .field(
+                "files",
+                &debug_bytes(&self.bytes[self.files_range.clone()]),
+            )
+            .field(
+                "description",
+                &debug_bytes(&self.bytes[self.description_range.clone()]),
+            )
+            .finish()
+    }
+}
+
+fn debug_bytes(bytes: &[u8]) -> String {
+    String::from_utf8(bytes.iter().flat_map(|b| escape_default(*b)).collect())
+        .unwrap()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use itertools::Itertools;
+    use pretty_assertions::assert_eq;
+
+    #[test]
+    fn test_create_changelogrevisiondata_invalid() {
+        assert_eq!(
+            ChangelogRevisionData::new(b"abcd\n\n0 0\nfile1".to_vec()),
+            None
+        );
+        assert_eq!(
+            ChangelogRevisionData::new(b"abcd\n\n0 0\nfile1\n".to_vec()),
+            None
+        );
+    }
+
+    #[test]
+    fn test_create_changelogrevisiondata() {
+        let data = ChangelogRevisionData::new(
+            b"abcd
+Some One <someone@example.com>
+0 0
+file1
+file2
+
+some
+commit
+message"
+                .to_vec(),
+        )
+        .unwrap();
+        assert_eq!(
+            data.files().collect_vec(),
+            vec![HgPath::new("file1"), HgPath::new("file2")]
+        );
+        assert_eq!(data.description(), b"some\ncommit\nmessage");
+    }
+}