Patchwork D10366: rust: Refactor parse_dirstate() to take a callback

login
register
mail settings
Submitter phabricator
Date April 12, 2021, 12:24 p.m.
Message ID <differential-rev-PHID-DREV-tpcmqypk43zlnohp55ff-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/48684/
State Superseded
Headers show

Comments

phabricator - April 12, 2021, 12:24 p.m.
SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  The callback is called each entry, with optional copy source,
  instead of having parse_dirstate() return separate `Vec`s for copy mappings
  and the rest of the data.
  
  This avoids an intermediate heap allocation, but more importantly will allow
  the dirstate tree experiment to store copy data next to the rest of data
  instead of a separate mapping.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10366

AFFECTED FILES
  rust/hg-core/src/dirstate/dirstate_map.rs
  rust/hg-core/src/dirstate/parsers.rs
  rust/hg-core/src/operations/list_tracked_files.rs
  rust/hg-cpython/src/parsers.rs

CHANGE DETAILS




To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/rust/hg-cpython/src/parsers.rs b/rust/hg-cpython/src/parsers.rs
--- a/rust/hg-cpython/src/parsers.rs
+++ b/rust/hg-cpython/src/parsers.rs
@@ -14,8 +14,8 @@ 
     PythonObject, ToPyObject,
 };
 use hg::{
-    pack_dirstate, parse_dirstate, utils::hg_path::HgPathBuf, DirstateEntry,
-    DirstateParents, FastHashMap, PARENT_SIZE,
+    pack_dirstate, parse_dirstate, utils::hg_path::HgPathBuf, DirstateParents,
+    FastHashMap, PARENT_SIZE,
 };
 use std::convert::TryInto;
 
@@ -28,33 +28,39 @@ 
     copymap: PyDict,
     st: PyBytes,
 ) -> PyResult<PyTuple> {
-    match parse_dirstate(st.data(py)) {
-        Ok((parents, entries, copies)) => {
-            let dirstate_map: FastHashMap<HgPathBuf, DirstateEntry> = entries
-                .into_iter()
-                .map(|(path, entry)| (path.to_owned(), entry))
-                .collect();
-            let copy_map: FastHashMap<HgPathBuf, HgPathBuf> = copies
-                .into_iter()
-                .map(|(path, copy)| (path.to_owned(), copy.to_owned()))
-                .collect();
+    let mut py_result = Ok(());
+    let result = parse_dirstate(st.data(py), |path, entry, copy_source| {
+        if py_result.is_err() {
+            return;
+        }
+        macro_rules! handle_py_result {
+            ($e: expr) => {
+                match $e {
+                    Ok(value) => value,
+                    Err(error) => {
+                        py_result = Err(error);
+                        return;
+                    }
+                }
+            };
+        }
+        handle_py_result!(dmap.set_item(
+            py,
+            PyBytes::new(py, path.as_bytes()),
+            handle_py_result!(make_dirstate_tuple(py, entry)),
+        ));
 
-            for (filename, entry) in &dirstate_map {
-                dmap.set_item(
-                    py,
-                    PyBytes::new(py, filename.as_bytes()),
-                    make_dirstate_tuple(py, entry)?,
-                )?;
-            }
-            for (path, copy_path) in copy_map {
-                copymap.set_item(
-                    py,
-                    PyBytes::new(py, path.as_bytes()),
-                    PyBytes::new(py, copy_path.as_bytes()),
-                )?;
-            }
-            Ok(dirstate_parents_to_pytuple(py, parents))
+        if let Some(copy) = copy_source {
+            handle_py_result!(copymap.set_item(
+                py,
+                PyBytes::new(py, path.as_bytes()),
+                PyBytes::new(py, copy.as_bytes()),
+            ));
         }
+    });
+    py_result?;
+    match result {
+        Ok(parents) => Ok(dirstate_parents_to_pytuple(py, parents)),
         Err(e) => Err(PyErr::new::<exc::ValueError, _>(py, e.to_string())),
     }
 }
diff --git a/rust/hg-core/src/operations/list_tracked_files.rs b/rust/hg-core/src/operations/list_tracked_files.rs
--- a/rust/hg-core/src/operations/list_tracked_files.rs
+++ b/rust/hg-core/src/operations/list_tracked_files.rs
@@ -30,14 +30,11 @@ 
     }
 
     pub fn tracked_files(&self) -> Result<Vec<&HgPath>, HgError> {
-        let (_, entries, _) = parse_dirstate(&self.content)?;
-        let mut files: Vec<&HgPath> = entries
-            .into_iter()
-            .filter_map(|(path, entry)| match entry.state {
-                EntryState::Removed => None,
-                _ => Some(path),
-            })
-            .collect();
+        let mut files = Vec::new();
+        parse_dirstate(&self.content, |path, entry, _| match entry.state {
+            EntryState::Removed => {}
+            _ => files.push(path),
+        })?;
         files.par_sort_unstable();
         Ok(files)
     }
diff --git a/rust/hg-core/src/dirstate/parsers.rs b/rust/hg-core/src/dirstate/parsers.rs
--- a/rust/hg-core/src/dirstate/parsers.rs
+++ b/rust/hg-core/src/dirstate/parsers.rs
@@ -20,12 +20,6 @@ 
 /// Dirstate entries have a static part of 8 + 32 + 32 + 32 + 32 bits.
 const MIN_ENTRY_SIZE: usize = 17;
 
-type ParseResult<'a> = (
-    &'a DirstateParents,
-    Vec<(&'a HgPath, DirstateEntry)>,
-    Vec<(&'a HgPath, &'a HgPath)>,
-);
-
 pub fn parse_dirstate_parents(
     contents: &[u8],
 ) -> Result<&DirstateParents, HgError> {
@@ -34,11 +28,13 @@ 
     Ok(parents)
 }
 
+/// `each_entry` is called for each entry with (path, metadata, optional copy
+/// source)
 #[timed]
-pub fn parse_dirstate(mut contents: &[u8]) -> Result<ParseResult, HgError> {
-    let mut copies = Vec::new();
-    let mut entries = Vec::new();
-
+pub fn parse_dirstate<'a>(
+    mut contents: &'a [u8],
+    mut each_entry: impl FnMut(&'a HgPath, &DirstateEntry, Option<&'a HgPath>),
+) -> Result<&'a DirstateParents, HgError> {
     let (parents, rest) = DirstateParents::from_bytes(contents)
         .map_err(|_| HgError::corrupted("Too little data for dirstate."))?;
     contents = rest;
@@ -62,14 +58,13 @@ 
         let path = HgPath::new(
             iter.next().expect("splitn always yields at least one item"),
         );
-        if let Some(copy_source) = iter.next() {
-            copies.push((path, HgPath::new(copy_source)));
-        }
+        let copy_source = iter.next().map(HgPath::new);
 
-        entries.push((path, entry));
+        each_entry(path, &entry, copy_source);
+
         contents = rest;
     }
-    Ok((parents, entries, copies))
+    Ok(parents)
 }
 
 /// `now` is the duration in seconds since the Unix epoch
@@ -276,16 +271,16 @@ 
             pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
                 .unwrap();
 
-        let (new_parents, entries, copies) =
-            parse_dirstate(result.as_slice()).unwrap();
-        let new_state_map: StateMap = entries
-            .into_iter()
-            .map(|(path, entry)| (path.to_owned(), entry))
-            .collect();
-        let new_copy_map: CopyMap = copies
-            .into_iter()
-            .map(|(path, copy)| (path.to_owned(), copy.to_owned()))
-            .collect();
+        let mut new_state_map = StateMap::default();
+        let mut new_copy_map = CopyMap::default();
+        let new_parents =
+            parse_dirstate(&result, |path, entry, copy_source| {
+                new_state_map.insert(path.to_owned(), *entry);
+                if let Some(copy) = copy_source {
+                    new_copy_map.insert(path.to_owned(), copy.to_owned());
+                }
+            })
+            .unwrap();
 
         assert_eq!(
             (&parents, state_map, copymap),
@@ -354,16 +349,16 @@ 
             pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
                 .unwrap();
 
-        let (new_parents, entries, copies) =
-            parse_dirstate(result.as_slice()).unwrap();
-        let new_state_map: StateMap = entries
-            .into_iter()
-            .map(|(path, entry)| (path.to_owned(), entry))
-            .collect();
-        let new_copy_map: CopyMap = copies
-            .into_iter()
-            .map(|(path, copy)| (path.to_owned(), copy.to_owned()))
-            .collect();
+        let mut new_state_map = StateMap::default();
+        let mut new_copy_map = CopyMap::default();
+        let new_parents =
+            parse_dirstate(&result, |path, entry, copy_source| {
+                new_state_map.insert(path.to_owned(), *entry);
+                if let Some(copy) = copy_source {
+                    new_copy_map.insert(path.to_owned(), copy.to_owned());
+                }
+            })
+            .unwrap();
 
         assert_eq!(
             (&parents, state_map, copymap),
@@ -400,16 +395,16 @@ 
             pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
                 .unwrap();
 
-        let (new_parents, entries, copies) =
-            parse_dirstate(result.as_slice()).unwrap();
-        let new_state_map: StateMap = entries
-            .into_iter()
-            .map(|(path, entry)| (path.to_owned(), entry))
-            .collect();
-        let new_copy_map: CopyMap = copies
-            .into_iter()
-            .map(|(path, copy)| (path.to_owned(), copy.to_owned()))
-            .collect();
+        let mut new_state_map = StateMap::default();
+        let mut new_copy_map = CopyMap::default();
+        let new_parents =
+            parse_dirstate(&result, |path, entry, copy_source| {
+                new_state_map.insert(path.to_owned(), *entry);
+                if let Some(copy) = copy_source {
+                    new_copy_map.insert(path.to_owned(), copy.to_owned());
+                }
+            })
+            .unwrap();
 
         assert_eq!(
             (
diff --git a/rust/hg-core/src/dirstate/dirstate_map.rs b/rust/hg-core/src/dirstate/dirstate_map.rs
--- a/rust/hg-core/src/dirstate/dirstate_map.rs
+++ b/rust/hg-core/src/dirstate/dirstate_map.rs
@@ -367,20 +367,16 @@ 
             return Ok(None);
         }
 
-        let (parents, entries, copies) = parse_dirstate(file_contents)?;
-        self.state_map.extend(
-            entries
-                .into_iter()
-                .map(|(path, entry)| (path.to_owned(), entry)),
-        );
-        self.copy_map.extend(
-            copies
-                .into_iter()
-                .map(|(path, copy)| (path.to_owned(), copy.to_owned())),
-        );
+        let parents =
+            parse_dirstate(file_contents, |path, entry, copy_source| {
+                self.state_map.insert(path.to_owned(), *entry);
+                if let Some(copy) = copy_source {
+                    self.copy_map.insert(path.to_owned(), copy.to_owned());
+                }
+            })?;
 
         if !self.dirty_parents {
-            self.set_parents(&parents);
+            self.set_parents(parents);
         }
 
         Ok(Some(parents))