Patchwork D12518: rust-dirstatemap: implement part of the `setparents` logic

login
register
mail settings
Submitter phabricator
Date April 12, 2022, 4:04 p.m.
Message ID <differential-rev-PHID-DREV-gsslov4wwaer4kwjglaj-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/50866/
State New
Headers show

Comments

phabricator - April 12, 2022, 4:04 p.m.
Alphare created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  The Python code does many round-trip calls to the Rust dirstatemap when copy
  information needs to be dropped in `setparents`.
  
  This may result in improved performance on `commit`, `update` and other such
  commands, but was mostly done to drop the last use of `set_dirstate_item`.
  See inline comments for an asterisk about performance, and see next patch for
  why `set_dirstate_item` has to go.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D12518

AFFECTED FILES
  mercurial/dirstatemap.py
  rust/hg-core/src/dirstate_tree/dirstate_map.rs
  rust/hg-cpython/src/dirstate/dirstate_map.rs

CHANGE DETAILS




To: Alphare, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs
@@ -489,6 +489,19 @@ 
         Ok(dirs)
     }
 
+    def setparents_fixup(&self) -> PyResult<PyDict> {
+        let dict = PyDict::new(py);
+        let copies = self.inner(py).borrow_mut().setparents_fixup();
+        for (key, value) in copies.map_err(|e| v2_error(py, e))? {
+            dict.set_item(
+                py,
+                PyBytes::new(py, key.as_bytes()),
+                PyBytes::new(py, value.as_bytes()),
+            )?;
+        }
+        Ok(dict)
+    }
+
     def debug_iter(&self, all: bool) -> PyResult<PyList> {
         let dirs = PyList::new(py, &[]);
         for item in self.inner(py).borrow().debug_iter(all) {
diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
@@ -1375,6 +1375,41 @@ 
         )))
     }
 
+    /// Only public because it needs to be exposed to the Python layer.
+    /// It is not the full `setparents` logic, only the parts that mutate the
+    /// entries.
+    pub fn setparents_fixup(
+        &mut self,
+    ) -> Result<Vec<(HgPathBuf, HgPathBuf)>, DirstateV2ParseError> {
+        // XXX
+        // All the copying and re-querying is quite inefficient, but this is
+        // still a lot better than doing it from Python.
+        //
+        // The better solution is to develop a mechanism for `iter_mut`,
+        // which will be a lot more involved: we're dealing with a lazy,
+        // append-mostly, tree-like data structure. This will do for now.
+        let mut copies = vec![];
+        let mut files_with_p2_info = vec![];
+        for res in self.iter() {
+            let (path, entry) = res?;
+            if entry.p2_info() {
+                files_with_p2_info.push(path.to_owned())
+            }
+        }
+        self.with_dmap_mut(|map| {
+            for path in files_with_p2_info.iter() {
+                let node = map.get_or_insert(path)?;
+                let entry =
+                    node.data.as_entry_mut().expect("entry should exist");
+                entry.drop_merge_data();
+                if let Some(source) = node.copy_source.take().as_deref() {
+                    copies.push((path.to_owned(), source.to_owned()));
+                }
+            }
+            Ok(copies)
+        })
+    }
+
     pub fn debug_iter(
         &self,
         all: bool,
diff --git a/mercurial/dirstatemap.py b/mercurial/dirstatemap.py
--- a/mercurial/dirstatemap.py
+++ b/mercurial/dirstatemap.py
@@ -593,22 +593,7 @@ 
             self._dirtyparents = True
             copies = {}
             if fold_p2:
-                # Collect into an intermediate list to avoid a `RuntimeError`
-                # exception due to mutation during iteration.
-                # TODO: move this the whole loop to Rust where `iter_mut`
-                # enables in-place mutation of elements of a collection while
-                # iterating it, without mutating the collection itself.
-                files_with_p2_info = [
-                    f for f, s in self._map.items() if s.p2_info
-                ]
-                rust_map = self._map
-                for f in files_with_p2_info:
-                    e = rust_map.get(f)
-                    source = self.copymap.pop(f, None)
-                    if source:
-                        copies[f] = source
-                    e.drop_merge_data()
-                    rust_map.set_dirstate_item(f, e)
+                copies = self._map.setparents_fixup()
             return copies
 
         ### disk interaction