Patchwork D11089: dirstate-v2: Enforce data size read from the docket file

login
register
mail settings
Submitter phabricator
Date July 12, 2021, 9:11 p.m.
Message ID <differential-rev-PHID-DREV-yw7gne75klzcdz7xzzso-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/49404/
State Superseded
Headers show

Comments

phabricator - July 12, 2021, 9:11 p.m.
SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  The data file may not be shorter than its size given by the docket.
  It may be longer, but additional data is ignored.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D11089

AFFECTED FILES
  mercurial/dirstatemap.py
  rust/hg-core/src/dirstate_tree/dirstate_map.rs
  rust/hg-core/src/dirstate_tree/on_disk.rs
  rust/hg-cpython/src/dirstate/dirstate_map.rs
  rust/hg-cpython/src/dirstate/owning.rs
  rust/rhg/src/commands/status.rs

CHANGE DETAILS




To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/rust/rhg/src/commands/status.rs b/rust/rhg/src/commands/status.rs
--- a/rust/rhg/src/commands/status.rs
+++ b/rust/rhg/src/commands/status.rs
@@ -170,11 +170,13 @@ 
     let (mut dmap, parents) = if repo.has_dirstate_v2() {
         let parents;
         let dirstate_data;
+        let data_size;
         if let Some(docket_data) =
             repo.hg_vfs().read("dirstate").io_not_found_as_none()?
         {
             let docket = on_disk::read_docket(&docket_data)?;
             parents = Some(docket.parents());
+            data_size = docket.data_size();
             dirstate_data_mmap = repo
                 .hg_vfs()
                 .mmap_open(docket.data_filename())
@@ -182,9 +184,10 @@ 
             dirstate_data = dirstate_data_mmap.as_deref().unwrap_or(b"");
         } else {
             parents = None;
+            data_size = 0;
             dirstate_data = b"";
         }
-        let dmap = DirstateMap::new_v2(dirstate_data)?;
+        let dmap = DirstateMap::new_v2(dirstate_data, data_size)?;
         (dmap, parents)
     } else {
         dirstate_data_mmap =
diff --git a/rust/hg-cpython/src/dirstate/owning.rs b/rust/hg-cpython/src/dirstate/owning.rs
--- a/rust/hg-cpython/src/dirstate/owning.rs
+++ b/rust/hg-cpython/src/dirstate/owning.rs
@@ -48,9 +48,10 @@ 
     pub fn new_v2(
         py: Python,
         on_disk: PyBytes,
+        data_size: usize,
     ) -> Result<Self, DirstateError> {
         let bytes: &'_ [u8] = on_disk.data(py);
-        let map = DirstateMap::new_v2(bytes)?;
+        let map = DirstateMap::new_v2(bytes, data_size)?;
 
         // Like in `bytes` above, this `'_` lifetime parameter borrows from
         // the bytes buffer owned by `on_disk`.
diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs
@@ -83,11 +83,12 @@ 
     @staticmethod
     def new_v2(
         on_disk: PyBytes,
+        data_size: usize,
     ) -> PyResult<PyObject> {
         let dirstate_error = |e: DirstateError| {
             PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e))
         };
-        let inner = OwningDirstateMap::new_v2(py, on_disk)
+        let inner = OwningDirstateMap::new_v2(py, on_disk, data_size)
                 .map_err(dirstate_error)?;
         let map = Self::create_instance(py, Box::new(inner))?;
         Ok(map.into_object())
diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs
@@ -21,7 +21,7 @@ 
 use bytes_cast::BytesCast;
 use format_bytes::format_bytes;
 use std::borrow::Cow;
-use std::convert::TryFrom;
+use std::convert::{TryFrom, TryInto};
 use std::time::{Duration, SystemTime, UNIX_EPOCH};
 
 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
@@ -224,6 +224,11 @@ 
         DirstateParents { p1, p2 }
     }
 
+    pub fn data_size(&self) -> usize {
+        // This `unwrap` could only panic on a 16-bit CPU
+        self.header.data_size.get().try_into().unwrap()
+    }
+
     pub fn data_filename(&self) -> String {
         String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap()
     }
diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
@@ -410,8 +410,15 @@ 
     }
 
     #[timed]
-    pub fn new_v2(on_disk: &'on_disk [u8]) -> Result<Self, DirstateError> {
-        Ok(on_disk::read(on_disk)?)
+    pub fn new_v2(
+        on_disk: &'on_disk [u8],
+        data_size: usize,
+    ) -> Result<Self, DirstateError> {
+        if let Some(data) = on_disk.get(..data_size) {
+            Ok(on_disk::read(data)?)
+        } else {
+            Err(DirstateV2ParseError.into())
+        }
     }
 
     #[timed]
diff --git a/mercurial/dirstatemap.py b/mercurial/dirstatemap.py
--- a/mercurial/dirstatemap.py
+++ b/mercurial/dirstatemap.py
@@ -639,12 +639,14 @@ 
                     data = self._opener.read(self.docket.data_filename())
                 else:
                     data = b''
-                self._rustmap = rustmod.DirstateMap.new_v2(data)
+                self._rustmap = rustmod.DirstateMap.new_v2(
+                    data, self.docket.data_size
+                )
                 parents = self.docket.parents
             else:
                 st = self._readdirstatefile()
                 self._rustmap, parents = rustmod.DirstateMap.new_v1(
-                    self._use_dirstate_tree, st
+                    self._use_dirstate_tree, self._readdirstatefile()
                 )
 
             if parents and not self._dirtyparents: