Patchwork D6629: rust-dirstate: use EntryState enum instead of literals

login
register
mail settings
Submitter phabricator
Date July 10, 2019, 11:48 a.m.
Message ID <differential-rev-PHID-DREV-kjku552lhdbvl627awb4-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/40861/
State Superseded
Headers show

Comments

phabricator - July 10, 2019, 11:48 a.m.
Alphare created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  This improves code readability quite a bit, while also adding a layer of
  safety because we're checking the state byte against the enum.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6629

AFFECTED FILES
  rust/hg-core/src/dirstate/dirs_multiset.rs
  rust/hg-core/src/dirstate/mod.rs
  rust/hg-core/src/dirstate/parsers.rs
  rust/hg-core/src/lib.rs
  rust/hg-cpython/src/dirstate/dirs_multiset.rs
  rust/hg-cpython/src/dirstate/mod.rs
  rust/hg-cpython/src/parsers.rs

CHANGE DETAILS




To: Alphare, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
phabricator - July 15, 2019, 4:08 p.m.
kevincox added inline comments.
kevincox accepted this revision.

INLINE COMMENTS

> lib.rs:85
> +            Overflow => "Overflow in dirstate.".to_string(),
> +            CorruptedEntry(e) => format!("Corrupted entry: {}.", e),
> +            Damaged => "Dirstate appears to be damaged.".to_string(),

I like using `{!r}` so that the substituted entry is quoted and unambiguous.

> parsers.rs:42
> +                // platform-specific `c_char`.
> +                let state: u8 = entry.state.into();
> +

If you are going to a comment I would say why. However in this case it probably isn't worth having a comment.

You can also do this on one line `entry.state.into::<u8>() as c_char` which I think is simple enough.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D6629/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D6629

To: Alphare, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
phabricator - July 17, 2019, 10:29 a.m.
Alphare added inline comments.

INLINE COMMENTS

> kevincox wrote in lib.rs:85
> I like using `{!r}` so that the substituted entry is quoted and unambiguous.

Did you mean `{:?}`. I can't find anything related to your syntax.

> kevincox wrote in parsers.rs:42
> If you are going to a comment I would say why. However in this case it probably isn't worth having a comment.
> 
> You can also do this on one line `entry.state.into::<u8>() as c_char` which I think is simple enough.

The one-liner does not compile, so I improved the comment to explain why (which it really should have in the first place).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D6629/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D6629

To: Alphare, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
phabricator - July 17, 2019, 11:35 a.m.
kevincox added inline comments.

INLINE COMMENTS

> Alphare wrote in lib.rs:85
> Did you mean `{:?}`. I can't find anything related to your syntax.

Sorry yes. I've been writing too much python at work.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D6629/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D6629

To: Alphare, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel

Patch

diff --git a/rust/hg-cpython/src/parsers.rs b/rust/hg-cpython/src/parsers.rs
--- a/rust/hg-cpython/src/parsers.rs
+++ b/rust/hg-cpython/src/parsers.rs
@@ -37,11 +37,15 @@ 
     match parse_dirstate(&mut dirstate_map, &mut copies, st.data(py)) {
         Ok(parents) => {
             for (filename, entry) in dirstate_map {
+                // Explicitly go through u8 first, then cast to
+                // platform-specific `c_char`.
+                let state: u8 = entry.state.into();
+
                 dmap.set_item(
                     py,
                     PyBytes::new(py, &filename),
                     decapsule_make_dirstate_tuple(py)?(
-                        entry.state as c_char,
+                        state as c_char,
                         entry.mode,
                         entry.size,
                         entry.mtime,
@@ -123,6 +127,9 @@ 
                 },
             ) in dirstate_map
             {
+                // Explicitly go through u8 first, then cast to
+                // platform-specific `c_char`.
+                let state: u8 = state.into();
                 dmap.set_item(
                     py,
                     PyBytes::new(py, &filename[..]),
diff --git a/rust/hg-cpython/src/dirstate/mod.rs b/rust/hg-cpython/src/dirstate/mod.rs
--- a/rust/hg-cpython/src/dirstate/mod.rs
+++ b/rust/hg-cpython/src/dirstate/mod.rs
@@ -11,9 +11,10 @@ 
 //! From Python, this will be seen as `mercurial.rustext.dirstate`
 
 use cpython::{
-    PyBytes, PyDict, PyErr, PyModule, PyObject, PyResult, PySequence, Python,
+    exc, PyBytes, PyDict, PyErr, PyModule, PyObject, PyResult, PySequence,
+    Python,
 };
-use hg::{DirstateEntry, StateMap};
+use hg::{DirstateEntry, DirstateParseError, EntryState, StateMap};
 use std::ffi::CStr;
 
 #[cfg(feature = "python27")]
@@ -27,6 +28,7 @@ 
 
 mod dirs_multiset;
 use dirstate::dirs_multiset::Dirs;
+use std::convert::TryFrom;
 
 /// C code uses a custom `dirstate_tuple` type, checks in multiple instances
 /// for this type, and raises a Python `Exception` if the check does not pass.
@@ -65,7 +67,11 @@ 
         .map(|(filename, stats)| {
             let stats = stats.extract::<PySequence>(py)?;
             let state = stats.get_item(py, 0)?.extract::<PyBytes>(py)?;
-            let state = state.data(py)[0] as i8;
+            let state = EntryState::try_from(state.data(py)[0]).map_err(
+                |e: DirstateParseError| {
+                    PyErr::new::<exc::ValueError, _>(py, e.to_string())
+                },
+            )?;
             let mode = stats.get_item(py, 1)?.extract(py)?;
             let size = stats.get_item(py, 2)?.extract(py)?;
             let mtime = stats.get_item(py, 3)?.extract(py)?;
diff --git a/rust/hg-cpython/src/dirstate/dirs_multiset.rs b/rust/hg-cpython/src/dirstate/dirs_multiset.rs
--- a/rust/hg-cpython/src/dirstate/dirs_multiset.rs
+++ b/rust/hg-cpython/src/dirstate/dirs_multiset.rs
@@ -16,7 +16,11 @@ 
 };
 
 use dirstate::extract_dirstate;
-use hg::{DirsIterable, DirsMultiset, DirstateMapError};
+use hg::{
+    DirsIterable, DirsMultiset, DirstateMapError, DirstateParseError,
+    EntryState,
+};
+use std::convert::TryInto;
 
 py_class!(pub class Dirs |py| {
     data dirs_map: RefCell<DirsMultiset>;
@@ -28,9 +32,15 @@ 
         map: PyObject,
         skip: Option<PyObject> = None
     ) -> PyResult<Self> {
-        let mut skip_state: Option<i8> = None;
+        let mut skip_state: Option<EntryState> = None;
         if let Some(skip) = skip {
-            skip_state = Some(skip.extract::<PyBytes>(py)?.data(py)[0] as i8);
+            skip_state = Some(
+                skip.extract::<PyBytes>(py)?.data(py)[0]
+                    .try_into()
+                    .map_err(|e: DirstateParseError| {
+                        PyErr::new::<exc::ValueError, _>(py, e.to_string())
+                    })?,
+            );
         }
         let inner = if let Ok(map) = map.cast_as::<PyDict>(py) {
             let dirstate = extract_dirstate(py, &map)?;
diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs
--- a/rust/hg-core/src/lib.rs
+++ b/rust/hg-core/src/lib.rs
@@ -6,10 +6,12 @@ 
 extern crate memchr;
 #[macro_use]
 extern crate lazy_static;
+extern crate core;
 extern crate regex;
 
 mod ancestors;
 pub mod dagops;
+pub mod utils;
 pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
 mod dirstate;
 pub mod discovery;
@@ -17,10 +19,10 @@ 
 pub use dirstate::{
     dirs_multiset::DirsMultiset,
     parsers::{pack_dirstate, parse_dirstate},
-    CopyMap, DirsIterable, DirstateEntry, DirstateParents, StateMap,
+    CopyMap, DirsIterable, DirstateEntry, DirstateParents, EntryState,
+    StateMap,
 };
 mod filepatterns;
-mod utils;
 
 pub use filepatterns::{
     build_single_regex, read_pattern_file, PatternSyntax, PatternTuple,
@@ -68,6 +70,24 @@ 
     Damaged,
 }
 
+impl From<std::io::Error> for DirstateParseError {
+    fn from(e: std::io::Error) -> Self {
+        DirstateParseError::CorruptedEntry(e.to_string())
+    }
+}
+
+impl ToString for DirstateParseError {
+    fn to_string(&self) -> String {
+        use DirstateParseError::*;
+        match self {
+            TooLittleData => "Too little data for dirstate.".to_string(),
+            Overflow => "Overflow in dirstate.".to_string(),
+            CorruptedEntry(e) => format!("Corrupted entry: {}.", e),
+            Damaged => "Dirstate appears to be damaged.".to_string(),
+        }
+    }
+}
+
 #[derive(Debug, PartialEq)]
 pub enum DirstatePackError {
     CorruptedEntry(String),
@@ -75,21 +95,33 @@ 
     BadSize(usize, usize),
 }
 
+impl From<std::io::Error> for DirstatePackError {
+    fn from(e: std::io::Error) -> Self {
+        DirstatePackError::CorruptedEntry(e.to_string())
+    }
+}
 #[derive(Debug, PartialEq)]
 pub enum DirstateMapError {
     PathNotFound(Vec<u8>),
     EmptyPath,
 }
 
-impl From<std::io::Error> for DirstatePackError {
-    fn from(e: std::io::Error) -> Self {
-        DirstatePackError::CorruptedEntry(e.to_string())
+pub enum DirstateError {
+    Parse(DirstateParseError),
+    Pack(DirstatePackError),
+    Map(DirstateMapError),
+    IO(std::io::Error),
+}
+
+impl From<DirstateParseError> for DirstateError {
+    fn from(e: DirstateParseError) -> Self {
+        DirstateError::Parse(e)
     }
 }
 
-impl From<std::io::Error> for DirstateParseError {
-    fn from(e: std::io::Error) -> Self {
-        DirstateParseError::CorruptedEntry(e.to_string())
+impl From<DirstatePackError> for DirstateError {
+    fn from(e: DirstatePackError) -> Self {
+        DirstateError::Pack(e)
     }
 }
 
@@ -109,3 +141,15 @@ 
         PatternFileError::IO(e)
     }
 }
+
+impl From<DirstateMapError> for DirstateError {
+    fn from(e: DirstateMapError) -> Self {
+        DirstateError::Map(e)
+    }
+}
+
+impl From<std::io::Error> for DirstateError {
+    fn from(e: std::io::Error) -> Self {
+        DirstateError::IO(e)
+    }
+}
diff --git a/rust/hg-core/src/dirstate/parsers.rs b/rust/hg-core/src/dirstate/parsers.rs
--- a/rust/hg-core/src/dirstate/parsers.rs
+++ b/rust/hg-core/src/dirstate/parsers.rs
@@ -4,13 +4,17 @@ 
 // GNU General Public License version 2 or any later version.
 
 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
-use dirstate::{CopyMap, StateMap};
+use dirstate::EntryState;
+use std::convert::TryFrom;
 use std::io::Cursor;
 use std::time::Duration;
-use {DirstateEntry, DirstatePackError, DirstateParents, DirstateParseError};
+use {
+    CopyMap, DirstateEntry, DirstatePackError, DirstateParents,
+    DirstateParseError, StateMap,
+};
 
 /// Parents are stored in the dirstate as byte hashes.
-const PARENT_SIZE: usize = 20;
+pub const PARENT_SIZE: usize = 20;
 /// Dirstate entries have a static part of 8 + 32 + 32 + 32 + 32 bits.
 const MIN_ENTRY_SIZE: usize = 17;
 
@@ -38,7 +42,7 @@ 
         let entry_bytes = &contents[curr_pos..];
 
         let mut cursor = Cursor::new(entry_bytes);
-        let state = cursor.read_i8()?;
+        let state = EntryState::try_from(cursor.read_u8()?)?;
         let mode = cursor.read_i32::<BigEndian>()?;
         let size = cursor.read_i32::<BigEndian>()?;
         let mtime = cursor.read_i32::<BigEndian>()?;
@@ -107,7 +111,7 @@ 
     for (ref filename, entry) in state_map.iter() {
         let mut new_filename: Vec<u8> = filename.to_vec();
         let mut new_mtime: i32 = entry.mtime;
-        if entry.state == 'n' as i8 && entry.mtime == now.into() {
+        if entry.state == EntryState::Normal && entry.mtime == now {
             // The file was last modified "simultaneously" with the current
             // write to dirstate (i.e. within the same second for file-
             // systems with a granularity of 1 sec). This commonly happens
@@ -132,7 +136,7 @@ 
             new_filename.extend(copy);
         }
 
-        packed.write_i8(entry.state)?;
+        packed.write_u8(entry.state.into())?;
         packed.write_i32::<BigEndian>(entry.mode)?;
         packed.write_i32::<BigEndian>(entry.size)?;
         packed.write_i32::<BigEndian>(new_mtime)?;
@@ -148,6 +152,7 @@ 
 
     Ok(packed)
 }
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -176,7 +181,7 @@ 
         let expected_state_map: StateMap = [(
             b"f1".to_vec(),
             DirstateEntry {
-                state: 'n' as i8,
+                state: EntryState::Normal,
                 mode: 0o644,
                 size: 0,
                 mtime: 791231220,
@@ -213,7 +218,7 @@ 
         let expected_state_map: StateMap = [(
             b"f1".to_vec(),
             DirstateEntry {
-                state: 'n' as i8,
+                state: EntryState::Normal,
                 mode: 0o644,
                 size: 0,
                 mtime: 791231220,
@@ -251,7 +256,7 @@ 
         let mut state_map: StateMap = [(
             b"f1".to_vec(),
             DirstateEntry {
-                state: 'n' as i8,
+                state: EntryState::Normal,
                 mode: 0o644,
                 size: 0,
                 mtime: 791231220,
@@ -291,7 +296,7 @@ 
             (
                 b"f1".to_vec(),
                 DirstateEntry {
-                    state: 'n' as i8,
+                    state: EntryState::Normal,
                     mode: 0o644,
                     size: 0,
                     mtime: 791231220,
@@ -300,7 +305,7 @@ 
             (
                 b"f2".to_vec(),
                 DirstateEntry {
-                    state: 'm' as i8,
+                    state: EntryState::Merged,
                     mode: 0o777,
                     size: 1000,
                     mtime: 791231220,
@@ -309,7 +314,7 @@ 
             (
                 b"f3".to_vec(),
                 DirstateEntry {
-                    state: 'r' as i8,
+                    state: EntryState::Removed,
                     mode: 0o644,
                     size: 234553,
                     mtime: 791231220,
@@ -318,7 +323,7 @@ 
             (
                 b"f4\xF6".to_vec(),
                 DirstateEntry {
-                    state: 'a' as i8,
+                    state: EntryState::Added,
                     mode: 0o644,
                     size: -1,
                     mtime: -1,
@@ -360,7 +365,7 @@ 
         let mut state_map: StateMap = [(
             b"f1".to_vec(),
             DirstateEntry {
-                state: 'n' as i8,
+                state: EntryState::Normal,
                 mode: 0o644,
                 size: 0,
                 mtime: 15000000,
@@ -395,7 +400,7 @@ 
                 [(
                     b"f1".to_vec(),
                     DirstateEntry {
-                        state: 'n' as i8,
+                        state: EntryState::Normal,
                         mode: 0o644,
                         size: 0,
                         mtime: -1
diff --git a/rust/hg-core/src/dirstate/mod.rs b/rust/hg-core/src/dirstate/mod.rs
--- a/rust/hg-core/src/dirstate/mod.rs
+++ b/rust/hg-core/src/dirstate/mod.rs
@@ -6,6 +6,8 @@ 
 // GNU General Public License version 2 or any later version.
 
 use std::collections::HashMap;
+use std::convert::TryFrom;
+use DirstateParseError;
 
 pub mod dirs_multiset;
 pub mod parsers;
@@ -21,7 +23,7 @@ 
 /// comes first.
 #[derive(Debug, PartialEq, Copy, Clone)]
 pub struct DirstateEntry {
-    pub state: i8,
+    pub state: EntryState,
     pub mode: i32,
     pub mtime: i32,
     pub size: i32,
@@ -36,3 +38,42 @@ 
     Dirstate(&'a HashMap<Vec<u8>, DirstateEntry>),
     Manifest(&'a Vec<Vec<u8>>),
 }
+
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub enum EntryState {
+    Normal,
+    Added,
+    Removed,
+    Merged,
+    Unknown,
+}
+
+impl TryFrom<u8> for EntryState {
+    type Error = DirstateParseError;
+
+    fn try_from(value: u8) -> Result<Self, Self::Error> {
+        match value {
+            b'n' => Ok(EntryState::Normal),
+            b'a' => Ok(EntryState::Added),
+            b'r' => Ok(EntryState::Removed),
+            b'm' => Ok(EntryState::Merged),
+            b'?' => Ok(EntryState::Unknown),
+            _ => Err(DirstateParseError::CorruptedEntry(format!(
+                "Incorrect entry state {}",
+                value
+            ))),
+        }
+    }
+}
+
+impl Into<u8> for EntryState {
+    fn into(self) -> u8 {
+        match self {
+            EntryState::Normal => b'n',
+            EntryState::Added => b'a',
+            EntryState::Removed => b'r',
+            EntryState::Merged => b'm',
+            EntryState::Unknown => b'?',
+        }
+    }
+}
diff --git a/rust/hg-core/src/dirstate/dirs_multiset.rs b/rust/hg-core/src/dirstate/dirs_multiset.rs
--- a/rust/hg-core/src/dirstate/dirs_multiset.rs
+++ b/rust/hg-core/src/dirstate/dirs_multiset.rs
@@ -8,6 +8,7 @@ 
 //! A multiset of directory names.
 //!
 //! Used to counts the references to directories in a manifest or dirstate.
+use dirstate::EntryState;
 use std::collections::hash_map::{Entry, Iter};
 use std::collections::HashMap;
 use {DirsIterable, DirstateEntry, DirstateMapError};
@@ -21,7 +22,10 @@ 
     /// Initializes the multiset from a dirstate or a manifest.
     ///
     /// If `skip_state` is provided, skips dirstate entries with equal state.
-    pub fn new(iterable: DirsIterable, skip_state: Option<i8>) -> Self {
+    pub fn new(
+        iterable: DirsIterable,
+        skip_state: Option<EntryState>,
+    ) -> Self {
         let mut multiset = DirsMultiset {
             inner: HashMap::new(),
         };
@@ -140,6 +144,7 @@ 
 #[cfg(test)]
 mod tests {
     use super::*;
+    use dirstate::EntryState;
     use std::collections::HashMap;
 
     #[test]
@@ -289,7 +294,7 @@ 
                 (
                     f.as_bytes().to_vec(),
                     DirstateEntry {
-                        state: 0,
+                        state: EntryState::Normal,
                         mode: 0,
                         mtime: 0,
                         size: 0,
@@ -322,28 +327,33 @@ 
             .map(|(k, v)| (k.as_bytes().to_vec(), *v))
             .collect();
 
-        let new = DirsMultiset::new(Manifest(&input_vec), Some('n' as i8));
+        let new =
+            DirsMultiset::new(Manifest(&input_vec), Some(EntryState::Normal));
         let expected = DirsMultiset {
             inner: expected_inner,
         };
         // Skip does not affect a manifest
         assert_eq!(expected, new);
 
-        let input_map =
-            [("a/", 'n'), ("a/b/", 'n'), ("a/c", 'r'), ("a/d/", 'm')]
-                .iter()
-                .map(|(f, state)| {
-                    (
-                        f.as_bytes().to_vec(),
-                        DirstateEntry {
-                            state: *state as i8,
-                            mode: 0,
-                            mtime: 0,
-                            size: 0,
-                        },
-                    )
-                })
-                .collect();
+        let input_map = [
+            ("a/", EntryState::Normal),
+            ("a/b/", EntryState::Normal),
+            ("a/c", EntryState::Removed),
+            ("a/d/", EntryState::Merged),
+        ]
+        .iter()
+        .map(|(f, state)| {
+            (
+                f.as_bytes().to_vec(),
+                DirstateEntry {
+                    state: *state,
+                    mode: 0,
+                    mtime: 0,
+                    size: 0,
+                },
+            )
+        })
+        .collect();
 
         // "a" incremented with "a/c" and "a/d/"
         let expected_inner = [("", 1), ("a", 2), ("a/d", 1)]
@@ -351,10 +361,12 @@ 
             .map(|(k, v)| (k.as_bytes().to_vec(), *v))
             .collect();
 
-        let new = DirsMultiset::new(Dirstate(&input_map), Some('n' as i8));
+        let new =
+            DirsMultiset::new(Dirstate(&input_map), Some(EntryState::Normal));
         let expected = DirsMultiset {
             inner: expected_inner,
         };
         assert_eq!(expected, new);
     }
+
 }