rust-dirstate: use EntryState enum instead of literals
authorRaphaël Gomès <rgomes@octobus.net>
Tue, 09 Jul 2019 12:15:09 +0200
changeset 42749 7ceded4419a3
parent 42748 7cae6bc29ff9
child 42750 849e744b925d
rust-dirstate: use EntryState enum instead of literals This improves code readability quite a bit, while also adding a layer of safety because we're checking the state byte against the enum. Differential Revision: https://phab.mercurial-scm.org/D6629
rust/hg-core/src/dirstate.rs
rust/hg-core/src/dirstate/dirs_multiset.rs
rust/hg-core/src/dirstate/parsers.rs
rust/hg-core/src/lib.rs
rust/hg-cpython/src/dirstate.rs
rust/hg-cpython/src/dirstate/dirs_multiset.rs
rust/hg-cpython/src/parsers.rs
--- a/rust/hg-core/src/dirstate.rs	Tue Jul 09 11:49:49 2019 +0200
+++ b/rust/hg-core/src/dirstate.rs	Tue Jul 09 12:15:09 2019 +0200
@@ -5,7 +5,9 @@
 // This software may be used and distributed according to the terms of the
 // GNU General Public License version 2 or any later version.
 
+use crate::DirstateParseError;
 use std::collections::HashMap;
+use std::convert::TryFrom;
 
 pub mod dirs_multiset;
 pub mod parsers;
@@ -21,7 +23,7 @@
 /// comes first.
 #[derive(Debug, PartialEq, Copy, Clone)]
 pub struct DirstateEntry {
-    pub state: i8,
+    pub state: EntryState,
     pub mode: i32,
     pub mtime: i32,
     pub size: i32,
@@ -36,3 +38,42 @@
     Dirstate(&'a HashMap<Vec<u8>, DirstateEntry>),
     Manifest(&'a Vec<Vec<u8>>),
 }
+
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub enum EntryState {
+    Normal,
+    Added,
+    Removed,
+    Merged,
+    Unknown,
+}
+
+impl TryFrom<u8> for EntryState {
+    type Error = DirstateParseError;
+
+    fn try_from(value: u8) -> Result<Self, Self::Error> {
+        match value {
+            b'n' => Ok(EntryState::Normal),
+            b'a' => Ok(EntryState::Added),
+            b'r' => Ok(EntryState::Removed),
+            b'm' => Ok(EntryState::Merged),
+            b'?' => Ok(EntryState::Unknown),
+            _ => Err(DirstateParseError::CorruptedEntry(format!(
+                "Incorrect entry state {}",
+                value
+            ))),
+        }
+    }
+}
+
+impl Into<u8> for EntryState {
+    fn into(self) -> u8 {
+        match self {
+            EntryState::Normal => b'n',
+            EntryState::Added => b'a',
+            EntryState::Removed => b'r',
+            EntryState::Merged => b'm',
+            EntryState::Unknown => b'?',
+        }
+    }
+}
--- a/rust/hg-core/src/dirstate/dirs_multiset.rs	Tue Jul 09 11:49:49 2019 +0200
+++ b/rust/hg-core/src/dirstate/dirs_multiset.rs	Tue Jul 09 12:15:09 2019 +0200
@@ -8,7 +8,10 @@
 //! A multiset of directory names.
 //!
 //! Used to counts the references to directories in a manifest or dirstate.
-use crate::{utils::files, DirsIterable, DirstateEntry, DirstateMapError};
+use crate::{
+    dirstate::EntryState, utils::files, DirsIterable, DirstateEntry,
+    DirstateMapError,
+};
 use std::collections::hash_map::{Entry, Iter};
 use std::collections::HashMap;
 
@@ -21,7 +24,10 @@
     /// Initializes the multiset from a dirstate or a manifest.
     ///
     /// If `skip_state` is provided, skips dirstate entries with equal state.
-    pub fn new(iterable: DirsIterable, skip_state: Option<i8>) -> Self {
+    pub fn new(
+        iterable: DirsIterable,
+        skip_state: Option<EntryState>,
+    ) -> Self {
         let mut multiset = DirsMultiset {
             inner: HashMap::new(),
         };
@@ -257,7 +263,7 @@
                 (
                     f.as_bytes().to_vec(),
                     DirstateEntry {
-                        state: 0,
+                        state: EntryState::Normal,
                         mode: 0,
                         mtime: 0,
                         size: 0,
@@ -290,28 +296,33 @@
             .map(|(k, v)| (k.as_bytes().to_vec(), *v))
             .collect();
 
-        let new = DirsMultiset::new(Manifest(&input_vec), Some('n' as i8));
+        let new =
+            DirsMultiset::new(Manifest(&input_vec), Some(EntryState::Normal));
         let expected = DirsMultiset {
             inner: expected_inner,
         };
         // Skip does not affect a manifest
         assert_eq!(expected, new);
 
-        let input_map =
-            [("a/", 'n'), ("a/b/", 'n'), ("a/c", 'r'), ("a/d/", 'm')]
-                .iter()
-                .map(|(f, state)| {
-                    (
-                        f.as_bytes().to_vec(),
-                        DirstateEntry {
-                            state: *state as i8,
-                            mode: 0,
-                            mtime: 0,
-                            size: 0,
-                        },
-                    )
-                })
-                .collect();
+        let input_map = [
+            ("a/", EntryState::Normal),
+            ("a/b/", EntryState::Normal),
+            ("a/c", EntryState::Removed),
+            ("a/d/", EntryState::Merged),
+        ]
+        .iter()
+        .map(|(f, state)| {
+            (
+                f.as_bytes().to_vec(),
+                DirstateEntry {
+                    state: *state,
+                    mode: 0,
+                    mtime: 0,
+                    size: 0,
+                },
+            )
+        })
+        .collect();
 
         // "a" incremented with "a/c" and "a/d/"
         let expected_inner = [("", 1), ("a", 2), ("a/d", 1)]
@@ -319,10 +330,12 @@
             .map(|(k, v)| (k.as_bytes().to_vec(), *v))
             .collect();
 
-        let new = DirsMultiset::new(Dirstate(&input_map), Some('n' as i8));
+        let new =
+            DirsMultiset::new(Dirstate(&input_map), Some(EntryState::Normal));
         let expected = DirsMultiset {
             inner: expected_inner,
         };
         assert_eq!(expected, new);
     }
+
 }
--- a/rust/hg-core/src/dirstate/parsers.rs	Tue Jul 09 11:49:49 2019 +0200
+++ b/rust/hg-core/src/dirstate/parsers.rs	Tue Jul 09 12:15:09 2019 +0200
@@ -4,12 +4,12 @@
 // GNU General Public License version 2 or any later version.
 
 use crate::{
-    dirstate::{CopyMap, StateMap},
+    dirstate::{CopyMap, EntryState, StateMap},
     utils::copy_into_array,
     DirstateEntry, DirstatePackError, DirstateParents, DirstateParseError,
 };
 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
-use std::convert::TryInto;
+use std::convert::{TryFrom, TryInto};
 use std::io::Cursor;
 use std::time::Duration;
 
@@ -42,7 +42,7 @@
         let entry_bytes = &contents[curr_pos..];
 
         let mut cursor = Cursor::new(entry_bytes);
-        let state = cursor.read_i8()?;
+        let state = EntryState::try_from(cursor.read_u8()?)?;
         let mode = cursor.read_i32::<BigEndian>()?;
         let size = cursor.read_i32::<BigEndian>()?;
         let mtime = cursor.read_i32::<BigEndian>()?;
@@ -109,7 +109,7 @@
     for (ref filename, entry) in state_map.iter() {
         let mut new_filename: Vec<u8> = filename.to_vec();
         let mut new_mtime: i32 = entry.mtime;
-        if entry.state == 'n' as i8 && entry.mtime == now {
+        if entry.state == EntryState::Normal && entry.mtime == now {
             // The file was last modified "simultaneously" with the current
             // write to dirstate (i.e. within the same second for file-
             // systems with a granularity of 1 sec). This commonly happens
@@ -134,7 +134,7 @@
             new_filename.extend(copy);
         }
 
-        packed.write_i8(entry.state)?;
+        packed.write_u8(entry.state.into())?;
         packed.write_i32::<BigEndian>(entry.mode)?;
         packed.write_i32::<BigEndian>(entry.size)?;
         packed.write_i32::<BigEndian>(new_mtime)?;
@@ -179,7 +179,7 @@
         let expected_state_map: StateMap = [(
             b"f1".to_vec(),
             DirstateEntry {
-                state: 'n' as i8,
+                state: EntryState::Normal,
                 mode: 0o644,
                 size: 0,
                 mtime: 791231220,
@@ -216,7 +216,7 @@
         let expected_state_map: StateMap = [(
             b"f1".to_vec(),
             DirstateEntry {
-                state: 'n' as i8,
+                state: EntryState::Normal,
                 mode: 0o644,
                 size: 0,
                 mtime: 791231220,
@@ -254,7 +254,7 @@
         let mut state_map: StateMap = [(
             b"f1".to_vec(),
             DirstateEntry {
-                state: 'n' as i8,
+                state: EntryState::Normal,
                 mode: 0o644,
                 size: 0,
                 mtime: 791231220,
@@ -294,7 +294,7 @@
             (
                 b"f1".to_vec(),
                 DirstateEntry {
-                    state: 'n' as i8,
+                    state: EntryState::Normal,
                     mode: 0o644,
                     size: 0,
                     mtime: 791231220,
@@ -303,7 +303,7 @@
             (
                 b"f2".to_vec(),
                 DirstateEntry {
-                    state: 'm' as i8,
+                    state: EntryState::Merged,
                     mode: 0o777,
                     size: 1000,
                     mtime: 791231220,
@@ -312,7 +312,7 @@
             (
                 b"f3".to_vec(),
                 DirstateEntry {
-                    state: 'r' as i8,
+                    state: EntryState::Removed,
                     mode: 0o644,
                     size: 234553,
                     mtime: 791231220,
@@ -321,7 +321,7 @@
             (
                 b"f4\xF6".to_vec(),
                 DirstateEntry {
-                    state: 'a' as i8,
+                    state: EntryState::Added,
                     mode: 0o644,
                     size: -1,
                     mtime: -1,
@@ -363,7 +363,7 @@
         let mut state_map: StateMap = [(
             b"f1".to_vec(),
             DirstateEntry {
-                state: 'n' as i8,
+                state: EntryState::Normal,
                 mode: 0o644,
                 size: 0,
                 mtime: 15000000,
@@ -398,7 +398,7 @@
                 [(
                     b"f1".to_vec(),
                     DirstateEntry {
-                        state: 'n' as i8,
+                        state: EntryState::Normal,
                         mode: 0o644,
                         size: 0,
                         mtime: -1
--- a/rust/hg-core/src/lib.rs	Tue Jul 09 11:49:49 2019 +0200
+++ b/rust/hg-core/src/lib.rs	Tue Jul 09 12:15:09 2019 +0200
@@ -11,7 +11,8 @@
 pub use dirstate::{
     dirs_multiset::DirsMultiset,
     parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE},
-    CopyMap, DirsIterable, DirstateEntry, DirstateParents, StateMap,
+    CopyMap, DirsIterable, DirstateEntry, DirstateParents, EntryState,
+    StateMap,
 };
 mod filepatterns;
 pub mod utils;
@@ -62,6 +63,24 @@
     Damaged,
 }
 
+impl From<std::io::Error> for DirstateParseError {
+    fn from(e: std::io::Error) -> Self {
+        DirstateParseError::CorruptedEntry(e.to_string())
+    }
+}
+
+impl ToString for DirstateParseError {
+    fn to_string(&self) -> String {
+        use crate::DirstateParseError::*;
+        match self {
+            TooLittleData => "Too little data for dirstate.".to_string(),
+            Overflow => "Overflow in dirstate.".to_string(),
+            CorruptedEntry(e) => format!("Corrupted entry: {:?}.", e),
+            Damaged => "Dirstate appears to be damaged.".to_string(),
+        }
+    }
+}
+
 #[derive(Debug, PartialEq)]
 pub enum DirstatePackError {
     CorruptedEntry(String),
@@ -69,21 +88,33 @@
     BadSize(usize, usize),
 }
 
+impl From<std::io::Error> for DirstatePackError {
+    fn from(e: std::io::Error) -> Self {
+        DirstatePackError::CorruptedEntry(e.to_string())
+    }
+}
 #[derive(Debug, PartialEq)]
 pub enum DirstateMapError {
     PathNotFound(Vec<u8>),
     EmptyPath,
 }
 
-impl From<std::io::Error> for DirstatePackError {
-    fn from(e: std::io::Error) -> Self {
-        DirstatePackError::CorruptedEntry(e.to_string())
+pub enum DirstateError {
+    Parse(DirstateParseError),
+    Pack(DirstatePackError),
+    Map(DirstateMapError),
+    IO(std::io::Error),
+}
+
+impl From<DirstateParseError> for DirstateError {
+    fn from(e: DirstateParseError) -> Self {
+        DirstateError::Parse(e)
     }
 }
 
-impl From<std::io::Error> for DirstateParseError {
-    fn from(e: std::io::Error) -> Self {
-        DirstateParseError::CorruptedEntry(e.to_string())
+impl From<DirstatePackError> for DirstateError {
+    fn from(e: DirstatePackError) -> Self {
+        DirstateError::Pack(e)
     }
 }
 
@@ -103,3 +134,15 @@
         PatternFileError::IO(e)
     }
 }
+
+impl From<DirstateMapError> for DirstateError {
+    fn from(e: DirstateMapError) -> Self {
+        DirstateError::Map(e)
+    }
+}
+
+impl From<std::io::Error> for DirstateError {
+    fn from(e: std::io::Error) -> Self {
+        DirstateError::IO(e)
+    }
+}
--- a/rust/hg-cpython/src/dirstate.rs	Tue Jul 09 11:49:49 2019 +0200
+++ b/rust/hg-cpython/src/dirstate.rs	Tue Jul 09 12:15:09 2019 +0200
@@ -12,14 +12,16 @@
 mod dirs_multiset;
 use crate::dirstate::dirs_multiset::Dirs;
 use cpython::{
-    PyBytes, PyDict, PyErr, PyModule, PyObject, PyResult, PySequence, Python,
+    exc, PyBytes, PyDict, PyErr, PyModule, PyObject, PyResult, PySequence,
+    Python,
 };
-use hg::{DirstateEntry, StateMap};
+use hg::{DirstateEntry, DirstateParseError, EntryState, StateMap};
 use libc::{c_char, c_int};
 #[cfg(feature = "python27")]
 use python27_sys::PyCapsule_Import;
 #[cfg(feature = "python3")]
 use python3_sys::PyCapsule_Import;
+use std::convert::TryFrom;
 use std::ffi::CStr;
 use std::mem::transmute;
 
@@ -60,7 +62,11 @@
         .map(|(filename, stats)| {
             let stats = stats.extract::<PySequence>(py)?;
             let state = stats.get_item(py, 0)?.extract::<PyBytes>(py)?;
-            let state = state.data(py)[0] as i8;
+            let state = EntryState::try_from(state.data(py)[0]).map_err(
+                |e: DirstateParseError| {
+                    PyErr::new::<exc::ValueError, _>(py, e.to_string())
+                },
+            )?;
             let mode = stats.get_item(py, 1)?.extract(py)?;
             let size = stats.get_item(py, 2)?.extract(py)?;
             let mtime = stats.get_item(py, 3)?.extract(py)?;
--- a/rust/hg-cpython/src/dirstate/dirs_multiset.rs	Tue Jul 09 11:49:49 2019 +0200
+++ b/rust/hg-cpython/src/dirstate/dirs_multiset.rs	Tue Jul 09 12:15:09 2019 +0200
@@ -16,7 +16,11 @@
 };
 
 use crate::dirstate::extract_dirstate;
-use hg::{DirsIterable, DirsMultiset, DirstateMapError};
+use hg::{
+    DirsIterable, DirsMultiset, DirstateMapError, DirstateParseError,
+    EntryState,
+};
+use std::convert::TryInto;
 
 py_class!(pub class Dirs |py| {
     data dirs_map: RefCell<DirsMultiset>;
@@ -28,9 +32,15 @@
         map: PyObject,
         skip: Option<PyObject> = None
     ) -> PyResult<Self> {
-        let mut skip_state: Option<i8> = None;
+        let mut skip_state: Option<EntryState> = None;
         if let Some(skip) = skip {
-            skip_state = Some(skip.extract::<PyBytes>(py)?.data(py)[0] as i8);
+            skip_state = Some(
+                skip.extract::<PyBytes>(py)?.data(py)[0]
+                    .try_into()
+                    .map_err(|e: DirstateParseError| {
+                        PyErr::new::<exc::ValueError, _>(py, e.to_string())
+                    })?,
+            );
         }
         let inner = if let Ok(map) = map.cast_as::<PyDict>(py) {
             let dirstate = extract_dirstate(py, &map)?;
--- a/rust/hg-cpython/src/parsers.rs	Tue Jul 09 11:49:49 2019 +0200
+++ b/rust/hg-cpython/src/parsers.rs	Tue Jul 09 12:15:09 2019 +0200
@@ -37,11 +37,17 @@
     match parse_dirstate(&mut dirstate_map, &mut copies, st.data(py)) {
         Ok(parents) => {
             for (filename, entry) in dirstate_map {
+                // Explicitly go through u8 first, then cast to
+                // platform-specific `c_char` because Into<u8> has a specific
+                // implementation while `as c_char` would just do a naive enum
+                // cast.
+                let state: u8 = entry.state.into();
+
                 dmap.set_item(
                     py,
                     PyBytes::new(py, &filename),
                     decapsule_make_dirstate_tuple(py)?(
-                        entry.state as c_char,
+                        state as c_char,
                         entry.mode,
                         entry.size,
                         entry.mtime,
@@ -130,6 +136,11 @@
                 },
             ) in dirstate_map
             {
+                // Explicitly go through u8 first, then cast to
+                // platform-specific `c_char` because Into<u8> has a specific
+                // implementation while `as c_char` would just do a naive enum
+                // cast.
+                let state: u8 = state.into();
                 dmap.set_item(
                     py,
                     PyBytes::new(py, &filename[..]),