rhg: internally, return a structured representation from hg cat
authorArseniy Alekseyev <aalekseyev@janestreet.com>
Fri, 15 Oct 2021 14:05:20 +0100
changeset 48237 027ebad952ac
parent 48236 f8dc78716ad2
child 48238 ea50caada82d
rhg: internally, return a structured representation from hg cat The purpose of this change is to make it possible to support limited templating in `hg cat`, so we could print separators between files etc. The templating itself is not implemented yet, so this functionality is unused in `rhg cat`. However, in our fork of hg we're implementing a slightly different command `hg jscat` which makes use of this. So accepting this change will let us minimize the size of the patch we're maintaining on our side. Differential Revision: https://phab.mercurial-scm.org/D11679
rust/hg-core/src/operations/cat.rs
rust/hg-core/src/revlog/filelog.rs
rust/rhg/src/commands/cat.rs
--- a/rust/hg-core/src/operations/cat.rs	Thu Oct 14 19:02:08 2021 +0100
+++ b/rust/hg-core/src/operations/cat.rs	Fri Oct 15 14:05:20 2021 +0100
@@ -10,20 +10,19 @@
 use crate::revlog::Node;
 
 use crate::utils::hg_path::HgPath;
-use crate::utils::hg_path::HgPathBuf;
 
 use itertools::put_back;
 use itertools::PutBack;
 use std::cmp::Ordering;
 
-pub struct CatOutput {
+pub struct CatOutput<'a> {
     /// Whether any file in the manifest matched the paths given as CLI
     /// arguments
     pub found_any: bool,
     /// The contents of matching files, in manifest order
-    pub concatenated: Vec<u8>,
+    pub results: Vec<(&'a HgPath, Vec<u8>)>,
     /// Which of the CLI arguments did not match any manifest file
-    pub missing: Vec<HgPathBuf>,
+    pub missing: Vec<&'a HgPath>,
     /// The node ID that the given revset was resolved to
     pub node: Node,
 }
@@ -32,7 +31,7 @@
 fn find_item<'a, 'b, 'c, D, I: Iterator<Item = (&'a HgPath, D)>>(
     i: &mut PutBack<I>,
     needle: &'b HgPath,
-) -> Option<I::Item> {
+) -> Option<D> {
     loop {
         match i.next() {
             None => return None,
@@ -42,30 +41,30 @@
                     return None;
                 }
                 Ordering::Greater => continue,
-                Ordering::Equal => return Some(val),
+                Ordering::Equal => return Some(val.1),
             },
         }
     }
 }
 
 fn find_files_in_manifest<
-    'a,
-    'b,
-    D,
-    I: Iterator<Item = (&'a HgPath, D)>,
-    J: Iterator<Item = &'b HgPath>,
+    'manifest,
+    'query,
+    Data,
+    Manifest: Iterator<Item = (&'manifest HgPath, Data)>,
+    Query: Iterator<Item = &'query HgPath>,
 >(
-    manifest: I,
-    files: J,
-) -> (Vec<(&'a HgPath, D)>, Vec<&'b HgPath>) {
+    manifest: Manifest,
+    query: Query,
+) -> (Vec<(&'query HgPath, Data)>, Vec<&'query HgPath>) {
     let mut manifest = put_back(manifest);
     let mut res = vec![];
     let mut missing = vec![];
 
-    for file in files {
+    for file in query {
         match find_item(&mut manifest, file) {
             None => missing.push(file),
-            Some(item) => res.push(item),
+            Some(item) => res.push((file, item)),
         }
     }
     return (res, missing);
@@ -79,36 +78,37 @@
 pub fn cat<'a>(
     repo: &Repo,
     revset: &str,
-    mut files: Vec<HgPathBuf>,
-) -> Result<CatOutput, RevlogError> {
+    mut files: Vec<&'a HgPath>,
+) -> Result<CatOutput<'a>, RevlogError> {
     let rev = crate::revset::resolve_single(revset, repo)?;
     let manifest = repo.manifest_for_rev(rev)?;
     let node = *repo
         .changelog()?
         .node_from_rev(rev)
         .expect("should succeed when repo.manifest did");
-    let mut bytes: Vec<u8> = vec![];
+    let mut results: Vec<(&'a HgPath, Vec<u8>)> = vec![];
     let mut found_any = false;
 
     files.sort_unstable();
 
     let (found, missing) = find_files_in_manifest(
         manifest.files_with_nodes(),
-        files.iter().map(|f| f.as_ref()),
+        files.into_iter().map(|f| f.as_ref()),
     );
 
-    for (manifest_file, node_bytes) in found {
+    for (file_path, node_bytes) in found {
         found_any = true;
-        let file_log = repo.filelog(manifest_file)?;
+        let file_log = repo.filelog(file_path)?;
         let file_node = Node::from_hex_for_repo(node_bytes)?;
-        bytes.extend(file_log.data_for_node(file_node)?.data()?);
+        results.push((
+            file_path,
+            file_log.data_for_node(file_node)?.into_data()?,
+        ));
     }
 
-    let missing: Vec<HgPathBuf> =
-        missing.iter().map(|file| (*file).to_owned()).collect();
     Ok(CatOutput {
         found_any,
-        concatenated: bytes,
+        results,
         missing,
         node,
     })
--- a/rust/hg-core/src/revlog/filelog.rs	Thu Oct 14 19:02:08 2021 +0100
+++ b/rust/hg-core/src/revlog/filelog.rs	Fri Oct 15 14:05:20 2021 +0100
@@ -7,7 +7,6 @@
 use crate::utils::files::get_path_from_bytes;
 use crate::utils::hg_path::HgPath;
 use crate::utils::SliceExt;
-use std::borrow::Cow;
 use std::path::PathBuf;
 
 /// A specialized `Revlog` to work with file data logs.
@@ -40,7 +39,7 @@
         &self,
         file_rev: Revision,
     ) -> Result<FilelogEntry, RevlogError> {
-        let data = self.revlog.get_rev_data(file_rev)?;
+        let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?;
         Ok(FilelogEntry(data.into()))
     }
 }
@@ -51,22 +50,32 @@
     get_path_from_bytes(&encoded_bytes).into()
 }
 
-pub struct FilelogEntry<'filelog>(Cow<'filelog, [u8]>);
+pub struct FilelogEntry(Vec<u8>);
 
-impl<'filelog> FilelogEntry<'filelog> {
+impl FilelogEntry {
     /// Split into metadata and data
-    pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
+    /// Returns None if there is no metadata, so the entire entry is data.
+    fn split_metadata(&self) -> Result<Option<(&[u8], &[u8])>, HgError> {
         const DELIMITER: &[u8; 2] = &[b'\x01', b'\n'];
 
         if let Some(rest) = self.0.drop_prefix(DELIMITER) {
             if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
-                Ok((Some(metadata), data))
+                Ok(Some((metadata, data)))
             } else {
                 Err(HgError::corrupted(
                     "Missing metadata end delimiter in filelog entry",
                 ))
             }
         } else {
+            Ok(None)
+        }
+    }
+
+    /// Split into metadata and data
+    pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
+        if let Some((metadata, data)) = self.split_metadata()? {
+            Ok((Some(metadata), data))
+        } else {
             Ok((None, &self.0))
         }
     }
@@ -76,4 +85,14 @@
         let (_metadata, data) = self.split()?;
         Ok(data)
     }
+
+    /// Consume the entry, and convert it into data, discarding any metadata,
+    /// if present.
+    pub fn into_data(self) -> Result<Vec<u8>, HgError> {
+        if let Some((_metadata, data)) = self.split_metadata()? {
+            Ok(data.to_owned())
+        } else {
+            Ok(self.0)
+        }
+    }
 }
--- a/rust/rhg/src/commands/cat.rs	Thu Oct 14 19:02:08 2021 +0100
+++ b/rust/rhg/src/commands/cat.rs	Fri Oct 15 14:05:20 2021 +0100
@@ -66,6 +66,7 @@
             .map_err(|e| CommandError::abort(e.to_string()))?;
         files.push(hg_file);
     }
+    let files = files.iter().map(|file| file.as_ref()).collect();
     // TODO probably move this to a util function like `repo.default_rev` or
     // something when it's used somewhere else
     let rev = match rev {
@@ -74,7 +75,9 @@
     };
 
     let output = cat(&repo, &rev, files).map_err(|e| (e, rev.as_str()))?;
-    invocation.ui.write_stdout(&output.concatenated)?;
+    for (_file, contents) in output.results {
+        invocation.ui.write_stdout(&contents)?;
+    }
     if !output.missing.is_empty() {
         let short = format!("{:x}", output.node.short()).into_bytes();
         for path in &output.missing {