merge-lists: make it possible to specify pattern to match
authorMartin von Zweigbergk <martinvonz@google.com>
Fri, 18 Mar 2022 12:23:47 -0700
changeset 49011 b999edb15f8c
parent 49010 681b25ea579e
child 49012 5b65721a75eb
merge-lists: make it possible to specify pattern to match The `merge-lists` tool doesn't know anything about Python other than its regex that attempts to match import lines. Let's make it possible to pass in a custom regex so it's easy to use the tool for e.g. C/C++ `#include` lines or Rust `use` lines (given the limited). Differential Revision: https://phab.mercurial-scm.org/D12392
contrib/merge-lists/src/main.rs
contrib/merge-lists/tests/test-merge-lists.rs
--- a/contrib/merge-lists/src/main.rs	Fri Mar 04 16:12:56 2022 -0800
+++ b/contrib/merge-lists/src/main.rs	Fri Mar 18 12:23:47 2022 -0700
@@ -1,4 +1,4 @@
-use clap::Parser;
+use clap::{ArgGroup, Parser};
 use itertools::Itertools;
 use regex::bytes::Regex;
 use similar::ChangeTag;
@@ -150,6 +150,7 @@
 /// for partial merge tools (configured in `[partial-merge-tools]`).
 #[derive(Parser, Debug)]
 #[clap(version, about, long_about = None)]
+#[clap(group(ArgGroup::new("match").required(true).args(&["pattern", "python-imports"])))]
 struct Args {
     /// Path to the file's content in the "local" side
     local: OsString,
@@ -159,6 +160,26 @@
 
     /// Path to the file's content in the "other" side
     other: OsString,
+
+    /// Regular expression to use
+    #[clap(long, short)]
+    pattern: Option<String>,
+
+    /// Use built-in regular expression for Python imports
+    #[clap(long)]
+    python_imports: bool,
+}
+
+fn get_regex(args: &Args) -> Regex {
+    let pattern = if args.python_imports {
+        r"import \w+(\.\w+)*( +#.*)?\n|from (\w+(\.\w+)* import \w+( as \w+)?(, \w+( as \w+)?)*( +#.*)?)"
+    } else if let Some(pattern) = &args.pattern {
+        pattern
+    } else {
+        ".*"
+    };
+    let pattern = format!(r"{}\r?\n?", pattern);
+    regex::bytes::Regex::new(&pattern).unwrap()
 }
 
 fn main() {
@@ -172,8 +193,7 @@
     let local_bytes = std::fs::read(&local_path).unwrap();
     let other_bytes = std::fs::read(&other_path).unwrap();
 
-    let regex =
-        regex::bytes::Regex::new(r"import \w+(\.\w+)*( +#.*)?\n|from (\w+(\.\w+)* import \w+( as \w+)?(, \w+( as \w+)?)*( +#.*)?)\r?\n?").unwrap();
+    let regex = get_regex(&args);
     let (new_base_bytes, new_local_bytes, new_other_bytes) =
         resolve(&base_bytes, &local_bytes, &other_bytes, &regex);
 
--- a/contrib/merge-lists/tests/test-merge-lists.rs	Fri Mar 04 16:12:56 2022 -0800
+++ b/contrib/merge-lists/tests/test-merge-lists.rs	Fri Mar 18 12:23:47 2022 -0700
@@ -1,7 +1,8 @@
 use similar::DiffableStr;
+use std::ffi::OsStr;
 use tempdir::TempDir;
 
-fn run_test(input: &str) -> String {
+fn run_test(arg: &str, input: &str) -> String {
     let mut cmd = assert_cmd::Command::cargo_bin("merge-lists").unwrap();
     let temp_dir = TempDir::new("test").unwrap();
     let base_path = temp_dir.path().join("base");
@@ -16,6 +17,7 @@
     std::fs::write(&local_path, split.next().unwrap()).unwrap();
     std::fs::write(&other_path, split.next().unwrap()).unwrap();
     cmd.args(&[
+        OsStr::new(arg),
         local_path.as_os_str(),
         base_path.as_os_str(),
         other_path.as_os_str(),
@@ -38,6 +40,7 @@
 #[test]
 fn test_merge_lists_basic() {
     let output = run_test(
+        "--python-imports",
         r"
 base:
 import lib1
@@ -72,6 +75,7 @@
     // Test some "from x import y" statements and some non-import conflicts
     // (unresolvable)
     let output = run_test(
+        "--python-imports",
         r"
 base:
 from . import x
@@ -116,6 +120,7 @@
     // Test that nothing is done if the elements in the conflicting hunks are
     // not sorted
     let output = run_test(
+        "--python-imports",
         r"
 base:
 import x
@@ -154,3 +159,46 @@
     3+3
     "###);
 }
+
+#[test]
+fn test_custom_regex() {
+    // Test merging of all lines (by matching anything)
+    let output = run_test(
+        "--pattern=.*",
+        r"
+base:
+aardvark
+baboon
+camel
+
+local:
+aardvark
+camel
+eagle
+
+other:
+aardvark
+camel
+deer
+",
+    );
+    insta::assert_snapshot!(output, @r###"
+    base:
+    aardvark
+    camel
+    deer
+    eagle
+
+    local:
+    aardvark
+    camel
+    deer
+    eagle
+
+    other:
+    aardvark
+    camel
+    deer
+    eagle
+    "###);
+}