rust/hg-core/src/matchers.rs
branchstable
changeset 49581 04f1dba53c96
parent 49558 363923bd51cd
child 49630 c7fb9b74e753
equal deleted inserted replaced
49580:08fe5c4d4471 49581:04f1dba53c96
   571         }
   571         }
   572         new
   572         new
   573     }
   573     }
   574 }
   574 }
   575 
   575 
       
   576 /// Wraps [`regex::bytes::Regex`] to improve performance in multithreaded
       
   577 /// contexts.
       
   578 ///
       
   579 /// The `status` algorithm makes heavy use of threads, and calling `is_match`
       
   580 /// from many threads at once is prone to contention, probably within the
       
   581 /// scratch space needed as the regex DFA is built lazily.
       
   582 ///
       
   583 /// We are in the process of raising the issue upstream, but for now
       
   584 /// the workaround used here is to store the `Regex` in a lazily populated
       
   585 /// thread-local variable, sharing the initial read-only compilation, but
       
   586 /// not the lazy dfa scratch space mentioned above.
       
   587 ///
       
   588 /// This reduces the contention observed with 16+ threads, but does not
       
   589 /// completely remove it. Hopefully this can be addressed upstream.
       
   590 struct RegexMatcher {
       
   591     /// Compiled at the start of the status algorithm, used as a base for
       
   592     /// cloning in each thread-local `self.local`, thus sharing the expensive
       
   593     /// first compilation.
       
   594     base: regex::bytes::Regex,
       
   595     /// Thread-local variable that holds the `Regex` that is actually queried
       
   596     /// from each thread.
       
   597     local: thread_local::ThreadLocal<regex::bytes::Regex>,
       
   598 }
       
   599 
       
   600 impl RegexMatcher {
       
   601     /// Returns whether the path matches the stored `Regex`.
       
   602     pub fn is_match(&self, path: &HgPath) -> bool {
       
   603         self.local
       
   604             .get_or(|| self.base.clone())
       
   605             .is_match(path.as_bytes())
       
   606     }
       
   607 }
       
   608 
   576 /// Returns a function that matches an `HgPath` against the given regex
   609 /// Returns a function that matches an `HgPath` against the given regex
   577 /// pattern.
   610 /// pattern.
   578 ///
   611 ///
   579 /// This can fail when the pattern is invalid or not supported by the
   612 /// This can fail when the pattern is invalid or not supported by the
   580 /// underlying engine (the `regex` crate), for instance anything with
   613 /// underlying engine (the `regex` crate), for instance anything with
   581 /// back-references.
   614 /// back-references.
   582 #[timed]
   615 #[timed]
   583 fn re_matcher(
   616 fn re_matcher(pattern: &[u8]) -> PatternResult<RegexMatcher> {
   584     pattern: &[u8],
       
   585 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
       
   586     use std::io::Write;
   617     use std::io::Write;
   587 
   618 
   588     // The `regex` crate adds `.*` to the start and end of expressions if there
   619     // The `regex` crate adds `.*` to the start and end of expressions if there
   589     // are no anchors, so add the start anchor.
   620     // are no anchors, so add the start anchor.
   590     let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
   621     let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
   609         // multiple *minutes*.
   640         // multiple *minutes*.
   610         .dfa_size_limit(50 * (1 << 20))
   641         .dfa_size_limit(50 * (1 << 20))
   611         .build()
   642         .build()
   612         .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
   643         .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
   613 
   644 
   614     Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
   645     Ok(RegexMatcher {
       
   646         base: re,
       
   647         local: Default::default(),
       
   648     })
   615 }
   649 }
   616 
   650 
   617 /// Returns the regex pattern and a function that matches an `HgPath` against
   651 /// Returns the regex pattern and a function that matches an `HgPath` against
   618 /// said regex formed by the given ignore patterns.
   652 /// said regex formed by the given ignore patterns.
   619 fn build_regex_match<'a, 'b>(
   653 fn build_regex_match<'a, 'b>(
   636     // An empty pattern would cause the regex engine to incorrectly match the
   670     // An empty pattern would cause the regex engine to incorrectly match the
   637     // (empty) root directory
   671     // (empty) root directory
   638     let func = if !(regexps.is_empty()) {
   672     let func = if !(regexps.is_empty()) {
   639         let matcher = re_matcher(&full_regex)?;
   673         let matcher = re_matcher(&full_regex)?;
   640         let func = move |filename: &HgPath| {
   674         let func = move |filename: &HgPath| {
   641             exact_set.contains(filename) || matcher(filename)
   675             exact_set.contains(filename) || matcher.is_match(filename)
   642         };
   676         };
   643         Box::new(func) as IgnoreFnType
   677         Box::new(func) as IgnoreFnType
   644     } else {
   678     } else {
   645         let func = move |filename: &HgPath| exact_set.contains(filename);
   679         let func = move |filename: &HgPath| exact_set.contains(filename);
   646         Box::new(func) as IgnoreFnType
   680         Box::new(func) as IgnoreFnType