571 } |
571 } |
572 new |
572 new |
573 } |
573 } |
574 } |
574 } |
575 |
575 |
|
576 /// Wraps [`regex::bytes::Regex`] to improve performance in multithreaded |
|
577 /// contexts. |
|
578 /// |
|
579 /// The `status` algorithm makes heavy use of threads, and calling `is_match` |
|
580 /// from many threads at once is prone to contention, probably within the |
|
581 /// scratch space needed as the regex DFA is built lazily. |
|
582 /// |
|
583 /// We are in the process of raising the issue upstream, but for now |
|
584 /// the workaround used here is to store the `Regex` in a lazily populated |
|
585 /// thread-local variable, sharing the initial read-only compilation, but |
|
586 /// not the lazy dfa scratch space mentioned above. |
|
587 /// |
|
588 /// This reduces the contention observed with 16+ threads, but does not |
|
589 /// completely remove it. Hopefully this can be addressed upstream. |
|
590 struct RegexMatcher { |
|
591 /// Compiled at the start of the status algorithm, used as a base for |
|
592 /// cloning in each thread-local `self.local`, thus sharing the expensive |
|
593 /// first compilation. |
|
594 base: regex::bytes::Regex, |
|
595 /// Thread-local variable that holds the `Regex` that is actually queried |
|
596 /// from each thread. |
|
597 local: thread_local::ThreadLocal<regex::bytes::Regex>, |
|
598 } |
|
599 |
|
600 impl RegexMatcher { |
|
601 /// Returns whether the path matches the stored `Regex`. |
|
602 pub fn is_match(&self, path: &HgPath) -> bool { |
|
603 self.local |
|
604 .get_or(|| self.base.clone()) |
|
605 .is_match(path.as_bytes()) |
|
606 } |
|
607 } |
|
608 |
576 /// Returns a function that matches an `HgPath` against the given regex |
609 /// Returns a function that matches an `HgPath` against the given regex |
577 /// pattern. |
610 /// pattern. |
578 /// |
611 /// |
579 /// This can fail when the pattern is invalid or not supported by the |
612 /// This can fail when the pattern is invalid or not supported by the |
580 /// underlying engine (the `regex` crate), for instance anything with |
613 /// underlying engine (the `regex` crate), for instance anything with |
581 /// back-references. |
614 /// back-references. |
582 #[timed] |
615 #[timed] |
583 fn re_matcher( |
616 fn re_matcher(pattern: &[u8]) -> PatternResult<RegexMatcher> { |
584 pattern: &[u8], |
|
585 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> { |
|
586 use std::io::Write; |
617 use std::io::Write; |
587 |
618 |
588 // The `regex` crate adds `.*` to the start and end of expressions if there |
619 // The `regex` crate adds `.*` to the start and end of expressions if there |
589 // are no anchors, so add the start anchor. |
620 // are no anchors, so add the start anchor. |
590 let mut escaped_bytes = vec![b'^', b'(', b'?', b':']; |
621 let mut escaped_bytes = vec![b'^', b'(', b'?', b':']; |
609 // multiple *minutes*. |
640 // multiple *minutes*. |
610 .dfa_size_limit(50 * (1 << 20)) |
641 .dfa_size_limit(50 * (1 << 20)) |
611 .build() |
642 .build() |
612 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?; |
643 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?; |
613 |
644 |
614 Ok(move |path: &HgPath| re.is_match(path.as_bytes())) |
645 Ok(RegexMatcher { |
|
646 base: re, |
|
647 local: Default::default(), |
|
648 }) |
615 } |
649 } |
616 |
650 |
617 /// Returns the regex pattern and a function that matches an `HgPath` against |
651 /// Returns the regex pattern and a function that matches an `HgPath` against |
618 /// said regex formed by the given ignore patterns. |
652 /// said regex formed by the given ignore patterns. |
619 fn build_regex_match<'a, 'b>( |
653 fn build_regex_match<'a, 'b>( |
636 // An empty pattern would cause the regex engine to incorrectly match the |
670 // An empty pattern would cause the regex engine to incorrectly match the |
637 // (empty) root directory |
671 // (empty) root directory |
638 let func = if !(regexps.is_empty()) { |
672 let func = if !(regexps.is_empty()) { |
639 let matcher = re_matcher(&full_regex)?; |
673 let matcher = re_matcher(&full_regex)?; |
640 let func = move |filename: &HgPath| { |
674 let func = move |filename: &HgPath| { |
641 exact_set.contains(filename) || matcher(filename) |
675 exact_set.contains(filename) || matcher.is_match(filename) |
642 }; |
676 }; |
643 Box::new(func) as IgnoreFnType |
677 Box::new(func) as IgnoreFnType |
644 } else { |
678 } else { |
645 let func = move |filename: &HgPath| exact_set.contains(filename); |
679 let func = move |filename: &HgPath| exact_set.contains(filename); |
646 Box::new(func) as IgnoreFnType |
680 Box::new(func) as IgnoreFnType |