revset: reduce nesting of chained 'or' operations (issue4624)
authorYuya Nishihara <yuya@tcha.org>
Sun, 26 Apr 2015 18:13:48 +0900
changeset 25309 b333ca94403d
parent 25308 036c26b08b71
child 25310 c1f5ef76d1c2
revset: reduce nesting of chained 'or' operations (issue4624) This reduces the stack depth of chained 'or' operations: - from O(n) to O(1) at the parsing, alias expansion and optimization phases - from O(n) to O(log(n)) at the evaluation phase simplifyinfixops() must be applied immediately after the parsing phase. Otherwise, alias expansion would crash by "maximum recursion depth exceeded" error. Test cases use 'x:y|y:z' instead of 'x|y' because I'm planning to optimize 'x|y' in a different way. Benchmarks: 0) 605b1d32c1c0 1) this patch revset #0: 0 + 1 + 2 + ... + 200 0) wall 0.026347 comb 0.030000 user 0.030000 sys 0.000000 (best of 101) 1) wall 0.023858 comb 0.030000 user 0.030000 sys 0.000000 (best of 112) revset #1: 0 + 1 + 2 + ... + 1000 0) maximum recursion depth exceeded 1) wall 0.483341 comb 0.480000 user 0.480000 sys 0.000000 (best of 20) revset #2: sort(0 + 1 + 2 + ... + 200) 0) wall 0.013404 comb 0.010000 user 0.010000 sys 0.000000 (best of 196) 1) wall 0.006814 comb 0.010000 user 0.010000 sys 0.000000 (best of 375) revset #3: sort(0 + 1 + 2 + ... + 1000) 0) maximum recursion depth exceeded 1) wall 0.035240 comb 0.040000 user 0.040000 sys 0.000000 (best of 100)
mercurial/revset.py
tests/test-glog.t
tests/test-revset.t
--- a/mercurial/revset.py	Sun May 24 14:10:52 2015 +0900
+++ b/mercurial/revset.py	Sun Apr 26 18:13:48 2015 +0900
@@ -356,10 +356,9 @@
 def andset(repo, subset, x, y):
     return getset(repo, getset(repo, subset, x), y)
 
-def orset(repo, subset, x, y):
-    xl = getset(repo, subset, x)
-    yl = getset(repo, subset, y)
-    return xl + yl
+def orset(repo, subset, *xs):
+    rs = [getset(repo, subset, x) for x in xs]
+    return _combinesets(rs)
 
 def notset(repo, subset, x):
     return subset - getset(repo, subset, x)
@@ -2160,13 +2159,11 @@
             return w, (op, tb, ta)
         return w, (op, ta, tb)
     elif op == 'or':
-        wa, ta = optimize(x[1], False)
-        wb, tb = optimize(x[2], False)
+        ws, ts = zip(*[optimize(y, False) for y in x[1:]])
         # we can't reorder trees by weight because it would change the order.
         # ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a")
-        #   if wb < wa:
-        #       tb, ta = ta, tb
-        return max(wa, wb), (op, ta, tb)
+        #   ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0]))
+        return max(ws), (op,) + ts
     elif op == 'not':
         # Optimize not public() to _notpublic() because we have a fast version
         if x[1] == ('func', ('symbol', 'public'), None):
@@ -2384,7 +2381,7 @@
     tree, pos = p.parse(defn)
     if pos != len(defn):
         raise error.ParseError(_('invalid token'), pos)
-    return tree
+    return parser.simplifyinfixops(tree, ('or',))
 
 class revsetalias(object):
     # whether own `error` information is already shown or not.
@@ -2515,7 +2512,7 @@
     tree, pos = p.parse(spec, lookup=lookup)
     if pos != len(spec):
         raise error.ParseError(_("invalid token"), pos)
-    return tree
+    return parser.simplifyinfixops(tree, ('or',))
 
 def posttreebuilthook(tree, repo):
     # hook for extensions to execute code on the optimized tree
--- a/tests/test-glog.t	Sun May 24 14:10:52 2015 +0900
+++ b/tests/test-glog.t	Sun Apr 26 18:13:48 2015 +0900
@@ -1469,13 +1469,12 @@
   (group
     (group
       (or
-        (or
-          (func
-            ('symbol', 'branch')
-            ('string', 'default'))
-          (func
-            ('symbol', 'branch')
-            ('string', 'branch')))
+        (func
+          ('symbol', 'branch')
+          ('string', 'default'))
+        (func
+          ('symbol', 'branch')
+          ('string', 'branch'))
         (func
           ('symbol', 'branch')
           ('string', 'branch')))))
--- a/tests/test-revset.t	Sun May 24 14:10:52 2015 +0900
+++ b/tests/test-revset.t	Sun Apr 26 18:13:48 2015 +0900
@@ -128,16 +128,15 @@
   6
   $ try '0|1|2'
   (or
-    (or
-      ('symbol', '0')
-      ('symbol', '1'))
+    ('symbol', '0')
+    ('symbol', '1')
     ('symbol', '2'))
   * set:
   <addset
+    <baseset [0]>,
     <addset
-      <baseset [0]>,
-      <baseset [1]>>,
-    <baseset [2]>>
+      <baseset [1]>,
+      <baseset [2]>>>
   0
   1
   2
@@ -910,6 +909,49 @@
   4
   5
 
+test that chained `or` operations make balanced addsets
+
+  $ try '0:1|1:2|2:3|3:4|4:5'
+  (or
+    (range
+      ('symbol', '0')
+      ('symbol', '1'))
+    (range
+      ('symbol', '1')
+      ('symbol', '2'))
+    (range
+      ('symbol', '2')
+      ('symbol', '3'))
+    (range
+      ('symbol', '3')
+      ('symbol', '4'))
+    (range
+      ('symbol', '4')
+      ('symbol', '5')))
+  * set:
+  <addset
+    <addset
+      <spanset+ 0:1>,
+      <spanset+ 1:2>>,
+    <addset
+      <spanset+ 2:3>,
+      <addset
+        <spanset+ 3:4>,
+        <spanset+ 4:5>>>>
+  0
+  1
+  2
+  3
+  4
+  5
+
+test that chained `or` operations never eat up stack (issue4624)
+(uses `0:1` instead of `0` to avoid future optimization of trivial revisions)
+
+  $ hg log -T '{rev}\n' -r "`python -c "print '|'.join(['0:1'] * 500)"`"
+  0
+  1
+
 check that conversion to only works
   $ try --optimize '::3 - ::1'
   (minus
@@ -1352,6 +1394,44 @@
   <baseset [5]>
   5
 
+test chained `or` operations are flattened at parsing phase
+
+  $ echo 'chainedorops($1, $2, $3) = $1|$2|$3' >> .hg/hgrc
+  $ try 'chainedorops(0:1, 1:2, 2:3)'
+  (func
+    ('symbol', 'chainedorops')
+    (list
+      (list
+        (range
+          ('symbol', '0')
+          ('symbol', '1'))
+        (range
+          ('symbol', '1')
+          ('symbol', '2')))
+      (range
+        ('symbol', '2')
+        ('symbol', '3'))))
+  (or
+    (range
+      ('symbol', '0')
+      ('symbol', '1'))
+    (range
+      ('symbol', '1')
+      ('symbol', '2'))
+    (range
+      ('symbol', '2')
+      ('symbol', '3')))
+  * set:
+  <addset
+    <spanset+ 0:1>,
+    <addset
+      <spanset+ 1:2>,
+      <spanset+ 2:3>>>
+  0
+  1
+  2
+  3
+
 test variable isolation, variable placeholders are rewritten as string
 then parsed and matched again as string. Check they do not leak too
 far away.