smartset: split generatorset classes to avoid cycle
authorGregory Szorc <gregory.szorc@gmail.com>
Wed, 27 Dec 2017 11:08:32 -0700
changeset 35501 12a46ad67a3c
parent 35500 87918218da70
child 35502 67611e06ff08
smartset: split generatorset classes to avoid cycle I uncovered a cycle manifesting in a memory leak by running `hgperfrevset '::tip'`. The cycle was due to generatorset.__init__ assigning a bound method to self.__contains__. Internet sleuthing revealed that assigning a bound method to an instance attribute always creates a cycle. This commit creates two new variants of generatorset for the special cases of ascending and descending generators. The special implementations of __contains__ have been extracted to these classes where they are defined as __contains__. generatorset now implements __new__ and changes the spawned type to one of the new classes if needed. Differential Revision: https://phab.mercurial-scm.org/D1780
mercurial/smartset.py
tests/test-revset.t
tests/test-revset2.t
--- a/mercurial/smartset.py	Wed Dec 27 13:53:21 2017 -0600
+++ b/mercurial/smartset.py	Wed Dec 27 11:08:32 2017 -0700
@@ -772,6 +772,16 @@
     >>> xs.last()  # cached
     4
     """
+    def __new__(cls, gen, iterasc=None):
+        if iterasc is None:
+            typ = cls
+        elif iterasc:
+            typ = _generatorsetasc
+        else:
+            typ = _generatorsetdesc
+
+        return super(generatorset, cls).__new__(typ)
+
     def __init__(self, gen, iterasc=None):
         """
         gen: a generator producing the values for the generatorset.
@@ -782,13 +792,6 @@
         self._genlist = []
         self._finished = False
         self._ascending = True
-        if iterasc is not None:
-            if iterasc:
-                self.fastasc = self._iterator
-                self.__contains__ = self._asccontains
-            else:
-                self.fastdesc = self._iterator
-                self.__contains__ = self._desccontains
 
     def __nonzero__(self):
         # Do not use 'for r in self' because it will enforce the iteration
@@ -814,36 +817,6 @@
         self._cache[x] = False
         return False
 
-    def _asccontains(self, x):
-        """version of contains optimised for ascending generator"""
-        if x in self._cache:
-            return self._cache[x]
-
-        # Use new values only, as existing values would be cached.
-        for l in self._consumegen():
-            if l == x:
-                return True
-            if l > x:
-                break
-
-        self._cache[x] = False
-        return False
-
-    def _desccontains(self, x):
-        """version of contains optimised for descending generator"""
-        if x in self._cache:
-            return self._cache[x]
-
-        # Use new values only, as existing values would be cached.
-        for l in self._consumegen():
-            if l == x:
-                return True
-            if l < x:
-                break
-
-        self._cache[x] = False
-        return False
-
     def __iter__(self):
         if self._ascending:
             it = self.fastasc
@@ -947,7 +920,45 @@
 
     def __repr__(self):
         d = {False: '-', True: '+'}[self._ascending]
-        return '<%s%s>' % (type(self).__name__, d)
+        return '<%s%s>' % (type(self).__name__.lstrip('_'), d)
+
+class _generatorsetasc(generatorset):
+    """Special case of generatorset optimized for ascending generators."""
+
+    fastasc = generatorset._iterator
+
+    def __contains__(self, x):
+        if x in self._cache:
+            return self._cache[x]
+
+        # Use new values only, as existing values would be cached.
+        for l in self._consumegen():
+            if l == x:
+                return True
+            if l > x:
+                break
+
+        self._cache[x] = False
+        return False
+
+class _generatorsetdesc(generatorset):
+    """Special case of generatorset optimized for descending generators."""
+
+    fastdesc = generatorset._iterator
+
+    def __contains__(self, x):
+        if x in self._cache:
+            return self._cache[x]
+
+        # Use new values only, as existing values would be cached.
+        for l in self._consumegen():
+            if l == x:
+                return True
+            if l < x:
+                break
+
+        self._cache[x] = False
+        return False
 
 def spanset(repo, start=0, end=None):
     """Create a spanset that represents a range of repository revisions
--- a/tests/test-revset.t	Wed Dec 27 13:53:21 2017 -0600
+++ b/tests/test-revset.t	Wed Dec 27 11:08:32 2017 -0700
@@ -1484,7 +1484,7 @@
   $ hg debugrevspec -s 'last(0::)'
   * set:
   <baseset slice=0:1
-    <generatorset->>
+    <generatorsetasc->>
   9
   $ hg identify -r '0::' --num
   9
--- a/tests/test-revset2.t	Wed Dec 27 13:53:21 2017 -0600
+++ b/tests/test-revset2.t	Wed Dec 27 11:08:32 2017 -0700
@@ -152,7 +152,7 @@
   * set:
   <addset
     <baseset- [1, 3, 5]>,
-    <generatorset+>>
+    <generatorsetdesc+>>
   5
   3
   1
@@ -174,7 +174,7 @@
             (symbol '5'))))))
   * set:
   <addset+
-    <generatorset+>,
+    <generatorsetdesc+>,
     <baseset- [1, 3, 5]>>
   0
   1
@@ -927,7 +927,7 @@
       (symbol 'merge')
       None))
   * set:
-  <generatorset+>
+  <generatorsetasc+>
   6
   7