cext: fix memory leak in phases computation
Without this a buffer whose size in bytes is the number of
changesets in the repository is leaked each time the repository is
opened and changeset phases are computed.
Impact: the current code in hgwebdir creates a new `localrepository`
instance for each HTTP request. Since any pull or push is made of several
requests, a team of 100 people can easily produce thousands of such
requests per day.
Being a low-level malloc, this leak can't be seen with the gc module and
tools relying on that, but was spotted by valgrind immediately.
Reproduction
------------
for i in range(cl_args.iterations):
repo = hg.repository(baseui, repo_path)
rev = repo.revs(rev).first()
ctx = repo[rev]
del ctx
del repo
# avoid any pollution by other type of leak
# (that should be fixed in 5.8)
repoview._filteredrepotypes.clear()
gc.collect()
Measurements
------------
Resident Set Size (RSS), taken on a clone of
mozilla-central for performance analysis (440 000
changesets).
before:
5.8+hg19.5ac0f2a8ba72 1000 iterations: 1606MB
5.8+hg19.5ac0f2a8ba72 10000 iterations: 5723MB
after:
5.8+hg20.e2084d39e145 1000 iterations: 555MB
5.8+hg20.e2084d39e145 10000 iterations: 555MB
(double checked, not a copy/paste error)
(e2084d39e14 is the present changeset, before amendment
of the message to add the measurements)
# Tests to ensure that sha1dc.sha1 is exactly a drop-in for
# hashlib.sha1 for our needs.
from __future__ import absolute_import
import hashlib
import unittest
import silenttestrunner
try:
from mercurial.thirdparty import sha1dc
except ImportError:
sha1dc = None
class hashertestsbase(object):
def test_basic_hash(self):
h = self.hasher()
h.update(b'foo')
self.assertEqual(
'0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33', h.hexdigest()
)
h.update(b'bar')
self.assertEqual(
'8843d7f92416211de9ebb963ff4ce28125932878', h.hexdigest()
)
def test_copy_hasher(self):
h = self.hasher()
h.update(b'foo')
h2 = h.copy()
h.update(b'baz')
h2.update(b'bar')
self.assertEqual(
'21eb6533733a5e4763acacd1d45a60c2e0e404e1', h.hexdigest()
)
self.assertEqual(
'8843d7f92416211de9ebb963ff4ce28125932878', h2.hexdigest()
)
def test_init_hasher(self):
h = self.hasher(b'initial string')
self.assertEqual(
b'\xc9y|n\x1f3S\xa4:\xbaJ\xca,\xc1\x1a\x9e\xb8\xd8\xdd\x86',
h.digest(),
)
def test_bytes_like_types(self):
h = self.hasher()
h.update(bytearray(b'foo'))
h.update(memoryview(b'baz'))
self.assertEqual(
'21eb6533733a5e4763acacd1d45a60c2e0e404e1', h.hexdigest()
)
h = self.hasher(bytearray(b'foo'))
h.update(b'baz')
self.assertEqual(
'21eb6533733a5e4763acacd1d45a60c2e0e404e1', h.hexdigest()
)
h = self.hasher(memoryview(b'foo'))
h.update(b'baz')
self.assertEqual(
'21eb6533733a5e4763acacd1d45a60c2e0e404e1', h.hexdigest()
)
class hashlibtests(unittest.TestCase, hashertestsbase):
hasher = hashlib.sha1
if sha1dc:
class sha1dctests(unittest.TestCase, hashertestsbase):
hasher = sha1dc.sha1
if __name__ == '__main__':
silenttestrunner.main(__name__)