mercurial/similar.py
changeset 31584 985a98c6bad0
parent 31583 2efd9771323e
child 32202 ded48ad55146
equal deleted inserted replaced
31583:2efd9771323e 31584:985a98c6bad0
     4 #
     4 #
     5 # This software may be used and distributed according to the terms of the
     5 # This software may be used and distributed according to the terms of the
     6 # GNU General Public License version 2 or any later version.
     6 # GNU General Public License version 2 or any later version.
     7 
     7 
     8 from __future__ import absolute_import
     8 from __future__ import absolute_import
     9 
       
    10 import hashlib
       
    11 
     9 
    12 from .i18n import _
    10 from .i18n import _
    13 from . import (
    11 from . import (
    14     bdiff,
    12     bdiff,
    15     mdiff,
    13     mdiff,
    21     Takes a list of new filectxs and a list of removed filectxs, and yields
    19     Takes a list of new filectxs and a list of removed filectxs, and yields
    22     (before, after) tuples of exact matches.
    20     (before, after) tuples of exact matches.
    23     '''
    21     '''
    24     numfiles = len(added) + len(removed)
    22     numfiles = len(added) + len(removed)
    25 
    23 
    26     # Get hashes of removed files.
    24     # Build table of removed files: {hash(fctx.data()): [fctx, ...]}.
       
    25     # We use hash() to discard fctx.data() from memory.
    27     hashes = {}
    26     hashes = {}
    28     for i, fctx in enumerate(reversed(removed)):
    27     for i, fctx in enumerate(removed):
    29         repo.ui.progress(_('searching for exact renames'), i, total=numfiles,
    28         repo.ui.progress(_('searching for exact renames'), i, total=numfiles,
    30                          unit=_('files'))
    29                          unit=_('files'))
    31         h = hashlib.sha1(fctx.data()).digest()
    30         h = hash(fctx.data())
    32         hashes[h] = fctx
    31         if h not in hashes:
       
    32             hashes[h] = [fctx]
       
    33         else:
       
    34             hashes[h].append(fctx)
    33 
    35 
    34     # For each added file, see if it corresponds to a removed file.
    36     # For each added file, see if it corresponds to a removed file.
    35     for i, fctx in enumerate(added):
    37     for i, fctx in enumerate(added):
    36         repo.ui.progress(_('searching for exact renames'), i + len(removed),
    38         repo.ui.progress(_('searching for exact renames'), i + len(removed),
    37                 total=numfiles, unit=_('files'))
    39                 total=numfiles, unit=_('files'))
    38         adata = fctx.data()
    40         adata = fctx.data()
    39         h = hashlib.sha1(adata).digest()
    41         h = hash(adata)
    40         if h in hashes:
    42         for rfctx in hashes.get(h, []):
    41             rfctx = hashes[h]
       
    42             # compare between actual file contents for exact identity
    43             # compare between actual file contents for exact identity
    43             if adata == rfctx.data():
    44             if adata == rfctx.data():
    44                 yield (rfctx, fctx)
    45                 yield (rfctx, fctx)
       
    46                 break
    45 
    47 
    46     # Done
    48     # Done
    47     repo.ui.progress(_('searching for exact renames'), None)
    49     repo.ui.progress(_('searching for exact renames'), None)
    48 
    50 
    49 def _ctxdata(fctx):
    51 def _ctxdata(fctx):