mercurial/revlogutils/nodemap.py
changeset 44307 c577bb4a04d4
parent 44034 ab595920de0e
child 44308 5962fd0d1045
equal deleted inserted replaced
44306:a0ec05d93c8e 44307:c577bb4a04d4
     5 #
     5 #
     6 # This software may be used and distributed according to the terms of the
     6 # This software may be used and distributed according to the terms of the
     7 # GNU General Public License version 2 or any later version.
     7 # GNU General Public License version 2 or any later version.
     8 
     8 
     9 from __future__ import absolute_import
     9 from __future__ import absolute_import
    10 from .. import error
    10 
       
    11 import struct
       
    12 
       
    13 from .. import (
       
    14     error,
       
    15     node as nodemod,
       
    16     pycompat,
       
    17 )
    11 
    18 
    12 
    19 
    13 class NodeMap(dict):
    20 class NodeMap(dict):
    14     def __missing__(self, x):
    21     def __missing__(self, x):
    15         raise error.RevlogError(b'unknown node: %s' % x)
    22         raise error.RevlogError(b'unknown node: %s' % x)
       
    23 
       
    24 
       
    25 ### Nodemap Trie
       
    26 #
       
    27 # This is a simple reference implementation to compute and persist a nodemap
       
    28 # trie. This reference implementation is write only. The python version of this
       
    29 # is not expected to be actually used, since it wont provide performance
       
    30 # improvement over existing non-persistent C implementation.
       
    31 #
       
    32 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
       
    33 # revision can be adressed using its node shortest prefix.
       
    34 #
       
    35 # The trie is stored as a sequence of block. Each block contains 16 entries
       
    36 # (signed 64bit integer, big endian). Each entry can be one of the following:
       
    37 #
       
    38 #  * value >=  0 -> index of sub-block
       
    39 #  * value == -1 -> no value
       
    40 #  * value <  -1 -> a revision value: rev = -(value+10)
       
    41 #
       
    42 # The implementation focus on simplicity, not on performance. A Rust
       
    43 # implementation should provide a efficient version of the same binary
       
    44 # persistence. This reference python implementation is never meant to be
       
    45 # extensively use in production.
       
    46 
       
    47 
       
    48 def persistent_data(index):
       
    49     """return the persistent binary form for a nodemap for a given index
       
    50     """
       
    51     trie = _build_trie(index)
       
    52     return _persist_trie(trie)
       
    53 
       
    54 
       
    55 S_BLOCK = struct.Struct(">" + ("l" * 16))
       
    56 
       
    57 NO_ENTRY = -1
       
    58 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
       
    59 REV_OFFSET = 2
       
    60 
       
    61 
       
    62 def _transform_rev(rev):
       
    63     """Return the number used to represent the rev in the tree.
       
    64 
       
    65     (or retrieve a rev number from such representation)
       
    66 
       
    67     Note that this is an involution, a function equal to its inverse (i.e.
       
    68     which gives the identity when applied to itself).
       
    69     """
       
    70     return -(rev + REV_OFFSET)
       
    71 
       
    72 
       
    73 def _to_int(hex_digit):
       
    74     """turn an hexadecimal digit into a proper integer"""
       
    75     return int(hex_digit, 16)
       
    76 
       
    77 
       
    78 def _build_trie(index):
       
    79     """build a nodemap trie
       
    80 
       
    81     The nodemap stores revision number for each unique prefix.
       
    82 
       
    83     Each block is a dictionary with keys in `[0, 15]`. Values are either
       
    84     another block or a revision number.
       
    85     """
       
    86     root = {}
       
    87     for rev in range(len(index)):
       
    88         hex = nodemod.hex(index[rev][7])
       
    89         _insert_into_block(index, 0, root, rev, hex)
       
    90     return root
       
    91 
       
    92 
       
    93 def _insert_into_block(index, level, block, current_rev, current_hex):
       
    94     """insert a new revision in a block
       
    95 
       
    96     index: the index we are adding revision for
       
    97     level: the depth of the current block in the trie
       
    98     block: the block currently being considered
       
    99     current_rev: the revision number we are adding
       
   100     current_hex: the hexadecimal representation of the of that revision
       
   101     """
       
   102     hex_digit = _to_int(current_hex[level : level + 1])
       
   103     entry = block.get(hex_digit)
       
   104     if entry is None:
       
   105         # no entry, simply store the revision number
       
   106         block[hex_digit] = current_rev
       
   107     elif isinstance(entry, dict):
       
   108         # need to recurse to an underlying block
       
   109         _insert_into_block(index, level + 1, entry, current_rev, current_hex)
       
   110     else:
       
   111         # collision with a previously unique prefix, inserting new
       
   112         # vertices to fit both entry.
       
   113         other_hex = nodemod.hex(index[entry][7])
       
   114         other_rev = entry
       
   115         new = {}
       
   116         block[hex_digit] = new
       
   117         _insert_into_block(index, level + 1, new, other_rev, other_hex)
       
   118         _insert_into_block(index, level + 1, new, current_rev, current_hex)
       
   119 
       
   120 
       
   121 def _persist_trie(root):
       
   122     """turn a nodemap trie into persistent binary data
       
   123 
       
   124     See `_build_trie` for nodemap trie structure"""
       
   125     block_map = {}
       
   126     chunks = []
       
   127     for tn in _walk_trie(root):
       
   128         block_map[id(tn)] = len(chunks)
       
   129         chunks.append(_persist_block(tn, block_map))
       
   130     return b''.join(chunks)
       
   131 
       
   132 
       
   133 def _walk_trie(block):
       
   134     """yield all the block in a trie
       
   135 
       
   136     Children blocks are always yield before their parent block.
       
   137     """
       
   138     for (_, item) in sorted(block.items()):
       
   139         if isinstance(item, dict):
       
   140             for sub_block in _walk_trie(item):
       
   141                 yield sub_block
       
   142     yield block
       
   143 
       
   144 
       
   145 def _persist_block(block_node, block_map):
       
   146     """produce persistent binary data for a single block
       
   147 
       
   148     Children block are assumed to be already persisted and present in
       
   149     block_map.
       
   150     """
       
   151     data = tuple(_to_value(block_node.get(i), block_map) for i in range(16))
       
   152     return S_BLOCK.pack(*data)
       
   153 
       
   154 
       
   155 def _to_value(item, block_map):
       
   156     """persist any value as an integer"""
       
   157     if item is None:
       
   158         return NO_ENTRY
       
   159     elif isinstance(item, dict):
       
   160         return block_map[id(item)]
       
   161     else:
       
   162         return _transform_rev(item)