5 # |
5 # |
6 # This software may be used and distributed according to the terms of the |
6 # This software may be used and distributed according to the terms of the |
7 # GNU General Public License version 2 or any later version. |
7 # GNU General Public License version 2 or any later version. |
8 |
8 |
9 from __future__ import absolute_import |
9 from __future__ import absolute_import |
10 from .. import error |
10 |
|
11 import struct |
|
12 |
|
13 from .. import ( |
|
14 error, |
|
15 node as nodemod, |
|
16 pycompat, |
|
17 ) |
11 |
18 |
12 |
19 |
13 class NodeMap(dict): |
20 class NodeMap(dict): |
14 def __missing__(self, x): |
21 def __missing__(self, x): |
15 raise error.RevlogError(b'unknown node: %s' % x) |
22 raise error.RevlogError(b'unknown node: %s' % x) |
|
23 |
|
24 |
|
25 ### Nodemap Trie |
|
26 # |
|
27 # This is a simple reference implementation to compute and persist a nodemap |
|
28 # trie. This reference implementation is write only. The python version of this |
|
29 # is not expected to be actually used, since it wont provide performance |
|
30 # improvement over existing non-persistent C implementation. |
|
31 # |
|
32 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each |
|
33 # revision can be adressed using its node shortest prefix. |
|
34 # |
|
35 # The trie is stored as a sequence of block. Each block contains 16 entries |
|
36 # (signed 64bit integer, big endian). Each entry can be one of the following: |
|
37 # |
|
38 # * value >= 0 -> index of sub-block |
|
39 # * value == -1 -> no value |
|
40 # * value < -1 -> a revision value: rev = -(value+10) |
|
41 # |
|
42 # The implementation focus on simplicity, not on performance. A Rust |
|
43 # implementation should provide a efficient version of the same binary |
|
44 # persistence. This reference python implementation is never meant to be |
|
45 # extensively use in production. |
|
46 |
|
47 |
|
48 def persistent_data(index): |
|
49 """return the persistent binary form for a nodemap for a given index |
|
50 """ |
|
51 trie = _build_trie(index) |
|
52 return _persist_trie(trie) |
|
53 |
|
54 |
|
55 S_BLOCK = struct.Struct(">" + ("l" * 16)) |
|
56 |
|
57 NO_ENTRY = -1 |
|
58 # rev 0 need to be -2 because 0 is used by block, -1 is a special value. |
|
59 REV_OFFSET = 2 |
|
60 |
|
61 |
|
62 def _transform_rev(rev): |
|
63 """Return the number used to represent the rev in the tree. |
|
64 |
|
65 (or retrieve a rev number from such representation) |
|
66 |
|
67 Note that this is an involution, a function equal to its inverse (i.e. |
|
68 which gives the identity when applied to itself). |
|
69 """ |
|
70 return -(rev + REV_OFFSET) |
|
71 |
|
72 |
|
73 def _to_int(hex_digit): |
|
74 """turn an hexadecimal digit into a proper integer""" |
|
75 return int(hex_digit, 16) |
|
76 |
|
77 |
|
78 def _build_trie(index): |
|
79 """build a nodemap trie |
|
80 |
|
81 The nodemap stores revision number for each unique prefix. |
|
82 |
|
83 Each block is a dictionary with keys in `[0, 15]`. Values are either |
|
84 another block or a revision number. |
|
85 """ |
|
86 root = {} |
|
87 for rev in range(len(index)): |
|
88 hex = nodemod.hex(index[rev][7]) |
|
89 _insert_into_block(index, 0, root, rev, hex) |
|
90 return root |
|
91 |
|
92 |
|
93 def _insert_into_block(index, level, block, current_rev, current_hex): |
|
94 """insert a new revision in a block |
|
95 |
|
96 index: the index we are adding revision for |
|
97 level: the depth of the current block in the trie |
|
98 block: the block currently being considered |
|
99 current_rev: the revision number we are adding |
|
100 current_hex: the hexadecimal representation of the of that revision |
|
101 """ |
|
102 hex_digit = _to_int(current_hex[level : level + 1]) |
|
103 entry = block.get(hex_digit) |
|
104 if entry is None: |
|
105 # no entry, simply store the revision number |
|
106 block[hex_digit] = current_rev |
|
107 elif isinstance(entry, dict): |
|
108 # need to recurse to an underlying block |
|
109 _insert_into_block(index, level + 1, entry, current_rev, current_hex) |
|
110 else: |
|
111 # collision with a previously unique prefix, inserting new |
|
112 # vertices to fit both entry. |
|
113 other_hex = nodemod.hex(index[entry][7]) |
|
114 other_rev = entry |
|
115 new = {} |
|
116 block[hex_digit] = new |
|
117 _insert_into_block(index, level + 1, new, other_rev, other_hex) |
|
118 _insert_into_block(index, level + 1, new, current_rev, current_hex) |
|
119 |
|
120 |
|
121 def _persist_trie(root): |
|
122 """turn a nodemap trie into persistent binary data |
|
123 |
|
124 See `_build_trie` for nodemap trie structure""" |
|
125 block_map = {} |
|
126 chunks = [] |
|
127 for tn in _walk_trie(root): |
|
128 block_map[id(tn)] = len(chunks) |
|
129 chunks.append(_persist_block(tn, block_map)) |
|
130 return b''.join(chunks) |
|
131 |
|
132 |
|
133 def _walk_trie(block): |
|
134 """yield all the block in a trie |
|
135 |
|
136 Children blocks are always yield before their parent block. |
|
137 """ |
|
138 for (_, item) in sorted(block.items()): |
|
139 if isinstance(item, dict): |
|
140 for sub_block in _walk_trie(item): |
|
141 yield sub_block |
|
142 yield block |
|
143 |
|
144 |
|
145 def _persist_block(block_node, block_map): |
|
146 """produce persistent binary data for a single block |
|
147 |
|
148 Children block are assumed to be already persisted and present in |
|
149 block_map. |
|
150 """ |
|
151 data = tuple(_to_value(block_node.get(i), block_map) for i in range(16)) |
|
152 return S_BLOCK.pack(*data) |
|
153 |
|
154 |
|
155 def _to_value(item, block_map): |
|
156 """persist any value as an integer""" |
|
157 if item is None: |
|
158 return NO_ENTRY |
|
159 elif isinstance(item, dict): |
|
160 return block_map[id(item)] |
|
161 else: |
|
162 return _transform_rev(item) |