33 ) |
33 ) |
34 |
34 |
35 parsers = policy.importmod(r'parsers') |
35 parsers = policy.importmod(r'parsers') |
36 propertycache = util.propertycache |
36 propertycache = util.propertycache |
37 |
37 |
|
38 # Allow tests to more easily test the alternate path in manifestdict.fastdelta() |
|
39 FASTDELTA_TEXTDIFF_THRESHOLD = 1000 |
|
40 |
38 def _parse(data): |
41 def _parse(data): |
39 # This method does a little bit of excessive-looking |
42 # This method does a little bit of excessive-looking |
40 # precondition checking. This is so that the behavior of this |
43 # precondition checking. This is so that the behavior of this |
41 # class exactly matches its C counterpart to try and help |
44 # class exactly matches its C counterpart to try and help |
42 # prevent surprise breakage for anyone that develops against |
45 # prevent surprise breakage for anyone that develops against |
121 |
124 |
122 def _cmp(a, b): |
125 def _cmp(a, b): |
123 return (a > b) - (a < b) |
126 return (a > b) - (a < b) |
124 |
127 |
125 class _lazymanifest(object): |
128 class _lazymanifest(object): |
126 def __init__(self, data, positions=None, extrainfo=None, extradata=None): |
129 """A pure python manifest backed by a byte string. It is supplimented with |
|
130 internal lists as it is modified, until it is compacted back to a pure byte |
|
131 string. |
|
132 |
|
133 ``data`` is the initial manifest data. |
|
134 |
|
135 ``positions`` is a list of offsets, one per manifest entry. Positive |
|
136 values are offsets into ``data``, negative values are offsets into the |
|
137 ``extradata`` list. When an entry is removed, its entry is dropped from |
|
138 ``positions``. The values are encoded such that when walking the list and |
|
139 indexing into ``data`` or ``extradata`` as appropriate, the entries are |
|
140 sorted by filename. |
|
141 |
|
142 ``extradata`` is a list of (key, hash, flags) for entries that were added or |
|
143 modified since the manifest was created or compacted. |
|
144 """ |
|
145 def __init__(self, data, positions=None, extrainfo=None, extradata=None, |
|
146 hasremovals=False): |
127 if positions is None: |
147 if positions is None: |
128 self.positions = self.findlines(data) |
148 self.positions = self.findlines(data) |
129 self.extrainfo = [0] * len(self.positions) |
149 self.extrainfo = [0] * len(self.positions) |
130 self.data = data |
150 self.data = data |
131 self.extradata = [] |
151 self.extradata = [] |
|
152 self.hasremovals = False |
132 else: |
153 else: |
133 self.positions = positions[:] |
154 self.positions = positions[:] |
134 self.extrainfo = extrainfo[:] |
155 self.extrainfo = extrainfo[:] |
135 self.extradata = extradata[:] |
156 self.extradata = extradata[:] |
136 self.data = data |
157 self.data = data |
|
158 self.hasremovals = hasremovals |
137 |
159 |
138 def findlines(self, data): |
160 def findlines(self, data): |
139 if not data: |
161 if not data: |
140 return [] |
162 return [] |
141 pos = data.find("\n") |
163 pos = data.find("\n") |
238 raise KeyError |
260 raise KeyError |
239 cur = self.positions[needle] |
261 cur = self.positions[needle] |
240 self.positions = self.positions[:needle] + self.positions[needle + 1:] |
262 self.positions = self.positions[:needle] + self.positions[needle + 1:] |
241 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:] |
263 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:] |
242 if cur >= 0: |
264 if cur >= 0: |
|
265 # This does NOT unsort the list as far as the search functions are |
|
266 # concerned, as they only examine lines mapped by self.positions. |
243 self.data = self.data[:cur] + '\x00' + self.data[cur + 1:] |
267 self.data = self.data[:cur] + '\x00' + self.data[cur + 1:] |
|
268 self.hasremovals = True |
244 |
269 |
245 def __setitem__(self, key, value): |
270 def __setitem__(self, key, value): |
246 if not isinstance(key, bytes): |
271 if not isinstance(key, bytes): |
247 raise TypeError("setitem: manifest keys must be a byte string.") |
272 raise TypeError("setitem: manifest keys must be a byte string.") |
248 if not isinstance(value, tuple) or len(value) != 2: |
273 if not isinstance(value, tuple) or len(value) != 2: |
274 self.extrainfo[needle:]) |
299 self.extrainfo[needle:]) |
275 |
300 |
276 def copy(self): |
301 def copy(self): |
277 # XXX call _compact like in C? |
302 # XXX call _compact like in C? |
278 return _lazymanifest(self.data, self.positions, self.extrainfo, |
303 return _lazymanifest(self.data, self.positions, self.extrainfo, |
279 self.extradata) |
304 self.extradata, self.hasremovals) |
280 |
305 |
281 def _compact(self): |
306 def _compact(self): |
282 # hopefully not called TOO often |
307 # hopefully not called TOO often |
283 if len(self.extradata) == 0: |
308 if len(self.extradata) == 0 and not self.hasremovals: |
284 return |
309 return |
285 l = [] |
310 l = [] |
286 i = 0 |
311 i = 0 |
287 offset = 0 |
312 offset = 0 |
288 self.extrainfo = [0] * len(self.positions) |
313 self.extrainfo = [0] * len(self.positions) |
289 while i < len(self.positions): |
314 while i < len(self.positions): |
290 if self.positions[i] >= 0: |
315 if self.positions[i] >= 0: |
291 cur = self.positions[i] |
316 cur = self.positions[i] |
292 last_cut = cur |
317 last_cut = cur |
|
318 |
|
319 # Collect all contiguous entries in the buffer at the current |
|
320 # offset, breaking out only for added/modified items held in |
|
321 # extradata, or a deleted line prior to the next position. |
293 while True: |
322 while True: |
294 self.positions[i] = offset |
323 self.positions[i] = offset |
295 i += 1 |
324 i += 1 |
296 if i == len(self.positions) or self.positions[i] < 0: |
325 if i == len(self.positions) or self.positions[i] < 0: |
297 break |
326 break |
|
327 |
|
328 # A removed file has no positions[] entry, but does have an |
|
329 # overwritten first byte. Break out and find the end of the |
|
330 # current good entry/entries if there is a removed file |
|
331 # before the next position. |
|
332 if (self.hasremovals |
|
333 and self.data.find('\n\x00', cur, |
|
334 self.positions[i]) != -1): |
|
335 break |
|
336 |
298 offset += self.positions[i] - cur |
337 offset += self.positions[i] - cur |
299 cur = self.positions[i] |
338 cur = self.positions[i] |
300 end_cut = self.data.find('\n', cur) |
339 end_cut = self.data.find('\n', cur) |
301 if end_cut != -1: |
340 if end_cut != -1: |
302 end_cut += 1 |
341 end_cut += 1 |
556 start = 0 |
596 start = 0 |
557 # zero copy representation of base as a buffer |
597 # zero copy representation of base as a buffer |
558 addbuf = util.buffer(base) |
598 addbuf = util.buffer(base) |
559 |
599 |
560 changes = list(changes) |
600 changes = list(changes) |
561 if len(changes) < 1000: |
601 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD: |
562 # start with a readonly loop that finds the offset of |
602 # start with a readonly loop that finds the offset of |
563 # each line and creates the deltas |
603 # each line and creates the deltas |
564 for f, todelete in changes: |
604 for f, todelete in changes: |
565 # bs will either be the index of the item or the insert point |
605 # bs will either be the index of the item or the insert point |
566 start, end = _msearch(addbuf, f, start) |
606 start, end = _msearch(addbuf, f, start) |