61 |
77 |
62 If no position is specified, we load the whole index, and replace |
78 If no position is specified, we load the whole index, and replace |
63 the lazy objects in revlog with the underlying objects for |
79 the lazy objects in revlog with the underlying objects for |
64 efficiency in cases where we look at most of the nodes. |
80 efficiency in cases where we look at most of the nodes. |
65 """ |
81 """ |
66 def __init__(self, data, revlog): |
82 def __init__(self, data, revlog, indexformat): |
67 self.data = data |
83 self.data = data |
68 self.s = struct.calcsize(indexformat) |
84 self.s = struct.calcsize(indexformat) |
|
85 self.indexformat = indexformat |
69 self.l = len(data)/self.s |
86 self.l = len(data)/self.s |
70 self.index = [None] * self.l |
87 self.index = [None] * self.l |
71 self.map = {nullid: -1} |
88 self.map = {nullid: -1} |
72 self.all = 0 |
89 self.all = 0 |
73 self.revlog = revlog |
90 self.revlog = revlog |
74 |
|
75 def trunc(self, pos): |
|
76 self.l = pos/self.s |
|
77 |
91 |
78 def load(self, pos=None): |
92 def load(self, pos=None): |
79 if self.all: return |
93 if self.all: return |
80 if pos is not None: |
94 if pos is not None: |
81 block = pos / 1000 |
95 block = pos / 1000 |
106 pos += len(self.p.index) |
121 pos += len(self.p.index) |
107 self.p.load(pos) |
122 self.p.load(pos) |
108 return self.p.index[pos] |
123 return self.p.index[pos] |
109 def __getitem__(self, pos): |
124 def __getitem__(self, pos): |
110 return self.p.index[pos] or self.load(pos) |
125 return self.p.index[pos] or self.load(pos) |
|
126 def __setitem__(self, pos, item): |
|
127 self.p.index[pos] = item |
111 def __delitem__(self, pos): |
128 def __delitem__(self, pos): |
112 del self.p.index[pos] |
129 del self.p.index[pos] |
113 def append(self, e): |
130 def append(self, e): |
114 self.p.index.append(e) |
131 self.p.index.append(e) |
115 def trunc(self, pos): |
|
116 self.p.trunc(pos) |
|
117 |
132 |
118 class lazymap(object): |
133 class lazymap(object): |
119 """a lazy version of the node map""" |
134 """a lazy version of the node map""" |
120 def __init__(self, parser): |
135 def __init__(self, parser): |
121 self.p = parser |
136 self.p = parser |
176 Both pieces of the revlog are written to in an append-only |
191 Both pieces of the revlog are written to in an append-only |
177 fashion, which means we never need to rewrite a file to insert or |
192 fashion, which means we never need to rewrite a file to insert or |
178 remove data, and can use some simple techniques to avoid the need |
193 remove data, and can use some simple techniques to avoid the need |
179 for locking while reading. |
194 for locking while reading. |
180 """ |
195 """ |
181 def __init__(self, opener, indexfile, datafile): |
196 def __init__(self, opener, indexfile, datafile, defversion=0): |
182 """ |
197 """ |
183 create a revlog object |
198 create a revlog object |
184 |
199 |
185 opener is a function that abstracts the file opening operation |
200 opener is a function that abstracts the file opening operation |
186 and can be used to implement COW semantics or the like. |
201 and can be used to implement COW semantics or the like. |
211 if (oldst and st.st_dev == oldst.st_dev |
229 if (oldst and st.st_dev == oldst.st_dev |
212 and st.st_ino == oldst.st_ino |
230 and st.st_ino == oldst.st_ino |
213 and st.st_mtime == oldst.st_mtime |
231 and st.st_mtime == oldst.st_mtime |
214 and st.st_ctime == oldst.st_ctime): |
232 and st.st_ctime == oldst.st_ctime): |
215 return |
233 return |
216 self.indexstat = st |
234 self.indexstat = st |
217 i = f.read() |
235 if len(i) > 0: |
218 |
236 v = struct.unpack(versionformat, i[:4])[0] |
219 if i and i[:4] != "\0\0\0\0": |
237 if v != 0: |
220 raise RevlogError(_("incompatible revlog signature on %s") % |
238 flags = v & ~0xFFFF |
221 self.indexfile) |
239 fmt = v & 0xFFFF |
222 |
240 if fmt != REVLOGNG or (flags & ~(REVLOGNGINLINEDATA)): |
223 if len(i) > 10000: |
241 raise RevlogError( |
224 # big index, let's parse it on demand |
242 _("unknown version format %d or flags %x on %s") % |
225 parser = lazyparser(i, self) |
243 (v, flags, self.indexfile)) |
226 self.index = lazyindex(parser) |
244 self.version = v |
227 self.nodemap = lazymap(parser) |
245 if v == 0: |
228 else: |
246 self.indexformat = indexformatv0 |
229 s = struct.calcsize(indexformat) |
247 else: |
230 l = len(i) / s |
248 self.indexformat = indexformatng |
231 self.index = [None] * l |
249 |
232 m = [None] * l |
250 if i: |
233 |
251 if st and st.st_size > 10000: |
234 n = 0 |
252 # big index, let's parse it on demand |
235 for f in xrange(0, l * s, s): |
253 parser = lazyparser(i, self, self.indexformat) |
236 # offset, size, base, linkrev, p1, p2, nodeid |
254 self.index = lazyindex(parser) |
237 e = struct.unpack(indexformat, i[f:f + s]) |
255 self.nodemap = lazymap(parser) |
238 m[n] = (e[6], n) |
256 else: |
239 self.index[n] = e |
257 self.parseindex(i) |
240 n += 1 |
258 if self.version != 0: |
241 |
259 e = list(self.index[0]) |
242 self.nodemap = dict(m) |
260 type = self.ngtype(e[0]) |
243 self.nodemap[nullid] = -1 |
261 e[0] = self.offset_type(0, type) |
|
262 self.index[0] = e |
|
263 else: |
|
264 self.nodemap = { nullid: -1} |
|
265 self.index = [] |
|
266 |
|
267 |
|
268 def parseindex(self, data): |
|
269 s = struct.calcsize(self.indexformat) |
|
270 l = len(data) |
|
271 self.index = [] |
|
272 self.nodemap = {nullid: -1} |
|
273 off = 0 |
|
274 n = 0 |
|
275 while off < l: |
|
276 e = struct.unpack(self.indexformat, data[off:off + s]) |
|
277 self.index.append(e) |
|
278 self.nodemap[e[-1]] = n |
|
279 n += 1 |
|
280 off += s |
|
281 |
|
282 def ngoffset(self, q): |
|
283 if q & 0xFFFF: |
|
284 raise RevlogError(_('%s: incompatible revision flag %x') % |
|
285 (self.indexfile, type)) |
|
286 return long(q >> 16) |
|
287 |
|
288 def ngtype(self, q): |
|
289 return int(q & 0xFFFF) |
|
290 |
|
291 def offset_type(self, offset, type): |
|
292 return long(long(offset) << 16 | type) |
|
293 |
|
294 def loadindexmap(self): |
|
295 """loads both the map and the index from the lazy parser""" |
|
296 if isinstance(self.index, lazyindex): |
|
297 p = self.index.p |
|
298 p.load() |
244 |
299 |
245 def tip(self): return self.node(len(self.index) - 1) |
300 def tip(self): return self.node(len(self.index) - 1) |
246 def count(self): return len(self.index) |
301 def count(self): return len(self.index) |
247 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6] |
302 def node(self, rev): |
|
303 return (rev < 0) and nullid or self.index[rev][-1] |
248 def rev(self, node): |
304 def rev(self, node): |
249 try: |
305 try: |
250 return self.nodemap[node] |
306 return self.nodemap[node] |
251 except KeyError: |
307 except KeyError: |
252 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node))) |
308 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node))) |
253 def linkrev(self, node): return self.index[self.rev(node)][3] |
309 def linkrev(self, node): return self.index[self.rev(node)][-4] |
254 def parents(self, node): |
310 def parents(self, node): |
255 if node == nullid: return (nullid, nullid) |
311 if node == nullid: return (nullid, nullid) |
256 return self.index[self.rev(node)][4:6] |
312 r = self.rev(node) |
257 |
313 d = self.index[r][-3:-1] |
258 def start(self, rev): return (rev < 0) and -1 or self.index[rev][0] |
314 if self.version == 0: |
|
315 return d |
|
316 return [ self.node(x) for x in d ] |
|
317 def start(self, rev): |
|
318 if rev < 0: |
|
319 return -1 |
|
320 if self.version != 0: |
|
321 return self.ngoffset(self.index[rev][0]) |
|
322 return self.index[rev][0] |
|
323 def end(self, rev): return self.start(rev) + self.length(rev) |
|
324 |
259 def length(self, rev): |
325 def length(self, rev): |
260 if rev < 0: |
326 if rev < 0: |
261 return 0 |
327 return 0 |
262 else: |
328 else: |
263 return self.index[rev][1] |
329 return self.index[rev][1] |
264 def end(self, rev): return self.start(rev) + self.length(rev) |
330 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5] |
265 def base(self, rev): return (rev < 0) and rev or self.index[rev][2] |
|
266 |
331 |
267 def reachable(self, rev, stop=None): |
332 def reachable(self, rev, stop=None): |
268 reachable = {} |
333 reachable = {} |
269 visit = [rev] |
334 visit = [rev] |
270 reachable[rev] = 1 |
335 reachable[rev] = 1 |
499 |
564 |
500 def patches(self, t, pl): |
565 def patches(self, t, pl): |
501 """apply a list of patches to a string""" |
566 """apply a list of patches to a string""" |
502 return mdiff.patches(t, pl) |
567 return mdiff.patches(t, pl) |
503 |
568 |
504 def chunk(self, rev): |
569 def chunk(self, rev, df=None, cachelen=4096): |
505 start, length = self.start(rev), self.length(rev) |
570 start, length = self.start(rev), self.length(rev) |
506 end = start + length |
571 end = start + length |
507 |
572 def loadcache(df): |
508 def loadcache(): |
573 cache_length = max(cachelen, length) # 4k |
509 cache_length = max(4096 * 1024, length) # 4Mo |
574 if not df: |
510 df = self.opener(self.datafile) |
575 df = self.opener(self.datafile) |
511 df.seek(start) |
576 df.seek(start) |
512 self.chunkcache = (start, df.read(cache_length)) |
577 self.chunkcache = (start, df.read(cache_length)) |
513 |
578 |
514 if not self.chunkcache: |
579 if not self.chunkcache: |
515 loadcache() |
580 loadcache(df) |
516 |
581 |
517 cache_start = self.chunkcache[0] |
582 cache_start = self.chunkcache[0] |
518 cache_end = cache_start + len(self.chunkcache[1]) |
583 cache_end = cache_start + len(self.chunkcache[1]) |
519 if start >= cache_start and end <= cache_end: |
584 if start >= cache_start and end <= cache_end: |
520 # it is cached |
585 # it is cached |
521 offset = start - cache_start |
586 offset = start - cache_start |
522 else: |
587 else: |
523 loadcache() |
588 loadcache(df) |
524 offset = 0 |
589 offset = 0 |
525 |
590 |
526 #def checkchunk(): |
591 #def checkchunk(): |
527 # df = self.opener(self.datafile) |
592 # df = self.opener(self.datafile) |
528 # df.seek(start) |
593 # df.seek(start) |
553 # look up what we need to read |
618 # look up what we need to read |
554 text = None |
619 text = None |
555 rev = self.rev(node) |
620 rev = self.rev(node) |
556 base = self.base(rev) |
621 base = self.base(rev) |
557 |
622 |
|
623 df = self.opener(self.datafile) |
|
624 |
558 # do we have useful data cached? |
625 # do we have useful data cached? |
559 if self.cache and self.cache[1] >= base and self.cache[1] < rev: |
626 if self.cache and self.cache[1] >= base and self.cache[1] < rev: |
560 base = self.cache[1] |
627 base = self.cache[1] |
561 text = self.cache[2] |
628 text = self.cache[2] |
562 else: |
629 else: |
563 text = self.chunk(base) |
630 text = self.chunk(base, df=df) |
564 |
631 |
565 bins = [] |
632 bins = [] |
566 for r in xrange(base + 1, rev + 1): |
633 for r in xrange(base + 1, rev + 1): |
567 bins.append(self.chunk(r)) |
634 bins.append(self.chunk(r, df=df)) |
568 |
635 |
569 text = self.patches(text, bins) |
636 text = self.patches(text, bins) |
570 |
637 |
571 p1, p2 = self.parents(node) |
638 p1, p2 = self.parents(node) |
572 if node != hash(text, p1, p2): |
639 if node != hash(text, p1, p2): |
619 |
686 |
620 offset = 0 |
687 offset = 0 |
621 if t >= 0: |
688 if t >= 0: |
622 offset = self.end(t) |
689 offset = self.end(t) |
623 |
690 |
624 e = (offset, l, base, link, p1, p2, node) |
691 if self.version == 0: |
|
692 e = (offset, l, base, link, p1, p2, node) |
|
693 else: |
|
694 e = (self.offset_type(offset, 0), l, len(text), |
|
695 base, link, self.rev(p1), self.rev(p2), node) |
625 |
696 |
626 self.index.append(e) |
697 self.index.append(e) |
627 self.nodemap[node] = n |
698 self.nodemap[node] = n |
628 entry = struct.pack(indexformat, *e) |
699 entry = struct.pack(self.indexformat, *e) |
629 |
700 |
630 transaction.add(self.datafile, e[0]) |
701 transaction.add(self.datafile, offset) |
|
702 transaction.add(self.indexfile, n * len(entry)) |
631 f = self.opener(self.datafile, "a") |
703 f = self.opener(self.datafile, "a") |
632 if data[0]: |
704 if data[0]: |
633 f.write(data[0]) |
705 f.write(data[0]) |
634 f.write(data[1]) |
706 f.write(data[1]) |
635 transaction.add(self.indexfile, n * len(entry)) |
707 f = self.opener(self.indexfile, "a") |
636 self.opener(self.indexfile, "a").write(entry) |
708 |
|
709 if len(self.index) == 1 and self.version != 0: |
|
710 l = struct.pack(versionformat, self.version) |
|
711 f.write(l) |
|
712 entry = entry[4:] |
|
713 |
|
714 f.write(entry) |
637 |
715 |
638 self.cache = (node, n, text) |
716 self.cache = (node, n, text) |
639 return node |
717 return node |
640 |
718 |
641 def ancestor(self, a, b): |
719 def ancestor(self, a, b): |
746 node = None |
824 node = None |
747 |
825 |
748 base = prev = -1 |
826 base = prev = -1 |
749 start = end = measure = 0 |
827 start = end = measure = 0 |
750 if r: |
828 if r: |
751 base = self.base(t) |
|
752 start = self.start(base) |
|
753 end = self.end(t) |
829 end = self.end(t) |
754 measure = self.length(base) |
830 |
755 prev = self.tip() |
831 ifh = self.opener(self.indexfile, "a+") |
756 |
832 transaction.add(self.indexfile, ifh.tell()) |
757 transaction.add(self.datafile, end) |
833 transaction.add(self.datafile, end) |
758 transaction.add(self.indexfile, r * struct.calcsize(indexformat)) |
|
759 dfh = self.opener(self.datafile, "a") |
834 dfh = self.opener(self.datafile, "a") |
760 ifh = self.opener(self.indexfile, "a") |
|
761 |
835 |
762 # loop through our set of deltas |
836 # loop through our set of deltas |
763 chain = None |
837 chain = None |
764 for chunk in revs: |
838 for chunk in revs: |
765 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80]) |
839 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80]) |
792 tempd = compress(delta) |
866 tempd = compress(delta) |
793 cdelta = tempd[0] + tempd[1] |
867 cdelta = tempd[0] + tempd[1] |
794 |
868 |
795 if chain != prev or (end - start + len(cdelta)) > measure * 2: |
869 if chain != prev or (end - start + len(cdelta)) > measure * 2: |
796 # flush our writes here so we can read it in revision |
870 # flush our writes here so we can read it in revision |
797 dfh.flush() |
871 if dfh: |
|
872 dfh.flush() |
798 ifh.flush() |
873 ifh.flush() |
799 text = self.revision(chain) |
874 text = self.revision(chain) |
800 text = self.patches(text, [delta]) |
875 text = self.patches(text, [delta]) |
801 chk = self.addrevision(text, transaction, link, p1, p2) |
876 chk = self.addrevision(text, transaction, link, p1, p2) |
802 if chk != node: |
877 if chk != node: |
803 raise RevlogError(_("consistency error adding group")) |
878 raise RevlogError(_("consistency error adding group")) |
804 measure = len(text) |
879 measure = len(text) |
805 else: |
880 else: |
806 e = (end, len(cdelta), base, link, p1, p2, node) |
881 if self.version == 0: |
|
882 e = (end, len(cdelta), base, link, p1, p2, node) |
|
883 else: |
|
884 e = (self.offset_type(end, 0), len(cdelta), -1, base, |
|
885 link, self.rev(p1), self.rev(p2), node) |
807 self.index.append(e) |
886 self.index.append(e) |
808 self.nodemap[node] = r |
887 self.nodemap[node] = r |
809 dfh.write(cdelta) |
888 dfh.write(cdelta) |
810 ifh.write(struct.pack(indexformat, *e)) |
889 ifh.write(struct.pack(self.indexformat, *e)) |
811 |
890 |
812 t, r, chain, prev = r, r + 1, node, node |
891 t, r, chain, prev = r, r + 1, node, node |
813 base = self.base(t) |
892 base = self.base(t) |
814 start = self.start(base) |
893 start = self.start(base) |
815 end = self.end(t) |
894 end = self.end(t) |
816 |
895 |
817 dfh.close() |
|
818 ifh.close() |
|
819 if node is None: |
896 if node is None: |
820 raise RevlogError(_("group to be added is empty")) |
897 raise RevlogError(_("group to be added is empty")) |
821 return node |
898 return node |
822 |
899 |
823 def strip(self, rev, minlink): |
900 def strip(self, rev, minlink): |
824 if self.count() == 0 or rev >= self.count(): |
901 if self.count() == 0 or rev >= self.count(): |
825 return |
902 return |
|
903 |
|
904 if isinstance(self.index, lazyindex): |
|
905 self.loadindexmap() |
826 |
906 |
827 # When stripping away a revision, we need to make sure it |
907 # When stripping away a revision, we need to make sure it |
828 # does not actually belong to an older changeset. |
908 # does not actually belong to an older changeset. |
829 # The minlink parameter defines the oldest revision |
909 # The minlink parameter defines the oldest revision |
830 # we're allowed to strip away. |
910 # we're allowed to strip away. |
831 while minlink > self.index[rev][3]: |
911 while minlink > self.index[rev][-4]: |
832 rev += 1 |
912 rev += 1 |
833 if rev >= self.count(): |
913 if rev >= self.count(): |
834 return |
914 return |
835 |
915 |
836 # first truncate the files on disk |
916 # first truncate the files on disk |
837 end = self.start(rev) |
917 end = self.start(rev) |
838 self.opener(self.datafile, "a").truncate(end) |
918 df = self.opener(self.datafile, "a") |
839 end = rev * struct.calcsize(indexformat) |
919 df.truncate(end) |
840 self.opener(self.indexfile, "a").truncate(end) |
920 end = rev * struct.calcsize(self.indexformat) |
|
921 |
|
922 indexf = self.opener(self.indexfile, "a") |
|
923 indexf.truncate(end) |
841 |
924 |
842 # then reset internal state in memory to forget those revisions |
925 # then reset internal state in memory to forget those revisions |
843 self.cache = None |
926 self.cache = None |
844 self.chunkcache = None |
927 self.chunkcache = None |
845 for p in self.index[rev:]: |
928 for x in xrange(rev, self.count()): |
846 del self.nodemap[p[6]] |
929 del self.nodemap[self.node(x)] |
|
930 |
847 del self.index[rev:] |
931 del self.index[rev:] |
848 |
|
849 # truncating the lazyindex also truncates the lazymap. |
|
850 if isinstance(self.index, lazyindex): |
|
851 self.index.trunc(end) |
|
852 |
|
853 |
932 |
854 def checksize(self): |
933 def checksize(self): |
855 expected = 0 |
934 expected = 0 |
856 if self.count(): |
935 if self.count(): |
857 expected = self.end(self.count() - 1) |
936 expected = self.end(self.count() - 1) |