mercurial/verify.py
changeset 2778 fdc232d8a193
child 3196 f3b939444c72
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/verify.py	Mon Aug 07 16:27:09 2006 -0500
@@ -0,0 +1,200 @@
+# verify.py - repository integrity checking for Mercurial
+#
+# Copyright 2006 Matt Mackall <mpm@selenic.com>
+#
+# This software may be used and distributed according to the terms
+# of the GNU General Public License, incorporated herein by reference.
+
+from node import *
+from i18n import gettext as _
+import revlog, mdiff
+
+def verify(repo):
+    filelinkrevs = {}
+    filenodes = {}
+    changesets = revisions = files = 0
+    errors = [0]
+    warnings = [0]
+    neededmanifests = {}
+
+    def err(msg):
+        repo.ui.warn(msg + "\n")
+        errors[0] += 1
+
+    def warn(msg):
+        repo.ui.warn(msg + "\n")
+        warnings[0] += 1
+
+    def checksize(obj, name):
+        d = obj.checksize()
+        if d[0]:
+            err(_("%s data length off by %d bytes") % (name, d[0]))
+        if d[1]:
+            err(_("%s index contains %d extra bytes") % (name, d[1]))
+
+    def checkversion(obj, name):
+        if obj.version != revlog.REVLOGV0:
+            if not revlogv1:
+                warn(_("warning: `%s' uses revlog format 1") % name)
+        elif revlogv1:
+            warn(_("warning: `%s' uses revlog format 0") % name)
+
+    revlogv1 = repo.revlogversion != revlog.REVLOGV0
+    if repo.ui.verbose or revlogv1 != repo.revlogv1:
+        repo.ui.status(_("repository uses revlog format %d\n") %
+                       (revlogv1 and 1 or 0))
+
+    seen = {}
+    repo.ui.status(_("checking changesets\n"))
+    checksize(repo.changelog, "changelog")
+
+    for i in range(repo.changelog.count()):
+        changesets += 1
+        n = repo.changelog.node(i)
+        l = repo.changelog.linkrev(n)
+        if l != i:
+            err(_("incorrect link (%d) for changeset revision %d") %(l, i))
+        if n in seen:
+            err(_("duplicate changeset at revision %d") % i)
+        seen[n] = 1
+
+        for p in repo.changelog.parents(n):
+            if p not in repo.changelog.nodemap:
+                err(_("changeset %s has unknown parent %s") %
+                             (short(n), short(p)))
+        try:
+            changes = repo.changelog.read(n)
+        except KeyboardInterrupt:
+            repo.ui.warn(_("interrupted"))
+            raise
+        except Exception, inst:
+            err(_("unpacking changeset %s: %s") % (short(n), inst))
+            continue
+
+        neededmanifests[changes[0]] = n
+
+        for f in changes[3]:
+            filelinkrevs.setdefault(f, []).append(i)
+
+    seen = {}
+    repo.ui.status(_("checking manifests\n"))
+    checkversion(repo.manifest, "manifest")
+    checksize(repo.manifest, "manifest")
+
+    for i in range(repo.manifest.count()):
+        n = repo.manifest.node(i)
+        l = repo.manifest.linkrev(n)
+
+        if l < 0 or l >= repo.changelog.count():
+            err(_("bad manifest link (%d) at revision %d") % (l, i))
+
+        if n in neededmanifests:
+            del neededmanifests[n]
+
+        if n in seen:
+            err(_("duplicate manifest at revision %d") % i)
+
+        seen[n] = 1
+
+        for p in repo.manifest.parents(n):
+            if p not in repo.manifest.nodemap:
+                err(_("manifest %s has unknown parent %s") %
+                    (short(n), short(p)))
+
+        try:
+            delta = mdiff.patchtext(repo.manifest.delta(n))
+        except KeyboardInterrupt:
+            repo.ui.warn(_("interrupted"))
+            raise
+        except Exception, inst:
+            err(_("unpacking manifest %s: %s") % (short(n), inst))
+            continue
+
+        try:
+            ff = [ l.split('\0') for l in delta.splitlines() ]
+            for f, fn in ff:
+                filenodes.setdefault(f, {})[bin(fn[:40])] = 1
+        except (ValueError, TypeError), inst:
+            err(_("broken delta in manifest %s: %s") % (short(n), inst))
+
+    repo.ui.status(_("crosschecking files in changesets and manifests\n"))
+
+    for m, c in neededmanifests.items():
+        err(_("Changeset %s refers to unknown manifest %s") %
+            (short(m), short(c)))
+    del neededmanifests
+
+    for f in filenodes:
+        if f not in filelinkrevs:
+            err(_("file %s in manifest but not in changesets") % f)
+
+    for f in filelinkrevs:
+        if f not in filenodes:
+            err(_("file %s in changeset but not in manifest") % f)
+
+    repo.ui.status(_("checking files\n"))
+    ff = filenodes.keys()
+    ff.sort()
+    for f in ff:
+        if f == "/dev/null":
+            continue
+        files += 1
+        if not f:
+            err(_("file without name in manifest %s") % short(n))
+            continue
+        fl = repo.file(f)
+        checkversion(fl, f)
+        checksize(fl, f)
+
+        nodes = {nullid: 1}
+        seen = {}
+        for i in range(fl.count()):
+            revisions += 1
+            n = fl.node(i)
+
+            if n in seen:
+                err(_("%s: duplicate revision %d") % (f, i))
+            if n not in filenodes[f]:
+                err(_("%s: %d:%s not in manifests") % (f, i, short(n)))
+            else:
+                del filenodes[f][n]
+
+            flr = fl.linkrev(n)
+            if flr not in filelinkrevs.get(f, []):
+                err(_("%s:%s points to unexpected changeset %d")
+                        % (f, short(n), flr))
+            else:
+                filelinkrevs[f].remove(flr)
+
+            # verify contents
+            try:
+                t = fl.read(n)
+            except KeyboardInterrupt:
+                repo.ui.warn(_("interrupted"))
+                raise
+            except Exception, inst:
+                err(_("unpacking file %s %s: %s") % (f, short(n), inst))
+
+            # verify parents
+            (p1, p2) = fl.parents(n)
+            if p1 not in nodes:
+                err(_("file %s:%s unknown parent 1 %s") %
+                    (f, short(n), short(p1)))
+            if p2 not in nodes:
+                err(_("file %s:%s unknown parent 2 %s") %
+                        (f, short(n), short(p1)))
+            nodes[n] = 1
+
+        # cross-check
+        for node in filenodes[f]:
+            err(_("node %s in manifests not in %s") % (hex(node), f))
+
+    repo.ui.status(_("%d files, %d changesets, %d total revisions\n") %
+                   (files, changesets, revisions))
+
+    if warnings[0]:
+        repo.ui.warn(_("%d warnings encountered!\n") % warnings[0])
+    if errors[0]:
+        repo.ui.warn(_("%d integrity errors encountered!\n") % errors[0])
+        return 1
+