transaction: change list of journal entries into a dictionary
authorJoerg Sonnenberger <joerg@bec.de>
Sat, 07 Nov 2020 21:34:09 +0100
changeset 45871 a985c4fb23ca
parent 45870 a6f08085edfe
child 45872 ec73a6a75985
transaction: change list of journal entries into a dictionary The transaction object used to keep a mapping table of path names to journal entries and a list of journal entries consisting of path and file offset to truncate on rollback. The offsets are used in three cases. repair.strip and rollback process all of them in one go, but they care about the order. For them, it is perfectly reasonable to read the journal back from disk as both operations already involve at least one system call per journal entry. The other consumer is the revlog logic for moving from inline to external data storage. It doesn't care about the order of the journal and just needs to original offset stored. Further optimisations are possible here to move the in-memory journal to a set(), but without memoisation of the original revlog size this could turn it into O(n^2) behavior in worst case when many revlogs need to migrated. Differential Revision: https://phab.mercurial-scm.org/D9277
mercurial/repair.py
mercurial/transaction.py
--- a/mercurial/repair.py	Sat Nov 07 19:24:12 2020 +0100
+++ b/mercurial/repair.py	Sat Nov 07 21:34:09 2020 +0100
@@ -209,7 +209,7 @@
                 # transaction and makes assumptions that file storage is
                 # using append-only files. We'll need some kind of storage
                 # API to handle stripping for us.
-                offset = len(tr._entries)
+                oldfiles = set(tr._offsetmap.keys())
 
                 tr.startgroup()
                 cl.strip(striprev, tr)
@@ -219,8 +219,11 @@
                     repo.file(fn).strip(striprev, tr)
                 tr.endgroup()
 
-                for i in pycompat.xrange(offset, len(tr._entries)):
-                    file, troffset = tr._entries[i]
+                entries = tr.readjournal()
+
+                for file, troffset in entries:
+                    if file in oldfiles:
+                        continue
                     with repo.svfs(file, b'a', checkambig=True) as fp:
                         fp.truncate(troffset)
                     if troffset == 0:
--- a/mercurial/transaction.py	Sat Nov 07 19:24:12 2020 +0100
+++ b/mercurial/transaction.py	Sat Nov 07 21:34:09 2020 +0100
@@ -158,8 +158,7 @@
         vfsmap[b''] = opener  # set default value
         self._vfsmap = vfsmap
         self._after = after
-        self._entries = []
-        self._map = {}
+        self._offsetmap = {}
         self._journal = journalname
         self._undoname = undoname
         self._queue = []
@@ -180,7 +179,7 @@
 
         # a dict of arguments to be passed to hooks
         self.hookargs = {}
-        self._file = opener.open(self._journal, b"w")
+        self._file = opener.open(self._journal, b"w+")
 
         # a list of ('location', 'path', 'backuppath', cache) entries.
         # - if 'backuppath' is empty, no file existed at backup time
@@ -249,7 +248,7 @@
     @active
     def add(self, file, offset):
         """record the state of an append-only file before update"""
-        if file in self._map or file in self._backupmap:
+        if file in self._offsetmap or file in self._backupmap:
             return
         if self._queue:
             self._queue[-1].append((file, offset))
@@ -259,10 +258,9 @@
 
     def _addentry(self, file, offset):
         """add a append-only entry to memory and on-disk state"""
-        if file in self._map or file in self._backupmap:
+        if file in self._offsetmap or file in self._backupmap:
             return
-        self._entries.append((file, offset))
-        self._map[file] = len(self._entries) - 1
+        self._offsetmap[file] = offset
         # add enough data to the journal to do the truncate
         self._file.write(b"%s\0%d\n" % (file, offset))
         self._file.flush()
@@ -282,7 +280,7 @@
             msg = b'cannot use transaction.addbackup inside "group"'
             raise error.ProgrammingError(msg)
 
-        if file in self._map or file in self._backupmap:
+        if file in self._offsetmap or file in self._backupmap:
             return
         vfs = self._vfsmap[location]
         dirname, filename = vfs.split(file)
@@ -396,9 +394,16 @@
 
     @active
     def findoffset(self, file):
-        if file in self._map:
-            return self._entries[self._map[file]][1]
-        return None
+        return self._offsetmap.get(file)
+
+    @active
+    def readjournal(self):
+        self._file.seek(0)
+        entries = []
+        for l in self._file:
+            file, troffset = l.split(b'\0')
+            entries.append((file, int(troffset)))
+        return entries
 
     @active
     def replace(self, file, offset):
@@ -407,10 +412,9 @@
         that are not pending in the queue
         '''
 
-        if file not in self._map:
+        if file not in self._offsetmap:
             raise KeyError(file)
-        index = self._map[file]
-        self._entries[index] = (file, offset)
+        self._offsetmap[file] = offset
         self._file.write(b"%s\0%d\n" % (file, offset))
         self._file.flush()
 
@@ -550,7 +554,7 @@
                     self._report(
                         b"couldn't remove %s: %s\n" % (vfs.join(b), inst)
                     )
-        self._entries = []
+        self._offsetmap = {}
         self._writeundo()
         if self._after:
             self._after()
@@ -627,13 +631,14 @@
         undobackupfile.close()
 
     def _abort(self):
+        entries = self.readjournal()
         self._count = 0
         self._usages = 0
         self._file.close()
         self._backupsfile.close()
 
         try:
-            if not self._entries and not self._backupentries:
+            if not self._offsetmap and not self._backupentries:
                 if self._backupjournal:
                     self._opener.unlink(self._backupjournal)
                 if self._journal:
@@ -652,7 +657,7 @@
                     self._report,
                     self._opener,
                     self._vfsmap,
-                    self._entries,
+                    entries,
                     self._backupentries,
                     False,
                     checkambigfiles=self._checkambigfiles,