transaction: split new files into a separate set
authorJoerg Sonnenberger <joerg@bec.de>
Sat, 07 Nov 2020 22:31:29 +0100
changeset 45872 ec73a6a75985
parent 45871 a985c4fb23ca
child 45873 c8860a212770
transaction: split new files into a separate set Journal entries with size 0 are common as they represent new revlog files. Move them from the dictionary into a set as the former is more dense. This reduces peak RSS by 70MB for the NetBSD test repository with around 450k files under .hg/store. Differential Revision: https://phab.mercurial-scm.org/D9278
mercurial/repair.py
mercurial/transaction.py
--- a/mercurial/repair.py	Sat Nov 07 21:34:09 2020 +0100
+++ b/mercurial/repair.py	Sat Nov 07 22:31:29 2020 +0100
@@ -210,6 +210,7 @@
                 # using append-only files. We'll need some kind of storage
                 # API to handle stripping for us.
                 oldfiles = set(tr._offsetmap.keys())
+                oldfiles.update(tr._newfiles)
 
                 tr.startgroup()
                 cl.strip(striprev, tr)
--- a/mercurial/transaction.py	Sat Nov 07 21:34:09 2020 +0100
+++ b/mercurial/transaction.py	Sat Nov 07 22:31:29 2020 +0100
@@ -159,6 +159,7 @@
         self._vfsmap = vfsmap
         self._after = after
         self._offsetmap = {}
+        self._newfiles = set()
         self._journal = journalname
         self._undoname = undoname
         self._queue = []
@@ -248,7 +249,11 @@
     @active
     def add(self, file, offset):
         """record the state of an append-only file before update"""
-        if file in self._offsetmap or file in self._backupmap:
+        if (
+            file in self._newfiles
+            or file in self._offsetmap
+            or file in self._backupmap
+        ):
             return
         if self._queue:
             self._queue[-1].append((file, offset))
@@ -258,9 +263,16 @@
 
     def _addentry(self, file, offset):
         """add a append-only entry to memory and on-disk state"""
-        if file in self._offsetmap or file in self._backupmap:
+        if (
+            file in self._newfiles
+            or file in self._offsetmap
+            or file in self._backupmap
+        ):
             return
-        self._offsetmap[file] = offset
+        if offset:
+            self._offsetmap[file] = offset
+        else:
+            self._newfiles.add(file)
         # add enough data to the journal to do the truncate
         self._file.write(b"%s\0%d\n" % (file, offset))
         self._file.flush()
@@ -280,7 +292,11 @@
             msg = b'cannot use transaction.addbackup inside "group"'
             raise error.ProgrammingError(msg)
 
-        if file in self._offsetmap or file in self._backupmap:
+        if (
+            file in self._newfiles
+            or file in self._offsetmap
+            or file in self._backupmap
+        ):
             return
         vfs = self._vfsmap[location]
         dirname, filename = vfs.split(file)
@@ -394,6 +410,8 @@
 
     @active
     def findoffset(self, file):
+        if file in self._newfiles:
+            return 0
         return self._offsetmap.get(file)
 
     @active
@@ -411,10 +429,19 @@
         replace can only replace already committed entries
         that are not pending in the queue
         '''
-
-        if file not in self._offsetmap:
+        if file in self._newfiles:
+            if not offset:
+                return
+            self._newfiles.remove(file)
+            self._offsetmap[file] = offset
+        elif file in self._offsetmap:
+            if not offset:
+                del self._offsetmap[file]
+                self._newfiles.add(file)
+            else:
+                self._offsetmap[file] = offset
+        else:
             raise KeyError(file)
-        self._offsetmap[file] = offset
         self._file.write(b"%s\0%d\n" % (file, offset))
         self._file.flush()
 
@@ -555,6 +582,7 @@
                         b"couldn't remove %s: %s\n" % (vfs.join(b), inst)
                     )
         self._offsetmap = {}
+        self._newfiles = set()
         self._writeundo()
         if self._after:
             self._after()
@@ -638,7 +666,7 @@
         self._backupsfile.close()
 
         try:
-            if not self._offsetmap and not self._backupentries:
+            if not entries and not self._backupentries:
                 if self._backupjournal:
                     self._opener.unlink(self._backupjournal)
                 if self._journal: