revlog: add method for obtaining storage info (API)
authorGregory Szorc <gregory.szorc@gmail.com>
Mon, 24 Sep 2018 11:56:48 -0700
changeset 39869 14e500b58263
parent 39868 b06303a208be
child 39870 b399ff55ee6d
revlog: add method for obtaining storage info (API) We currently have a handful of methods on the file and manifest storage interfaces for obtaining metadata about storage. e.g. files() is used to obtain the files backing storage. rawsize() is to quickly compute the size of tracked revisions without resolving their fulltext. Code in upgrade and stream clone make heavy use of these methods. The existing APIs are generic and don't necessarily have the specialization that we need going forward. For example, files() doesn't distinguish between exclusive storage and shared storage. This makes stream clone difficult to implement when e.g. there may be a single file backing storage for multiple tracked paths. It also makes reporting difficult, as we don't know how many bytes are actually used by storage since we can't easily identify shared files. This commit implements a new method for obtaining storage metadata. It is designed to accept arguments specifying what metadata to request and to return a dict with those fields populated. We /could/ make each of these attributes a separate method. But this is a specialized API and I'm trying to avoid method bloat on the interfaces. There is also the possibility that certain callers will want to obtain multiple fields in different combinations and some backends may have performance issues obtaining all that data via separate method calls. Simple storage integration tests have been added. For now, we assume fields can't be "None" (ignoring the interface documentation). We can revisit this later. Differential Revision: https://phab.mercurial-scm.org/D4747
mercurial/filelog.py
mercurial/manifest.py
mercurial/repository.py
mercurial/revlog.py
mercurial/testing/storage.py
--- a/mercurial/filelog.py	Wed Sep 26 11:27:41 2018 -0700
+++ b/mercurial/filelog.py	Mon Sep 24 11:56:48 2018 -0700
@@ -187,6 +187,14 @@
     def verifyintegrity(self, state):
         return self._revlog.verifyintegrity(state)
 
+    def storageinfo(self, exclusivefiles=False, sharedfiles=False,
+                    revisionscount=False, trackedsize=False,
+                    storedsize=False):
+        return self._revlog.storageinfo(
+            exclusivefiles=exclusivefiles, sharedfiles=sharedfiles,
+            revisionscount=revisionscount, trackedsize=trackedsize,
+            storedsize=storedsize)
+
     # TODO these aren't part of the interface and aren't internal methods.
     # Callers should be fixed to not use them.
 
--- a/mercurial/manifest.py	Wed Sep 26 11:27:41 2018 -0700
+++ b/mercurial/manifest.py	Mon Sep 24 11:56:48 2018 -0700
@@ -1592,6 +1592,14 @@
 
         return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
 
+    def storageinfo(self, exclusivefiles=False, sharedfiles=False,
+                    revisionscount=False, trackedsize=False,
+                    storedsize=False):
+        return self._revlog.storageinfo(
+            exclusivefiles=exclusivefiles, sharedfiles=sharedfiles,
+            revisionscount=revisionscount, trackedsize=trackedsize,
+            storedsize=storedsize)
+
     @property
     def indexfile(self):
         return self._revlog.indexfile
--- a/mercurial/repository.py	Wed Sep 26 11:27:41 2018 -0700
+++ b/mercurial/repository.py	Mon Sep 24 11:56:48 2018 -0700
@@ -748,6 +748,41 @@
         be a better API for that.
         """
 
+    def storageinfo(exclusivefiles=False, sharedfiles=False,
+                    revisionscount=False, trackedsize=False,
+                    storedsize=False):
+        """Obtain information about storage for this file's data.
+
+        Returns a dict describing storage for this tracked path. The keys
+        in the dict map to arguments of the same. The arguments are bools
+        indicating whether to calculate and obtain that data.
+
+        exclusivefiles
+           Iterable of (vfs, path) describing files that are exclusively
+           used to back storage for this tracked path.
+
+        sharedfiles
+           Iterable of (vfs, path) describing files that are used to back
+           storage for this tracked path. Those files may also provide storage
+           for other stored entities.
+
+        revisionscount
+           Number of revisions available for retrieval.
+
+        trackedsize
+           Total size in bytes of all tracked revisions. This is a sum of the
+           length of the fulltext of all revisions.
+
+        storedsize
+           Total size in bytes used to store data for all tracked revisions.
+           This is commonly less than ``trackedsize`` due to internal usage
+           of deltas rather than fulltext revisions.
+
+        Not all storage backends may support all queries are have a reasonable
+        value to use. In that case, the value should be set to ``None`` and
+        callers are expected to handle this special value.
+        """
+
     def verifyintegrity(state):
         """Verifies the integrity of file storage.
 
@@ -1199,6 +1234,15 @@
         manifest including files that did not match.
         """
 
+    def storageinfo(exclusivefiles=False, sharedfiles=False,
+                    revisionscount=False, trackedsize=False,
+                    storedsize=False):
+        """Obtain information about storage for this manifest's data.
+
+        See ``ifilestorage.storageinfo()`` for a description of this method.
+        This one behaves the same way, except for manifest data.
+        """
+
 class imanifestlog(interfaceutil.Interface):
     """Interface representing a collection of manifest snapshots.
 
--- a/mercurial/revlog.py	Wed Sep 26 11:27:41 2018 -0700
+++ b/mercurial/revlog.py	Mon Sep 24 11:56:48 2018 -0700
@@ -2643,3 +2643,28 @@
             yield revlogproblem(
                 warning=_("warning: '%s' uses revlog format %d; expected %d") %
                         (self.indexfile, version, state['expectedversion']))
+
+    def storageinfo(self, exclusivefiles=False, sharedfiles=False,
+                    revisionscount=False, trackedsize=False,
+                    storedsize=False):
+        d = {}
+
+        if exclusivefiles:
+            d['exclusivefiles'] = [(self.opener, self.indexfile)]
+            if not self._inline:
+                d['exclusivefiles'].append((self.opener, self.datafile))
+
+        if sharedfiles:
+            d['sharedfiles'] = []
+
+        if revisionscount:
+            d['revisionscount'] = len(self)
+
+        if trackedsize:
+            d['trackedsize'] = sum(map(self.rawsize, iter(self)))
+
+        if storedsize:
+            d['storedsize'] = sum(self.opener.stat(path).st_size
+                                  for path in self.files())
+
+        return d
--- a/mercurial/testing/storage.py	Wed Sep 26 11:27:41 2018 -0700
+++ b/mercurial/testing/storage.py	Mon Sep 24 11:56:48 2018 -0700
@@ -388,6 +388,10 @@
     def testempty(self):
         f = self._makefilefn()
 
+        self.assertEqual(f.storageinfo(), {})
+        self.assertEqual(f.storageinfo(revisionscount=True, trackedsize=True),
+                         {'revisionscount': 0, 'trackedsize': 0})
+
         self.assertEqual(f.rawsize(nullrev), 0)
 
         for i in range(-5, 5):
@@ -466,6 +470,10 @@
         with self._maketransactionfn() as tr:
             node = f.add(fulltext, None, tr, 0, nullid, nullid)
 
+        self.assertEqual(f.storageinfo(), {})
+        self.assertEqual(f.storageinfo(revisionscount=True, trackedsize=True),
+                         {'revisionscount': 1, 'trackedsize': len(fulltext)})
+
         self.assertEqual(f.rawsize(0), len(fulltext))
 
         with self.assertRaises(IndexError):
@@ -553,6 +561,14 @@
             node1 = f.add(fulltext1, None, tr, 1, node0, nullid)
             node2 = f.add(fulltext2, None, tr, 3, node1, nullid)
 
+        self.assertEqual(f.storageinfo(), {})
+        self.assertEqual(
+            f.storageinfo(revisionscount=True, trackedsize=True),
+            {
+                'revisionscount': 3,
+                'trackedsize': len(fulltext0) + len(fulltext1) + len(fulltext2),
+            })
+
         self.assertEqual(f.rawsize(0), len(fulltext0))
         self.assertEqual(f.rawsize(1), len(fulltext1))
         self.assertEqual(f.rawsize(2), len(fulltext2))