sha1dc: use buffer protocol when parsing arguments
authorGregory Szorc <gregory.szorc@gmail.com>
Tue, 14 Jan 2020 18:59:49 -0800
changeset 44087 dc9b53482689
parent 44086 ffac09da7a19
child 44088 b3ec1ea95ee6
sha1dc: use buffer protocol when parsing arguments Without this, functions won't accept bytearray, memoryview, or other types that can be exposed as bytes to the C API. The most resilient way to obtain a bytes-like object from the C API is using the Py_buffer interface. This commit converts use of s#/y# to s*/y* and uses Py_buffer for accessing the underlying bytes array. I checked how hashlib is implemented in CPython and the the implementation agrees with its use of the Py_buffer interface as well as using BufferError in cases of bad buffer types. Sadly, there's no good way to test for ndim > 1 without writing our own C-backed Python type. Differential Revision: https://phab.mercurial-scm.org/D7879
mercurial/thirdparty/sha1dc/cext.c
tests/test-hashutil.py
--- a/mercurial/thirdparty/sha1dc/cext.c	Tue Jan 14 20:05:37 2020 -0500
+++ b/mercurial/thirdparty/sha1dc/cext.c	Tue Jan 14 18:59:49 2020 -0800
@@ -25,8 +25,8 @@
 
 static int pysha1ctx_init(pysha1ctx *self, PyObject *args)
 {
-	const char *data = NULL;
-	Py_ssize_t len;
+	Py_buffer data;
+	data.obj = NULL;
 
 	SHA1DCInit(&(self->ctx));
 	/* We don't want "safe" sha1s, wherein sha1dc can give you a
@@ -34,11 +34,19 @@
 	   collision. We just want to detect collisions.
 	 */
 	SHA1DCSetSafeHash(&(self->ctx), 0);
-	if (!PyArg_ParseTuple(args, PY23("|s#", "|y#"), &data, &len)) {
+	if (!PyArg_ParseTuple(args, PY23("|s*", "|y*"), &data)) {
 		return -1;
 	}
-	if (data) {
-		SHA1DCUpdate(&(self->ctx), data, len);
+	if (data.obj) {
+		if (!PyBuffer_IsContiguous(&data, 'C') || data.ndim > 1) {
+			PyErr_SetString(PyExc_BufferError,
+			                "buffer must be contiguous and single dimension");
+			PyBuffer_Release(&data);
+			return -1;
+		}
+
+		SHA1DCUpdate(&(self->ctx), data.buf, data.len);
+		PyBuffer_Release(&data);
 	}
 	return 0;
 }
@@ -50,12 +58,18 @@
 
 static PyObject *pysha1ctx_update(pysha1ctx *self, PyObject *args)
 {
-	const char *data;
-	Py_ssize_t len;
-	if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &data, &len)) {
+	Py_buffer data;
+	if (!PyArg_ParseTuple(args, PY23("s*", "y*"), &data)) {
 		return NULL;
 	}
-	SHA1DCUpdate(&(self->ctx), data, len);
+	if (!PyBuffer_IsContiguous(&data, 'C') || data.ndim > 1) {
+		PyErr_SetString(PyExc_BufferError,
+		                "buffer must be contiguous and single dimension");
+		PyBuffer_Release(&data);
+		return NULL;
+	}
+	SHA1DCUpdate(&(self->ctx), data.buf, data.len);
+	PyBuffer_Release(&data);
 	Py_RETURN_NONE;
 }
 
--- a/tests/test-hashutil.py	Tue Jan 14 20:05:37 2020 -0500
+++ b/tests/test-hashutil.py	Tue Jan 14 18:59:49 2020 -0800
@@ -45,6 +45,26 @@
             h.digest(),
         )
 
+    def test_bytes_like_types(self):
+        h = self.hasher()
+        h.update(bytearray(b'foo'))
+        h.update(memoryview(b'baz'))
+        self.assertEqual(
+            '21eb6533733a5e4763acacd1d45a60c2e0e404e1', h.hexdigest()
+        )
+
+        h = self.hasher(bytearray(b'foo'))
+        h.update(b'baz')
+        self.assertEqual(
+            '21eb6533733a5e4763acacd1d45a60c2e0e404e1', h.hexdigest()
+        )
+
+        h = self.hasher(memoryview(b'foo'))
+        h.update(b'baz')
+        self.assertEqual(
+            '21eb6533733a5e4763acacd1d45a60c2e0e404e1', h.hexdigest()
+        )
+
 
 class hashlibtests(unittest.TestCase, hashertestsbase):
     hasher = hashlib.sha1