mercurial/bdiff_module.c
branchstable
changeset 33572 857876ebaed4
parent 33202 c1994c986d77
parent 33571 9a944e908ecf
child 33573 9e0fea06ae2c
equal deleted inserted replaced
33202:c1994c986d77 33572:857876ebaed4
     1 /*
       
     2  bdiff.c - efficient binary diff extension for Mercurial
       
     3 
       
     4  Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
       
     5 
       
     6  This software may be used and distributed according to the terms of
       
     7  the GNU General Public License, incorporated herein by reference.
       
     8 
       
     9  Based roughly on Python difflib
       
    10 */
       
    11 
       
    12 #define PY_SSIZE_T_CLEAN
       
    13 #include <Python.h>
       
    14 #include <stdlib.h>
       
    15 #include <string.h>
       
    16 #include <limits.h>
       
    17 
       
    18 #include "bdiff.h"
       
    19 #include "bitmanipulation.h"
       
    20 #include "util.h"
       
    21 
       
    22 
       
    23 static PyObject *blocks(PyObject *self, PyObject *args)
       
    24 {
       
    25 	PyObject *sa, *sb, *rl = NULL, *m;
       
    26 	struct bdiff_line *a, *b;
       
    27 	struct bdiff_hunk l, *h;
       
    28 	int an, bn, count, pos = 0;
       
    29 
       
    30 	l.next = NULL;
       
    31 
       
    32 	if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
       
    33 		return NULL;
       
    34 
       
    35 	an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
       
    36 	bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
       
    37 
       
    38 	if (!a || !b)
       
    39 		goto nomem;
       
    40 
       
    41 	count = bdiff_diff(a, an, b, bn, &l);
       
    42 	if (count < 0)
       
    43 		goto nomem;
       
    44 
       
    45 	rl = PyList_New(count);
       
    46 	if (!rl)
       
    47 		goto nomem;
       
    48 
       
    49 	for (h = l.next; h; h = h->next) {
       
    50 		m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
       
    51 		PyList_SetItem(rl, pos, m);
       
    52 		pos++;
       
    53 	}
       
    54 
       
    55 nomem:
       
    56 	free(a);
       
    57 	free(b);
       
    58 	bdiff_freehunks(l.next);
       
    59 	return rl ? rl : PyErr_NoMemory();
       
    60 }
       
    61 
       
    62 static PyObject *bdiff(PyObject *self, PyObject *args)
       
    63 {
       
    64 	char *sa, *sb, *rb, *ia, *ib;
       
    65 	PyObject *result = NULL;
       
    66 	struct bdiff_line *al, *bl;
       
    67 	struct bdiff_hunk l, *h;
       
    68 	int an, bn, count;
       
    69 	Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
       
    70 	PyThreadState *_save;
       
    71 
       
    72 	l.next = NULL;
       
    73 
       
    74 	if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
       
    75 		return NULL;
       
    76 
       
    77 	if (la > UINT_MAX || lb > UINT_MAX) {
       
    78 		PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
       
    79 		return NULL;
       
    80 	}
       
    81 
       
    82 	_save = PyEval_SaveThread();
       
    83 
       
    84 	lmax = la > lb ? lb : la;
       
    85 	for (ia = sa, ib = sb;
       
    86 	     li < lmax && *ia == *ib;
       
    87 	     ++li, ++ia, ++ib)
       
    88 		if (*ia == '\n')
       
    89 			lcommon = li + 1;
       
    90 	/* we can almost add: if (li == lmax) lcommon = li; */
       
    91 
       
    92 	an = bdiff_splitlines(sa + lcommon, la - lcommon, &al);
       
    93 	bn = bdiff_splitlines(sb + lcommon, lb - lcommon, &bl);
       
    94 	if (!al || !bl)
       
    95 		goto nomem;
       
    96 
       
    97 	count = bdiff_diff(al, an, bl, bn, &l);
       
    98 	if (count < 0)
       
    99 		goto nomem;
       
   100 
       
   101 	/* calculate length of output */
       
   102 	la = lb = 0;
       
   103 	for (h = l.next; h; h = h->next) {
       
   104 		if (h->a1 != la || h->b1 != lb)
       
   105 			len += 12 + bl[h->b1].l - bl[lb].l;
       
   106 		la = h->a2;
       
   107 		lb = h->b2;
       
   108 	}
       
   109 	PyEval_RestoreThread(_save);
       
   110 	_save = NULL;
       
   111 
       
   112 	result = PyBytes_FromStringAndSize(NULL, len);
       
   113 
       
   114 	if (!result)
       
   115 		goto nomem;
       
   116 
       
   117 	/* build binary patch */
       
   118 	rb = PyBytes_AsString(result);
       
   119 	la = lb = 0;
       
   120 
       
   121 	for (h = l.next; h; h = h->next) {
       
   122 		if (h->a1 != la || h->b1 != lb) {
       
   123 			len = bl[h->b1].l - bl[lb].l;
       
   124 			putbe32((uint32_t)(al[la].l + lcommon - al->l), rb);
       
   125 			putbe32((uint32_t)(al[h->a1].l + lcommon - al->l), rb + 4);
       
   126 			putbe32((uint32_t)len, rb + 8);
       
   127 			memcpy(rb + 12, bl[lb].l, len);
       
   128 			rb += 12 + len;
       
   129 		}
       
   130 		la = h->a2;
       
   131 		lb = h->b2;
       
   132 	}
       
   133 
       
   134 nomem:
       
   135 	if (_save)
       
   136 		PyEval_RestoreThread(_save);
       
   137 	free(al);
       
   138 	free(bl);
       
   139 	bdiff_freehunks(l.next);
       
   140 	return result ? result : PyErr_NoMemory();
       
   141 }
       
   142 
       
   143 /*
       
   144  * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
       
   145  * reduce whitespace sequences to a single space and trim remaining whitespace
       
   146  * from end of lines.
       
   147  */
       
   148 static PyObject *fixws(PyObject *self, PyObject *args)
       
   149 {
       
   150 	PyObject *s, *result = NULL;
       
   151 	char allws, c;
       
   152 	const char *r;
       
   153 	Py_ssize_t i, rlen, wlen = 0;
       
   154 	char *w;
       
   155 
       
   156 	if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
       
   157 		return NULL;
       
   158 	r = PyBytes_AsString(s);
       
   159 	rlen = PyBytes_Size(s);
       
   160 
       
   161 	w = (char *)PyMem_Malloc(rlen ? rlen : 1);
       
   162 	if (!w)
       
   163 		goto nomem;
       
   164 
       
   165 	for (i = 0; i != rlen; i++) {
       
   166 		c = r[i];
       
   167 		if (c == ' ' || c == '\t' || c == '\r') {
       
   168 			if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
       
   169 				w[wlen++] = ' ';
       
   170 		} else if (c == '\n' && !allws
       
   171 			  && wlen > 0 && w[wlen - 1] == ' ') {
       
   172 			w[wlen - 1] = '\n';
       
   173 		} else {
       
   174 			w[wlen++] = c;
       
   175 		}
       
   176 	}
       
   177 
       
   178 	result = PyBytes_FromStringAndSize(w, wlen);
       
   179 
       
   180 nomem:
       
   181 	PyMem_Free(w);
       
   182 	return result ? result : PyErr_NoMemory();
       
   183 }
       
   184 
       
   185 
       
   186 static char mdiff_doc[] = "Efficient binary diff.";
       
   187 
       
   188 static PyMethodDef methods[] = {
       
   189 	{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
       
   190 	{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
       
   191 	{"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
       
   192 	{NULL, NULL}
       
   193 };
       
   194 
       
   195 #ifdef IS_PY3K
       
   196 static struct PyModuleDef bdiff_module = {
       
   197 	PyModuleDef_HEAD_INIT,
       
   198 	"bdiff",
       
   199 	mdiff_doc,
       
   200 	-1,
       
   201 	methods
       
   202 };
       
   203 
       
   204 PyMODINIT_FUNC PyInit_bdiff(void)
       
   205 {
       
   206 	return PyModule_Create(&bdiff_module);
       
   207 }
       
   208 #else
       
   209 PyMODINIT_FUNC initbdiff(void)
       
   210 {
       
   211 	Py_InitModule3("bdiff", methods, mdiff_doc);
       
   212 }
       
   213 #endif