mercurial/bdiff.c
author Bryan O'Sullivan <bryano@fb.com>
Tue, 18 Sep 2012 15:42:19 -0700
changeset 17616 9535a0dc41f2
parent 16750 5b1f869b5548
child 18551 d6fb7bbec16a
permissions -rw-r--r--
store: implement fncache basic path encoding in C (This is not yet enabled; it will be turned on in a followup patch.) The path encoding performed by fncache is complex and (perhaps surprisingly) slow enough to negatively affect the overall performance of Mercurial. For a short path (< 120 bytes), the Python code can be reduced to a fairly tractable state machine that either determines that nothing needs to be done in a single pass, or performs the encoding in a second pass. For longer paths, we avoid the more complicated hashed encoding scheme for now, and fall back to Python. Raw performance: I measured in a repo containing 150,000 files in its tip manifest, with a median path name length of 57 bytes, and 95th percentile of 96 bytes. In this repo, the Python code takes 3.1 seconds to encode all path names, while the hybrid C-and-Python code (called from Python) takes 0.21 seconds, for a speedup of about 14. Across several other large repositories, I've measured the speedup from the C code at between 26x and 40x. For path names above 120 bytes where we must fall back to Python for hashed encoding, the speedup is about 1.7x. Thus absolute performance will depend strongly on the characteristics of a particular repository.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     1
/*
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     2
 bdiff.c - efficient binary diff extension for Mercurial
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     3
2859
345bac2bc4ec update copyrights.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 2600
diff changeset
     4
 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     5
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     6
 This software may be used and distributed according to the terms of
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     7
 the GNU General Public License, incorporated herein by reference.
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     8
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
     9
 Based roughly on Python difflib
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    10
*/
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    11
16749
eab8ca175262 bdiff: use Py_ssize_t instead of int
Adrian Buehlmann <adrian@cadifra.com>
parents: 16477
diff changeset
    12
#define PY_SSIZE_T_CLEAN
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    13
#include <Python.h>
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    14
#include <stdlib.h>
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    15
#include <string.h>
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
    16
#include <limits.h>
867
0cd2ee61b10a Allow Mercurial to build on HP-UX 11
tksoh@users.sourceforge.net
parents: 839
diff changeset
    17
11364
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
    18
#include "util.h"
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
    19
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    20
struct line {
16749
eab8ca175262 bdiff: use Py_ssize_t instead of int
Adrian Buehlmann <adrian@cadifra.com>
parents: 16477
diff changeset
    21
	int hash, n, e;
eab8ca175262 bdiff: use Py_ssize_t instead of int
Adrian Buehlmann <adrian@cadifra.com>
parents: 16477
diff changeset
    22
	Py_ssize_t len;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    23
	const char *l;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    24
};
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    25
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
    26
struct pos {
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
    27
	int pos, len;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
    28
};
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
    29
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
    30
struct hunk;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    31
struct hunk {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    32
	int a1, a2, b1, b2;
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
    33
	struct hunk *next;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    34
};
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    35
16749
eab8ca175262 bdiff: use Py_ssize_t instead of int
Adrian Buehlmann <adrian@cadifra.com>
parents: 16477
diff changeset
    36
static int splitlines(const char *a, Py_ssize_t len, struct line **lr)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    37
{
13732
afe9269dccec bdiff.c: rename all variables which hold a hash value to "hash"
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13731
diff changeset
    38
	unsigned hash;
13731
5d0cdf4ec338 bdiff.c: use unsigned arithmetic for hash computation
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13730
diff changeset
    39
	int i;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    40
	const char *p, *b = a;
5340
5737845fd974 bdiff: simple splitlines optimization
Christoph Spiel <cspiel@freenet.de>
parents: 5339
diff changeset
    41
	const char * const plast = a + len - 1;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    42
	struct line *l;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    43
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    44
	/* count the lines */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    45
	i = 1; /* extra line for sentinel */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    46
	for (p = a; p < a + len; p++)
5340
5737845fd974 bdiff: simple splitlines optimization
Christoph Spiel <cspiel@freenet.de>
parents: 5339
diff changeset
    47
		if (*p == '\n' || p == plast)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    48
			i++;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    49
1978
10606ee61107 do proper typecasting on malloc() and calloc() calls
TK Soh <teekaysoh@yahoo.com>
parents: 1759
diff changeset
    50
	*lr = l = (struct line *)malloc(sizeof(struct line) * i);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    51
	if (!l)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    52
		return -1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    53
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    54
	/* build the line array and calculate hashes */
13732
afe9269dccec bdiff.c: rename all variables which hold a hash value to "hash"
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13731
diff changeset
    55
	hash = 0;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    56
	for (p = a; p < a + len; p++) {
5342
d0c48891dd4a bdiff: switch to lyhash
Matt Mackall <mpm@selenic.com>
parents: 5341
diff changeset
    57
		/* Leonid Yuriev's hash */
13732
afe9269dccec bdiff.c: rename all variables which hold a hash value to "hash"
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13731
diff changeset
    58
		hash = (hash * 1664525) + (unsigned char)*p + 1013904223;
5342
d0c48891dd4a bdiff: switch to lyhash
Matt Mackall <mpm@selenic.com>
parents: 5341
diff changeset
    59
5340
5737845fd974 bdiff: simple splitlines optimization
Christoph Spiel <cspiel@freenet.de>
parents: 5339
diff changeset
    60
		if (*p == '\n' || p == plast) {
13732
afe9269dccec bdiff.c: rename all variables which hold a hash value to "hash"
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13731
diff changeset
    61
			l->hash = hash;
afe9269dccec bdiff.c: rename all variables which hold a hash value to "hash"
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13731
diff changeset
    62
			hash = 0;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    63
			l->len = p - b + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    64
			l->l = b;
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
    65
			l->n = INT_MAX;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    66
			l++;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    67
			b = p + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    68
		}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    69
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    70
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    71
	/* set up a sentinel */
13732
afe9269dccec bdiff.c: rename all variables which hold a hash value to "hash"
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13731
diff changeset
    72
	l->hash = 0;
13731
5d0cdf4ec338 bdiff.c: use unsigned arithmetic for hash computation
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13730
diff changeset
    73
	l->len = 0;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    74
	l->l = a + len;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    75
	return i - 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    76
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    77
13729
4a9c09239ba1 bdiff.c: make all local functions static
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13302
diff changeset
    78
static inline int cmp(struct line *a, struct line *b)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    79
{
13732
afe9269dccec bdiff.c: rename all variables which hold a hash value to "hash"
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13731
diff changeset
    80
	return a->hash != b->hash || a->len != b->len || memcmp(a->l, b->l, a->len);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    81
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    82
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    83
static int equatelines(struct line *a, int an, struct line *b, int bn)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    84
{
5452
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
    85
	int i, j, buckets = 1, t, scale;
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
    86
	struct pos *h = NULL;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    87
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    88
	/* build a hash table of the next highest power of 2 */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    89
	while (buckets < bn + 1)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    90
		buckets *= 2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
    91
5339
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
    92
	/* try to allocate a large hash table to avoid collisions */
5452
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
    93
	for (scale = 4; scale; scale /= 2) {
5339
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
    94
		h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
5452
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
    95
		if (h)
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
    96
			break;
82b4ff3abbcd bdiff: tweaks for large files
Matt Mackall <mpm@selenic.com>
parents: 5342
diff changeset
    97
	}
5339
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
    98
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
    99
	if (!h)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   100
		return 0;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   101
5339
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
   102
	buckets = buckets * scale - 1;
058e93c3d07d I have spotted the biggest bottleneck in "bdiff.c". Actually it was
Christoph Spiel <cspiel@freenet.de>
parents: 4134
diff changeset
   103
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   104
	/* clear the hash table */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   105
	for (i = 0; i <= buckets; i++) {
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
   106
		h[i].pos = INT_MAX;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   107
		h[i].len = 0;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   108
	}
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   109
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   110
	/* add lines to the hash table chains */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   111
	for (i = bn - 1; i >= 0; i--) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   112
		/* find the equivalence class */
13732
afe9269dccec bdiff.c: rename all variables which hold a hash value to "hash"
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13731
diff changeset
   113
		for (j = b[i].hash & buckets; h[j].pos != INT_MAX;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   114
		     j = (j + 1) & buckets)
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   115
			if (!cmp(b + i, b + h[j].pos))
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   116
				break;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   117
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   118
		/* add to the head of the equivalence class */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   119
		b[i].n = h[j].pos;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   120
		b[i].e = j;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   121
		h[j].pos = i;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   122
		h[j].len++; /* keep track of popularity */
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   123
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   124
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   125
	/* compute popularity threshold */
9534
8e202431d620 bdiff: gradually enable the popularity hack
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 8858
diff changeset
   126
	t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   127
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   128
	/* match items in a to their equivalence class in b */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   129
	for (i = 0; i < an; i++) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   130
		/* find the equivalence class */
13732
afe9269dccec bdiff.c: rename all variables which hold a hash value to "hash"
Markus F.X.J. Oberhumer <markus@oberhumer.com>
parents: 13731
diff changeset
   131
		for (j = a[i].hash & buckets; h[j].pos != INT_MAX;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   132
		     j = (j + 1) & buckets)
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   133
			if (!cmp(a + i, b + h[j].pos))
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   134
				break;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   135
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   136
		a[i].e = j; /* use equivalence class for quick compare */
1542
8e80eefb3de6 made C src formatting more consistent
twaldmann@thinkmo.de
parents: 1397
diff changeset
   137
		if (h[j].len <= t)
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   138
			a[i].n = h[j].pos; /* point to head of match list */
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   139
		else
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
   140
			a[i].n = INT_MAX; /* too popular */
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   141
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   142
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   143
	/* discard hash tables */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   144
	free(h);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   145
	return 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   146
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   147
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   148
static int longest_match(struct line *a, struct line *b, struct pos *pos,
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   149
			 int a1, int a2, int b1, int b2, int *omi, int *omj)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   150
{
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   151
	int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   152
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   153
	for (i = a1; i < a2; i++) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   154
		/* skip things before the current block */
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
   155
		for (j = a[i].n; j < b1; j = b[j].n)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   156
			;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   157
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   158
		/* loop through all lines match a[i] in b */
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
   159
		for (; j < b2; j = b[j].n) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   160
			/* does this extend an earlier match? */
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   161
			if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   162
				k = pos[j - 1].len + 1;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   163
			else
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   164
				k = 1;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   165
			pos[j].pos = i;
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   166
			pos[j].len = k;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   167
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   168
			/* best match so far? */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   169
			if (k > mk) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   170
				mi = i;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   171
				mj = j;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   172
				mk = k;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   173
			}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   174
		}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   175
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   176
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   177
	if (mk) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   178
		mi = mi - mk + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   179
		mj = mj - mk + 1;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   180
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   181
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   182
	/* expand match to include neighboring popular lines */
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   183
	while (mi - mb > a1 && mj - mb > b1 &&
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   184
	       a[mi - mb - 1].e == b[mj - mb - 1].e)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   185
		mb++;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   186
	while (mi + mk < a2 && mj + mk < b2 &&
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   187
	       a[mi + mk].e == b[mj + mk].e)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   188
		mk++;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   189
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   190
	*omi = mi - mb;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   191
	*omj = mj - mb;
5341
458acf92b49e bdiff: use INT_MAX to avoid some inner loop comparisons
Matt Mackall <mpm@selenic.com>
parents: 5340
diff changeset
   192
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   193
	return mk + mb;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   194
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   195
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   196
static struct hunk *recurse(struct line *a, struct line *b, struct pos *pos,
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   197
			    int a1, int a2, int b1, int b2, struct hunk *l)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   198
{
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   199
	int i, j, k;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   200
10500
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   201
	while (1) {
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   202
		/* find the longest match in this chunk */
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   203
		k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   204
		if (!k)
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   205
			return l;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   206
10500
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   207
		/* and recurse on the remaining chunks on either side */
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   208
		l = recurse(a, b, pos, a1, i, b1, j, l);
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   209
		if (!l)
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   210
			return NULL;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   211
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   212
		l->next = (struct hunk *)malloc(sizeof(struct hunk));
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   213
		if (!l->next)
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   214
			return NULL;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   215
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   216
		l = l->next;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   217
		l->a1 = i;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   218
		l->a2 = i + k;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   219
		l->b1 = j;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   220
		l->b2 = j + k;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   221
		l->next = NULL;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   222
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   223
		/* tail-recursion didn't happen, so do equivalent iteration */
10500
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   224
		a1 = i + k;
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   225
		b1 = j + k;
e96597c8d0ea bdiff: do not use recursion / avoid stackoverflow (issue1940)
Alistair Bell <alistair@bellsonline.com>
parents: 10282
diff changeset
   226
	}
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   227
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   228
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   229
static int diff(struct line *a, int an, struct line *b, int bn,
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   230
		 struct hunk *base)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   231
{
7104
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   232
	struct hunk *curr;
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   233
	struct pos *pos;
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   234
	int t, count = 0;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   235
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   236
	/* allocate and fill arrays */
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   237
	t = equatelines(a, an, b, bn);
5571
f84bb2e1cc3a fix calloc(0, ...) issue
Jim Hague <jim.hague@acm.org>
parents: 5452
diff changeset
   238
	pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   239
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   240
	if (pos && t) {
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   241
		/* generate the matching block list */
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   242
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   243
		curr = recurse(a, b, pos, 0, an, 0, bn, base);
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   244
		if (!curr)
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   245
			return -1;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   246
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   247
		/* sentinel end hunk */
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   248
		curr->next = (struct hunk *)malloc(sizeof(struct hunk));
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   249
		if (!curr->next)
13090
c73745762f33 bdiff: Fix bogus NULL return
Matt Mackall <mpm@selenic.com>
parents: 13089
diff changeset
   250
			return -1;
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   251
		curr = curr->next;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   252
		curr->a1 = curr->a2 = an;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   253
		curr->b1 = curr->b2 = bn;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   254
		curr->next = NULL;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   255
	}
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   256
474
b2ae8283d1a6 Minor speed improvements for bdiff
mpm@selenic.com
parents: 472
diff changeset
   257
	free(pos);
7104
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   258
7625
930a2be7e875 bdiff: add comment about normalization
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7189
diff changeset
   259
	/* normalize the hunk list, try to push each hunk towards the end */
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   260
	for (curr = base->next; curr; curr = curr->next) {
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   261
		struct hunk *next = curr->next;
7104
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   262
		int shift = 0;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   263
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   264
		if (!next)
7104
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   265
			break;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   266
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   267
		if (curr->a2 == next->a1)
10282
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 9534
diff changeset
   268
			while (curr->a2 + shift < an && curr->b2 + shift < bn
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 9534
diff changeset
   269
			       && !cmp(a + curr->a2 + shift,
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 9534
diff changeset
   270
				       b + curr->b2 + shift))
7104
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   271
				shift++;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   272
		else if (curr->b2 == next->b1)
10282
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 9534
diff changeset
   273
			while (curr->b2 + shift < bn && curr->a2 + shift < an
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 9534
diff changeset
   274
			       && !cmp(b + curr->b2 + shift,
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 9534
diff changeset
   275
				       a + curr->a2 + shift))
7104
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   276
				shift++;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   277
		if (!shift)
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   278
			continue;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   279
		curr->b2 += shift;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   280
		next->b1 += shift;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   281
		curr->a2 += shift;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   282
		next->a1 += shift;
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   283
	}
9514cbb6e4f6 bdiff: normalize the diff (issue1295)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7036
diff changeset
   284
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   285
	for (curr = base->next; curr; curr = curr->next)
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   286
		count++;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   287
	return count;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   288
}
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   289
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   290
static void freehunks(struct hunk *l)
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   291
{
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   292
	struct hunk *n;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   293
	for (; l; l = n) {
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   294
		n = l->next;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   295
		free(l);
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   296
	}
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   297
}
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   298
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   299
static PyObject *blocks(PyObject *self, PyObject *args)
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   300
{
435
e731d25ddab2 Fix a compile warning for bdiff
mpm@selenic.com
parents: 433
diff changeset
   301
	PyObject *sa, *sb, *rl = NULL, *m;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   302
	struct line *a, *b;
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   303
	struct hunk l, *h;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   304
	int an, bn, count, pos = 0;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   305
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   306
	if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   307
		return NULL;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   308
11364
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   309
	an = splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   310
	bn = splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   311
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   312
	if (!a || !b)
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   313
		goto nomem;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   314
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   315
	l.next = NULL;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   316
	count = diff(a, an, b, bn, &l);
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   317
	if (count < 0)
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   318
		goto nomem;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   319
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   320
	rl = PyList_New(count);
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   321
	if (!rl)
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   322
		goto nomem;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   323
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   324
	for (h = l.next; h; h = h->next) {
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   325
		m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   326
		PyList_SetItem(rl, pos, m);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   327
		pos++;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   328
	}
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   329
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   330
nomem:
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   331
	free(a);
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   332
	free(b);
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   333
	freehunks(l.next);
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   334
	return rl ? rl : PyErr_NoMemory();
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   335
}
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   336
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   337
static PyObject *bdiff(PyObject *self, PyObject *args)
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   338
{
15222
73015301db86 bdiff: fix pointer aliasing
Matt Mackall <mpm@selenic.com>
parents: 13732
diff changeset
   339
	char *sa, *sb, *rb;
3335
9061613c1593 Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents: 2859
diff changeset
   340
	PyObject *result = NULL;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   341
	struct line *al, *bl;
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   342
	struct hunk l, *h;
16749
eab8ca175262 bdiff: use Py_ssize_t instead of int
Adrian Buehlmann <adrian@cadifra.com>
parents: 16477
diff changeset
   343
	int an, bn, count;
eab8ca175262 bdiff: use Py_ssize_t instead of int
Adrian Buehlmann <adrian@cadifra.com>
parents: 16477
diff changeset
   344
	Py_ssize_t len = 0, la, lb;
16477
70b5e25f1598 bdiff.bdiff: release the GIL before doing expensive diff operations
Augie Fackler <raf@durin42.com>
parents: 16437
diff changeset
   345
	PyThreadState *_save;
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   346
3369
4bad632913d8 python2.5 PyArg_ParseTuple fix
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 3335
diff changeset
   347
	if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   348
		return NULL;
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   349
16477
70b5e25f1598 bdiff.bdiff: release the GIL before doing expensive diff operations
Augie Fackler <raf@durin42.com>
parents: 16437
diff changeset
   350
	_save = PyEval_SaveThread();
3335
9061613c1593 Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents: 2859
diff changeset
   351
	an = splitlines(sa, la, &al);
9061613c1593 Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents: 2859
diff changeset
   352
	bn = splitlines(sb, lb, &bl);
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   353
	if (!al || !bl)
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   354
		goto nomem;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   355
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   356
	l.next = NULL;
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   357
	count = diff(al, an, bl, bn, &l);
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   358
	if (count < 0)
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   359
		goto nomem;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   360
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   361
	/* calculate length of output */
3335
9061613c1593 Teach bdiff to support buffer objects
Brendan Cully <brendan@kublai.com>
parents: 2859
diff changeset
   362
	la = lb = 0;
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   363
	for (h = l.next; h; h = h->next) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   364
		if (h->a1 != la || h->b1 != lb)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   365
			len += 12 + bl[h->b1].l - bl[lb].l;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   366
		la = h->a2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   367
		lb = h->b2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   368
	}
16477
70b5e25f1598 bdiff.bdiff: release the GIL before doing expensive diff operations
Augie Fackler <raf@durin42.com>
parents: 16437
diff changeset
   369
	PyEval_RestoreThread(_save);
70b5e25f1598 bdiff.bdiff: release the GIL before doing expensive diff operations
Augie Fackler <raf@durin42.com>
parents: 16437
diff changeset
   370
	_save = NULL;
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   371
11364
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   372
	result = PyBytes_FromStringAndSize(NULL, len);
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   373
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   374
	if (!result)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   375
		goto nomem;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   376
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   377
	/* build binary patch */
11364
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   378
	rb = PyBytes_AsString(result);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   379
	la = lb = 0;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   380
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   381
	for (h = l.next; h; h = h->next) {
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   382
		if (h->a1 != la || h->b1 != lb) {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   383
			len = bl[h->b1].l - bl[lb].l;
16750
5b1f869b5548 bdiff: check and cast first parameter value on putbe32() calls
Adrian Buehlmann <adrian@cadifra.com>
parents: 16749
diff changeset
   384
5b1f869b5548 bdiff: check and cast first parameter value on putbe32() calls
Adrian Buehlmann <adrian@cadifra.com>
parents: 16749
diff changeset
   385
#define checkputbe32(__x, __c) \
5b1f869b5548 bdiff: check and cast first parameter value on putbe32() calls
Adrian Buehlmann <adrian@cadifra.com>
parents: 16749
diff changeset
   386
	if (__x > UINT_MAX) { \
5b1f869b5548 bdiff: check and cast first parameter value on putbe32() calls
Adrian Buehlmann <adrian@cadifra.com>
parents: 16749
diff changeset
   387
		PyErr_SetString(PyExc_ValueError, \
5b1f869b5548 bdiff: check and cast first parameter value on putbe32() calls
Adrian Buehlmann <adrian@cadifra.com>
parents: 16749
diff changeset
   388
		                "bdiff: value too large for putbe32"); \
5b1f869b5548 bdiff: check and cast first parameter value on putbe32() calls
Adrian Buehlmann <adrian@cadifra.com>
parents: 16749
diff changeset
   389
		goto nomem; \
5b1f869b5548 bdiff: check and cast first parameter value on putbe32() calls
Adrian Buehlmann <adrian@cadifra.com>
parents: 16749
diff changeset
   390
	} \
5b1f869b5548 bdiff: check and cast first parameter value on putbe32() calls
Adrian Buehlmann <adrian@cadifra.com>
parents: 16749
diff changeset
   391
	putbe32((uint32_t)(__x), __c);
5b1f869b5548 bdiff: check and cast first parameter value on putbe32() calls
Adrian Buehlmann <adrian@cadifra.com>
parents: 16749
diff changeset
   392
5b1f869b5548 bdiff: check and cast first parameter value on putbe32() calls
Adrian Buehlmann <adrian@cadifra.com>
parents: 16749
diff changeset
   393
			checkputbe32(al[la].l - al->l, rb);
5b1f869b5548 bdiff: check and cast first parameter value on putbe32() calls
Adrian Buehlmann <adrian@cadifra.com>
parents: 16749
diff changeset
   394
			checkputbe32(al[h->a1].l - al->l, rb + 4);
5b1f869b5548 bdiff: check and cast first parameter value on putbe32() calls
Adrian Buehlmann <adrian@cadifra.com>
parents: 16749
diff changeset
   395
			checkputbe32(len, rb + 8);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   396
			memcpy(rb + 12, bl[lb].l, len);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   397
			rb += 12 + len;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   398
		}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   399
		la = h->a2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   400
		lb = h->b2;
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   401
	}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   402
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   403
nomem:
16477
70b5e25f1598 bdiff.bdiff: release the GIL before doing expensive diff operations
Augie Fackler <raf@durin42.com>
parents: 16437
diff changeset
   404
	if (_save)
70b5e25f1598 bdiff.bdiff: release the GIL before doing expensive diff operations
Augie Fackler <raf@durin42.com>
parents: 16437
diff changeset
   405
		PyEval_RestoreThread(_save);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   406
	free(al);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   407
	free(bl);
13089
faee0ffbc24b bdiff: dynamically allocate hunks
Matt Mackall <mpm@selenic.com>
parents: 11364
diff changeset
   408
	freehunks(l.next);
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   409
	return result ? result : PyErr_NoMemory();
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   410
}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   411
15530
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   412
/*
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   413
 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   414
 * reduce whitespace sequences to a single space and trim remaining whitespace
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   415
 * from end of lines.
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   416
 */
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   417
static PyObject *fixws(PyObject *self, PyObject *args)
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   418
{
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   419
	PyObject *s, *result = NULL;
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   420
	char allws, c;
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   421
	const char *r;
16749
eab8ca175262 bdiff: use Py_ssize_t instead of int
Adrian Buehlmann <adrian@cadifra.com>
parents: 16477
diff changeset
   422
	Py_ssize_t i, rlen, wlen = 0;
15530
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   423
	char *w;
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   424
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   425
	if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   426
		return NULL;
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   427
	r = PyBytes_AsString(s);
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   428
	rlen = PyBytes_Size(s);
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   429
16071
8134ec8627e7 bdiff: fix malloc(0) issue in fixws()
Jim Hague <jim.hague@acm.org>
parents: 15530
diff changeset
   430
	w = (char *)malloc(rlen ? rlen : 1);
15530
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   431
	if (!w)
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   432
		goto nomem;
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   433
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   434
	for (i = 0; i != rlen; i++) {
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   435
		c = r[i];
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   436
		if (c == ' ' || c == '\t' || c == '\r') {
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   437
			if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   438
				w[wlen++] = ' ';
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   439
		} else if (c == '\n' && !allws
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   440
			  && wlen > 0 && w[wlen - 1] == ' ') {
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   441
			w[wlen - 1] = '\n';
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   442
		} else {
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   443
			w[wlen++] = c;
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   444
		}
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   445
	}
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   446
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   447
	result = PyBytes_FromStringAndSize(w, wlen);
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   448
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   449
nomem:
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   450
	free(w);
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   451
	return result ? result : PyErr_NoMemory();
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   452
}
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   453
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   454
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   455
static char mdiff_doc[] = "Efficient binary diff.";
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   456
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   457
static PyMethodDef methods[] = {
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   458
	{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
433
79c694462294 Add bdiff.blocks / minor performance tweaks
mpm@selenic.com
parents: 411
diff changeset
   459
	{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
15530
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 15222
diff changeset
   460
	{"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   461
	{NULL, NULL}
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   462
};
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   463
11364
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   464
#ifdef IS_PY3K
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   465
static struct PyModuleDef bdiff_module = {
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   466
	PyModuleDef_HEAD_INIT,
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   467
	"bdiff",
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   468
	mdiff_doc,
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   469
	-1,
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   470
	methods
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   471
};
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   472
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   473
PyMODINIT_FUNC PyInit_bdiff(void)
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   474
{
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   475
	return PyModule_Create(&bdiff_module);
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   476
}
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   477
#else
400
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   478
PyMODINIT_FUNC initbdiff(void)
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   479
{
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   480
	Py_InitModule3("bdiff", methods, mdiff_doc);
8b067bde6679 Add a fast binary diff extension (not yet used)
mpm@selenic.com
parents:
diff changeset
   481
}
11364
0044193a1c45 bdiff.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10500
diff changeset
   482
#endif
1542
8e80eefb3de6 made C src formatting more consistent
twaldmann@thinkmo.de
parents: 1397
diff changeset
   483