mercurial/bdiff.c
changeset 474 b2ae8283d1a6
parent 472 aa3d592df9b9
child 484 934279f3ca53
equal deleted inserted replaced
473:5914e27dc717 474:b2ae8283d1a6
    26 #endif
    26 #endif
    27 
    27 
    28 struct line {
    28 struct line {
    29 	int h, len, n, e;
    29 	int h, len, n, e;
    30 	const char *l;
    30 	const char *l;
       
    31 };
       
    32 
       
    33 struct pos {
       
    34 	int pos, len;
    31 };
    35 };
    32 
    36 
    33 struct hunk {
    37 struct hunk {
    34 	int a1, a2, b1, b2;
    38 	int a1, a2, b1, b2;
    35 };
    39 };
    85 	return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
    89 	return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
    86 }
    90 }
    87 
    91 
    88 static int equatelines(struct line *a, int an, struct line *b, int bn)
    92 static int equatelines(struct line *a, int an, struct line *b, int bn)
    89 {
    93 {
    90 	int i, j, buckets = 1, t, *h, *l;
    94 	int i, j, buckets = 1, t;
       
    95 	struct pos *h;
    91 
    96 
    92 	/* build a hash table of the next highest power of 2 */
    97 	/* build a hash table of the next highest power of 2 */
    93 	while (buckets < bn + 1)
    98 	while (buckets < bn + 1)
    94 		buckets *= 2;
    99 		buckets *= 2;
    95 
   100 
    96 	h = malloc(buckets * sizeof(int));
   101 	h = malloc(buckets * sizeof(struct pos));
    97 	l = calloc(buckets, sizeof(int));
       
    98 	buckets = buckets - 1;
   102 	buckets = buckets - 1;
    99 	if (!h || !l) {
   103 	if (!h)
   100 		free(h);
       
   101 		return 0;
   104 		return 0;
   102 	}
       
   103 
   105 
   104 	/* clear the hash table */
   106 	/* clear the hash table */
   105 	for (i = 0; i <= buckets; i++)
   107 	for (i = 0; i <= buckets; i++) {
   106 		h[i] = -1;
   108 		h[i].pos = -1;
       
   109 		h[i].len = 0;
       
   110 	}
   107 
   111 
   108 	/* add lines to the hash table chains */
   112 	/* add lines to the hash table chains */
   109 	for (i = bn - 1; i >= 0; i--) {
   113 	for (i = bn - 1; i >= 0; i--) {
   110 		/* find the equivalence class */
   114 		/* find the equivalence class */
   111 		for (j = b[i].h & buckets; h[j] != -1; j = (j + 1) & buckets)
   115 		for (j = b[i].h & buckets; h[j].pos != -1;
   112 			if (!cmp(b + i, b + h[j]))
   116 		     j = (j + 1) & buckets)
       
   117 			if (!cmp(b + i, b + h[j].pos))
   113 				break;
   118 				break;
   114 
   119 
   115 		/* add to the head of the equivalence class */
   120 		/* add to the head of the equivalence class */
   116 		b[i].n = h[j];
   121 		b[i].n = h[j].pos;
   117 		b[i].e = j;
   122 		b[i].e = j;
   118 		h[j] = i;
   123 		h[j].pos = i;
   119 		l[j]++; /* keep track of popularity */
   124 		h[j].len++; /* keep track of popularity */
   120 	}
   125 	}
   121 
   126 
   122 	/* compute popularity threshold */
   127 	/* compute popularity threshold */
   123 	t = (bn >= 200) ? bn / 100 : bn + 1;
   128 	t = (bn >= 200) ? bn / 100 : bn + 1;
   124 
   129 
   125 	/* match items in a to their equivalence class in b */
   130 	/* match items in a to their equivalence class in b */
   126 	for (i = 0; i < an; i++) {
   131 	for (i = 0; i < an; i++) {
   127 		/* find the equivalence class */
   132 		/* find the equivalence class */
   128 		for (j = a[i].h & buckets; h[j] != -1; j = (j + 1) & buckets)
   133 		for (j = a[i].h & buckets; h[j].pos != -1;
   129 			if (!cmp(a + i, b + h[j]))
   134 		     j = (j + 1) & buckets)
       
   135 			if (!cmp(a + i, b + h[j].pos))
   130 				break;
   136 				break;
   131 
   137 
   132 		a[i].e = j; /* use equivalence class for quick compare */
   138 		a[i].e = j; /* use equivalence class for quick compare */
   133 		if(l[j] <= t)
   139 		if(h[j].len <= t)
   134 			a[i].n = h[j]; /* point to head of match list */
   140 			a[i].n = h[j].pos; /* point to head of match list */
   135 		else
   141 		else
   136 			a[i].n = -1; /* too popular */
   142 			a[i].n = -1; /* too popular */
   137 	}
   143 	}
   138 
   144 
   139 	/* discard hash tables */
   145 	/* discard hash tables */
   140 	free(h);
   146 	free(h);
   141 	free(l);
       
   142 	return 1;
   147 	return 1;
   143 }
   148 }
   144 
   149 
   145 static int longest_match(struct line *a, struct line *b, int *jpos, int *jlen,
   150 static int longest_match(struct line *a, struct line *b, struct pos *pos,
   146 			 int a1, int a2, int b1, int b2, int *omi, int *omj)
   151 			 int a1, int a2, int b1, int b2, int *omi, int *omj)
   147 {
   152 {
   148 	int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
   153 	int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
   149 
   154 
   150 	for (i = a1; i < a2; i++) {
   155 	for (i = a1; i < a2; i++) {
   153 			;
   158 			;
   154 
   159 
   155 		/* loop through all lines match a[i] in b */
   160 		/* loop through all lines match a[i] in b */
   156 		for (; j != -1 && j < b2; j = b[j].n) {
   161 		for (; j != -1 && j < b2; j = b[j].n) {
   157 			/* does this extend an earlier match? */
   162 			/* does this extend an earlier match? */
   158 			if (i > a1 && j > b1 && jpos[j - 1] == i - 1)
   163 			if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
   159 				k = jlen[j - 1] + 1;
   164 				k = pos[j - 1].len + 1;
   160 			else
   165 			else
   161 				k = 1;
   166 				k = 1;
   162 			jpos[j] = i;
   167 			pos[j].pos = i;
   163 			jlen[j] = k;
   168 			pos[j].len = k;
   164 
   169 
   165 			/* best match so far? */
   170 			/* best match so far? */
   166 			if (k > mk) {
   171 			if (k > mk) {
   167 				mi = i;
   172 				mi = i;
   168 				mj = j;
   173 				mj = j;
   187 	*omi = mi - mb;
   192 	*omi = mi - mb;
   188 	*omj = mj - mb;
   193 	*omj = mj - mb;
   189 	return mk + mb;
   194 	return mk + mb;
   190 }
   195 }
   191 
   196 
   192 static void recurse(struct line *a, struct line *b, int *jpos, int *jlen,
   197 static void recurse(struct line *a, struct line *b, struct pos *pos,
   193 		    int a1, int a2, int b1, int b2, struct hunklist *l)
   198 		    int a1, int a2, int b1, int b2, struct hunklist *l)
   194 {
   199 {
   195 	int i, j, k;
   200 	int i, j, k;
   196 
   201 
   197 	/* find the longest match in this chunk */
   202 	/* find the longest match in this chunk */
   198 	k = longest_match(a, b, jpos, jlen, a1, a2, b1, b2, &i, &j);
   203 	k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
   199 	if (!k)
   204 	if (!k)
   200 		return;
   205 		return;
   201 
   206 
   202 	/* and recurse on the remaining chunks on either side */
   207 	/* and recurse on the remaining chunks on either side */
   203 	recurse(a, b, jpos, jlen, a1, i, b1, j, l);
   208 	recurse(a, b, pos, a1, i, b1, j, l);
   204 	l->head->a1 = i;
   209 	l->head->a1 = i;
   205 	l->head->a2 = i + k;
   210 	l->head->a2 = i + k;
   206 	l->head->b1 = j;
   211 	l->head->b1 = j;
   207 	l->head->b2 = j + k;
   212 	l->head->b2 = j + k;
   208 	l->head++;
   213 	l->head++;
   209 	recurse(a, b, jpos, jlen, i + k, a2, j + k, b2, l);
   214 	recurse(a, b, pos, i + k, a2, j + k, b2, l);
   210 }
   215 }
   211 
   216 
   212 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
   217 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
   213 {
   218 {
   214 	struct hunklist l;
   219 	struct hunklist l;
   215 	int *jpos, *jlen, t;
   220 	struct pos *pos;
       
   221 	int t;
   216 
   222 
   217 	/* allocate and fill arrays */
   223 	/* allocate and fill arrays */
   218 	t = equatelines(a, an, b, bn);
   224 	t = equatelines(a, an, b, bn);
   219 	jpos = calloc(bn, sizeof(int));
   225 	pos = calloc(bn, sizeof(struct pos));
   220 	jlen = calloc(bn, sizeof(int));
       
   221 	l.head = l.base = malloc(sizeof(struct hunk) * ((an + bn) / 4 + 2));
   226 	l.head = l.base = malloc(sizeof(struct hunk) * ((an + bn) / 4 + 2));
   222 
   227 
   223 	if (jpos && jlen && l.base && t) {
   228 	if (pos && l.base && t) {
   224 		/* generate the matching block list */
   229 		/* generate the matching block list */
   225 		recurse(a, b, jpos, jlen, 0, an, 0, bn, &l);
   230 		recurse(a, b, pos, 0, an, 0, bn, &l);
   226 		l.head->a1 = an;
   231 		l.head->a1 = an;
   227 		l.head->b1 = bn;
   232 		l.head->b1 = bn;
   228 		l.head++;
   233 		l.head++;
   229 	}
   234 	}
   230 
   235 
   231 	free(jpos);
   236 	free(pos);
   232 	free(jlen);
       
   233 	return l;
   237 	return l;
   234 }
   238 }
   235 
   239 
   236 static PyObject *blocks(PyObject *self, PyObject *args)
   240 static PyObject *blocks(PyObject *self, PyObject *args)
   237 {
   241 {