mercurial/pathencode.c
branchstable
changeset 33572 857876ebaed4
parent 33202 c1994c986d77
parent 33571 9a944e908ecf
child 33573 9e0fea06ae2c
equal deleted inserted replaced
33202:c1994c986d77 33572:857876ebaed4
     1 /*
       
     2  pathencode.c - efficient path name encoding
       
     3 
       
     4  Copyright 2012 Facebook
       
     5 
       
     6  This software may be used and distributed according to the terms of
       
     7  the GNU General Public License, incorporated herein by reference.
       
     8 */
       
     9 
       
    10 /*
       
    11  * An implementation of the name encoding scheme used by the fncache
       
    12  * store.  The common case is of a path < 120 bytes long, which is
       
    13  * handled either in a single pass with no allocations or two passes
       
    14  * with a single allocation.  For longer paths, multiple passes are
       
    15  * required.
       
    16  */
       
    17 
       
    18 #define PY_SSIZE_T_CLEAN
       
    19 #include <Python.h>
       
    20 #include <assert.h>
       
    21 #include <ctype.h>
       
    22 #include <stdlib.h>
       
    23 #include <string.h>
       
    24 
       
    25 #include "util.h"
       
    26 
       
    27 /* state machine for the fast path */
       
    28 enum path_state {
       
    29 	START,   /* first byte of a path component */
       
    30 	A,       /* "AUX" */
       
    31 	AU,
       
    32 	THIRD,   /* third of a 3-byte sequence, e.g. "AUX", "NUL" */
       
    33 	C,       /* "CON" or "COMn" */
       
    34 	CO,
       
    35 	COMLPT,  /* "COM" or "LPT" */
       
    36 	COMLPTn,
       
    37 	L,
       
    38 	LP,
       
    39 	N,
       
    40 	NU,
       
    41 	P,       /* "PRN" */
       
    42 	PR,
       
    43 	LDOT,    /* leading '.' */
       
    44 	DOT,     /* '.' in a non-leading position */
       
    45 	H,       /* ".h" */
       
    46 	HGDI,    /* ".hg", ".d", or ".i" */
       
    47 	SPACE,
       
    48 	DEFAULT  /* byte of a path component after the first */
       
    49 };
       
    50 
       
    51 /* state machine for dir-encoding */
       
    52 enum dir_state {
       
    53 	DDOT,
       
    54 	DH,
       
    55 	DHGDI,
       
    56 	DDEFAULT
       
    57 };
       
    58 
       
    59 static inline int inset(const uint32_t bitset[], char c)
       
    60 {
       
    61 	return bitset[((uint8_t)c) >> 5] & (1 << (((uint8_t)c) & 31));
       
    62 }
       
    63 
       
    64 static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
       
    65                             char c)
       
    66 {
       
    67 	if (dest) {
       
    68 		assert(*destlen < destsize);
       
    69 		dest[*destlen] = c;
       
    70 	}
       
    71 	(*destlen)++;
       
    72 }
       
    73 
       
    74 static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
       
    75                            const void *src, Py_ssize_t len)
       
    76 {
       
    77 	if (dest) {
       
    78 		assert(*destlen + len < destsize);
       
    79 		memcpy((void *)&dest[*destlen], src, len);
       
    80 	}
       
    81 	*destlen += len;
       
    82 }
       
    83 
       
    84 static inline void hexencode(char *dest, Py_ssize_t *destlen, size_t destsize,
       
    85 			     uint8_t c)
       
    86 {
       
    87 	static const char hexdigit[] = "0123456789abcdef";
       
    88 
       
    89 	charcopy(dest, destlen, destsize, hexdigit[c >> 4]);
       
    90 	charcopy(dest, destlen, destsize, hexdigit[c & 15]);
       
    91 }
       
    92 
       
    93 /* 3-byte escape: tilde followed by two hex digits */
       
    94 static inline void escape3(char *dest, Py_ssize_t *destlen, size_t destsize,
       
    95 			   char c)
       
    96 {
       
    97 	charcopy(dest, destlen, destsize, '~');
       
    98 	hexencode(dest, destlen, destsize, c);
       
    99 }
       
   100 
       
   101 static Py_ssize_t _encodedir(char *dest, size_t destsize,
       
   102                              const char *src, Py_ssize_t len)
       
   103 {
       
   104 	enum dir_state state = DDEFAULT;
       
   105 	Py_ssize_t i = 0, destlen = 0;
       
   106 
       
   107 	while (i < len) {
       
   108 		switch (state) {
       
   109 		case DDOT:
       
   110 			switch (src[i]) {
       
   111 			case 'd':
       
   112 			case 'i':
       
   113 				state = DHGDI;
       
   114 				charcopy(dest, &destlen, destsize, src[i++]);
       
   115 				break;
       
   116 			case 'h':
       
   117 				state = DH;
       
   118 				charcopy(dest, &destlen, destsize, src[i++]);
       
   119 				break;
       
   120 			default:
       
   121 				state = DDEFAULT;
       
   122 				break;
       
   123 			}
       
   124 			break;
       
   125 		case DH:
       
   126 			if (src[i] == 'g') {
       
   127 				state = DHGDI;
       
   128 				charcopy(dest, &destlen, destsize, src[i++]);
       
   129 			}
       
   130 			else state = DDEFAULT;
       
   131 			break;
       
   132 		case DHGDI:
       
   133 			if (src[i] == '/') {
       
   134 				memcopy(dest, &destlen, destsize, ".hg", 3);
       
   135 				charcopy(dest, &destlen, destsize, src[i++]);
       
   136 			}
       
   137 			state = DDEFAULT;
       
   138 			break;
       
   139 		case DDEFAULT:
       
   140 			if (src[i] == '.')
       
   141 				state = DDOT;
       
   142 			charcopy(dest, &destlen, destsize, src[i++]);
       
   143 			break;
       
   144 		}
       
   145 	}
       
   146 
       
   147 	return destlen;
       
   148 }
       
   149 
       
   150 PyObject *encodedir(PyObject *self, PyObject *args)
       
   151 {
       
   152 	Py_ssize_t len, newlen;
       
   153 	PyObject *pathobj, *newobj;
       
   154 	char *path;
       
   155 
       
   156 	if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj))
       
   157 		return NULL;
       
   158 
       
   159 	if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) {
       
   160 		PyErr_SetString(PyExc_TypeError, "expected a string");
       
   161 		return NULL;
       
   162 	}
       
   163 
       
   164 	newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1;
       
   165 
       
   166 	if (newlen == len + 1) {
       
   167 		Py_INCREF(pathobj);
       
   168 		return pathobj;
       
   169 	}
       
   170 
       
   171 	newobj = PyBytes_FromStringAndSize(NULL, newlen);
       
   172 
       
   173 	if (newobj) {
       
   174 		assert(PyBytes_Check(newobj));
       
   175 		Py_SIZE(newobj)--;
       
   176 		_encodedir(PyBytes_AS_STRING(newobj), newlen, path,
       
   177 			   len + 1);
       
   178 	}
       
   179 
       
   180 	return newobj;
       
   181 }
       
   182 
       
   183 static Py_ssize_t _encode(const uint32_t twobytes[8], const uint32_t onebyte[8],
       
   184 			  char *dest, Py_ssize_t destlen, size_t destsize,
       
   185 			  const char *src, Py_ssize_t len,
       
   186 			  int encodedir)
       
   187 {
       
   188 	enum path_state state = START;
       
   189 	Py_ssize_t i = 0;
       
   190 
       
   191 	/*
       
   192 	 * Python strings end with a zero byte, which we use as a
       
   193 	 * terminal token as they are not valid inside path names.
       
   194 	 */
       
   195 
       
   196 	while (i < len) {
       
   197 		switch (state) {
       
   198 		case START:
       
   199 			switch (src[i]) {
       
   200 			case '/':
       
   201 				charcopy(dest, &destlen, destsize, src[i++]);
       
   202 				break;
       
   203 			case '.':
       
   204 				state = LDOT;
       
   205 				escape3(dest, &destlen, destsize, src[i++]);
       
   206 				break;
       
   207 			case ' ':
       
   208 				state = DEFAULT;
       
   209 				escape3(dest, &destlen, destsize, src[i++]);
       
   210 				break;
       
   211 			case 'a':
       
   212 				state = A;
       
   213 				charcopy(dest, &destlen, destsize, src[i++]);
       
   214 				break;
       
   215 			case 'c':
       
   216 				state = C;
       
   217 				charcopy(dest, &destlen, destsize, src[i++]);
       
   218 				break;
       
   219 			case 'l':
       
   220 				state = L;
       
   221 				charcopy(dest, &destlen, destsize, src[i++]);
       
   222 				break;
       
   223 			case 'n':
       
   224 				state = N;
       
   225 				charcopy(dest, &destlen, destsize, src[i++]);
       
   226 				break;
       
   227 			case 'p':
       
   228 				state = P;
       
   229 				charcopy(dest, &destlen, destsize, src[i++]);
       
   230 				break;
       
   231 			default:
       
   232 				state = DEFAULT;
       
   233 				break;
       
   234 			}
       
   235 			break;
       
   236 		case A:
       
   237 			if (src[i] == 'u') {
       
   238 				state = AU;
       
   239 				charcopy(dest, &destlen, destsize, src[i++]);
       
   240 			}
       
   241 			else state = DEFAULT;
       
   242 			break;
       
   243 		case AU:
       
   244 			if (src[i] == 'x') {
       
   245 				state = THIRD;
       
   246 				i++;
       
   247 			}
       
   248 			else state = DEFAULT;
       
   249 			break;
       
   250 		case THIRD:
       
   251 			state = DEFAULT;
       
   252 			switch (src[i]) {
       
   253 			case '.':
       
   254 			case '/':
       
   255 			case '\0':
       
   256 				escape3(dest, &destlen, destsize, src[i - 1]);
       
   257 				break;
       
   258 			default:
       
   259 				i--;
       
   260 				break;
       
   261 			}
       
   262 			break;
       
   263 		case C:
       
   264 			if (src[i] == 'o') {
       
   265 				state = CO;
       
   266 				charcopy(dest, &destlen, destsize, src[i++]);
       
   267 			}
       
   268 			else state = DEFAULT;
       
   269 			break;
       
   270 		case CO:
       
   271 			if (src[i] == 'm') {
       
   272 				state = COMLPT;
       
   273 				i++;
       
   274 			}
       
   275 			else if (src[i] == 'n') {
       
   276 				state = THIRD;
       
   277 				i++;
       
   278 			}
       
   279 			else state = DEFAULT;
       
   280 			break;
       
   281 		case COMLPT:
       
   282 			switch (src[i]) {
       
   283 			case '1': case '2': case '3': case '4': case '5':
       
   284 			case '6': case '7': case '8': case '9':
       
   285 				state = COMLPTn;
       
   286 				i++;
       
   287 				break;
       
   288 			default:
       
   289 				state = DEFAULT;
       
   290 				charcopy(dest, &destlen, destsize, src[i - 1]);
       
   291 				break;
       
   292 			}
       
   293 			break;
       
   294 		case COMLPTn:
       
   295 			state = DEFAULT;
       
   296 			switch (src[i]) {
       
   297 			case '.':
       
   298 			case '/':
       
   299 			case '\0':
       
   300 				escape3(dest, &destlen, destsize, src[i - 2]);
       
   301 				charcopy(dest, &destlen, destsize, src[i - 1]);
       
   302 				break;
       
   303 			default:
       
   304 				memcopy(dest, &destlen, destsize,
       
   305 					&src[i - 2], 2);
       
   306 				break;
       
   307 			}
       
   308 			break;
       
   309 		case L:
       
   310 			if (src[i] == 'p') {
       
   311 				state = LP;
       
   312 				charcopy(dest, &destlen, destsize, src[i++]);
       
   313 			}
       
   314 			else state = DEFAULT;
       
   315 			break;
       
   316 		case LP:
       
   317 			if (src[i] == 't') {
       
   318 				state = COMLPT;
       
   319 				i++;
       
   320 			}
       
   321 			else state = DEFAULT;
       
   322 			break;
       
   323 		case N:
       
   324 			if (src[i] == 'u') {
       
   325 				state = NU;
       
   326 				charcopy(dest, &destlen, destsize, src[i++]);
       
   327 			}
       
   328 			else state = DEFAULT;
       
   329 			break;
       
   330 		case NU:
       
   331 			if (src[i] == 'l') {
       
   332 				state = THIRD;
       
   333 				i++;
       
   334 			}
       
   335 			else state = DEFAULT;
       
   336 			break;
       
   337 		case P:
       
   338 			if (src[i] == 'r') {
       
   339 				state = PR;
       
   340 				charcopy(dest, &destlen, destsize, src[i++]);
       
   341 			}
       
   342 			else state = DEFAULT;
       
   343 			break;
       
   344 		case PR:
       
   345 			if (src[i] == 'n') {
       
   346 				state = THIRD;
       
   347 				i++;
       
   348 			}
       
   349 			else state = DEFAULT;
       
   350 			break;
       
   351 		case LDOT:
       
   352 			switch (src[i]) {
       
   353 			case 'd':
       
   354 			case 'i':
       
   355 				state = HGDI;
       
   356 				charcopy(dest, &destlen, destsize, src[i++]);
       
   357 				break;
       
   358 			case 'h':
       
   359 				state = H;
       
   360 				charcopy(dest, &destlen, destsize, src[i++]);
       
   361 				break;
       
   362 			default:
       
   363 				state = DEFAULT;
       
   364 				break;
       
   365 			}
       
   366 			break;
       
   367 		case DOT:
       
   368 			switch (src[i]) {
       
   369 			case '/':
       
   370 			case '\0':
       
   371 				state = START;
       
   372 				memcopy(dest, &destlen, destsize, "~2e", 3);
       
   373 				charcopy(dest, &destlen, destsize, src[i++]);
       
   374 				break;
       
   375 			case 'd':
       
   376 			case 'i':
       
   377 				state = HGDI;
       
   378 				charcopy(dest, &destlen, destsize, '.');
       
   379 				charcopy(dest, &destlen, destsize, src[i++]);
       
   380 				break;
       
   381 			case 'h':
       
   382 				state = H;
       
   383 				memcopy(dest, &destlen, destsize, ".h", 2);
       
   384 				i++;
       
   385 				break;
       
   386 			default:
       
   387 				state = DEFAULT;
       
   388 				charcopy(dest, &destlen, destsize, '.');
       
   389 				break;
       
   390 			}
       
   391 			break;
       
   392 		case H:
       
   393 			if (src[i] == 'g') {
       
   394 				state = HGDI;
       
   395 				charcopy(dest, &destlen, destsize, src[i++]);
       
   396 			}
       
   397 			else state = DEFAULT;
       
   398 			break;
       
   399 		case HGDI:
       
   400 			if (src[i] == '/') {
       
   401 				state = START;
       
   402 				if (encodedir)
       
   403 					memcopy(dest, &destlen, destsize, ".hg",
       
   404 						3);
       
   405 				charcopy(dest, &destlen, destsize, src[i++]);
       
   406 			}
       
   407 			else state = DEFAULT;
       
   408 			break;
       
   409 		case SPACE:
       
   410 			switch (src[i]) {
       
   411 			case '/':
       
   412 			case '\0':
       
   413 				state = START;
       
   414 				memcopy(dest, &destlen, destsize, "~20", 3);
       
   415 				charcopy(dest, &destlen, destsize, src[i++]);
       
   416 				break;
       
   417 			default:
       
   418 				state = DEFAULT;
       
   419 				charcopy(dest, &destlen, destsize, ' ');
       
   420 				break;
       
   421 			}
       
   422 			break;
       
   423 		case DEFAULT:
       
   424 			while (inset(onebyte, src[i])) {
       
   425 				charcopy(dest, &destlen, destsize, src[i++]);
       
   426 				if (i == len)
       
   427 					goto done;
       
   428 			}
       
   429 			switch (src[i]) {
       
   430 			case '.':
       
   431 				state = DOT;
       
   432 				i++;
       
   433 				break;
       
   434 			case ' ':
       
   435 				state = SPACE;
       
   436 				i++;
       
   437 				break;
       
   438 			case '/':
       
   439 				state = START;
       
   440 				charcopy(dest, &destlen, destsize, '/');
       
   441 				i++;
       
   442 				break;
       
   443 			default:
       
   444 				if (inset(onebyte, src[i])) {
       
   445 					do {
       
   446 						charcopy(dest, &destlen,
       
   447 							 destsize, src[i++]);
       
   448 					} while (i < len &&
       
   449 						 inset(onebyte, src[i]));
       
   450 				}
       
   451 				else if (inset(twobytes, src[i])) {
       
   452 					char c = src[i++];
       
   453 					charcopy(dest, &destlen, destsize, '_');
       
   454 					charcopy(dest, &destlen, destsize,
       
   455 						 c == '_' ? '_' : c + 32);
       
   456 				}
       
   457 				else
       
   458 					escape3(dest, &destlen, destsize,
       
   459 						src[i++]);
       
   460 				break;
       
   461 			}
       
   462 			break;
       
   463 		}
       
   464 	}
       
   465 done:
       
   466 	return destlen;
       
   467 }
       
   468 
       
   469 static Py_ssize_t basicencode(char *dest, size_t destsize,
       
   470 			      const char *src, Py_ssize_t len)
       
   471 {
       
   472 	static const uint32_t twobytes[8] = { 0, 0, 0x87fffffe };
       
   473 
       
   474 	static const uint32_t onebyte[8] = {
       
   475 		1, 0x2bff3bfa, 0x68000001, 0x2fffffff,
       
   476 	};
       
   477 
       
   478 	Py_ssize_t destlen = 0;
       
   479 
       
   480 	return _encode(twobytes, onebyte, dest, destlen, destsize,
       
   481 		       src, len, 1);
       
   482 }
       
   483 
       
   484 static const Py_ssize_t maxstorepathlen = 120;
       
   485 
       
   486 static Py_ssize_t _lowerencode(char *dest, size_t destsize,
       
   487 			       const char *src, Py_ssize_t len)
       
   488 {
       
   489 	static const uint32_t onebyte[8] = {
       
   490 		1, 0x2bfffbfb, 0xe8000001, 0x2fffffff
       
   491 	};
       
   492 
       
   493 	static const uint32_t lower[8] = { 0, 0, 0x7fffffe };
       
   494 
       
   495 	Py_ssize_t i, destlen = 0;
       
   496 
       
   497 	for (i = 0; i < len; i++) {
       
   498 		if (inset(onebyte, src[i]))
       
   499 			charcopy(dest, &destlen, destsize, src[i]);
       
   500 		else if (inset(lower, src[i]))
       
   501 			charcopy(dest, &destlen, destsize, src[i] + 32);
       
   502 		else
       
   503 			escape3(dest, &destlen, destsize, src[i]);
       
   504 	}
       
   505 
       
   506 	return destlen;
       
   507 }
       
   508 
       
   509 PyObject *lowerencode(PyObject *self, PyObject *args)
       
   510 {
       
   511 	char *path;
       
   512 	Py_ssize_t len, newlen;
       
   513 	PyObject *ret;
       
   514 
       
   515 	if (!PyArg_ParseTuple(args, "s#:lowerencode", &path, &len))
       
   516 		return NULL;
       
   517 
       
   518 	newlen = _lowerencode(NULL, 0, path, len);
       
   519 	ret = PyBytes_FromStringAndSize(NULL, newlen);
       
   520 	if (ret)
       
   521 		_lowerencode(PyBytes_AS_STRING(ret), newlen, path, len);
       
   522 
       
   523 	return ret;
       
   524 }
       
   525 
       
   526 /* See store.py:_auxencode for a description. */
       
   527 static Py_ssize_t auxencode(char *dest, size_t destsize,
       
   528 			    const char *src, Py_ssize_t len)
       
   529 {
       
   530 	static const uint32_t twobytes[8];
       
   531 
       
   532 	static const uint32_t onebyte[8] = {
       
   533 		~0U, 0xffff3ffe, ~0U, ~0U, ~0U, ~0U, ~0U, ~0U,
       
   534 	};
       
   535 
       
   536 	return _encode(twobytes, onebyte, dest, 0, destsize, src, len, 0);
       
   537 }
       
   538 
       
   539 static PyObject *hashmangle(const char *src, Py_ssize_t len, const char sha[20])
       
   540 {
       
   541 	static const Py_ssize_t dirprefixlen = 8;
       
   542 	static const Py_ssize_t maxshortdirslen = 68;
       
   543 	char *dest;
       
   544 	PyObject *ret;
       
   545 
       
   546 	Py_ssize_t i, d, p, lastslash = len - 1, lastdot = -1;
       
   547 	Py_ssize_t destsize, destlen = 0, slop, used;
       
   548 
       
   549 	while (lastslash >= 0 && src[lastslash] != '/') {
       
   550 		if (src[lastslash] == '.' && lastdot == -1)
       
   551 			lastdot = lastslash;
       
   552 		lastslash--;
       
   553 	}
       
   554 
       
   555 #if 0
       
   556 	/* All paths should end in a suffix of ".i" or ".d".
       
   557            Unfortunately, the file names in test-hybridencode.py
       
   558            violate this rule.  */
       
   559 	if (lastdot != len - 3) {
       
   560 		PyErr_SetString(PyExc_ValueError,
       
   561 				"suffix missing or wrong length");
       
   562 		return NULL;
       
   563 	}
       
   564 #endif
       
   565 
       
   566 	/* If src contains a suffix, we will append it to the end of
       
   567 	   the new string, so make room. */
       
   568 	destsize = 120;
       
   569 	if (lastdot >= 0)
       
   570 		destsize += len - lastdot - 1;
       
   571 
       
   572 	ret = PyBytes_FromStringAndSize(NULL, destsize);
       
   573 	if (ret == NULL)
       
   574 		return NULL;
       
   575 
       
   576 	dest = PyBytes_AS_STRING(ret);
       
   577 	memcopy(dest, &destlen, destsize, "dh/", 3);
       
   578 
       
   579 	/* Copy up to dirprefixlen bytes of each path component, up to
       
   580 	   a limit of maxshortdirslen bytes. */
       
   581 	for (i = d = p = 0; i < lastslash; i++, p++) {
       
   582 		if (src[i] == '/') {
       
   583 			char d = dest[destlen - 1];
       
   584 			/* After truncation, a directory name may end
       
   585 			   in a space or dot, which are unportable. */
       
   586 			if (d == '.' || d == ' ')
       
   587 				dest[destlen - 1] = '_';
       
   588 			/* The + 3 is to account for "dh/" in the beginning */
       
   589 			if (destlen > maxshortdirslen + 3)
       
   590 				break;
       
   591 			charcopy(dest, &destlen, destsize, src[i]);
       
   592 			p = -1;
       
   593 		}
       
   594 		else if (p < dirprefixlen)
       
   595 			charcopy(dest, &destlen, destsize, src[i]);
       
   596 	}
       
   597 
       
   598 	/* Rewind to just before the last slash copied. */
       
   599 	if (destlen > maxshortdirslen + 3)
       
   600 		do {
       
   601 			destlen--;
       
   602 		} while (destlen > 0 && dest[destlen] != '/');
       
   603 
       
   604 	if (destlen > 3) {
       
   605 		if (lastslash > 0) {
       
   606 			char d = dest[destlen - 1];
       
   607 			/* The last directory component may be
       
   608 			   truncated, so make it safe. */
       
   609 			if (d == '.' || d == ' ')
       
   610 				dest[destlen - 1] = '_';
       
   611 		}
       
   612 
       
   613 		charcopy(dest, &destlen, destsize, '/');
       
   614 	}
       
   615 
       
   616 	/* Add a prefix of the original file's name. Its length
       
   617 	   depends on the number of bytes left after accounting for
       
   618 	   hash and suffix. */
       
   619 	used = destlen + 40;
       
   620 	if (lastdot >= 0)
       
   621 		used += len - lastdot - 1;
       
   622 	slop = maxstorepathlen - used;
       
   623 	if (slop > 0) {
       
   624 		Py_ssize_t basenamelen =
       
   625 			lastslash >= 0 ? len - lastslash - 2 : len - 1;
       
   626 
       
   627 		if (basenamelen > slop)
       
   628 			basenamelen = slop;
       
   629 		if (basenamelen > 0)
       
   630 			memcopy(dest, &destlen, destsize, &src[lastslash + 1],
       
   631 				basenamelen);
       
   632 	}
       
   633 
       
   634 	/* Add hash and suffix. */
       
   635 	for (i = 0; i < 20; i++)
       
   636 		hexencode(dest, &destlen, destsize, sha[i]);
       
   637 
       
   638 	if (lastdot >= 0)
       
   639 		memcopy(dest, &destlen, destsize, &src[lastdot],
       
   640 			len - lastdot - 1);
       
   641 
       
   642 	assert(PyBytes_Check(ret));
       
   643 	Py_SIZE(ret) = destlen;
       
   644 
       
   645 	return ret;
       
   646 }
       
   647 
       
   648 /*
       
   649  * Avoiding a trip through Python would improve performance by 50%,
       
   650  * but we don't encounter enough long names to be worth the code.
       
   651  */
       
   652 static int sha1hash(char hash[20], const char *str, Py_ssize_t len)
       
   653 {
       
   654 	static PyObject *shafunc;
       
   655 	PyObject *shaobj, *hashobj;
       
   656 
       
   657 	if (shafunc == NULL) {
       
   658 		PyObject *hashlib, *name = PyBytes_FromString("hashlib");
       
   659 
       
   660 		if (name == NULL)
       
   661 			return -1;
       
   662 
       
   663 		hashlib = PyImport_Import(name);
       
   664 		Py_DECREF(name);
       
   665 
       
   666 		if (hashlib == NULL) {
       
   667 			PyErr_SetString(PyExc_ImportError, "hashlib");
       
   668 			return -1;
       
   669 		}
       
   670 		shafunc = PyObject_GetAttrString(hashlib, "sha1");
       
   671 		Py_DECREF(hashlib);
       
   672 
       
   673 		if (shafunc == NULL) {
       
   674 			PyErr_SetString(PyExc_AttributeError,
       
   675 					"module 'hashlib' has no "
       
   676 					"attribute 'sha1'");
       
   677 			return -1;
       
   678 		}
       
   679 	}
       
   680 
       
   681 	shaobj = PyObject_CallFunction(shafunc, "s#", str, len);
       
   682 
       
   683 	if (shaobj == NULL)
       
   684 		return -1;
       
   685 
       
   686 	hashobj = PyObject_CallMethod(shaobj, "digest", "");
       
   687 	Py_DECREF(shaobj);
       
   688 	if (hashobj == NULL)
       
   689 		return -1;
       
   690 
       
   691 	if (!PyBytes_Check(hashobj) || PyBytes_GET_SIZE(hashobj) != 20) {
       
   692 		PyErr_SetString(PyExc_TypeError,
       
   693 				"result of digest is not a 20-byte hash");
       
   694 		Py_DECREF(hashobj);
       
   695 		return -1;
       
   696 	}
       
   697 
       
   698 	memcpy(hash, PyBytes_AS_STRING(hashobj), 20);
       
   699 	Py_DECREF(hashobj);
       
   700 	return 0;
       
   701 }
       
   702 
       
   703 #define MAXENCODE 4096 * 4
       
   704 
       
   705 static PyObject *hashencode(const char *src, Py_ssize_t len)
       
   706 {
       
   707 	char dired[MAXENCODE];
       
   708 	char lowered[MAXENCODE];
       
   709 	char auxed[MAXENCODE];
       
   710 	Py_ssize_t dirlen, lowerlen, auxlen, baselen;
       
   711 	char sha[20];
       
   712 
       
   713 	baselen = (len - 5) * 3;
       
   714 	if (baselen >= MAXENCODE) {
       
   715 		PyErr_SetString(PyExc_ValueError, "string too long");
       
   716 		return NULL;
       
   717 	}
       
   718 
       
   719 	dirlen = _encodedir(dired, baselen, src, len);
       
   720 	if (sha1hash(sha, dired, dirlen - 1) == -1)
       
   721 		return NULL;
       
   722 	lowerlen = _lowerencode(lowered, baselen, dired + 5, dirlen - 5);
       
   723 	auxlen = auxencode(auxed, baselen, lowered, lowerlen);
       
   724 	return hashmangle(auxed, auxlen, sha);
       
   725 }
       
   726 
       
   727 PyObject *pathencode(PyObject *self, PyObject *args)
       
   728 {
       
   729 	Py_ssize_t len, newlen;
       
   730 	PyObject *pathobj, *newobj;
       
   731 	char *path;
       
   732 
       
   733 	if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj))
       
   734 		return NULL;
       
   735 
       
   736 	if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) {
       
   737 		PyErr_SetString(PyExc_TypeError, "expected a string");
       
   738 		return NULL;
       
   739 	}
       
   740 
       
   741 	if (len > maxstorepathlen)
       
   742 		newlen = maxstorepathlen + 2;
       
   743 	else
       
   744 		newlen = len ? basicencode(NULL, 0, path, len + 1) : 1;
       
   745 
       
   746 	if (newlen <= maxstorepathlen + 1) {
       
   747 		if (newlen == len + 1) {
       
   748 			Py_INCREF(pathobj);
       
   749 			return pathobj;
       
   750 		}
       
   751 
       
   752 		newobj = PyBytes_FromStringAndSize(NULL, newlen);
       
   753 
       
   754 		if (newobj) {
       
   755 			assert(PyBytes_Check(newobj));
       
   756 			Py_SIZE(newobj)--;
       
   757 			basicencode(PyBytes_AS_STRING(newobj), newlen, path,
       
   758 				    len + 1);
       
   759 		}
       
   760 	}
       
   761 	else
       
   762 		newobj = hashencode(path, len + 1);
       
   763 
       
   764 	return newobj;
       
   765 }