hgext/convert/subversion.py
branchstable
changeset 45027 0ea9c86fac89
parent 45026 ddf66c218104
child 45032 df3660cc60f5
equal deleted inserted replaced
45026:ddf66c218104 45027:0ea9c86fac89
   352     b'https': httpcheck,
   352     b'https': httpcheck,
   353     b'file': filecheck,
   353     b'file': filecheck,
   354 }
   354 }
   355 
   355 
   356 
   356 
       
   357 class NonUtf8PercentEncodedBytes(Exception):
       
   358     pass
       
   359 
       
   360 
       
   361 # Subversion paths are Unicode. Since the percent-decoding is done on
       
   362 # UTF-8-encoded strings, percent-encoded bytes are interpreted as UTF-8.
       
   363 def url2pathname_like_subversion(unicodepath):
       
   364     if pycompat.ispy3:
       
   365         # On Python 3, we have to pass unicode to urlreq.url2pathname().
       
   366         # Percent-decoded bytes get decoded using UTF-8 and the 'replace' error
       
   367         # handler.
       
   368         unicodepath = urlreq.url2pathname(unicodepath)
       
   369         if u'\N{REPLACEMENT CHARACTER}' in unicodepath:
       
   370             raise NonUtf8PercentEncodedBytes
       
   371         else:
       
   372             return unicodepath
       
   373     else:
       
   374         # If we passed unicode on Python 2, it would be converted using the
       
   375         # latin-1 encoding. Therefore, we pass UTF-8-encoded bytes.
       
   376         unicodepath = urlreq.url2pathname(unicodepath.encode('utf-8'))
       
   377         try:
       
   378             return unicodepath.decode('utf-8')
       
   379         except UnicodeDecodeError:
       
   380             raise NonUtf8PercentEncodedBytes
       
   381 
       
   382 
   357 def issvnurl(ui, url):
   383 def issvnurl(ui, url):
   358     try:
   384     try:
   359         proto, path = url.split(b'://', 1)
   385         proto, path = url.split(b'://', 1)
   360         if proto == b'file':
   386         if proto == b'file':
   361             if (
   387             if (
   364                 and path[1:2].isalpha()
   390                 and path[1:2].isalpha()
   365                 and path[2:6].lower() == b'%3a/'
   391                 and path[2:6].lower() == b'%3a/'
   366             ):
   392             ):
   367                 path = path[:2] + b':/' + path[6:]
   393                 path = path[:2] + b':/' + path[6:]
   368             try:
   394             try:
   369                 path.decode(fsencoding)
   395                 unicodepath = path.decode(fsencoding)
   370             except UnicodeDecodeError:
   396             except UnicodeDecodeError:
   371                 ui.warn(
   397                 ui.warn(
   372                     _(
   398                     _(
   373                         b'Subversion requires that file URLs can be converted '
   399                         b'Subversion requires that file URLs can be converted '
   374                         b'to Unicode using the current locale encoding (%s)\n'
   400                         b'to Unicode using the current locale encoding (%s)\n'
   375                     )
   401                     )
   376                     % pycompat.sysbytes(fsencoding)
   402                     % pycompat.sysbytes(fsencoding)
   377                 )
   403                 )
   378                 return False
   404                 return False
   379             # FIXME: The following reasoning and logic is wrong and will be
   405             try:
   380             # fixed in a following changeset.
   406                 unicodepath = url2pathname_like_subversion(unicodepath)
   381             # pycompat.fsdecode() / pycompat.fsencode() are used so that bytes
   407             except NonUtf8PercentEncodedBytes:
   382             # in the URL roundtrip correctly on Unix. urlreq.url2pathname() on
       
   383             # py3 will decode percent-encoded bytes using the utf-8 encoding
       
   384             # and the "replace" error handler. This means that it will not
       
   385             # preserve non-UTF-8 bytes (https://bugs.python.org/issue40983).
       
   386             # url.open() uses the reverse function (urlreq.pathname2url()) and
       
   387             # has a similar problem
       
   388             # (https://bz.mercurial-scm.org/show_bug.cgi?id=6357). It makes
       
   389             # sense to solve both problems together and handle all file URLs
       
   390             # consistently. For now, we warn.
       
   391             unicodepath = urlreq.url2pathname(pycompat.fsdecode(path))
       
   392             if pycompat.ispy3 and u'\N{REPLACEMENT CHARACTER}' in unicodepath:
       
   393                 ui.warn(
   408                 ui.warn(
   394                     _(
   409                     _(
   395                         b'on Python 3, we currently do not support non-UTF-8 '
   410                         b'Subversion does not support non-UTF-8 '
   396                         b'percent-encoded bytes in file URLs for Subversion '
   411                         b'percent-encoded bytes in file URLs\n'
   397                         b'repositories\n'
       
   398                     )
   412                     )
   399                 )
   413                 )
   400             path = pycompat.fsencode(unicodepath)
   414                 return False
       
   415             # Below, we approximate how Subversion checks the path. On Unix, we
       
   416             # should therefore convert the path to bytes using `fsencoding`
       
   417             # (like Subversion does). On Windows, the right thing would
       
   418             # actually be to leave the path as unicode. For now, we restrict
       
   419             # the path to MBCS.
       
   420             path = unicodepath.encode(fsencoding)
   401     except ValueError:
   421     except ValueError:
   402         proto = b'file'
   422         proto = b'file'
   403         path = os.path.abspath(url)
   423         path = os.path.abspath(url)
   404         try:
   424         try:
   405             path.decode(fsencoding)
   425             path.decode(fsencoding)