fsmonitor: match watchman and filesystem encoding
authorOlivier Trempe <oliviertrempe@gmail.com>
Wed, 08 Mar 2017 09:03:42 -0500
changeset 31846 1064a296a2a7
parent 31845 86246530b8d2
child 31847 39d36c2db68e
fsmonitor: match watchman and filesystem encoding watchman's paths encoding can differ from filesystem encoding. For example, on Windows, it's always utf-8. Before this patch, on Windows, mismatch in path comparison between fsmonitor state and osutil.statfiles would yield a clean status for added/modified files. In addition to status reporting wrong results, this leads to files being discarded from changesets while doing history editing operations such as rebase. Benchmark: There is a little overhead at module import: python -m timeit "import hgext.fsmonitor" Windows before patch: 1000000 loops, best of 3: 0.563 usec per loop Windows after patch: 1000000 loops, best of 3: 0.583 usec per loop Linx before patch: 1000000 loops, best of 3: 0.579 usec per loop Linux after patch: 1000000 loops, best of 3: 0.588 usec per loop 10000 calls to _watchmantofsencoding: python -m timeit -s "from hgext.fsmonitor import _watchmantofsencoding, _fixencoding" "fname = '/path/to/file'" "for i in range(10000):" " if _fixencoding: fname = _watchmantofsencoding(fname)" Windows (_fixencoding is True): 100 loops, best of 3: 19.5 msec per loop Linux (_fixencoding is False): 100 loops, best of 3: 3.08 msec per loop
hgext/fsmonitor/__init__.py
tests/test-check-py3-compat.t
--- a/hgext/fsmonitor/__init__.py	Fri Apr 07 06:31:50 2017 -0700
+++ b/hgext/fsmonitor/__init__.py	Wed Mar 08 09:03:42 2017 -0500
@@ -91,14 +91,17 @@
 
 from __future__ import absolute_import
 
+import codecs
 import hashlib
 import os
 import stat
+import sys
 
 from mercurial.i18n import _
 from mercurial import (
     context,
     encoding,
+    error,
     extensions,
     localrepo,
     merge,
@@ -110,6 +113,7 @@
 from mercurial import match as matchmod
 
 from . import (
+    pywatchman,
     state,
     watchmanclient,
 )
@@ -159,6 +163,28 @@
     sha1.update('\0')
     return sha1.hexdigest()
 
+_watchmanencoding = pywatchman.encoding.get_local_encoding()
+_fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
+_fixencoding = codecs.lookup(_watchmanencoding) != codecs.lookup(_fsencoding)
+
+def _watchmantofsencoding(path):
+    """Fix path to match watchman and local filesystem encoding
+
+    watchman's paths encoding can differ from filesystem encoding. For example,
+    on Windows, it's always utf-8.
+    """
+    try:
+        decoded = path.decode(_watchmanencoding)
+    except UnicodeDecodeError as e:
+        raise error.Abort(str(e), hint='watchman encoding error')
+
+    try:
+        encoded = decoded.encode(_fsencoding, 'strict')
+    except UnicodeEncodeError as e:
+        raise error.Abort(str(e))
+
+    return encoded
+
 def overridewalk(orig, self, match, subrepos, unknown, ignored, full=True):
     '''Replacement for dirstate.walk, hooking into Watchman.
 
@@ -303,6 +329,8 @@
     # for name case changes.
     for entry in result['files']:
         fname = entry['name']
+        if _fixencoding:
+            fname = _watchmantofsencoding(fname)
         if switch_slashes:
             fname = fname.replace('\\', '/')
         if normalize:
--- a/tests/test-check-py3-compat.t	Fri Apr 07 06:31:50 2017 -0700
+++ b/tests/test-check-py3-compat.t	Wed Mar 08 09:03:42 2017 -0500
@@ -26,8 +26,8 @@
   > | sed 's|\\|/|g' | xargs $PYTHON3 contrib/check-py3-compat.py \
   > | sed 's/[0-9][0-9]*)$/*)/'
   hgext/convert/transport.py: error importing: <*Error> No module named 'svn.client' (error at transport.py:*) (glob)
-  hgext/fsmonitor/state.py: error importing: <SyntaxError> from __future__ imports must occur at the beginning of the file (__init__.py, line 30) (error at watchmanclient.py:*)
-  hgext/fsmonitor/watchmanclient.py: error importing: <SyntaxError> from __future__ imports must occur at the beginning of the file (__init__.py, line 30) (error at watchmanclient.py:*)
+  hgext/fsmonitor/state.py: error importing: <SyntaxError> from __future__ imports must occur at the beginning of the file (__init__.py, line 30) (error at __init__.py:*)
+  hgext/fsmonitor/watchmanclient.py: error importing: <SyntaxError> from __future__ imports must occur at the beginning of the file (__init__.py, line 30) (error at __init__.py:*)
   mercurial/cffi/bdiff.py: error importing: <*Error> No module named 'mercurial.cffi' (error at check-py3-compat.py:*) (glob)
   mercurial/cffi/mpatch.py: error importing: <*Error> No module named 'mercurial.cffi' (error at check-py3-compat.py:*) (glob)
   mercurial/cffi/osutil.py: error importing: <*Error> No module named 'mercurial.cffi' (error at check-py3-compat.py:*) (glob)