tests/test-encoding-func.py
author Matt Harbison <matt_harbison@yahoo.com>
Tue, 06 Sep 2022 15:08:52 -0400
branchstable
changeset 49490 37debd850c16
parent 48875 6000f5b25c9b
permissions -rw-r--r--
packaging: update dulwich to drop the certifi dependency on Windows The presence of `certifi` causes the system certificate store to be ignored, which was reported as a bug against TortoiseHg[1]. It was only pulled in on Windows because of `dulwich`, which was copied from the old TortoiseHg install scripts, in order to support `hg-git`. This version of `dulwich` raises the minimum `urllib3` to a version (1.25) that does certificate verification by default, without the help of `certifi`[2]. We already bundle a newer version of `urllib3`. Note that `certifi` can still be imported from the user site directory, if installed there. But the installer no longer disables the system certificates by default. [1] https://foss.heptapod.net/mercurial/tortoisehg/thg/-/issues/5825 [2] https://github.com/jelmer/dulwich/issues/1025
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
33926
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     1
import unittest
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     2
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 37947
diff changeset
     3
from mercurial import encoding
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 37947
diff changeset
     4
33926
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     5
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     6
class IsasciistrTest(unittest.TestCase):
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     7
    asciistrs = [
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     8
        b'a',
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     9
        b'ab',
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    10
        b'abc',
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    11
        b'abcd',
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    12
        b'abcde',
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    13
        b'abcdefghi',
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    14
        b'abcd\0fghi',
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    15
    ]
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    16
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    17
    def testascii(self):
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    18
        for s in self.asciistrs:
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    19
            self.assertTrue(encoding.isasciistr(s))
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    20
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    21
    def testnonasciichar(self):
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    22
        for s in self.asciistrs:
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    23
            for i in range(len(s)):
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    24
                t = bytearray(s)
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    25
                t[i] |= 0x80
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    26
                self.assertFalse(encoding.isasciistr(bytes(t)))
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    27
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 37947
diff changeset
    28
33927
853574db5b12 encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33926
diff changeset
    29
class LocalEncodingTest(unittest.TestCase):
853574db5b12 encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33926
diff changeset
    30
    def testasciifastpath(self):
853574db5b12 encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33926
diff changeset
    31
        s = b'\0' * 100
853574db5b12 encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33926
diff changeset
    32
        self.assertTrue(s is encoding.tolocal(s))
853574db5b12 encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33926
diff changeset
    33
        self.assertTrue(s is encoding.fromlocal(s))
853574db5b12 encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33926
diff changeset
    34
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 37947
diff changeset
    35
33928
6c119dbfd0c0 encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33927
diff changeset
    36
class Utf8bEncodingTest(unittest.TestCase):
37946
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    37
    def setUp(self):
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    38
        self.origencoding = encoding.encoding
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    39
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    40
    def tearDown(self):
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    41
        encoding.encoding = self.origencoding
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    42
33928
6c119dbfd0c0 encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33927
diff changeset
    43
    def testasciifastpath(self):
6c119dbfd0c0 encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33927
diff changeset
    44
        s = b'\0' * 100
6c119dbfd0c0 encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33927
diff changeset
    45
        self.assertTrue(s is encoding.toutf8b(s))
6c119dbfd0c0 encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33927
diff changeset
    46
        self.assertTrue(s is encoding.fromutf8b(s))
6c119dbfd0c0 encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33927
diff changeset
    47
37946
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    48
    def testlossylatin(self):
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    49
        encoding.encoding = b'ascii'
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    50
        s = u'\xc0'.encode('utf-8')
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    51
        l = encoding.tolocal(s)
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    52
        self.assertEqual(l, b'?')  # lossy
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    53
        self.assertEqual(s, encoding.toutf8b(l))  # utf8 sequence preserved
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    54
37947
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    55
    def testlosslesslatin(self):
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    56
        encoding.encoding = b'latin-1'
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    57
        s = u'\xc0'.encode('utf-8')
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    58
        l = encoding.tolocal(s)
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    59
        self.assertEqual(l, b'\xc0')  # lossless
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    60
        self.assertEqual(s, encoding.toutf8b(l))  # convert back to utf-8
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    61
37946
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    62
    def testlossy0xed(self):
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    63
        encoding.encoding = b'euc-kr'  # U+Dxxx Hangul
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    64
        s = u'\ud1bc\xc0'.encode('utf-8')
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    65
        l = encoding.tolocal(s)
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    66
        self.assertIn(b'\xed', l)
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    67
        self.assertTrue(l.endswith(b'?'))  # lossy
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    68
        self.assertEqual(s, encoding.toutf8b(l))  # utf8 sequence preserved
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    69
37947
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    70
    def testlossless0xed(self):
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    71
        encoding.encoding = b'euc-kr'  # U+Dxxx Hangul
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    72
        s = u'\ud1bc'.encode('utf-8')
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    73
        l = encoding.tolocal(s)
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    74
        self.assertEqual(l, b'\xc5\xed')  # lossless
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    75
        self.assertEqual(s, encoding.toutf8b(l))  # convert back to utf-8
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    76
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 37947
diff changeset
    77
33926
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    78
if __name__ == '__main__':
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    79
    import silenttestrunner
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 37947
diff changeset
    80
33926
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    81
    silenttestrunner.main(__name__)