tests/test-hybridencode.py
author Augie Fackler <augie@google.com>
Mon, 14 May 2018 22:02:44 -0400
changeset 38101 6acf41bb8d40
parent 37871 6574c81b6831
child 43076 2372284d9457
permissions -rw-r--r--
status: add default of --terse=u to tweakdefaults (BC) This is in line with both Git and Subversion, and strikes me as a more humane behavior. Test output changes are expected once you read test-status.t, and I feel like we've still got adequate coverage on things. Differential Revision: https://phab.mercurial-scm.org/D3628

from __future__ import absolute_import, print_function

import unittest

from mercurial import (
    store,
)

class hybridencodetests(unittest.TestCase):
    def hybridencode(self, input, want):

        # Check the C implementation if it's in use
        got = store._pathencode(input)
        self.assertEqual(want, got)
        # Check the reference implementation in Python
        refgot = store._hybridencode(input, True)
        self.assertEqual(want, refgot)

    def testnoencodingrequired(self):
        self.hybridencode(
            b'data/abcdefghijklmnopqrstuvwxyz0123456789 !#%&\'()+,-.;=[]^`{}',
            b'data/abcdefghijklmnopqrstuvwxyz0123456789 !#%&\'()+,-.;=[]^`{}')

    def testuppercasechars(self): # uppercase char X is encoded as _x
        self.hybridencode(
            b'data/ABCDEFGHIJKLMNOPQRSTUVWXYZ',
            b'data/_a_b_c_d_e_f_g_h_i_j_k_l_m_n_o_p_q_r_s_t_u_v_w_x_y_z')

    def testunderbar(self): # underbar is doubled
        self.hybridencode(b'data/_', b'data/__')

    def testtilde(self): # tilde is character-encoded
        self.hybridencode(b'data/~', b'data/~7e')

    def testcontrolchars(self): # characters in ASCII code range 1..31
        self.hybridencode(
            (b'data/\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f'
             b'\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e'
             b'\x1f'),
            (b'data/~01~02~03~04~05~06~07~08~09~0a~0b~0c~0d~0e~0f~10~11~12~13'
             b'~14~15~16~17~18~19~1a~1b~1c~1d~1e~1f'))

    def testhighascii(self):# characters in ASCII code range 126..255
        self.hybridencode(
            (b'data/~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c'
             b'\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b'
             b'\x9c\x9d\x9e\x9f'),
            (b'data/~7e~7f~80~81~82~83~84~85~86~87~88~89~8a~8b~8c~8d~8e~8f~90'
             b'~91~92~93~94~95~96~97~98~99~9a~9b~9c~9d~9e~9f'))
        self.hybridencode(
            (b'data/\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad'
             b'\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc'
             b'\xbd\xbe\xbf'),
            (b'data/~a0~a1~a2~a3~a4~a5~a6~a7~a8~a9~aa~ab~ac~ad~ae~af~b0~b1~b2'
             b'~b3~b4~b5~b6~b7~b8~b9~ba~bb~bc~bd~be~bf'))
        self.hybridencode(
            (b'data/\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca'
             b'\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6'
             b'\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf'),
            (b'data/~c0~c1~c2~c3~c4~c5~c6~c7~c8~c9~ca~cb~cc~cd~ce~cf~d0~d1~d2'
             b'~d3~d4~d5~d6~d7~d8~d9~da~db~dc~dd~de~df'))
        self.hybridencode(
            (b'data/\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed'
             b'\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd'
             b'\xfe\xff'),
            (b'data/~e0~e1~e2~e3~e4~e5~e6~e7~e8~e9~ea~eb~ec~ed~ee~ef~f0~f1~f2'
             b'~f3~f4~f5~f6~f7~f8~f9~fa~fb~fc~fd~fe~ff'))

    def testwinreserved(self): # Windows reserved characters
        self.hybridencode(
            (b'data/less <, greater >, colon :, double-quote ", backslash \\, '
             b'pipe |, question-mark ?, asterisk *'),
            (b'data/less ~3c, greater ~3e, colon ~3a, double-quote ~22, '
             b'backslash ~5c, pipe ~7c, question-mark ~3f, asterisk ~2a'))

    def testhgreserved(self):
        # encoding directories ending in .hg, .i or .d with '.hg' suffix
        self.hybridencode(b'data/x.h.i/x.hg/x.i/x.d/foo',
                          b'data/x.h.i.hg/x.hg.hg/x.i.hg/x.d.hg/foo')
        self.hybridencode(b'data/a.hg/a.i/a.d/foo',
                          b'data/a.hg.hg/a.i.hg/a.d.hg/foo')
        self.hybridencode(b'data/au.hg/au.i/au.d/foo',
                          b'data/au.hg.hg/au.i.hg/au.d.hg/foo')
        self.hybridencode(b'data/aux.hg/aux.i/aux.d/foo',
                          b'data/au~78.hg.hg/au~78.i.hg/au~78.d.hg/foo')
        self.hybridencode(b'data/auxy.hg/auxy.i/auxy.d/foo',
                          b'data/auxy.hg.hg/auxy.i.hg/auxy.d.hg/foo')
        # but these are not encoded on *filenames*
        self.hybridencode(b'data/foo/x.hg', b'data/foo/x.hg')
        self.hybridencode(b'data/foo/x.i', b'data/foo/x.i')
        self.hybridencode(b'data/foo/x.d', b'data/foo/x.d')
        self.hybridencode(b'data/foo/a.hg', b'data/foo/a.hg')
        self.hybridencode(b'data/foo/a.i', b'data/foo/a.i')
        self.hybridencode(b'data/foo/a.d', b'data/foo/a.d')
        self.hybridencode(b'data/foo/au.hg', b'data/foo/au.hg')
        self.hybridencode(b'data/foo/au.i', b'data/foo/au.i')
        self.hybridencode(b'data/foo/au.d', b'data/foo/au.d')
        self.hybridencode(b'data/foo/aux.hg', b'data/foo/au~78.hg')
        self.hybridencode(b'data/foo/aux.i', b'data/foo/au~78.i')
        self.hybridencode(b'data/foo/aux.d', b'data/foo/au~78.d')
        self.hybridencode(b'data/foo/auxy.hg', b'data/foo/auxy.hg')
        self.hybridencode(b'data/foo/auxy.i', b'data/foo/auxy.i')
        self.hybridencode(b'data/foo/auxy.d', b'data/foo/auxy.d')

        # plain .hg, .i and .d directories have the leading dot encoded
        self.hybridencode(b'data/.hg/.i/.d/foo',
                          b'data/~2ehg.hg/~2ei.hg/~2ed.hg/foo')

    def testmisclongcases(self):
        self.hybridencode(
            (b'data/aux.bla/bla.aux/prn/PRN/lpt/com3/nul/'
             b'coma/foo.NUL/normal.c.i'),
            (b'data/au~78.bla/bla.aux/pr~6e/_p_r_n/lpt/co~6d3'
             b'/nu~6c/coma/foo._n_u_l/normal.c.i'))
        self.hybridencode(
            (b'data/AUX/SECOND/X.PRN/FOURTH/FI:FTH/SIXTH/SEVENTH/EIGHTH/NINETH'
             b'/TENTH/ELEVENTH/LOREMIPSUM.TXT.i'),
            (b'dh/au~78/second/x.prn/fourth/fi~3afth/sixth/seventh/eighth/'
             b'nineth/tenth/loremia20419e358ddff1bf8751e38288aff1d7c32ec05.i'))
        self.hybridencode(
            (b'data/enterprise/openesbaddons/contrib-imola/corba-bc/'
             b'netbeansplugin/wsdlExtension/src/main/java/META-INF/services'
             b'/org.netbeans.modules.xml.wsdl.bindingsupport.spi.'
             b'ExtensibilityElementTemplateProvider.i'),
            (b'dh/enterpri/openesba/contrib-/corba-bc/netbeans/wsdlexte/src/'
             b'main/java/org.net7018f27961fdf338a598a40c4683429e7ffb9743.i'))
        self.hybridencode(
            (b'data/AUX.THE-QUICK-BROWN-FOX-JU:MPS-OVER-THE-LAZY-DOG-THE-'
             b'QUICK-BROWN-FOX-JUMPS-OVER-THE-LAZY-DOG.TXT.i'),
            (b'dh/au~78.the-quick-brown-fox-ju~3amps-over-the-lazy-dog-the-'
             b'quick-brown-fox-jud4dcadd033000ab2b26eb66bae1906bcb15d4a70.i'))
        self.hybridencode(
            (b'data/Project Planning/Resources/AnotherLongDirectoryName/Follow'
             b'edbyanother/AndAnother/AndThenAnExtremelyLongFileName.txt'),
            (b'dh/project_/resource/anotherl/followed/andanoth/andthenanextrem'
             b'elylongfilenaf93030515d9849cfdca52937c2204d19f83913e5.txt'))
        self.hybridencode(
            (b'data/Project.Planning/Resources/AnotherLongDirectoryName/Follo'
             b'wedbyanother/AndAnother/AndThenAnExtremelyLongFileName.txt'),
            (b'dh/project_/resource/anotherl/followed/andanoth/andthenanextre'
             b'melylongfilena0fd7c506f5c9d58204444fc67e9499006bd2d445.txt'))
        self.hybridencode(
            b'data/foo.../foo   / /a./_. /__/.x../    bla/.FOO/something.i',
            (b'data/foo..~2e/foo  ~20/~20/a~2e/__.~20/____/~2ex.~2e/~20   bla/'
             b'~2e_f_o_o/something.i'))
        self.hybridencode(
            b'data/c/co/com/com0/com1/com2/com3/com4/com5/com6/com7/com8/com9',
            (b'data/c/co/com/com0/co~6d1/co~6d2/co~6d3/co~6d4/co~6d5/co~6d6/'
             b'co~6d7/co~6d8/co~6d9'))
        self.hybridencode(
            b'data/C/CO/COM/COM0/COM1/COM2/COM3/COM4/COM5/COM6/COM7/COM8/COM9',
            (b'data/_c/_c_o/_c_o_m/_c_o_m0/_c_o_m1/_c_o_m2/_c_o_m3/_c_o_m4/'
             b'_c_o_m5/_c_o_m6/_c_o_m7/_c_o_m8/_c_o_m9'))
        self.hybridencode(
            (b'data/c.x/co.x/com.x/com0.x/com1.x/com2.x/com3.x/com4.x/com5.x/'
             b'com6.x/com7.x/com8.x/com9.x'),
            (b'data/c.x/co.x/com.x/com0.x/co~6d1.x/co~6d2.x/co~6d3.x/co~6d4.x'
             b'/co~6d5.x/co~6d6.x/co~6d7.x/co~6d8.x/co~6d9.x'))
        self.hybridencode(
            (b'data/x.c/x.co/x.com0/x.com1/x.com2/x.com3/x.com4/x.com5/x.com6'
             b'/x.com7/x.com8/x.com9'),
            (b'data/x.c/x.co/x.com0/x.com1/x.com2/x.com3/x.com4/x.com5/x.com6'
             b'/x.com7/x.com8/x.com9'))
        self.hybridencode(
            (b'data/cx/cox/comx/com0x/com1x/com2x/com3x/com4x/com5x/com6x/'
             b'com7x/com8x/com9x'),
            (b'data/cx/cox/comx/com0x/com1x/com2x/com3x/com4x/com5x/com6x/'
             b'com7x/com8x/com9x'))
        self.hybridencode(
            (b'data/xc/xco/xcom0/xcom1/xcom2/xcom3/xcom4/xcom5/xcom6/xcom7/'
             b'xcom8/xcom9'),
            (b'data/xc/xco/xcom0/xcom1/xcom2/xcom3/xcom4/xcom5/xcom6/xcom7/'
             b'xcom8/xcom9'))
        self.hybridencode(
            b'data/l/lp/lpt/lpt0/lpt1/lpt2/lpt3/lpt4/lpt5/lpt6/lpt7/lpt8/lpt9',
            (b'data/l/lp/lpt/lpt0/lp~741/lp~742/lp~743/lp~744/lp~745/lp~746/'
             b'lp~747/lp~748/lp~749'))
        self.hybridencode(
            b'data/L/LP/LPT/LPT0/LPT1/LPT2/LPT3/LPT4/LPT5/LPT6/LPT7/LPT8/LPT9',
            (b'data/_l/_l_p/_l_p_t/_l_p_t0/_l_p_t1/_l_p_t2/_l_p_t3/_l_p_t4/'
             b'_l_p_t5/_l_p_t6/_l_p_t7/_l_p_t8/_l_p_t9'))
        self.hybridencode(
            (b'data/l.x/lp.x/lpt.x/lpt0.x/lpt1.x/lpt2.x/lpt3.x/lpt4.x/lpt5.x/'
             b'lpt6.x/lpt7.x/lpt8.x/lpt9.x'),
            (b'data/l.x/lp.x/lpt.x/lpt0.x/lp~741.x/lp~742.x/lp~743.x/lp~744.x/'
             b'lp~745.x/lp~746.x/lp~747.x/lp~748.x/lp~749.x'))
        self.hybridencode(
            (b'data/x.l/x.lp/x.lpt/x.lpt0/x.lpt1/x.lpt2/x.lpt3/x.lpt4/x.lpt5/'
             b'x.lpt6/x.lpt7/x.lpt8/x.lpt9'),
            (b'data/x.l/x.lp/x.lpt/x.lpt0/x.lpt1/x.lpt2/x.lpt3/x.lpt4/x.lpt5'
             b'/x.lpt6/x.lpt7/x.lpt8/x.lpt9'))
        self.hybridencode(
            (b'data/lx/lpx/lptx/lpt0x/lpt1x/lpt2x/lpt3x/lpt4x/lpt5x/lpt6x/'
             b'lpt7x/lpt8x/lpt9x'),
            (b'data/lx/lpx/lptx/lpt0x/lpt1x/lpt2x/lpt3x/lpt4x/lpt5x/lpt6x/'
             b'lpt7x/lpt8x/lpt9x'))
        self.hybridencode(
            (b'data/xl/xlp/xlpt/xlpt0/xlpt1/xlpt2/xlpt3/xlpt4/xlpt5/xlpt6/'
             b'xlpt7/xlpt8/xlpt9'),
            (b'data/xl/xlp/xlpt/xlpt0/xlpt1/xlpt2/xlpt3/xlpt4/xlpt5/xlpt6/'
             b'xlpt7/xlpt8/xlpt9'))
        self.hybridencode(b'data/con/p/pr/prn/a/au/aux/n/nu/nul',
                          b'data/co~6e/p/pr/pr~6e/a/au/au~78/n/nu/nu~6c')
        self.hybridencode(
            b'data/CON/P/PR/PRN/A/AU/AUX/N/NU/NUL',
            b'data/_c_o_n/_p/_p_r/_p_r_n/_a/_a_u/_a_u_x/_n/_n_u/_n_u_l')
        self.hybridencode(
            b'data/con.x/p.x/pr.x/prn.x/a.x/au.x/aux.x/n.x/nu.x/nul.x',
            b'data/co~6e.x/p.x/pr.x/pr~6e.x/a.x/au.x/au~78.x/n.x/nu.x/nu~6c.x')
        self.hybridencode(
            b'data/x.con/x.p/x.pr/x.prn/x.a/x.au/x.aux/x.n/x.nu/x.nul',
            b'data/x.con/x.p/x.pr/x.prn/x.a/x.au/x.aux/x.n/x.nu/x.nul')
        self.hybridencode(b'data/conx/px/prx/prnx/ax/aux/auxx/nx/nux/nulx',
                          b'data/conx/px/prx/prnx/ax/au~78/auxx/nx/nux/nulx')
        self.hybridencode(b'data/xcon/xp/xpr/xprn/xa/xau/xaux/xn/xnu/xnul',
                          b'data/xcon/xp/xpr/xprn/xa/xau/xaux/xn/xnu/xnul')
        self.hybridencode(b'data/a./au./aux./auxy./aux.',
                          b'data/a~2e/au~2e/au~78~2e/auxy~2e/au~78~2e')
        self.hybridencode(b'data/c./co./con./cony./con.',
                          b'data/c~2e/co~2e/co~6e~2e/cony~2e/co~6e~2e')
        self.hybridencode(b'data/p./pr./prn./prny./prn.',
                          b'data/p~2e/pr~2e/pr~6e~2e/prny~2e/pr~6e~2e')
        self.hybridencode(b'data/n./nu./nul./nuly./nul.',
                          b'data/n~2e/nu~2e/nu~6c~2e/nuly~2e/nu~6c~2e')
        self.hybridencode(
            b'data/l./lp./lpt./lpt1./lpt1y./lpt1.',
            b'data/l~2e/lp~2e/lpt~2e/lp~741~2e/lpt1y~2e/lp~741~2e')
        self.hybridencode(b'data/lpt9./lpt9y./lpt9.',
                          b'data/lp~749~2e/lpt9y~2e/lp~749~2e')
        self.hybridencode(b'data/com./com1./com1y./com1.',
                          b'data/com~2e/co~6d1~2e/com1y~2e/co~6d1~2e')
        self.hybridencode(b'data/com9./com9y./com9.',
                          b'data/co~6d9~2e/com9y~2e/co~6d9~2e')
        self.hybridencode(b'data/a /au /aux /auxy /aux ',
                          b'data/a~20/au~20/aux~20/auxy~20/aux~20')

    def testhashingboundarycases(self):
        # largest unhashed path
        self.hybridencode(
            (b'data/123456789-123456789-123456789-123456789-123456789-unhashed'
             b'--xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'),
            (b'data/123456789-123456789-123456789-123456789-123456789-unhashed'
             b'--xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'))
        # shortest hashed path
        self.hybridencode(
            (b'data/123456789-123456789-123456789-123456789-123456789-hashed'
             b'----xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-123456'),
            (b'dh/123456789-123456789-123456789-123456789-123456789-hashed---'
             b'-xxxxxxxxx-xxxxxxxe9c55002b50bf5181e7a6fc1f60b126e2a6fcf71'))

    def testhashing(self):
        # changing one char in part that's hashed away produces a different hash
        self.hybridencode(
            (b'data/123456789-123456789-123456789-123456789-123456789-hashed'
             b'----xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxy-123456789-123456'),
            (b'dh/123456789-123456789-123456789-123456789-123456789-hashed---'
             b'-xxxxxxxxx-xxxxxxxd24fa4455faf8a94350c18e5eace7c2bb17af706'))
        # uppercase hitting length limit due to encoding
        self.hybridencode(
            (b'data/A23456789-123456789-123456789-123456789-123456789-'
             b'xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-'
             b'123456789-12345'),
            (b'dh/a23456789-123456789-123456789-123456789-123456789-'
             b'xxxxxxxxx-xxxxxxxxx-xxxxxxx'
             b'cbbc657029b41b94ed510d05feb6716a5c03bc6b'))
        self.hybridencode(
            (b'data/Z23456789-123456789-123456789-123456789-123456789-'
             b'xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-'
             b'123456789-12345'),
            (b'dh/z23456789-123456789-123456789-123456789-123456789-xxxxxxxxx'
             b'-xxxxxxxxx-xxxxxxx938f32a725c89512833fb96b6602dd9ebff51ddd'))
        # compare with lowercase not hitting limit
        self.hybridencode(
            (b'data/a23456789-123456789-123456789-123456789-123456789-'
             b'xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-'
             b'12345'),
            (b'data/a23456789-123456789-123456789-123456789-123456789-'
             b'xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-'
             b'12345'))
        self.hybridencode(
            (b'data/z23456789-123456789-123456789-123456789-123456789'
             b'-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789'
             b'-12345'),
            (b'data/z23456789-123456789-123456789-123456789-123456789'
             b'-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-'
             b'12345'))
        # not hitting limit with any of these
        self.hybridencode(
            (b'data/abcdefghijklmnopqrstuvwxyz0123456789 !#%&\'()+,-.;=[]^`{}'
             b'xxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'),
            (b'data/abcdefghijklmnopqrstuvwxyz0123456789 !#%&\'()+,-.;=[]^`{}'
             b'xxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'))
        # underbar hitting length limit due to encoding
        self.hybridencode(
            (b'data/_23456789-123456789-123456789-123456789-123456789-'
             b'xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-'
             b'12345'),
            (b'dh/_23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-'
             b'xxxxxxxxx-xxxxxxx9921a01af50feeabc060ce00eee4cba6efc31d2b'))

        # tilde hitting length limit due to encoding
        self.hybridencode(
            (b'data/~23456789-123456789-123456789-123456789-123456789-'
             b'xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-'
             b'12345'),
            (b'dh/~7e23456789-123456789-123456789-123456789-123456789'
             b'-xxxxxxxxx-xxxxxxxxx-xxxxx'
             b'9cec6f97d569c10995f785720044ea2e4227481b'))

    def testwinreservedoverlimit(self):
        # Windows reserved characters hitting length limit
        self.hybridencode(
            (b'data/<23456789-123456789-123456789-123456789-123456789'
             b'-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx'
             b'-123456789-12345'),
            (b'dh/~3c23456789-123456789-123456789-123456789-123456789'
             b'-xxxxxxxxx-xxxxxxxxx-xxxxxee'
             b'67d8f275876ca1ef2500fc542e63c885c4e62d'))