contrib/python-zstandard/tests/test_train_dictionary.py
author Raphaël Gomès <rgomes@octobus.net>
Mon, 09 Jan 2023 18:54:57 +0100
changeset 49926 0780371d6b1e
parent 44147 5e84a96d865b
permissions -rw-r--r--
rust-clippy: tell `clippy` we don't need to declare a default here This is a struct only useful for tests.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31796
diff changeset
     1
import struct
30435
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
     2
import sys
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31796
diff changeset
     3
import unittest
30435
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
     4
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31796
diff changeset
     5
import zstandard as zstd
30435
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
     6
43994
de7838053207 zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42937
diff changeset
     7
from .common import (
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31796
diff changeset
     8
    generate_samples,
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30435
diff changeset
     9
    make_cffi,
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 40121
diff changeset
    10
    random_input_data,
43994
de7838053207 zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42937
diff changeset
    11
    TestCase,
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30435
diff changeset
    12
)
30435
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    13
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    14
if sys.version_info[0] >= 3:
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    15
    int_type = int
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    16
else:
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    17
    int_type = long
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    18
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    19
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30435
diff changeset
    20
@make_cffi
43994
de7838053207 zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42937
diff changeset
    21
class TestTrainDictionary(TestCase):
30435
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    22
    def test_no_args(self):
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    23
        with self.assertRaises(TypeError):
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    24
            zstd.train_dictionary()
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    25
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    26
    def test_bad_args(self):
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    27
        with self.assertRaises(TypeError):
43994
de7838053207 zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42937
diff changeset
    28
            zstd.train_dictionary(8192, u"foo")
30435
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    29
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    30
        with self.assertRaises(ValueError):
43994
de7838053207 zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42937
diff changeset
    31
            zstd.train_dictionary(8192, [u"foo"])
30435
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    32
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31796
diff changeset
    33
    def test_no_params(self):
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 40121
diff changeset
    34
        d = zstd.train_dictionary(8192, random_input_data())
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31796
diff changeset
    35
        self.assertIsInstance(d.dict_id(), int_type)
30435
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    36
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31796
diff changeset
    37
        # The dictionary ID may be different across platforms.
43994
de7838053207 zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42937
diff changeset
    38
        expected = b"\x37\xa4\x30\xec" + struct.pack("<I", d.dict_id())
30435
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    39
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    40
        data = d.as_bytes()
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31796
diff changeset
    41
        self.assertEqual(data[0:8], expected)
31796
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    42
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    43
    def test_basic(self):
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31796
diff changeset
    44
        d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
31796
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    45
        self.assertIsInstance(d.dict_id(), int_type)
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    46
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    47
        data = d.as_bytes()
43994
de7838053207 zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42937
diff changeset
    48
        self.assertEqual(data[0:4], b"\x37\xa4\x30\xec")
31796
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    49
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    50
        self.assertEqual(d.k, 64)
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    51
        self.assertEqual(d.d, 16)
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    52
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    53
    def test_set_dict_id(self):
44147
5e84a96d865b python-zstandard: blacken at 80 characters
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43994
diff changeset
    54
        d = zstd.train_dictionary(
5e84a96d865b python-zstandard: blacken at 80 characters
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43994
diff changeset
    55
            8192, generate_samples(), k=64, d=16, dict_id=42
5e84a96d865b python-zstandard: blacken at 80 characters
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43994
diff changeset
    56
        )
31796
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    57
        self.assertEqual(d.dict_id(), 42)
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    58
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    59
    def test_optimize(self):
44147
5e84a96d865b python-zstandard: blacken at 80 characters
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43994
diff changeset
    60
        d = zstd.train_dictionary(
5e84a96d865b python-zstandard: blacken at 80 characters
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43994
diff changeset
    61
            8192, generate_samples(), threads=-1, steps=1, d=16
5e84a96d865b python-zstandard: blacken at 80 characters
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43994
diff changeset
    62
        )
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31796
diff changeset
    63
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
    64
        # This varies by platform.
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
    65
        self.assertIn(d.k, (50, 2000))
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31796
diff changeset
    66
        self.assertEqual(d.d, 16)
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31796
diff changeset
    67
43994
de7838053207 zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42937
diff changeset
    68
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31796
diff changeset
    69
@make_cffi
43994
de7838053207 zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42937
diff changeset
    70
class TestCompressionDict(TestCase):
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31796
diff changeset
    71
    def test_bad_mode(self):
43994
de7838053207 zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42937
diff changeset
    72
        with self.assertRaisesRegex(ValueError, "invalid dictionary load mode"):
de7838053207 zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42937
diff changeset
    73
            zstd.ZstdCompressionDict(b"foo", dict_type=42)
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31796
diff changeset
    74
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31796
diff changeset
    75
    def test_bad_precompute_compress(self):
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31796
diff changeset
    76
        d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
31796
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    77
44147
5e84a96d865b python-zstandard: blacken at 80 characters
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43994
diff changeset
    78
        with self.assertRaisesRegex(
5e84a96d865b python-zstandard: blacken at 80 characters
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43994
diff changeset
    79
            ValueError, "must specify one of level or "
5e84a96d865b python-zstandard: blacken at 80 characters
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43994
diff changeset
    80
        ):
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31796
diff changeset
    81
            d.precompute_compress()
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31796
diff changeset
    82
44147
5e84a96d865b python-zstandard: blacken at 80 characters
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43994
diff changeset
    83
        with self.assertRaisesRegex(
5e84a96d865b python-zstandard: blacken at 80 characters
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43994
diff changeset
    84
            ValueError, "must only specify one of level or "
5e84a96d865b python-zstandard: blacken at 80 characters
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43994
diff changeset
    85
        ):
43994
de7838053207 zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42937
diff changeset
    86
            d.precompute_compress(
de7838053207 zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42937
diff changeset
    87
                level=3, compression_params=zstd.CompressionParameters()
de7838053207 zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42937
diff changeset
    88
            )
31796
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
    89
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31796
diff changeset
    90
    def test_precompute_compress_rawcontent(self):
43994
de7838053207 zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42937
diff changeset
    91
        d = zstd.ZstdCompressionDict(
de7838053207 zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42937
diff changeset
    92
            b"dictcontent" * 64, dict_type=zstd.DICT_TYPE_RAWCONTENT
de7838053207 zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42937
diff changeset
    93
        )
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31796
diff changeset
    94
        d.precompute_compress(level=1)
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31796
diff changeset
    95
43994
de7838053207 zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42937
diff changeset
    96
        d = zstd.ZstdCompressionDict(
de7838053207 zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42937
diff changeset
    97
            b"dictcontent" * 64, dict_type=zstd.DICT_TYPE_FULLDICT
de7838053207 zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42937
diff changeset
    98
        )
44147
5e84a96d865b python-zstandard: blacken at 80 characters
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43994
diff changeset
    99
        with self.assertRaisesRegex(
5e84a96d865b python-zstandard: blacken at 80 characters
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43994
diff changeset
   100
            zstd.ZstdError, "unable to precompute dictionary"
5e84a96d865b python-zstandard: blacken at 80 characters
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43994
diff changeset
   101
        ):
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31796
diff changeset
   102
            d.precompute_compress(level=1)