# HG changeset patch # User Augie Fackler # Date 1579630491 18000 # Node ID 84a0102c05c7852c8215ef6cf21d809927586b69 # Parent 61881b1701405a12f25302d219fc7ea0e6ff0d70# Parent ff396501e8410ba455baabc4090fb1209c5af22b merge to stable for 5.3 release freeze diff -r 61881b170140 -r 84a0102c05c7 .arcconfig --- a/.arcconfig Thu Jan 09 14:19:20 2020 -0500 +++ b/.arcconfig Tue Jan 21 13:14:51 2020 -0500 @@ -1,5 +1,6 @@ { "conduit_uri": "https://phab.mercurial-scm.org/api", + "phabricator.uri": "https://phab.mercurial-scm.org/", "repository.callsign": "HG", "arc.land.onto.default": "@", "base": "hg:.^" diff -r 61881b170140 -r 84a0102c05c7 .hgignore --- a/.hgignore Thu Jan 09 14:19:20 2020 -0500 +++ b/.hgignore Tue Jan 21 13:14:51 2020 -0500 @@ -51,6 +51,7 @@ cscope.* .idea/* .asv/* +.pytype/* i18n/hg.pot locale/*/LC_MESSAGES/hg.mo hgext/__index__.py diff -r 61881b170140 -r 84a0102c05c7 Makefile --- a/Makefile Thu Jan 09 14:19:20 2020 -0500 +++ b/Makefile Tue Jan 21 13:14:51 2020 -0500 @@ -11,7 +11,7 @@ PURE= PYFILESCMD=find mercurial hgext doc -name '*.py' PYFILES:=$(shell $(PYFILESCMD)) -DOCFILES=mercurial/help/*.txt +DOCFILES=mercurial/helptext/*.txt export LANGUAGE=C export LC_ALL=C TESTFLAGS ?= $(shell echo $$HGTESTFLAGS) @@ -189,7 +189,8 @@ docker-centos6 \ docker-centos7 \ docker-centos8 \ - docker-debian-jessie \ + docker-debian-bullseye \ + docker-debian-buster \ docker-debian-stretch \ docker-fedora \ docker-ubuntu-trusty \ diff -r 61881b170140 -r 84a0102c05c7 contrib/automation/hgautomation/aws.py --- a/contrib/automation/hgautomation/aws.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/automation/hgautomation/aws.py Tue Jan 21 13:14:51 2020 -0500 @@ -59,7 +59,7 @@ UBUNTU_ACCOUNT_ID = '099720109477' -WINDOWS_BASE_IMAGE_NAME = 'Windows_Server-2019-English-Full-Base-2019.07.12' +WINDOWS_BASE_IMAGE_NAME = 'Windows_Server-2019-English-Full-Base-2019.11.13' KEY_PAIRS = { diff -r 61881b170140 -r 84a0102c05c7 contrib/automation/hgautomation/windows.py --- a/contrib/automation/hgautomation/windows.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/automation/hgautomation/windows.py Tue Jan 21 13:14:51 2020 -0500 @@ -71,7 +71,7 @@ BUILD_INNO = r''' Set-Location C:\hgdev\src $python = "C:\hgdev\python27-{arch}\python.exe" -C:\hgdev\python37-x64\python.exe contrib\packaging\inno\build.py --python $python +C:\hgdev\python37-x64\python.exe contrib\packaging\packaging.py inno --python $python if ($LASTEXITCODE -ne 0) {{ throw "process exited non-0: $LASTEXITCODE" }} @@ -88,7 +88,7 @@ BUILD_WIX = r''' Set-Location C:\hgdev\src $python = "C:\hgdev\python27-{arch}\python.exe" -C:\hgdev\python37-x64\python.exe contrib\packaging\wix\build.py --python $python {extra_args} +C:\hgdev\python37-x64\python.exe contrib\packaging\packaging.py wix --python $python {extra_args} if ($LASTEXITCODE -ne 0) {{ throw "process exited non-0: $LASTEXITCODE" }} diff -r 61881b170140 -r 84a0102c05c7 contrib/check-code.py --- a/contrib/check-code.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/check-code.py Tue Jan 21 13:14:51 2020 -0500 @@ -281,10 +281,10 @@ for tp in testpats[i]: p = tp[0] m = tp[1] - if p.startswith(r'^'): - p = r"^ [$>] (%s)" % p[1:] + if p.startswith('^'): + p = "^ [$>] (%s)" % p[1:] else: - p = r"^ [$>] .*(%s)" % p + p = "^ [$>] .*(%s)" % p utestpats[i].append((p, m) + tp[2:]) # don't transform the following rules: diff -r 61881b170140 -r 84a0102c05c7 contrib/check-commit --- a/contrib/check-commit Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/check-commit Tue Jan 21 13:14:51 2020 -0500 @@ -27,32 +27,42 @@ errors = [ (beforepatch + r".*[(]bc[)]", "(BC) needs to be uppercase"), - (beforepatch + r".*[(]issue \d\d\d", - "no space allowed between issue and number"), + ( + beforepatch + r".*[(]issue \d\d\d", + "no space allowed between issue and number", + ), (beforepatch + r".*[(]bug(\d|\s)", "use (issueDDDD) instead of bug"), (commitheader + r"# User [^@\n]+\n", "username is not an email address"), - (commitheader + r"(?!merge with )[^#]\S+[^:] ", - "summary line doesn't start with 'topic: '"), + ( + commitheader + r"(?!merge with )[^#]\S+[^:] ", + "summary line doesn't start with 'topic: '", + ), (afterheader + r"[A-Z][a-z]\S+", "don't capitalize summary lines"), (afterheader + r"^\S+: *[A-Z][a-z]\S+", "don't capitalize summary lines"), - (afterheader + r"\S*[^A-Za-z0-9-_]\S*: ", - "summary keyword should be most user-relevant one-word command or topic"), + ( + afterheader + r"\S*[^A-Za-z0-9-_]\S*: ", + "summary keyword should be most user-relevant one-word command or topic", + ), (afterheader + r".*\.\s*\n", "don't add trailing period on summary line"), (afterheader + r".{79,}", "summary line too long (limit is 78)"), ] word = re.compile(r'\S') + + def nonempty(first, second): if word.search(first): return first return second + def checkcommit(commit, node=None): exitcode = 0 printed = node is None hits = [] - signtag = (afterheader + - r'Added (tag [^ ]+|signature) for changeset [a-f0-9]{12}') + signtag = ( + afterheader + r'Added (tag [^ ]+|signature) for changeset [a-f0-9]{12}' + ) if re.search(signtag, commit): return 0 for exp, msg in errors: @@ -84,9 +94,11 @@ return exitcode + def readcommit(node): return os.popen("hg export %s" % node).read() + if __name__ == "__main__": exitcode = 0 node = os.environ.get("HG_NODE") diff -r 61881b170140 -r 84a0102c05c7 contrib/clang-format-ignorelist --- a/contrib/clang-format-ignorelist Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/clang-format-ignorelist Tue Jan 21 13:14:51 2020 -0500 @@ -3,100 +3,9 @@ mercurial/cext/manifest.c mercurial/cext/osutil.c # Vendored code that we should never format: -contrib/python-zstandard/c-ext/bufferutil.c -contrib/python-zstandard/c-ext/compressionchunker.c -contrib/python-zstandard/c-ext/compressiondict.c -contrib/python-zstandard/c-ext/compressionparams.c -contrib/python-zstandard/c-ext/compressionreader.c -contrib/python-zstandard/c-ext/compressionwriter.c -contrib/python-zstandard/c-ext/compressobj.c -contrib/python-zstandard/c-ext/compressor.c -contrib/python-zstandard/c-ext/compressoriterator.c -contrib/python-zstandard/c-ext/constants.c -contrib/python-zstandard/c-ext/decompressionreader.c -contrib/python-zstandard/c-ext/decompressionwriter.c -contrib/python-zstandard/c-ext/decompressobj.c -contrib/python-zstandard/c-ext/decompressor.c -contrib/python-zstandard/c-ext/decompressoriterator.c -contrib/python-zstandard/c-ext/frameparams.c -contrib/python-zstandard/c-ext/python-zstandard.h -contrib/python-zstandard/zstd.c -contrib/python-zstandard/zstd/common/bitstream.h -contrib/python-zstandard/zstd/common/compiler.h -contrib/python-zstandard/zstd/common/cpu.h -contrib/python-zstandard/zstd/common/debug.c -contrib/python-zstandard/zstd/common/debug.h -contrib/python-zstandard/zstd/common/entropy_common.c -contrib/python-zstandard/zstd/common/error_private.c -contrib/python-zstandard/zstd/common/error_private.h -contrib/python-zstandard/zstd/common/fse_decompress.c -contrib/python-zstandard/zstd/common/fse.h -contrib/python-zstandard/zstd/common/huf.h -contrib/python-zstandard/zstd/common/mem.h -contrib/python-zstandard/zstd/common/pool.c -contrib/python-zstandard/zstd/common/pool.h -contrib/python-zstandard/zstd/common/threading.c -contrib/python-zstandard/zstd/common/threading.h -contrib/python-zstandard/zstd/common/xxhash.c -contrib/python-zstandard/zstd/common/xxhash.h -contrib/python-zstandard/zstd/common/zstd_common.c -contrib/python-zstandard/zstd/common/zstd_errors.h -contrib/python-zstandard/zstd/common/zstd_internal.h -contrib/python-zstandard/zstd/compress/fse_compress.c -contrib/python-zstandard/zstd/compress/hist.c -contrib/python-zstandard/zstd/compress/hist.h -contrib/python-zstandard/zstd/compress/huf_compress.c -contrib/python-zstandard/zstd/compress/zstd_compress.c -contrib/python-zstandard/zstd/compress/zstd_compress_internal.h -contrib/python-zstandard/zstd/compress/zstd_compress_literals.c -contrib/python-zstandard/zstd/compress/zstd_compress_literals.h -contrib/python-zstandard/zstd/compress/zstd_compress_sequences.c -contrib/python-zstandard/zstd/compress/zstd_compress_sequences.h -contrib/python-zstandard/zstd/compress/zstd_double_fast.c -contrib/python-zstandard/zstd/compress/zstd_double_fast.h -contrib/python-zstandard/zstd/compress/zstd_fast.c -contrib/python-zstandard/zstd/compress/zstd_fast.h -contrib/python-zstandard/zstd/compress/zstd_lazy.c -contrib/python-zstandard/zstd/compress/zstd_lazy.h -contrib/python-zstandard/zstd/compress/zstd_ldm.c -contrib/python-zstandard/zstd/compress/zstd_ldm.h -contrib/python-zstandard/zstd/compress/zstdmt_compress.c -contrib/python-zstandard/zstd/compress/zstdmt_compress.h -contrib/python-zstandard/zstd/compress/zstd_opt.c -contrib/python-zstandard/zstd/compress/zstd_opt.h -contrib/python-zstandard/zstd/decompress/huf_decompress.c -contrib/python-zstandard/zstd/decompress/zstd_ddict.c -contrib/python-zstandard/zstd/decompress/zstd_ddict.h -contrib/python-zstandard/zstd/decompress/zstd_decompress_block.c -contrib/python-zstandard/zstd/decompress/zstd_decompress_block.h -contrib/python-zstandard/zstd/decompress/zstd_decompress_internal.h -contrib/python-zstandard/zstd/decompress/zstd_decompress.c -contrib/python-zstandard/zstd/deprecated/zbuff_common.c -contrib/python-zstandard/zstd/deprecated/zbuff_compress.c -contrib/python-zstandard/zstd/deprecated/zbuff_decompress.c -contrib/python-zstandard/zstd/deprecated/zbuff.h -contrib/python-zstandard/zstd/dictBuilder/cover.c -contrib/python-zstandard/zstd/dictBuilder/cover.h -contrib/python-zstandard/zstd/dictBuilder/divsufsort.c -contrib/python-zstandard/zstd/dictBuilder/divsufsort.h -contrib/python-zstandard/zstd/dictBuilder/fastcover.c -contrib/python-zstandard/zstd/dictBuilder/zdict.c -contrib/python-zstandard/zstd/dictBuilder/zdict.h -contrib/python-zstandard/zstd/zstd.h -hgext/fsmonitor/pywatchman/bser.c -mercurial/thirdparty/xdiff/xdiff.h -mercurial/thirdparty/xdiff/xdiffi.c -mercurial/thirdparty/xdiff/xdiffi.h -mercurial/thirdparty/xdiff/xemit.c -mercurial/thirdparty/xdiff/xemit.h -mercurial/thirdparty/xdiff/xhistogram.c -mercurial/thirdparty/xdiff/xinclude.h -mercurial/thirdparty/xdiff/xmacros.h -mercurial/thirdparty/xdiff/xmerge.c -mercurial/thirdparty/xdiff/xpatience.c -mercurial/thirdparty/xdiff/xprepare.c -mercurial/thirdparty/xdiff/xprepare.h -mercurial/thirdparty/xdiff/xtypes.h -mercurial/thirdparty/xdiff/xutils.c -mercurial/thirdparty/xdiff/xutils.h -mercurial/thirdparty/zope/interface/_zope_interface_coptimizations.c +syntax: glob +contrib/python-zstandard/**.c +contrib/python-zstandard/**.h +hgext/fsmonitor/pywatchman/**.c +mercurial/thirdparty/**.c +mercurial/thirdparty/**.h diff -r 61881b170140 -r 84a0102c05c7 contrib/dumprevlog --- a/contrib/dumprevlog Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/dumprevlog Tue Jan 21 13:14:51 2020 -0500 @@ -11,23 +11,26 @@ pycompat, revlog, ) -from mercurial.utils import ( - procutil, -) +from mercurial.utils import procutil for fp in (sys.stdin, sys.stdout, sys.stderr): procutil.setbinary(fp) + def binopen(path, mode=b'rb'): if b'b' not in mode: mode = mode + b'b' return open(path, pycompat.sysstr(mode)) + + binopen.options = {} + def printb(data, end=b'\n'): sys.stdout.flush() pycompat.stdout.write(data + end) + for f in sys.argv[1:]: r = revlog.revlog(binopen, encoding.strtolocal(f)) print("file:", f) diff -r 61881b170140 -r 84a0102c05c7 contrib/examples/fix.hgrc --- a/contrib/examples/fix.hgrc Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/examples/fix.hgrc Tue Jan 21 13:14:51 2020 -0500 @@ -1,9 +1,14 @@ [fix] -clang-format:command = clang-format --style file -i -clang-format:pattern = (**.c or **.cc or **.h) and not "listfile:contrib/clang-format-ignorelist" +clang-format:command = clang-format --style file +clang-format:pattern = set:(**.c or **.cc or **.h) and not "include:contrib/clang-format-ignorelist" -rustfmt:command = rustfmt {rootpath} +rustfmt:command = rustfmt +nightly rustfmt:pattern = set:**.rs black:command = black --config=black.toml - black:pattern = set:**.py - mercurial/thirdparty/** - "contrib/python-zstandard/**" + +# Mercurial doesn't have any Go code, but if we did this is how we +# would configure `hg fix` for Go: +go:command = gofmt +go:pattern = set:**.go diff -r 61881b170140 -r 84a0102c05c7 contrib/fuzz/FuzzedDataProvider.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contrib/fuzz/FuzzedDataProvider.h Tue Jan 21 13:14:51 2020 -0500 @@ -0,0 +1,368 @@ +//===- FuzzedDataProvider.h - Utility header for fuzz targets ---*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// A single header library providing an utility class to break up an array of +// bytes. Whenever run on the same input, provides the same output, as long as +// its methods are called in the same order, with the same arguments. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_ +#define LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// In addition to the comments below, the API is also briefly documented at +// https://github.com/google/fuzzing/blob/master/docs/split-inputs.md#fuzzed-data-provider +class FuzzedDataProvider +{ + public: + // |data| is an array of length |size| that the FuzzedDataProvider wraps + // to provide more granular access. |data| must outlive the + // FuzzedDataProvider. + FuzzedDataProvider(const uint8_t *data, size_t size) + : data_ptr_(data), remaining_bytes_(size) + { + } + ~FuzzedDataProvider() = default; + + // Returns a std::vector containing |num_bytes| of input data. If fewer + // than |num_bytes| of data remain, returns a shorter std::vector + // containing all of the data that's left. Can be used with any byte + // sized type, such as char, unsigned char, uint8_t, etc. + template std::vector ConsumeBytes(size_t num_bytes) + { + num_bytes = std::min(num_bytes, remaining_bytes_); + return ConsumeBytes(num_bytes, num_bytes); + } + + // Similar to |ConsumeBytes|, but also appends the terminator value at + // the end of the resulting vector. Useful, when a mutable + // null-terminated C-string is needed, for example. But that is a rare + // case. Better avoid it, if possible, and prefer using |ConsumeBytes| + // or |ConsumeBytesAsString| methods. + template + std::vector ConsumeBytesWithTerminator(size_t num_bytes, + T terminator = 0) + { + num_bytes = std::min(num_bytes, remaining_bytes_); + std::vector result = + ConsumeBytes(num_bytes + 1, num_bytes); + result.back() = terminator; + return result; + } + + // Returns a std::string containing |num_bytes| of input data. Using + // this and + // |.c_str()| on the resulting string is the best way to get an + // immutable null-terminated C string. If fewer than |num_bytes| of data + // remain, returns a shorter std::string containing all of the data + // that's left. + std::string ConsumeBytesAsString(size_t num_bytes) + { + static_assert(sizeof(std::string::value_type) == + sizeof(uint8_t), + "ConsumeBytesAsString cannot convert the data to " + "a string."); + + num_bytes = std::min(num_bytes, remaining_bytes_); + std::string result( + reinterpret_cast( + data_ptr_), + num_bytes); + Advance(num_bytes); + return result; + } + + // Returns a number in the range [min, max] by consuming bytes from the + // input data. The value might not be uniformly distributed in the given + // range. If there's no input data left, always returns |min|. |min| + // must be less than or equal to |max|. + template T ConsumeIntegralInRange(T min, T max) + { + static_assert(std::is_integral::value, + "An integral type is required."); + static_assert(sizeof(T) <= sizeof(uint64_t), + "Unsupported integral type."); + + if (min > max) + abort(); + + // Use the biggest type possible to hold the range and the + // result. + uint64_t range = static_cast(max) - min; + uint64_t result = 0; + size_t offset = 0; + + while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 && + remaining_bytes_ != 0) { + // Pull bytes off the end of the seed data. + // Experimentally, this seems to allow the fuzzer to + // more easily explore the input space. This makes + // sense, since it works by modifying inputs that caused + // new code to run, and this data is often used to + // encode length of data read by |ConsumeBytes|. + // Separating out read lengths makes it easier modify + // the contents of the data that is actually read. + --remaining_bytes_; + result = + (result << CHAR_BIT) | data_ptr_[remaining_bytes_]; + offset += CHAR_BIT; + } + + // Avoid division by 0, in case |range + 1| results in overflow. + if (range != std::numeric_limits::max()) + result = result % (range + 1); + + return static_cast(min + result); + } + + // Returns a std::string of length from 0 to |max_length|. When it runs + // out of input data, returns what remains of the input. Designed to be + // more stable with respect to a fuzzer inserting characters than just + // picking a random length and then consuming that many bytes with + // |ConsumeBytes|. + std::string ConsumeRandomLengthString(size_t max_length) + { + // Reads bytes from the start of |data_ptr_|. Maps "\\" to "\", + // and maps "\" followed by anything else to the end of the + // string. As a result of this logic, a fuzzer can insert + // characters into the string, and the string will be lengthened + // to include those new characters, resulting in a more stable + // fuzzer than picking the length of a string independently from + // picking its contents. + std::string result; + + // Reserve the anticipated capaticity to prevent several + // reallocations. + result.reserve(std::min(max_length, remaining_bytes_)); + for (size_t i = 0; i < max_length && remaining_bytes_ != 0; + ++i) { + char next = ConvertUnsignedToSigned(data_ptr_[0]); + Advance(1); + if (next == '\\' && remaining_bytes_ != 0) { + next = + ConvertUnsignedToSigned(data_ptr_[0]); + Advance(1); + if (next != '\\') + break; + } + result += next; + } + + result.shrink_to_fit(); + return result; + } + + // Returns a std::vector containing all remaining bytes of the input + // data. + template std::vector ConsumeRemainingBytes() + { + return ConsumeBytes(remaining_bytes_); + } + + // Returns a std::string containing all remaining bytes of the input + // data. Prefer using |ConsumeRemainingBytes| unless you actually need a + // std::string object. + std::string ConsumeRemainingBytesAsString() + { + return ConsumeBytesAsString(remaining_bytes_); + } + + // Returns a number in the range [Type's min, Type's max]. The value + // might not be uniformly distributed in the given range. If there's no + // input data left, always returns |min|. + template T ConsumeIntegral() + { + return ConsumeIntegralInRange(std::numeric_limits::min(), + std::numeric_limits::max()); + } + + // Reads one byte and returns a bool, or false when no data remains. + bool ConsumeBool() + { + return 1 & ConsumeIntegral(); + } + + // Returns a copy of the value selected from the given fixed-size + // |array|. + template + T PickValueInArray(const T (&array)[size]) + { + static_assert(size > 0, "The array must be non empty."); + return array[ConsumeIntegralInRange(0, size - 1)]; + } + + template + T PickValueInArray(std::initializer_list list) + { + // TODO(Dor1s): switch to static_assert once C++14 is allowed. + if (!list.size()) + abort(); + + return *(list.begin() + + ConsumeIntegralInRange(0, list.size() - 1)); + } + + // Returns an enum value. The enum must start at 0 and be contiguous. It + // must also contain |kMaxValue| aliased to its largest (inclusive) + // value. Such as: enum class Foo { SomeValue, OtherValue, kMaxValue = + // OtherValue }; + template T ConsumeEnum() + { + static_assert(std::is_enum::value, + "|T| must be an enum type."); + return static_cast(ConsumeIntegralInRange( + 0, static_cast(T::kMaxValue))); + } + + // Returns a floating point number in the range [0.0, 1.0]. If there's + // no input data left, always returns 0. + template T ConsumeProbability() + { + static_assert(std::is_floating_point::value, + "A floating point type is required."); + + // Use different integral types for different floating point + // types in order to provide better density of the resulting + // values. + using IntegralType = + typename std::conditional<(sizeof(T) <= sizeof(uint32_t)), + uint32_t, uint64_t>::type; + + T result = static_cast(ConsumeIntegral()); + result /= + static_cast(std::numeric_limits::max()); + return result; + } + + // Returns a floating point value in the range [Type's lowest, Type's + // max] by consuming bytes from the input data. If there's no input data + // left, always returns approximately 0. + template T ConsumeFloatingPoint() + { + return ConsumeFloatingPointInRange( + std::numeric_limits::lowest(), + std::numeric_limits::max()); + } + + // Returns a floating point value in the given range by consuming bytes + // from the input data. If there's no input data left, returns |min|. + // Note that |min| must be less than or equal to |max|. + template T ConsumeFloatingPointInRange(T min, T max) + { + if (min > max) + abort(); + + T range = .0; + T result = min; + constexpr T zero(.0); + if (max > zero && min < zero && + max > min + std::numeric_limits::max()) { + // The diff |max - min| would overflow the given + // floating point type. Use the half of the diff as the + // range and consume a bool to decide whether the result + // is in the first of the second part of the diff. + range = (max / 2.0) - (min / 2.0); + if (ConsumeBool()) { + result += range; + } + } else { + range = max - min; + } + + return result + range * ConsumeProbability(); + } + + // Reports the remaining bytes available for fuzzed input. + size_t remaining_bytes() + { + return remaining_bytes_; + } + + private: + FuzzedDataProvider(const FuzzedDataProvider &) = delete; + FuzzedDataProvider &operator=(const FuzzedDataProvider &) = delete; + + void Advance(size_t num_bytes) + { + if (num_bytes > remaining_bytes_) + abort(); + + data_ptr_ += num_bytes; + remaining_bytes_ -= num_bytes; + } + + template + std::vector ConsumeBytes(size_t size, size_t num_bytes_to_consume) + { + static_assert(sizeof(T) == sizeof(uint8_t), + "Incompatible data type."); + + // The point of using the size-based constructor below is to + // increase the odds of having a vector object with capacity + // being equal to the length. That part is always implementation + // specific, but at least both libc++ and libstdc++ allocate the + // requested number of bytes in that constructor, which seems to + // be a natural choice for other implementations as well. To + // increase the odds even more, we also call |shrink_to_fit| + // below. + std::vector result(size); + if (size == 0) { + if (num_bytes_to_consume != 0) + abort(); + return result; + } + + std::memcpy(result.data(), data_ptr_, num_bytes_to_consume); + Advance(num_bytes_to_consume); + + // Even though |shrink_to_fit| is also implementation specific, + // we expect it to provide an additional assurance in case + // vector's constructor allocated a buffer which is larger than + // the actual amount of data we put inside it. + result.shrink_to_fit(); + return result; + } + + template TS ConvertUnsignedToSigned(TU value) + { + static_assert(sizeof(TS) == sizeof(TU), + "Incompatible data types."); + static_assert(!std::numeric_limits::is_signed, + "Source type must be unsigned."); + + // TODO(Dor1s): change to `if constexpr` once C++17 becomes + // mainstream. + if (std::numeric_limits::is_modulo) + return static_cast(value); + + // Avoid using implementation-defined unsigned to signer + // conversions. To learn more, see + // https://stackoverflow.com/questions/13150449. + if (value <= std::numeric_limits::max()) { + return static_cast(value); + } else { + constexpr auto TS_min = std::numeric_limits::min(); + return TS_min + static_cast(value - TS_min); + } + } + + const uint8_t *data_ptr_; + size_t remaining_bytes_; +}; + +#endif // LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_ +// no-check-code since this is from a third party diff -r 61881b170140 -r 84a0102c05c7 contrib/fuzz/Makefile --- a/contrib/fuzz/Makefile Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/fuzz/Makefile Tue Jan 21 13:14:51 2020 -0500 @@ -1,184 +1,129 @@ CC = clang CXX = clang++ -all: bdiff mpatch xdiff +# By default, use our own standalone_fuzz_target_runner. +# This runner does no fuzzing, but simply executes the inputs +# provided via parameters. +# Run e.g. "make all LIB_FUZZING_ENGINE=/path/to/libFuzzer.a" +# to link the fuzzer(s) against a real fuzzing engine. +# +# OSS-Fuzz will define its own value for LIB_FUZZING_ENGINE. +LIB_FUZZING_ENGINE ?= standalone_fuzz_target_runner.o -fuzzutil.o: fuzzutil.cc fuzzutil.h - $(CXX) $(CXXFLAGS) -g -O1 \ - -std=c++17 \ - -I../../mercurial -c -o fuzzutil.o fuzzutil.cc +PYTHON_CONFIG ?= $$OUT/sanpy/bin/python-config + +CXXFLAGS += -Wno-deprecated-register -fuzzutil-oss-fuzz.o: fuzzutil.cc fuzzutil.h - $(CXX) $(CXXFLAGS) -std=c++17 \ - -I../../mercurial -c -o fuzzutil-oss-fuzz.o fuzzutil.cc +all: standalone_fuzz_target_runner.o oss-fuzz + +standalone_fuzz_target_runner.o: standalone_fuzz_target_runner.cc + +$$OUT/%_fuzzer_seed_corpus.zip: %_corpus.py + python $< $@ pyutil.o: pyutil.cc pyutil.h $(CXX) $(CXXFLAGS) -g -O1 \ - `$$OUT/sanpy/bin/python-config --cflags` \ + `$(PYTHON_CONFIG) --cflags` \ -I../../mercurial -c -o pyutil.o pyutil.cc -bdiff.o: ../../mercurial/bdiff.c - $(CC) $(CFLAGS) -fsanitize=fuzzer-no-link,address -c -o bdiff.o \ - ../../mercurial/bdiff.c - -bdiff: bdiff.cc bdiff.o fuzzutil.o - $(CXX) $(CXXFLAGS) -DHG_FUZZER_INCLUDE_MAIN=1 -g -O1 -fsanitize=fuzzer-no-link,address \ - -std=c++17 \ - -I../../mercurial bdiff.cc bdiff.o fuzzutil.o -o bdiff - bdiff-oss-fuzz.o: ../../mercurial/bdiff.c $(CC) $(CFLAGS) -c -o bdiff-oss-fuzz.o ../../mercurial/bdiff.c -bdiff_fuzzer: bdiff.cc bdiff-oss-fuzz.o fuzzutil-oss-fuzz.o +bdiff_fuzzer: bdiff.cc bdiff-oss-fuzz.o $(CXX) $(CXXFLAGS) -std=c++17 -I../../mercurial bdiff.cc \ - bdiff-oss-fuzz.o fuzzutil-oss-fuzz.o -lFuzzingEngine -o \ + bdiff-oss-fuzz.o $(LIB_FUZZING_ENGINE) -o \ $$OUT/bdiff_fuzzer mpatch.o: ../../mercurial/mpatch.c $(CC) -g -O1 -fsanitize=fuzzer-no-link,address -c -o mpatch.o \ ../../mercurial/mpatch.c -mpatch: CXXFLAGS += -std=c++17 -mpatch: mpatch.cc mpatch.o fuzzutil.o - $(CXX) $(CXXFLAGS) -DHG_FUZZER_INCLUDE_MAIN=1 -g -O1 -fsanitize=fuzzer-no-link,address \ - -I../../mercurial mpatch.cc mpatch.o fuzzutil.o -o mpatch - mpatch-oss-fuzz.o: ../../mercurial/mpatch.c $(CC) $(CFLAGS) -c -o mpatch-oss-fuzz.o ../../mercurial/mpatch.c -mpatch_fuzzer: mpatch.cc mpatch-oss-fuzz.o fuzzutil-oss-fuzz.o +mpatch_fuzzer: mpatch.cc mpatch-oss-fuzz.o $$OUT/mpatch_fuzzer_seed_corpus.zip $(CXX) $(CXXFLAGS) -std=c++17 -I../../mercurial mpatch.cc \ - mpatch-oss-fuzz.o fuzzutil-oss-fuzz.o -lFuzzingEngine -o \ + mpatch-oss-fuzz.o $(LIB_FUZZING_ENGINE) -o \ $$OUT/mpatch_fuzzer -mpatch_corpus.zip: - python mpatch_corpus.py $$OUT/mpatch_fuzzer_seed_corpus.zip - -x%.o: ../../mercurial/thirdparty/xdiff/x%.c ../../mercurial/thirdparty/xdiff/*.h - $(CC) -g -O1 -fsanitize=fuzzer-no-link,address -c \ - -o $@ \ - $< - -xdiff: CXXFLAGS += -std=c++17 -xdiff: xdiff.cc xdiffi.o xprepare.o xutils.o fuzzutil.o - $(CXX) $(CXXFLAGS) -DHG_FUZZER_INCLUDE_MAIN=1 -g -O1 -fsanitize=fuzzer-no-link,address \ - -I../../mercurial xdiff.cc \ - xdiffi.o xprepare.o xutils.o fuzzutil.o -o xdiff - fuzz-x%.o: ../../mercurial/thirdparty/xdiff/x%.c ../../mercurial/thirdparty/xdiff/*.h $(CC) $(CFLAGS) -c \ -o $@ \ $< -xdiff_fuzzer: xdiff.cc fuzz-xdiffi.o fuzz-xprepare.o fuzz-xutils.o fuzzutil-oss-fuzz.o +xdiff_fuzzer: xdiff.cc fuzz-xdiffi.o fuzz-xprepare.o fuzz-xutils.o $(CXX) $(CXXFLAGS) -std=c++17 -I../../mercurial xdiff.cc \ - fuzz-xdiffi.o fuzz-xprepare.o fuzz-xutils.o fuzzutil-oss-fuzz.o \ - -lFuzzingEngine -o $$OUT/xdiff_fuzzer - -manifest.o: ../../mercurial/cext/manifest.c - $(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ - -I../../mercurial \ - -c -o manifest.o ../../mercurial/cext/manifest.c - -charencode.o: ../../mercurial/cext/charencode.c - $(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ - -I../../mercurial \ - -c -o charencode.o ../../mercurial/cext/charencode.c + fuzz-xdiffi.o fuzz-xprepare.o fuzz-xutils.o \ + $(LIB_FUZZING_ENGINE) -o $$OUT/xdiff_fuzzer -parsers.o: ../../mercurial/cext/parsers.c - $(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ - -I../../mercurial \ - -c -o parsers.o ../../mercurial/cext/parsers.c - -dirs.o: ../../mercurial/cext/dirs.c - $(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ - -I../../mercurial \ - -c -o dirs.o ../../mercurial/cext/dirs.c +parsers-%.o: ../../mercurial/cext/%.c + $(CC) -I../../mercurial `$(PYTHON_CONFIG) --cflags` $(CFLAGS) -c \ + -o $@ $< -pathencode.o: ../../mercurial/cext/pathencode.c - $(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ - -I../../mercurial \ - -c -o pathencode.o ../../mercurial/cext/pathencode.c +PARSERS_OBJS=parsers-manifest.o parsers-charencode.o parsers-parsers.o parsers-dirs.o parsers-pathencode.o parsers-revlog.o -revlog.o: ../../mercurial/cext/revlog.c - $(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ - -I../../mercurial \ - -c -o revlog.o ../../mercurial/cext/revlog.c - -dirs_fuzzer: dirs.cc manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o pyutil.o - $(CXX) $(CXXFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ +dirs_fuzzer: dirs.cc pyutil.o $(PARSERS_OBJS) $$OUT/dirs_fuzzer_seed_corpus.zip + $(CXX) $(CXXFLAGS) `$(PYTHON_CONFIG) --cflags` \ -Wno-register -Wno-macro-redefined \ -I../../mercurial dirs.cc \ - manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o pyutil.o \ - -lFuzzingEngine `$$OUT/sanpy/bin/python-config --ldflags` \ + pyutil.o $(PARSERS_OBJS) \ + $(LIB_FUZZING_ENGINE) `$(PYTHON_CONFIG) --ldflags` \ -o $$OUT/dirs_fuzzer -fncache_fuzzer: fncache.cc manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o pyutil.o - $(CXX) $(CXXFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ +fncache_fuzzer: fncache.cc + $(CXX) $(CXXFLAGS) `$(PYTHON_CONFIG) --cflags` \ -Wno-register -Wno-macro-redefined \ -I../../mercurial fncache.cc \ - manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o pyutil.o \ - -lFuzzingEngine `$$OUT/sanpy/bin/python-config --ldflags` \ + pyutil.o $(PARSERS_OBJS) \ + $(LIB_FUZZING_ENGINE) `$(PYTHON_CONFIG) --ldflags` \ -o $$OUT/fncache_fuzzer -jsonescapeu8fast_fuzzer: jsonescapeu8fast.cc manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o pyutil.o - $(CXX) $(CXXFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ +jsonescapeu8fast_fuzzer: jsonescapeu8fast.cc pyutil.o $(PARSERS_OBJS) + $(CXX) $(CXXFLAGS) `$(PYTHON_CONFIG) --cflags` \ -Wno-register -Wno-macro-redefined \ -I../../mercurial jsonescapeu8fast.cc \ - manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o pyutil.o \ - -lFuzzingEngine `$$OUT/sanpy/bin/python-config --ldflags` \ + pyutil.o $(PARSERS_OBJS) \ + $(LIB_FUZZING_ENGINE) `$(PYTHON_CONFIG) --ldflags` \ -o $$OUT/jsonescapeu8fast_fuzzer -manifest_corpus.zip: - python manifest_corpus.py $$OUT/manifest_fuzzer_seed_corpus.zip - -manifest_fuzzer: manifest.cc manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o pyutil.o - $(CXX) $(CXXFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ +manifest_fuzzer: manifest.cc pyutil.o $(PARSERS_OBJS) $$OUT/manifest_fuzzer_seed_corpus.zip + $(CXX) $(CXXFLAGS) `$(PYTHON_CONFIG) --cflags` \ -Wno-register -Wno-macro-redefined \ -I../../mercurial manifest.cc \ - manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o pyutil.o \ - -lFuzzingEngine `$$OUT/sanpy/bin/python-config --ldflags` \ + pyutil.o $(PARSERS_OBJS) \ + $(LIB_FUZZING_ENGINE) `$(PYTHON_CONFIG) --ldflags` \ -o $$OUT/manifest_fuzzer -revlog_fuzzer: revlog.cc manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o pyutil.o - $(CXX) $(CXXFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ +revlog_fuzzer: revlog.cc pyutil.o $(PARSERS_OBJS) $$OUT/revlog_fuzzer_seed_corpus.zip + $(CXX) $(CXXFLAGS) `$(PYTHON_CONFIG) --cflags` \ -Wno-register -Wno-macro-redefined \ -I../../mercurial revlog.cc \ - manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o pyutil.o \ - -lFuzzingEngine `$$OUT/sanpy/bin/python-config --ldflags` \ + pyutil.o $(PARSERS_OBJS) \ + $(LIB_FUZZING_ENGINE) `$(PYTHON_CONFIG) --ldflags` \ -o $$OUT/revlog_fuzzer -revlog_corpus.zip: - python revlog_corpus.py $$OUT/revlog_fuzzer_seed_corpus.zip - -dirstate_fuzzer: dirstate.cc manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o pyutil.o - $(CXX) $(CXXFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ +dirstate_fuzzer: dirstate.cc pyutil.o $(PARSERS_OBJS) $$OUT/dirstate_fuzzer_seed_corpus.zip + $(CXX) $(CXXFLAGS) `$(PYTHON_CONFIG) --cflags` \ -Wno-register -Wno-macro-redefined \ -I../../mercurial dirstate.cc \ - manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o pyutil.o \ - -lFuzzingEngine `$$OUT/sanpy/bin/python-config --ldflags` \ + pyutil.o $(PARSERS_OBJS) \ + $(LIB_FUZZING_ENGINE) `$(PYTHON_CONFIG) --ldflags` \ -o $$OUT/dirstate_fuzzer -dirstate_corpus.zip: - python dirstate_corpus.py $$OUT/dirstate_fuzzer_seed_corpus.zip - -fm1readmarkers_fuzzer: fm1readmarkers.cc manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o pyutil.o - $(CXX) $(CXXFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ +fm1readmarkers_fuzzer: fm1readmarkers.cc pyutil.o $(PARSERS_OBJS) $$OUT/fm1readmarkers_fuzzer_seed_corpus.zip + $(CXX) $(CXXFLAGS) `$(PYTHON_CONFIG) --cflags` \ -Wno-register -Wno-macro-redefined \ -I../../mercurial fm1readmarkers.cc \ - manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o pyutil.o \ - -lFuzzingEngine `$$OUT/sanpy/bin/python-config --ldflags` \ + pyutil.o $(PARSERS_OBJS) \ + $(LIB_FUZZING_ENGINE) `$(PYTHON_CONFIG) --ldflags` \ -o $$OUT/fm1readmarkers_fuzzer -fm1readmarkers_corpus.zip: - python fm1readmarkers_corpus.py $$OUT/fm1readmarkers_fuzzer_seed_corpus.zip - clean: $(RM) *.o *_fuzzer \ bdiff \ mpatch \ xdiff -oss-fuzz: bdiff_fuzzer mpatch_fuzzer mpatch_corpus.zip xdiff_fuzzer dirs_fuzzer fncache_fuzzer jsonescapeu8fast_fuzzer manifest_fuzzer manifest_corpus.zip revlog_fuzzer revlog_corpus.zip dirstate_fuzzer dirstate_corpus.zip fm1readmarkers_fuzzer fm1readmarkers_corpus.zip +oss-fuzz: bdiff_fuzzer mpatch_fuzzer xdiff_fuzzer dirs_fuzzer fncache_fuzzer jsonescapeu8fast_fuzzer manifest_fuzzer revlog_fuzzer dirstate_fuzzer fm1readmarkers_fuzzer .PHONY: all clean oss-fuzz diff -r 61881b170140 -r 84a0102c05c7 contrib/fuzz/bdiff.cc --- a/contrib/fuzz/bdiff.cc Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/fuzz/bdiff.cc Tue Jan 21 13:14:51 2020 -0500 @@ -9,22 +9,25 @@ #include #include -#include "fuzzutil.h" +#include "FuzzedDataProvider.h" extern "C" { #include "bdiff.h" +extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) +{ + return 0; +} + int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { - auto maybe_inputs = SplitInputs(Data, Size); - if (!maybe_inputs) { - return 0; - } - auto inputs = std::move(maybe_inputs.value()); + FuzzedDataProvider provider(Data, Size); + std::string left = provider.ConsumeRandomLengthString(Size); + std::string right = provider.ConsumeRemainingBytesAsString(); struct bdiff_line *a, *b; - int an = bdiff_splitlines(inputs.left.get(), inputs.left_size, &a); - int bn = bdiff_splitlines(inputs.right.get(), inputs.right_size, &b); + int an = bdiff_splitlines(left.c_str(), left.size(), &a); + int bn = bdiff_splitlines(right.c_str(), right.size(), &b); struct bdiff_hunk l; bdiff_diff(a, an, b, bn, &l); free(a); @@ -33,12 +36,4 @@ return 0; // Non-zero return values are reserved for future use. } -#ifdef HG_FUZZER_INCLUDE_MAIN -int main(int argc, char **argv) -{ - const char data[] = "asdf"; - return LLVMFuzzerTestOneInput((const uint8_t *)data, 4); -} -#endif - } // extern "C" diff -r 61881b170140 -r 84a0102c05c7 contrib/fuzz/dirs.cc --- a/contrib/fuzz/dirs.cc Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/fuzz/dirs.cc Tue Jan 21 13:14:51 2020 -0500 @@ -9,16 +9,15 @@ extern "C" { -static PyCodeObject *code; +static PYCODETYPE *code; extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { contrib::initpy(*argv[0]); - code = (PyCodeObject *)Py_CompileString(R"py( -from parsers import dirs + code = (PYCODETYPE *)Py_CompileString(R"py( try: files = mdata.split('\n') - d = dirs(files) + d = parsers.dirs(files) list(d) 'a' in d if files: @@ -29,7 +28,7 @@ # to debug failures. # print e )py", - "fuzzer", Py_file_input); + "fuzzer", Py_file_input); return 0; } diff -r 61881b170140 -r 84a0102c05c7 contrib/fuzz/dirs_corpus.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contrib/fuzz/dirs_corpus.py Tue Jan 21 13:14:51 2020 -0500 @@ -0,0 +1,29 @@ +from __future__ import absolute_import, print_function + +import argparse +import zipfile + +ap = argparse.ArgumentParser() +ap.add_argument("out", metavar="some.zip", type=str, nargs=1) +args = ap.parse_args() + +with zipfile.ZipFile(args.out[0], "w", zipfile.ZIP_STORED) as zf: + zf.writestr( + "greek-tree", + "\n".join( + [ + "iota", + "A/mu", + "A/B/lambda", + "A/B/E/alpha", + "A/B/E/beta", + "A/D/gamma", + "A/D/G/pi", + "A/D/G/rho", + "A/D/G/tau", + "A/D/H/chi", + "A/D/H/omega", + "A/D/H/psi", + ] + ), + ) diff -r 61881b170140 -r 84a0102c05c7 contrib/fuzz/dirstate.cc --- a/contrib/fuzz/dirstate.cc Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/fuzz/dirstate.cc Tue Jan 21 13:14:51 2020 -0500 @@ -9,24 +9,23 @@ extern "C" { -static PyCodeObject *code; +static PYCODETYPE *code; extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { contrib::initpy(*argv[0]); - code = (PyCodeObject *)Py_CompileString(R"py( -from parsers import parse_dirstate + code = (PYCODETYPE *)Py_CompileString(R"py( try: dmap = {} copymap = {} - p = parse_dirstate(dmap, copymap, data) + p = parsers.parse_dirstate(dmap, copymap, data) except Exception as e: pass # uncomment this print if you're editing this Python code # to debug failures. # print e )py", - "fuzzer", Py_file_input); + "fuzzer", Py_file_input); return 0; } diff -r 61881b170140 -r 84a0102c05c7 contrib/fuzz/dirstate_corpus.py --- a/contrib/fuzz/dirstate_corpus.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/fuzz/dirstate_corpus.py Tue Jan 21 13:14:51 2020 -0500 @@ -13,5 +13,5 @@ with zipfile.ZipFile(args.out[0], "w", zipfile.ZIP_STORED) as zf: if os.path.exists(dirstate): - with open(dirstate) as f: + with open(dirstate, 'rb') as f: zf.writestr("dirstate", f.read()) diff -r 61881b170140 -r 84a0102c05c7 contrib/fuzz/fm1readmarkers.cc --- a/contrib/fuzz/fm1readmarkers.cc Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/fuzz/fm1readmarkers.cc Tue Jan 21 13:14:51 2020 -0500 @@ -9,13 +9,12 @@ extern "C" { -static PyCodeObject *code; +static PYCODETYPE *code; extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { contrib::initpy(*argv[0]); - code = (PyCodeObject *)Py_CompileString(R"py( -from parsers import fm1readmarkers + code = (PYCODETYPE *)Py_CompileString(R"py( def maybeint(s, default): try: return int(s) @@ -31,14 +30,14 @@ else: offset = stop = 0 offset, stop = maybeint(offset, 0), maybeint(stop, len(data)) - fm1readmarkers(data, offset, stop) + parsers.fm1readmarkers(data, offset, stop) except Exception as e: pass # uncomment this print if you're editing this Python code # to debug failures. # print e )py", - "fuzzer", Py_file_input); + "fuzzer", Py_file_input); return 0; } diff -r 61881b170140 -r 84a0102c05c7 contrib/fuzz/fncache.cc --- a/contrib/fuzz/fncache.cc Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/fuzz/fncache.cc Tue Jan 21 13:14:51 2020 -0500 @@ -10,29 +10,20 @@ extern "C" { -static PyCodeObject *code; +static PYCODETYPE *code; extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { contrib::initpy(*argv[0]); - code = (PyCodeObject *)Py_CompileString(R"py( -from parsers import ( - isasciistr, - asciilower, - asciiupper, - encodedir, - pathencode, - lowerencode, -) - + code = (PYCODETYPE *)Py_CompileString(R"py( try: for fn in ( - isasciistr, - asciilower, - asciiupper, - encodedir, - pathencode, - lowerencode, + parsers.isasciistr, + parsers.asciilower, + parsers.asciiupper, + parsers.encodedir, + parsers.pathencode, + parsers.lowerencode, ): try: fn(data) @@ -53,7 +44,7 @@ # to debug failures. # print(e) )py", - "fuzzer", Py_file_input); + "fuzzer", Py_file_input); if (!code) { std::cerr << "failed to compile Python code!" << std::endl; } diff -r 61881b170140 -r 84a0102c05c7 contrib/fuzz/fuzzutil.cc --- a/contrib/fuzz/fuzzutil.cc Thu Jan 09 14:19:20 2020 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -#include "fuzzutil.h" - -#include -#include - -contrib::optional SplitInputs(const uint8_t *Data, size_t Size) -{ - if (!Size) { - return contrib::nullopt; - } - // figure out a random point in [0, Size] to split our input. - size_t left_size = (Data[0] / 255.0) * (Size - 1); - - // Copy inputs to new allocations so if bdiff over-reads - // AddressSanitizer can detect it. - std::unique_ptr left(new char[left_size]); - std::memcpy(left.get(), Data + 1, left_size); - // right starts at the next byte after left ends - size_t right_size = Size - (left_size + 1); - std::unique_ptr right(new char[right_size]); - std::memcpy(right.get(), Data + 1 + left_size, right_size); - LOG(2) << "inputs are " << left_size << " and " << right_size - << " bytes" << std::endl; - two_inputs result = {std::move(right), right_size, std::move(left), - left_size}; - return result; -} diff -r 61881b170140 -r 84a0102c05c7 contrib/fuzz/fuzzutil.h --- a/contrib/fuzz/fuzzutil.h Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/fuzz/fuzzutil.h Tue Jan 21 13:14:51 2020 -0500 @@ -34,14 +34,4 @@ if (level <= DEBUG) \ std::cout -struct two_inputs { - std::unique_ptr right; - size_t right_size; - std::unique_ptr left; - size_t left_size; -}; - -/* Split a non-zero-length input into two inputs. */ -contrib::optional SplitInputs(const uint8_t *Data, size_t Size); - #endif /* CONTRIB_FUZZ_FUZZUTIL_H */ diff -r 61881b170140 -r 84a0102c05c7 contrib/fuzz/jsonescapeu8fast.cc --- a/contrib/fuzz/jsonescapeu8fast.cc Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/fuzz/jsonescapeu8fast.cc Tue Jan 21 13:14:51 2020 -0500 @@ -5,29 +5,27 @@ #include "pyutil.h" -#include #include #include +#include "FuzzedDataProvider.h" extern "C" { -static PyCodeObject *code; +static PYCODETYPE *code; extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { contrib::initpy(*argv[0]); - code = (PyCodeObject *)Py_CompileString(R"py( -from parsers import jsonescapeu8fast - + code = (PYCODETYPE *)Py_CompileString(R"py( try: - jsonescapeu8fast(data, paranoid) + parsers.jsonescapeu8fast(data, paranoid) except Exception as e: pass # uncomment this print if you're editing this Python code # to debug failures. # print(e) )py", - "fuzzer", Py_file_input); + "fuzzer", Py_file_input); if (!code) { std::cerr << "failed to compile Python code!" << std::endl; } diff -r 61881b170140 -r 84a0102c05c7 contrib/fuzz/manifest.cc --- a/contrib/fuzz/manifest.cc Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/fuzz/manifest.cc Tue Jan 21 13:14:51 2020 -0500 @@ -9,15 +9,14 @@ extern "C" { -static PyCodeObject *code; +static PYCODETYPE *code; extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { contrib::initpy(*argv[0]); - code = (PyCodeObject *)Py_CompileString(R"py( -from parsers import lazymanifest + code = (PYCODETYPE *)Py_CompileString(R"py( try: - lm = lazymanifest(mdata) + lm = parsers.lazymanifest(mdata) # iterate the whole thing, which causes the code to fully parse # every line in the manifest for e, _, _ in lm.iterentries(): @@ -41,7 +40,7 @@ # to debug failures. # print e )py", - "fuzzer", Py_file_input); + "fuzzer", Py_file_input); return 0; } diff -r 61881b170140 -r 84a0102c05c7 contrib/fuzz/mpatch.cc --- a/contrib/fuzz/mpatch.cc Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/fuzz/mpatch.cc Tue Jan 21 13:14:51 2020 -0500 @@ -14,6 +14,11 @@ #include "fuzzutil.h" +extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) +{ + return 0; +} + // To avoid having too many OOMs from the fuzzer infrastructure, we'll // skip patch application if the resulting fulltext would be bigger // than 10MiB. @@ -106,17 +111,4 @@ return 0; } -#ifdef HG_FUZZER_INCLUDE_MAIN -int main(int argc, char **argv) -{ - // One text, one patch. - const char data[] = "\x02\x00\0x1\x00\x0d" - // base text - "a" - // binary delta that will append a single b - "\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x01b"; - return LLVMFuzzerTestOneInput((const uint8_t *)data, 19); -} -#endif - } // extern "C" diff -r 61881b170140 -r 84a0102c05c7 contrib/fuzz/mpatch_corpus.py --- a/contrib/fuzz/mpatch_corpus.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/fuzz/mpatch_corpus.py Tue Jan 21 13:14:51 2020 -0500 @@ -2,6 +2,7 @@ import argparse import struct +import sys import zipfile from mercurial import ( @@ -14,34 +15,48 @@ args = ap.parse_args() -class deltafrag(object): +if sys.version_info[0] < 3: + + class py2reprhack(object): + def __repr__(self): + """Py2 calls __repr__ for `bytes(foo)`, forward to __bytes__""" + return self.__bytes__() + + +else: + + class py2reprhack(object): + """Not needed on py3.""" + + +class deltafrag(py2reprhack): def __init__(self, start, end, data): self.start = start self.end = end self.data = data - def __str__(self): + def __bytes__(self): return ( struct.pack(">lll", self.start, self.end, len(self.data)) + self.data ) -class delta(object): +class delta(py2reprhack): def __init__(self, frags): self.frags = frags - def __str__(self): - return ''.join(str(f) for f in self.frags) + def __bytes__(self): + return b''.join(bytes(f) for f in self.frags) -class corpus(object): +class corpus(py2reprhack): def __init__(self, base, deltas): self.base = base self.deltas = deltas - def __str__(self): - deltas = [str(d) for d in self.deltas] + def __bytes__(self): + deltas = [bytes(d) for d in self.deltas] parts = ( [ struct.pack(">B", len(deltas) + 1), @@ -51,300 +66,301 @@ + [self.base] + deltas ) - return "".join(parts) + return b''.join(parts) with zipfile.ZipFile(args.out[0], "w", zipfile.ZIP_STORED) as zf: # Manually constructed entries zf.writestr( - "one_delta_applies", str(corpus('a', [delta([deltafrag(0, 1, 'b')])])) + "one_delta_applies", + bytes(corpus(b'a', [delta([deltafrag(0, 1, b'b')])])), ) zf.writestr( "one_delta_starts_late", - str(corpus('a', [delta([deltafrag(3, 1, 'b')])])), + bytes(corpus(b'a', [delta([deltafrag(3, 1, b'b')])])), ) zf.writestr( "one_delta_ends_late", - str(corpus('a', [delta([deltafrag(0, 20, 'b')])])), + bytes(corpus(b'a', [delta([deltafrag(0, 20, b'b')])])), ) try: # Generated from repo data - r = hg.repository(uimod.ui(), '../..') - fl = r.file('mercurial/manifest.py') + r = hg.repository(uimod.ui(), b'../..') + fl = r.file(b'mercurial/manifest.py') rl = getattr(fl, '_revlog', fl) bins = rl._chunks(rl._deltachain(10)[0]) - zf.writestr('manifest_py_rev_10', str(corpus(bins[0], bins[1:]))) + zf.writestr('manifest_py_rev_10', bytes(corpus(bins[0], bins[1:]))) except: # skip this, so no re-raises print('skipping seed file from repo data') # Automatically discovered by running the fuzzer zf.writestr( - "mpatch_decode_old_overread", "\x02\x00\x00\x00\x02\x00\x00\x00" + "mpatch_decode_old_overread", b"\x02\x00\x00\x00\x02\x00\x00\x00" ) # https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=8876 zf.writestr( "mpatch_ossfuzz_getbe32_ubsan", - "\x02\x00\x00\x00\x0c \xff\xff\xff\xff ", + b"\x02\x00\x00\x00\x0c \xff\xff\xff\xff ", ) zf.writestr( "mpatch_apply_over_memcpy", - '\x13\x01\x00\x05\xd0\x00\x00\x00\x00\x00\x00\x00\x00\n \x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x8c\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00)\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00A\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x94\x18' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\xff\xfa\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x94\x18\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xfa\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x13\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00]\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00se\x00\x00' - '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - '\x00\x00\x00\x00', + b'\x13\x01\x00\x05\xd0\x00\x00\x00\x00\x00\x00\x00\x00\n \x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x8c\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00)\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00A\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x94\x18' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\xff\xfa\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x94\x18\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xfa\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x13\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00]\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00se\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00', ) diff -r 61881b170140 -r 84a0102c05c7 contrib/fuzz/pyutil.cc --- a/contrib/fuzz/pyutil.cc Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/fuzz/pyutil.cc Tue Jan 21 13:14:51 2020 -0500 @@ -1,21 +1,31 @@ #include "pyutil.h" +#include #include namespace contrib { +#if PY_MAJOR_VERSION >= 3 +#define HG_FUZZER_PY3 1 +PyMODINIT_FUNC PyInit_parsers(void); +#else +PyMODINIT_FUNC initparsers(void); +#endif + static char cpypath[8192] = "\0"; static PyObject *mainmod; static PyObject *globals; -/* TODO: use Python 3 for this fuzzing? */ -PyMODINIT_FUNC initparsers(void); - void initpy(const char *cselfpath) { +#ifdef HG_FUZZER_PY3 + const std::string subdir = "/sanpy/lib/python3.7"; +#else const std::string subdir = "/sanpy/lib/python2.7"; +#endif + /* HACK ALERT: we need a full Python installation built without pymalloc and with ASAN, so we dump one in $OUT/sanpy/lib/python2.7. This helps us wire that up. */ @@ -24,7 +34,11 @@ auto pos = selfpath.rfind("/"); if (pos == std::string::npos) { char wd[8192]; - getcwd(wd, 8192); + if (!getcwd(wd, 8192)) { + std::cerr << "Failed to call getcwd: errno " << errno + << std::endl; + exit(1); + } pypath = std::string(wd) + subdir; } else { pypath = selfpath.substr(0, pos) + subdir; @@ -34,11 +48,24 @@ setenv("PYTHONNOUSERSITE", "1", 1); /* prevent Python from looking up users in the fuzz environment */ setenv("PYTHONUSERBASE", cpypath, 1); +#ifdef HG_FUZZER_PY3 + std::wstring wcpypath(pypath.begin(), pypath.end()); + Py_SetPythonHome(wcpypath.c_str()); +#else Py_SetPythonHome(cpypath); +#endif Py_InitializeEx(0); mainmod = PyImport_AddModule("__main__"); globals = PyModule_GetDict(mainmod); + +#ifdef HG_FUZZER_PY3 + PyObject *mod = PyInit_parsers(); +#else initparsers(); + PyObject *mod = PyImport_ImportModule("parsers"); +#endif + + PyDict_SetItemString(globals, "parsers", mod); } PyObject *pyglobals() diff -r 61881b170140 -r 84a0102c05c7 contrib/fuzz/pyutil.h --- a/contrib/fuzz/pyutil.h Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/fuzz/pyutil.h Tue Jan 21 13:14:51 2020 -0500 @@ -1,5 +1,11 @@ #include +#if PY_MAJOR_VERSION >= 3 +#define PYCODETYPE PyObject +#else +#define PYCODETYPE PyCodeObject +#endif + namespace contrib { diff -r 61881b170140 -r 84a0102c05c7 contrib/fuzz/revlog.cc --- a/contrib/fuzz/revlog.cc Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/fuzz/revlog.cc Tue Jan 21 13:14:51 2020 -0500 @@ -9,16 +9,15 @@ extern "C" { -static PyCodeObject *code; +static PYCODETYPE *code; extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { contrib::initpy(*argv[0]); - code = (PyCodeObject *)Py_CompileString(R"py( -from parsers import parse_index2 + code = (PYCODETYPE *)Py_CompileString(R"py( for inline in (True, False): try: - index, cache = parse_index2(data, inline) + index, cache = parsers.parse_index2(data, inline) index.slicechunktodensity(list(range(len(index))), 0.5, 262144) index.stats() index.findsnapshots({}, 0) @@ -35,7 +34,7 @@ # to debug failures. # print e )py", - "fuzzer", Py_file_input); + "fuzzer", Py_file_input); return 0; } diff -r 61881b170140 -r 84a0102c05c7 contrib/fuzz/revlog_corpus.py --- a/contrib/fuzz/revlog_corpus.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/fuzz/revlog_corpus.py Tue Jan 21 13:14:51 2020 -0500 @@ -1,4 +1,4 @@ -from __future__ import absolute_import, print_function +from __future__ import absolute_import import argparse import os @@ -16,13 +16,10 @@ reporoot, '.hg', 'store', 'data', 'contrib', 'fuzz', 'mpatch.cc.i' ) -print(changelog, os.path.exists(changelog)) -print(contributing, os.path.exists(contributing)) - with zipfile.ZipFile(args.out[0], "w", zipfile.ZIP_STORED) as zf: if os.path.exists(changelog): - with open(changelog) as f: + with open(changelog, 'rb') as f: zf.writestr("00changelog.i", f.read()) if os.path.exists(contributing): - with open(contributing) as f: + with open(contributing, 'rb') as f: zf.writestr("contributing.i", f.read()) diff -r 61881b170140 -r 84a0102c05c7 contrib/fuzz/standalone_fuzz_target_runner.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contrib/fuzz/standalone_fuzz_target_runner.cc Tue Jan 21 13:14:51 2020 -0500 @@ -0,0 +1,45 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); + +// Example of a standalone runner for "fuzz targets". +// It reads all files passed as parameters and feeds their contents +// one by one into the fuzz target (LLVMFuzzerTestOneInput). +// This runner does not do any fuzzing, but allows us to run the fuzz target +// on the test corpus (e.g. "do_stuff_test_data") or on a single file, +// e.g. the one that comes from a bug report. + +#include +#include +#include +#include + +// Forward declare the "fuzz target" interface. +// We deliberately keep this inteface simple and header-free. +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size); + +extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv); + +int main(int argc, char **argv) +{ + LLVMFuzzerInitialize(&argc, &argv); + + for (int i = 1; i < argc; i++) { + std::ifstream in(argv[i]); + in.seekg(0, in.end); + size_t length = in.tellg(); + in.seekg(0, in.beg); + std::cout << "Reading " << length << " bytes from " << argv[i] + << std::endl; + // Allocate exactly length bytes so that we reliably catch + // buffer overflows. + std::vector bytes(length); + in.read(bytes.data(), bytes.size()); + assert(in); + LLVMFuzzerTestOneInput( + reinterpret_cast(bytes.data()), + bytes.size()); + std::cout << "Execution successful" << std::endl; + } + return 0; +} +// no-check-code since this is from a third party diff -r 61881b170140 -r 84a0102c05c7 contrib/fuzz/xdiff.cc --- a/contrib/fuzz/xdiff.cc Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/fuzz/xdiff.cc Tue Jan 21 13:14:51 2020 -0500 @@ -10,10 +10,15 @@ #include #include -#include "fuzzutil.h" +#include "FuzzedDataProvider.h" extern "C" { +int LLVMFuzzerInitialize(int *argc, char ***argv) +{ + return 0; +} + int hunk_consumer(long a1, long a2, long b1, long b2, void *priv) { // TODO: probably also test returning -1 from this when things break? @@ -27,17 +32,15 @@ if (Size > 100000) { return 0; } - auto maybe_inputs = SplitInputs(Data, Size); - if (!maybe_inputs) { - return 0; - } - auto inputs = std::move(maybe_inputs.value()); + FuzzedDataProvider provider(Data, Size); + std::string left = provider.ConsumeRandomLengthString(Size); + std::string right = provider.ConsumeRemainingBytesAsString(); mmfile_t a, b; - a.ptr = inputs.left.get(); - a.size = inputs.left_size; - b.ptr = inputs.right.get(); - b.size = inputs.right_size; + a.ptr = (char *)left.c_str(); + a.size = left.size(); + b.ptr = (char *)right.c_str(); + b.size = right.size(); xpparam_t xpp = { XDF_INDENT_HEURISTIC, /* flags */ }; @@ -52,12 +55,4 @@ return 0; // Non-zero return values are reserved for future use. } -#ifdef HG_FUZZER_INCLUDE_MAIN -int main(int argc, char **argv) -{ - const char data[] = "asdf"; - return LLVMFuzzerTestOneInput((const uint8_t *)data, 4); -} -#endif - } // extern "C" diff -r 61881b170140 -r 84a0102c05c7 contrib/hg-ssh --- a/contrib/hg-ssh Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/hg-ssh Tue Jan 21 13:14:51 2020 -0500 @@ -35,7 +35,9 @@ import sys # enable importing on demand to reduce startup time -import hgdemandimport ; hgdemandimport.enable() +import hgdemandimport + +hgdemandimport.enable() from mercurial import ( dispatch, @@ -43,6 +45,7 @@ ui as uimod, ) + def main(): # Prevent insertion/deletion of CRs dispatch.initstdio() @@ -56,9 +59,10 @@ args.pop(0) else: break - allowed_paths = [os.path.normpath(os.path.join(cwd, - os.path.expanduser(path))) - for path in args] + allowed_paths = [ + os.path.normpath(os.path.join(cwd, os.path.expanduser(path))) + for path in args + ] orig_cmd = os.getenv('SSH_ORIGINAL_COMMAND', '?') try: cmdargv = shlex.split(orig_cmd) @@ -75,10 +79,18 @@ if readonly: if not req.ui: req.ui = uimod.ui.load() - req.ui.setconfig(b'hooks', b'pretxnopen.hg-ssh', - b'python:__main__.rejectpush', b'hg-ssh') - req.ui.setconfig(b'hooks', b'prepushkey.hg-ssh', - b'python:__main__.rejectpush', b'hg-ssh') + req.ui.setconfig( + b'hooks', + b'pretxnopen.hg-ssh', + b'python:__main__.rejectpush', + b'hg-ssh', + ) + req.ui.setconfig( + b'hooks', + b'prepushkey.hg-ssh', + b'python:__main__.rejectpush', + b'hg-ssh', + ) dispatch.dispatch(req) else: sys.stderr.write('Illegal repository "%s"\n' % repo) @@ -87,11 +99,13 @@ sys.stderr.write('Illegal command "%s"\n' % orig_cmd) sys.exit(255) + def rejectpush(ui, **kwargs): ui.warn((b"Permission denied\n")) # mercurial hooks use unix process conventions for hook return values # so a truthy return means failure return True + if __name__ == '__main__': main() diff -r 61881b170140 -r 84a0102c05c7 contrib/hgclient.py --- a/contrib/hgclient.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/hgclient.py Tue Jan 21 13:14:51 2020 -0500 @@ -39,7 +39,7 @@ cmdline.extend(extraargs) def tonative(cmdline): - if os.name != r'nt': + if os.name != 'nt': return cmdline return [arg.decode("utf-8") for arg in cmdline] diff -r 61881b170140 -r 84a0102c05c7 contrib/hgperf --- a/contrib/hgperf Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/hgperf Tue Jan 21 13:14:51 2020 -0500 @@ -37,18 +37,24 @@ if libdir != '@' 'LIBDIR' '@': if not os.path.isabs(libdir): - libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), - libdir) + libdir = os.path.join( + os.path.dirname(os.path.realpath(__file__)), libdir + ) libdir = os.path.abspath(libdir) sys.path.insert(0, libdir) # enable importing on demand to reduce startup time try: - from mercurial import demandimport; demandimport.enable() + from mercurial import demandimport + + demandimport.enable() except ImportError: import sys - sys.stderr.write("abort: couldn't find mercurial libraries in [%s]\n" % - ' '.join(sys.path)) + + sys.stderr.write( + "abort: couldn't find mercurial libraries in [%s]\n" + % ' '.join(sys.path) + ) sys.stderr.write("(check your install and PYTHONPATH)\n") sys.exit(-1) @@ -57,6 +63,7 @@ util, ) + def timer(func, title=None): results = [] begin = util.timer() @@ -69,7 +76,7 @@ ostop = os.times() count += 1 a, b = ostart, ostop - results.append((cstop - cstart, b[0] - a[0], b[1]-a[1])) + results.append((cstop - cstart, b[0] - a[0], b[1] - a[1])) if cstop - begin > 3 and count >= 100: break if cstop - begin > 10 and count >= 3: @@ -79,19 +86,27 @@ if r: sys.stderr.write("! result: %s\n" % r) m = min(results) - sys.stderr.write("! wall %f comb %f user %f sys %f (best of %d)\n" - % (m[0], m[1] + m[2], m[1], m[2], count)) + sys.stderr.write( + "! wall %f comb %f user %f sys %f (best of %d)\n" + % (m[0], m[1] + m[2], m[1], m[2], count) + ) + orgruncommand = dispatch.runcommand + def runcommand(lui, repo, cmd, fullargs, ui, options, d, cmdpats, cmdoptions): ui.pushbuffer() lui.pushbuffer() - timer(lambda : orgruncommand(lui, repo, cmd, fullargs, ui, - options, d, cmdpats, cmdoptions)) + timer( + lambda: orgruncommand( + lui, repo, cmd, fullargs, ui, options, d, cmdpats, cmdoptions + ) + ) ui.popbuffer() lui.popbuffer() + dispatch.runcommand = runcommand dispatch.run() diff -r 61881b170140 -r 84a0102c05c7 contrib/hgweb.fcgi --- a/contrib/hgweb.fcgi Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/hgweb.fcgi Tue Jan 21 13:14:51 2020 -0500 @@ -7,13 +7,16 @@ # Uncomment and adjust if Mercurial is not installed system-wide # (consult "installed modules" path from 'hg debuginstall'): -#import sys; sys.path.insert(0, "/path/to/python/lib") +# import sys; sys.path.insert(0, "/path/to/python/lib") # Uncomment to send python tracebacks to the browser if an error occurs: -#import cgitb; cgitb.enable() +# import cgitb; cgitb.enable() -from mercurial import demandimport; demandimport.enable() +from mercurial import demandimport + +demandimport.enable() from mercurial.hgweb import hgweb from flup.server.fcgi import WSGIServer + application = hgweb(config) WSGIServer(application).run() diff -r 61881b170140 -r 84a0102c05c7 contrib/import-checker.py --- a/contrib/import-checker.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/import-checker.py Tue Jan 21 13:14:51 2020 -0500 @@ -535,6 +535,8 @@ if fullname != '__future__': if not fullname or ( fullname in stdlib_modules + # allow standard 'from typing import ...' style + and fullname.startswith('.') and fullname not in localmods and fullname + '.__init__' not in localmods ): diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/docker/ubuntu.template --- a/contrib/packaging/docker/ubuntu.template Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/packaging/docker/ubuntu.template Tue Jan 21 13:14:51 2020 -0500 @@ -10,7 +10,8 @@ dh-python \ less \ python \ - python-all-dev \ - python-docutils \ + python3-all \ + python3-all-dev \ + python3-docutils \ unzip \ zip diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/hg-docker --- a/contrib/packaging/hg-docker Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/packaging/hg-docker Tue Jan 21 13:14:51 2020 -0500 @@ -11,6 +11,7 @@ import subprocess import sys + def get_docker() -> str: docker = shutil.which('docker.io') or shutil.which('docker') if not docker: @@ -21,15 +22,16 @@ out = subprocess.check_output([docker, '-h'], stderr=subprocess.STDOUT) if b'Jansens' in out: - print('%s is the Docking System Tray; try installing docker.io' % - docker) + print( + '%s is the Docking System Tray; try installing docker.io' + % docker + ) sys.exit(1) except subprocess.CalledProcessError as e: print('error calling `%s -h`: %s' % (docker, e.output)) sys.exit(1) - out = subprocess.check_output([docker, 'version'], - stderr=subprocess.STDOUT) + out = subprocess.check_output([docker, 'version'], stderr=subprocess.STDOUT) lines = out.splitlines() if not any(l.startswith((b'Client:', b'Client version:')) for l in lines): @@ -42,6 +44,7 @@ return docker + def get_dockerfile(path: pathlib.Path, args: list) -> bytes: with path.open('rb') as fh: df = fh.read() @@ -51,6 +54,7 @@ return df + def build_docker_image(dockerfile: pathlib.Path, params: list, tag: str): """Build a Docker image from a templatized Dockerfile.""" docker = get_docker() @@ -65,9 +69,12 @@ args = [ docker, 'build', - '--build-arg', 'http_proxy', - '--build-arg', 'https_proxy', - '--tag', tag, + '--build-arg', + 'http_proxy', + '--build-arg', + 'https_proxy', + '--tag', + tag, '-', ] @@ -76,8 +83,10 @@ p.communicate(input=dockerfile) if p.returncode: raise subprocess.CalledProcessException( - p.returncode, 'failed to build docker image: %s %s' - % (p.stdout, p.stderr)) + p.returncode, + 'failed to build docker image: %s %s' % (p.stdout, p.stderr), + ) + def command_build(args): build_args = [] @@ -85,13 +94,13 @@ k, v = arg.split('=', 1) build_args.append((k.encode('utf-8'), v.encode('utf-8'))) - build_docker_image(pathlib.Path(args.dockerfile), - build_args, - args.tag) + build_docker_image(pathlib.Path(args.dockerfile), build_args, args.tag) + def command_docker(args): print(get_docker()) + def main() -> int: parser = argparse.ArgumentParser() @@ -99,9 +108,12 @@ build = subparsers.add_parser('build', help='Build a Docker image') build.set_defaults(func=command_build) - build.add_argument('--build-arg', action='append', default=[], - help='Substitution to perform in Dockerfile; ' - 'format: key=value') + build.add_argument( + '--build-arg', + action='append', + default=[], + help='Substitution to perform in Dockerfile; ' 'format: key=value', + ) build.add_argument('dockerfile', help='path to Dockerfile to use') build.add_argument('tag', help='Tag to apply to created image') @@ -112,5 +124,6 @@ return args.func(args) + if __name__ == '__main__': sys.exit(main()) diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/hgpackaging/cli.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contrib/packaging/hgpackaging/cli.py Tue Jan 21 13:14:51 2020 -0500 @@ -0,0 +1,153 @@ +# cli.py - Command line interface for automation +# +# Copyright 2019 Gregory Szorc +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +# no-check-code because Python 3 native. + +import argparse +import os +import pathlib + +from . import ( + inno, + wix, +) + +HERE = pathlib.Path(os.path.abspath(os.path.dirname(__file__))) +SOURCE_DIR = HERE.parent.parent.parent + + +def build_inno(python=None, iscc=None, version=None): + if not os.path.isabs(python): + raise Exception("--python arg must be an absolute path") + + if iscc: + iscc = pathlib.Path(iscc) + else: + iscc = ( + pathlib.Path(os.environ["ProgramFiles(x86)"]) + / "Inno Setup 5" + / "ISCC.exe" + ) + + build_dir = SOURCE_DIR / "build" + + inno.build( + SOURCE_DIR, build_dir, pathlib.Path(python), iscc, version=version, + ) + + +def build_wix( + name=None, + python=None, + version=None, + sign_sn=None, + sign_cert=None, + sign_password=None, + sign_timestamp_url=None, + extra_packages_script=None, + extra_wxs=None, + extra_features=None, +): + fn = wix.build_installer + kwargs = { + "source_dir": SOURCE_DIR, + "python_exe": pathlib.Path(python), + "version": version, + } + + if not os.path.isabs(python): + raise Exception("--python arg must be an absolute path") + + if extra_packages_script: + kwargs["extra_packages_script"] = extra_packages_script + if extra_wxs: + kwargs["extra_wxs"] = dict( + thing.split("=") for thing in extra_wxs.split(",") + ) + if extra_features: + kwargs["extra_features"] = extra_features.split(",") + + if sign_sn or sign_cert: + fn = wix.build_signed_installer + kwargs["name"] = name + kwargs["subject_name"] = sign_sn + kwargs["cert_path"] = sign_cert + kwargs["cert_password"] = sign_password + kwargs["timestamp_url"] = sign_timestamp_url + + fn(**kwargs) + + +def get_parser(): + parser = argparse.ArgumentParser() + + subparsers = parser.add_subparsers() + + sp = subparsers.add_parser("inno", help="Build Inno Setup installer") + sp.add_argument("--python", required=True, help="path to python.exe to use") + sp.add_argument("--iscc", help="path to iscc.exe to use") + sp.add_argument( + "--version", + help="Mercurial version string to use " + "(detected from __version__.py if not defined", + ) + sp.set_defaults(func=build_inno) + + sp = subparsers.add_parser( + "wix", help="Build Windows installer with WiX Toolset" + ) + sp.add_argument("--name", help="Application name", default="Mercurial") + sp.add_argument( + "--python", help="Path to Python executable to use", required=True + ) + sp.add_argument( + "--sign-sn", + help="Subject name (or fragment thereof) of certificate " + "to use for signing", + ) + sp.add_argument( + "--sign-cert", help="Path to certificate to use for signing" + ) + sp.add_argument("--sign-password", help="Password for signing certificate") + sp.add_argument( + "--sign-timestamp-url", + help="URL of timestamp server to use for signing", + ) + sp.add_argument("--version", help="Version string to use") + sp.add_argument( + "--extra-packages-script", + help=( + "Script to execute to include extra packages in " "py2exe binary." + ), + ) + sp.add_argument( + "--extra-wxs", help="CSV of path_to_wxs_file=working_dir_for_wxs_file" + ) + sp.add_argument( + "--extra-features", + help=( + "CSV of extra feature names to include " + "in the installer from the extra wxs files" + ), + ) + sp.set_defaults(func=build_wix) + + return parser + + +def main(): + parser = get_parser() + args = parser.parse_args() + + if not hasattr(args, "func"): + parser.print_help() + return + + kwargs = dict(vars(args)) + del kwargs["func"] + + args.func(**kwargs) diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/hgpackaging/inno.py --- a/contrib/packaging/hgpackaging/inno.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/packaging/hgpackaging/inno.py Tue Jan 21 13:14:51 2020 -0500 @@ -12,9 +12,16 @@ import shutil import subprocess -from .py2exe import build_py2exe -from .util import find_vc_runtime_files +import jinja2 +from .py2exe import ( + build_py2exe, + stage_install, +) +from .util import ( + find_vc_runtime_files, + read_version_py, +) EXTRA_PACKAGES = { 'dulwich', @@ -23,6 +30,10 @@ 'win32ctypes', } +PACKAGE_FILES_METADATA = { + 'ReadMe.html': 'Flags: isreadme', +} + def build( source_dir: pathlib.Path, @@ -43,11 +54,17 @@ raise Exception('%s does not exist' % iscc_exe) vc_x64 = r'\x64' in os.environ.get('LIB', '') + arch = 'x64' if vc_x64 else 'x86' + inno_source_dir = source_dir / 'contrib' / 'packaging' / 'inno' + inno_build_dir = build_dir / ('inno-%s' % arch) + staging_dir = inno_build_dir / 'stage' requirements_txt = ( source_dir / 'contrib' / 'packaging' / 'inno' / 'requirements.txt' ) + inno_build_dir.mkdir(parents=True, exist_ok=True) + build_py2exe( source_dir, build_dir, @@ -57,6 +74,15 @@ extra_packages=EXTRA_PACKAGES, ) + # Purge the staging directory for every build so packaging is + # pristine. + if staging_dir.exists(): + print('purging %s' % staging_dir) + shutil.rmtree(staging_dir) + + # Now assemble all the packaged files into the staging directory. + stage_install(source_dir, staging_dir) + # hg.exe depends on VC9 runtime DLLs. Copy those into place. for f in find_vc_runtime_files(vc_x64): if f.name.endswith('.manifest'): @@ -64,22 +90,74 @@ else: basename = f.name - dest_path = source_dir / 'dist' / basename + dest_path = staging_dir / basename print('copying %s to %s' % (f, dest_path)) shutil.copyfile(f, dest_path) + # The final package layout is simply a mirror of the staging directory. + package_files = [] + for root, dirs, files in os.walk(staging_dir): + dirs.sort() + + root = pathlib.Path(root) + + for f in sorted(files): + full = root / f + rel = full.relative_to(staging_dir) + if str(rel.parent) == '.': + dest_dir = '{app}' + else: + dest_dir = '{app}\\%s' % rel.parent + + package_files.append( + { + 'source': rel, + 'dest_dir': dest_dir, + 'metadata': PACKAGE_FILES_METADATA.get(str(rel), None), + } + ) + print('creating installer') + # Install Inno files by rendering a template. + jinja_env = jinja2.Environment( + loader=jinja2.FileSystemLoader(str(inno_source_dir)), + # Need to change these to prevent conflict with Inno Setup. + comment_start_string='{##', + comment_end_string='##}', + ) + + try: + template = jinja_env.get_template('mercurial.iss') + except jinja2.TemplateSyntaxError as e: + raise Exception( + 'template syntax error at %s:%d: %s' + % (e.name, e.lineno, e.message,) + ) + + content = template.render(package_files=package_files) + + with (inno_build_dir / 'mercurial.iss').open('w', encoding='utf-8') as fh: + fh.write(content) + + # Copy additional files used by Inno. + for p in ('mercurial.ico', 'postinstall.txt'): + shutil.copyfile( + source_dir / 'contrib' / 'win32' / p, inno_build_dir / p + ) + args = [str(iscc_exe)] if vc_x64: args.append('/dARCH=x64') - if version: - args.append('/dVERSION=%s' % version) + if not version: + version = read_version_py(source_dir) + + args.append('/dVERSION=%s' % version) args.append('/Odist') - args.append('contrib/packaging/inno/mercurial.iss') + args.append(str(inno_build_dir / 'mercurial.iss')) subprocess.run(args, cwd=str(source_dir), check=True) diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/hgpackaging/py2exe.py --- a/contrib/packaging/hgpackaging/py2exe.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/packaging/hgpackaging/py2exe.py Tue Jan 21 13:14:51 2020 -0500 @@ -15,10 +15,48 @@ from .util import ( extract_tar_to_directory, extract_zip_to_directory, + process_install_rules, python_exe_info, ) +STAGING_RULES = [ + ('contrib/bash_completion', 'Contrib/'), + ('contrib/hgk', 'Contrib/hgk.tcl'), + ('contrib/hgweb.fcgi', 'Contrib/'), + ('contrib/hgweb.wsgi', 'Contrib/'), + ('contrib/logo-droplets.svg', 'Contrib/'), + ('contrib/mercurial.el', 'Contrib/'), + ('contrib/mq.el', 'Contrib/'), + ('contrib/tcsh_completion', 'Contrib/'), + ('contrib/tcsh_completion_build.sh', 'Contrib/'), + ('contrib/vim/*', 'Contrib/Vim/'), + ('contrib/win32/postinstall.txt', 'ReleaseNotes.txt'), + ('contrib/win32/ReadMe.html', 'ReadMe.html'), + ('contrib/xml.rnc', 'Contrib/'), + ('contrib/zsh_completion', 'Contrib/'), + ('dist/hg.exe', './'), + ('dist/lib/*.dll', 'lib/'), + ('dist/lib/*.pyd', 'lib/'), + ('dist/lib/library.zip', 'lib/'), + ('dist/Microsoft.VC*.CRT.manifest', './'), + ('dist/msvc*.dll', './'), + ('dist/python*.dll', './'), + ('doc/*.html', 'doc/'), + ('doc/style.css', 'doc/'), + ('mercurial/helptext/**/*.txt', 'helptext/'), + ('mercurial/defaultrc/*.rc', 'hgrc.d/'), + ('mercurial/locale/**/*', 'locale/'), + ('mercurial/templates/**/*', 'Templates/'), + ('COPYING', 'Copying.txt'), +] + +# List of paths to exclude from the staging area. +STAGING_EXCLUDES = [ + 'doc/hg-ssh.8.html', +] + + def build_py2exe( source_dir: pathlib.Path, build_dir: pathlib.Path, @@ -169,3 +207,39 @@ env=env, check=True, ) + + +def stage_install( + source_dir: pathlib.Path, staging_dir: pathlib.Path, lower_case=False +): + """Copy all files to be installed to a directory. + + This allows packaging to simply walk a directory tree to find source + files. + """ + if lower_case: + rules = [] + for source, dest in STAGING_RULES: + # Only lower directory names. + if '/' in dest: + parent, leaf = dest.rsplit('/', 1) + dest = '%s/%s' % (parent.lower(), leaf) + rules.append((source, dest)) + else: + rules = STAGING_RULES + + process_install_rules(rules, source_dir, staging_dir) + + # Write out a default editor.rc file to configure notepad as the + # default editor. + with (staging_dir / 'hgrc.d' / 'editor.rc').open( + 'w', encoding='utf-8' + ) as fh: + fh.write('[ui]\neditor = notepad\n') + + # Purge any files we don't want to be there. + for f in STAGING_EXCLUDES: + p = staging_dir / f + if p.exists(): + print('removing %s' % p) + p.unlink() diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/hgpackaging/util.py --- a/contrib/packaging/hgpackaging/util.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/packaging/hgpackaging/util.py Tue Jan 21 13:14:51 2020 -0500 @@ -9,8 +9,11 @@ import distutils.version import getpass +import glob import os import pathlib +import re +import shutil import subprocess import tarfile import zipfile @@ -164,3 +167,60 @@ 'version': version, 'py3': version >= distutils.version.LooseVersion('3'), } + + +def process_install_rules( + rules: list, source_dir: pathlib.Path, dest_dir: pathlib.Path +): + for source, dest in rules: + if '*' in source: + if not dest.endswith('/'): + raise ValueError('destination must end in / when globbing') + + # We strip off the source path component before the first glob + # character to construct the relative install path. + prefix_end_index = source[: source.index('*')].rindex('/') + relative_prefix = source_dir / source[0:prefix_end_index] + + for res in glob.glob(str(source_dir / source), recursive=True): + source_path = pathlib.Path(res) + + if source_path.is_dir(): + continue + + rel_path = source_path.relative_to(relative_prefix) + + dest_path = dest_dir / dest[:-1] / rel_path + + dest_path.parent.mkdir(parents=True, exist_ok=True) + print('copying %s to %s' % (source_path, dest_path)) + shutil.copy(source_path, dest_path) + + # Simple file case. + else: + source_path = pathlib.Path(source) + + if dest.endswith('/'): + dest_path = pathlib.Path(dest) / source_path.name + else: + dest_path = pathlib.Path(dest) + + full_source_path = source_dir / source_path + full_dest_path = dest_dir / dest_path + + full_dest_path.parent.mkdir(parents=True, exist_ok=True) + shutil.copy(full_source_path, full_dest_path) + print('copying %s to %s' % (full_source_path, full_dest_path)) + + +def read_version_py(source_dir): + """Read the mercurial/__version__.py file to resolve the version string.""" + p = source_dir / 'mercurial' / '__version__.py' + + with p.open('r', encoding='utf-8') as fh: + m = re.search('version = b"([^"]+)"', fh.read(), re.MULTILINE) + + if not m: + raise Exception('could not parse %s' % p) + + return m.group(1) diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/hgpackaging/wix.py --- a/contrib/packaging/hgpackaging/wix.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/packaging/hgpackaging/wix.py Tue Jan 21 13:14:51 2020 -0500 @@ -7,39 +7,60 @@ # no-check-code because Python 3 native. +import collections import os import pathlib import re +import shutil import subprocess -import tempfile import typing +import uuid import xml.dom.minidom from .downloads import download_entry -from .py2exe import build_py2exe +from .py2exe import ( + build_py2exe, + stage_install, +) from .util import ( extract_zip_to_directory, + process_install_rules, sign_with_signtool, ) -SUPPORT_WXS = [ - ('contrib.wxs', r'contrib'), - ('dist.wxs', r'dist'), - ('doc.wxs', r'doc'), - ('help.wxs', r'mercurial\help'), - ('i18n.wxs', r'i18n'), - ('locale.wxs', r'mercurial\locale'), - ('templates.wxs', r'mercurial\templates'), -] - - EXTRA_PACKAGES = { 'distutils', 'pygments', } +EXTRA_INSTALL_RULES = [ + ('contrib/packaging/wix/COPYING.rtf', 'COPYING.rtf'), + ('contrib/win32/mercurial.ini', 'hgrc.d/mercurial.rc'), +] + +STAGING_REMOVE_FILES = [ + # We use the RTF variant. + 'copying.txt', +] + +SHORTCUTS = { + # hg.1.html' + 'hg.file.5d3e441c_28d9_5542_afd0_cdd4234f12d5': { + 'Name': 'Mercurial Command Reference', + }, + # hgignore.5.html + 'hg.file.5757d8e0_f207_5e10_a2ec_3ba0a062f431': { + 'Name': 'Mercurial Ignore Files', + }, + # hgrc.5.html + 'hg.file.92e605fd_1d1a_5dc6_9fc0_5d2998eb8f5e': { + 'Name': 'Mercurial Configuration Files', + }, +} + + def find_version(source_dir: pathlib.Path): version_py = source_dir / 'mercurial' / '__version__.py' @@ -148,49 +169,165 @@ return post_build_sign -LIBRARIES_XML = ''' - - - - - +def make_files_xml(staging_dir: pathlib.Path, is_x64) -> str: + """Create XML string listing every file to be installed.""" - - - - - - - - - -'''.lstrip() + # We derive GUIDs from a deterministic file path identifier. + # We shoehorn the name into something that looks like a URL because + # the UUID namespaces are supposed to work that way (even though + # the input data probably is never validated). - -def make_libraries_xml(wix_dir: pathlib.Path, dist_dir: pathlib.Path): - """Make XML data for library components WXS.""" - # We can't use ElementTree because it doesn't handle the - # directives. doc = xml.dom.minidom.parseString( - LIBRARIES_XML.format(wix_dir=str(wix_dir)) + '' + '' + '' ) - component = doc.getElementsByTagName('Component')[0] + # Assemble the install layout by directory. This makes it easier to + # emit XML, since each directory has separate entities. + manifest = collections.defaultdict(dict) + + for root, dirs, files in os.walk(staging_dir): + dirs.sort() + + root = pathlib.Path(root) + rel_dir = root.relative_to(staging_dir) + + for i in range(len(rel_dir.parts)): + parent = '/'.join(rel_dir.parts[0 : i + 1]) + manifest.setdefault(parent, {}) + + for f in sorted(files): + full = root / f + manifest[str(rel_dir).replace('\\', '/')][full.name] = full + + component_groups = collections.defaultdict(list) + + # Now emit a for each directory. + # Each directory is composed of a pointing to its parent + # and defines child 's and a with all the files. + for dir_name, entries in sorted(manifest.items()): + # The directory id is derived from the path. But the root directory + # is special. + if dir_name == '.': + parent_directory_id = 'INSTALLDIR' + else: + parent_directory_id = 'hg.dir.%s' % dir_name.replace('/', '.') - f = doc.createElement('File') - f.setAttribute('Name', 'library.zip') - f.setAttribute('KeyPath', 'yes') - component.appendChild(f) + fragment = doc.createElement('Fragment') + directory_ref = doc.createElement('DirectoryRef') + directory_ref.setAttribute('Id', parent_directory_id) + + # Add entries for immediate children directories. + for possible_child in sorted(manifest.keys()): + if ( + dir_name == '.' + and '/' not in possible_child + and possible_child != '.' + ): + child_directory_id = 'hg.dir.%s' % possible_child + name = possible_child + else: + if not possible_child.startswith('%s/' % dir_name): + continue + name = possible_child[len(dir_name) + 1 :] + if '/' in name: + continue + + child_directory_id = 'hg.dir.%s' % possible_child.replace( + '/', '.' + ) + + directory = doc.createElement('Directory') + directory.setAttribute('Id', child_directory_id) + directory.setAttribute('Name', name) + directory_ref.appendChild(directory) + + # Add s for files in this directory. + for rel, source_path in sorted(entries.items()): + if dir_name == '.': + full_rel = rel + else: + full_rel = '%s/%s' % (dir_name, rel) - lib_dir = dist_dir / 'lib' + component_unique_id = ( + 'https://www.mercurial-scm.org/wix-installer/0/component/%s' + % full_rel + ) + component_guid = uuid.uuid5(uuid.NAMESPACE_URL, component_unique_id) + component_id = 'hg.component.%s' % str(component_guid).replace( + '-', '_' + ) + + component = doc.createElement('Component') + + component.setAttribute('Id', component_id) + component.setAttribute('Guid', str(component_guid).upper()) + component.setAttribute('Win64', 'yes' if is_x64 else 'no') + + # Assign this component to a top-level group. + if dir_name == '.': + component_groups['ROOT'].append(component_id) + elif '/' in dir_name: + component_groups[dir_name[0 : dir_name.index('/')]].append( + component_id + ) + else: + component_groups[dir_name].append(component_id) + + unique_id = ( + 'https://www.mercurial-scm.org/wix-installer/0/%s' % full_rel + ) + file_guid = uuid.uuid5(uuid.NAMESPACE_URL, unique_id) + + # IDs have length limits. So use GUID to derive them. + file_guid_normalized = str(file_guid).replace('-', '_') + file_id = 'hg.file.%s' % file_guid_normalized - for p in sorted(lib_dir.iterdir()): - if not p.name.endswith(('.dll', '.pyd')): - continue + file_element = doc.createElement('File') + file_element.setAttribute('Id', file_id) + file_element.setAttribute('Source', str(source_path)) + file_element.setAttribute('KeyPath', 'yes') + file_element.setAttribute('ReadOnly', 'yes') + + component.appendChild(file_element) + directory_ref.appendChild(component) + + fragment.appendChild(directory_ref) + doc.documentElement.appendChild(fragment) + + for group, component_ids in sorted(component_groups.items()): + fragment = doc.createElement('Fragment') + component_group = doc.createElement('ComponentGroup') + component_group.setAttribute('Id', 'hg.group.%s' % group) + + for component_id in component_ids: + component_ref = doc.createElement('ComponentRef') + component_ref.setAttribute('Id', component_id) + component_group.appendChild(component_ref) - f = doc.createElement('File') - f.setAttribute('Name', p.name) - component.appendChild(f) + fragment.appendChild(component_group) + doc.documentElement.appendChild(fragment) + + # Add to files that have it defined. + for file_id, metadata in sorted(SHORTCUTS.items()): + els = doc.getElementsByTagName('File') + els = [el for el in els if el.getAttribute('Id') == file_id] + + if not els: + raise Exception('could not find File[Id=%s]' % file_id) + + for el in els: + shortcut = doc.createElement('Shortcut') + shortcut.setAttribute('Id', 'hg.shortcut.%s' % file_id) + shortcut.setAttribute('Directory', 'ProgramMenuDir') + shortcut.setAttribute('Icon', 'hgIcon.ico') + shortcut.setAttribute('IconIndex', '0') + shortcut.setAttribute('Advertise', 'yes') + for k, v in sorted(metadata.items()): + shortcut.setAttribute(k, v) + + el.appendChild(shortcut) return doc.toprettyxml() @@ -249,9 +386,27 @@ post_build_fn(source_dir, hg_build_dir, dist_dir, version) build_dir = hg_build_dir / ('wix-%s' % arch) + staging_dir = build_dir / 'stage' build_dir.mkdir(exist_ok=True) + # Purge the staging directory for every build so packaging is pristine. + if staging_dir.exists(): + print('purging %s' % staging_dir) + shutil.rmtree(staging_dir) + + stage_install(source_dir, staging_dir, lower_case=True) + + # We also install some extra files. + process_install_rules(EXTRA_INSTALL_RULES, source_dir, staging_dir) + + # And remove some files we don't want. + for f in STAGING_REMOVE_FILES: + p = staging_dir / f + if p.exists(): + print('removing %s' % p) + p.unlink() + wix_pkg, wix_entry = download_entry('wix', hg_build_dir) wix_path = hg_build_dir / ('wix-%s' % wix_entry['version']) @@ -264,25 +419,16 @@ defines = {'Platform': arch} - for wxs, rel_path in SUPPORT_WXS: - wxs = wix_dir / wxs - wxs_source_dir = source_dir / rel_path - run_candle(wix_path, build_dir, wxs, wxs_source_dir, defines=defines) + # Derive a .wxs file with the staged files. + manifest_wxs = build_dir / 'stage.wxs' + with manifest_wxs.open('w', encoding='utf-8') as fh: + fh.write(make_files_xml(staging_dir, is_x64=arch == 'x64')) + + run_candle(wix_path, build_dir, manifest_wxs, staging_dir, defines=defines) for source, rel_path in sorted((extra_wxs or {}).items()): run_candle(wix_path, build_dir, source, rel_path, defines=defines) - # candle.exe doesn't like when we have an open handle on the file. - # So use TemporaryDirectory() instead of NamedTemporaryFile(). - with tempfile.TemporaryDirectory() as td: - td = pathlib.Path(td) - - tf = td / 'library.wxs' - with tf.open('w') as fh: - fh.write(make_libraries_xml(wix_dir, dist_dir)) - - run_candle(wix_path, build_dir, tf, dist_dir, defines=defines) - source = wix_dir / 'mercurial.wxs' defines['Version'] = version defines['Comments'] = 'Installs Mercurial version %s' % version @@ -308,20 +454,13 @@ str(msi_path), ] - for source, rel_path in SUPPORT_WXS: - assert source.endswith('.wxs') - args.append(str(build_dir / ('%s.wixobj' % source[:-4]))) - for source, rel_path in sorted((extra_wxs or {}).items()): assert source.endswith('.wxs') source = os.path.basename(source) args.append(str(build_dir / ('%s.wixobj' % source[:-4]))) args.extend( - [ - str(build_dir / 'library.wixobj'), - str(build_dir / 'mercurial.wixobj'), - ] + [str(build_dir / 'stage.wixobj'), str(build_dir / 'mercurial.wixobj'),] ) subprocess.run(args, cwd=str(source_dir), check=True) diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/inno/build.py --- a/contrib/packaging/inno/build.py Thu Jan 09 14:19:20 2020 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,60 +0,0 @@ -#!/usr/bin/env python3 -# build.py - Inno installer build script. -# -# Copyright 2019 Gregory Szorc -# -# This software may be used and distributed according to the terms of the -# GNU General Public License version 2 or any later version. - -# This script automates the building of the Inno MSI installer for Mercurial. - -# no-check-code because Python 3 native. - -import argparse -import os -import pathlib -import sys - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - - parser.add_argument( - '--python', required=True, help='path to python.exe to use' - ) - parser.add_argument('--iscc', help='path to iscc.exe to use') - parser.add_argument( - '--version', - help='Mercurial version string to use ' - '(detected from __version__.py if not defined', - ) - - args = parser.parse_args() - - if not os.path.isabs(args.python): - raise Exception('--python arg must be an absolute path') - - if args.iscc: - iscc = pathlib.Path(args.iscc) - else: - iscc = ( - pathlib.Path(os.environ['ProgramFiles(x86)']) - / 'Inno Setup 5' - / 'ISCC.exe' - ) - - here = pathlib.Path(os.path.abspath(os.path.dirname(__file__))) - source_dir = here.parent.parent.parent - build_dir = source_dir / 'build' - - sys.path.insert(0, str(source_dir / 'contrib' / 'packaging')) - - from hgpackaging.inno import build - - build( - source_dir, - build_dir, - pathlib.Path(args.python), - iscc, - version=args.version, - ) diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/inno/mercurial.iss --- a/contrib/packaging/inno/mercurial.iss Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/packaging/inno/mercurial.iss Tue Jan 21 13:14:51 2020 -0500 @@ -1,21 +1,6 @@ ; Script generated by the Inno Setup Script Wizard. ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES! -#ifndef VERSION -#define FileHandle -#define FileLine -#define VERSION = "unknown" -#if FileHandle = FileOpen(SourcePath + "\..\..\..\mercurial\__version__.py") - #expr FileLine = FileRead(FileHandle) - #expr FileLine = FileRead(FileHandle) - #define VERSION = Copy(FileLine, Pos('"', FileLine)+1, Len(FileLine)-Pos('"', FileLine)-1) -#endif -#if FileHandle - #expr FileClose(FileHandle) -#endif -#pragma message "Detected Version: " + VERSION -#endif - #ifndef ARCH #define ARCH = "x86" #endif @@ -33,68 +18,40 @@ AppVerName=Mercurial {#VERSION} OutputBaseFilename=Mercurial-{#VERSION} #endif -InfoAfterFile=contrib/win32/postinstall.txt -LicenseFile=COPYING +InfoAfterFile=../postinstall.txt +LicenseFile=Copying.txt ShowLanguageDialog=yes AppPublisher=Matt Mackall and others AppPublisherURL=https://mercurial-scm.org/ AppSupportURL=https://mercurial-scm.org/ AppUpdatesURL=https://mercurial-scm.org/ -AppID={{4B95A5F1-EF59-4B08-BED8-C891C46121B3} +{{ 'AppID={{4B95A5F1-EF59-4B08-BED8-C891C46121B3}' }} AppContact=mercurial@mercurial-scm.org DefaultDirName={pf}\Mercurial -SourceDir=..\..\.. +SourceDir=stage VersionInfoDescription=Mercurial distributed SCM (version {#VERSION}) VersionInfoCopyright=Copyright 2005-2019 Matt Mackall and others VersionInfoCompany=Matt Mackall and others InternalCompressLevel=max SolidCompression=true -SetupIconFile=contrib\win32\mercurial.ico +SetupIconFile=../mercurial.ico AllowNoIcons=true DefaultGroupName=Mercurial PrivilegesRequired=none ChangesEnvironment=true [Files] -Source: contrib\mercurial.el; DestDir: {app}/Contrib -Source: contrib\vim\*.*; DestDir: {app}/Contrib/Vim -Source: contrib\zsh_completion; DestDir: {app}/Contrib -Source: contrib\bash_completion; DestDir: {app}/Contrib -Source: contrib\tcsh_completion; DestDir: {app}/Contrib -Source: contrib\tcsh_completion_build.sh; DestDir: {app}/Contrib -Source: contrib\hgk; DestDir: {app}/Contrib; DestName: hgk.tcl -Source: contrib\xml.rnc; DestDir: {app}/Contrib -Source: contrib\mercurial.el; DestDir: {app}/Contrib -Source: contrib\mq.el; DestDir: {app}/Contrib -Source: contrib\hgweb.fcgi; DestDir: {app}/Contrib -Source: contrib\hgweb.wsgi; DestDir: {app}/Contrib -Source: contrib\win32\ReadMe.html; DestDir: {app}; Flags: isreadme -Source: contrib\win32\postinstall.txt; DestDir: {app}; DestName: ReleaseNotes.txt -Source: dist\hg.exe; DestDir: {app}; AfterInstall: Touch('{app}\hg.exe.local') -Source: dist\lib\*.dll; Destdir: {app}\lib -Source: dist\lib\*.pyd; Destdir: {app}\lib -Source: dist\python*.dll; Destdir: {app}; Flags: skipifsourcedoesntexist -Source: dist\msvc*.dll; DestDir: {app}; Flags: skipifsourcedoesntexist -Source: dist\Microsoft.VC*.CRT.manifest; DestDir: {app}; Flags: skipifsourcedoesntexist -Source: dist\lib\library.zip; DestDir: {app}\lib -Source: doc\*.html; DestDir: {app}\Docs -Source: doc\style.css; DestDir: {app}\Docs -Source: mercurial\help\*.txt; DestDir: {app}\help -Source: mercurial\help\internals\*.txt; DestDir: {app}\help\internals -Source: mercurial\default.d\*.rc; DestDir: {app}\default.d -Source: mercurial\locale\*.*; DestDir: {app}\locale; Flags: recursesubdirs createallsubdirs skipifsourcedoesntexist -Source: mercurial\templates\*.*; DestDir: {app}\Templates; Flags: recursesubdirs createallsubdirs -Source: CONTRIBUTORS; DestDir: {app}; DestName: Contributors.txt -Source: COPYING; DestDir: {app}; DestName: Copying.txt +{% for entry in package_files -%} +Source: {{ entry.source }}; DestDir: {{ entry.dest_dir }} +{%- if entry.metadata %}; {{ entry.metadata }}{% endif %} +{% endfor %} [INI] Filename: {app}\Mercurial.url; Section: InternetShortcut; Key: URL; String: https://mercurial-scm.org/ -Filename: {app}\default.d\editor.rc; Section: ui; Key: editor; String: notepad [UninstallDelete] Type: files; Name: {app}\Mercurial.url -Type: filesandordirs; Name: {app}\default.d -Type: files; Name: "{app}\hg.exe.local" +Type: filesandordirs; Name: {app}\hgrc.d [Icons] Name: {group}\Uninstall Mercurial; Filename: {uninstallexe} @@ -121,4 +78,5 @@ setArrayLength(Result, 1) Result[0] := ExpandConstant('{app}'); end; -#include "modpath.iss" + +{% include 'modpath.iss' %} diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/inno/modpath.iss --- a/contrib/packaging/inno/modpath.iss Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/packaging/inno/modpath.iss Tue Jan 21 13:14:51 2020 -0500 @@ -68,79 +68,42 @@ for d := 0 to GetArrayLength(pathdir)-1 do begin updatepath := true; - // Modify WinNT path - if UsingWinNT() = true then begin - - // Get current path, split into an array - RegQueryStringValue(regroot, regpath, 'Path', oldpath); - oldpath := oldpath + ';'; - i := 0; - - while (Pos(';', oldpath) > 0) do begin - SetArrayLength(pathArr, i+1); - pathArr[i] := Copy(oldpath, 0, Pos(';', oldpath)-1); - oldpath := Copy(oldpath, Pos(';', oldpath)+1, Length(oldpath)); - i := i + 1; + // Get current path, split into an array + RegQueryStringValue(regroot, regpath, 'Path', oldpath); + oldpath := oldpath + ';'; + i := 0; - // Check if current directory matches app dir - if pathdir[d] = pathArr[i-1] then begin - // if uninstalling, remove dir from path - if IsUninstaller() = true then begin - continue; - // if installing, flag that dir already exists in path - end else begin - updatepath := false; - end; - end; + while (Pos(';', oldpath) > 0) do begin + SetArrayLength(pathArr, i+1); + pathArr[i] := Copy(oldpath, 0, Pos(';', oldpath)-1); + oldpath := Copy(oldpath, Pos(';', oldpath)+1, Length(oldpath)); + i := i + 1; - // Add current directory to new path - if i = 1 then begin - newpath := pathArr[i-1]; + // Check if current directory matches app dir + if pathdir[d] = pathArr[i-1] then begin + // if uninstalling, remove dir from path + if IsUninstaller() = true then begin + continue; + // if installing, flag that dir already exists in path end else begin - newpath := newpath + ';' + pathArr[i-1]; + updatepath := false; end; end; - // Append app dir to path if not already included - if (IsUninstaller() = false) AND (updatepath = true) then - newpath := newpath + ';' + pathdir[d]; - - // Write new path - RegWriteStringValue(regroot, regpath, 'Path', newpath); - - // Modify Win9x path - end else begin - - // Convert to shortened dirname - pathdir[d] := GetShortName(pathdir[d]); - - // If autoexec.bat exists, check if app dir already exists in path - aExecFile := 'C:\AUTOEXEC.BAT'; - if FileExists(aExecFile) then begin - LoadStringsFromFile(aExecFile, aExecArr); - for i := 0 to GetArrayLength(aExecArr)-1 do begin - if IsUninstaller() = false then begin - // If app dir already exists while installing, skip add - if (Pos(pathdir[d], aExecArr[i]) > 0) then - updatepath := false; - break; - end else begin - // If app dir exists and = what we originally set, then delete at uninstall - if aExecArr[i] = 'SET PATH=%PATH%;' + pathdir[d] then - aExecArr[i] := ''; - end; - end; - end; - - // If app dir not found, or autoexec.bat didn't exist, then (create and) append to current path - if (IsUninstaller() = false) AND (updatepath = true) then begin - SaveStringToFile(aExecFile, #13#10 + 'SET PATH=%PATH%;' + pathdir[d], True); - - // If uninstalling, write the full autoexec out + // Add current directory to new path + if i = 1 then begin + newpath := pathArr[i-1]; end else begin - SaveStringsToFile(aExecFile, aExecArr, False); + newpath := newpath + ';' + pathArr[i-1]; end; end; + + // Append app dir to path if not already included + if (IsUninstaller() = false) AND (updatepath = true) then + newpath := newpath + ';' + pathdir[d]; + + // Write new path + RegWriteStringValue(regroot, regpath, 'Path', newpath); end; end; @@ -207,13 +170,6 @@ end; function NeedRestart(): Boolean; -var - taskname: String; begin - taskname := ModPathName; - if IsTaskSelected(taskname) and not UsingWinNT() then begin - Result := True; - end else begin - Result := False; - end; + Result := False; end; diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/inno/readme.rst --- a/contrib/packaging/inno/readme.rst Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/packaging/inno/readme.rst Tue Jan 21 13:14:51 2020 -0500 @@ -11,12 +11,12 @@ * Inno Setup (http://jrsoftware.org/isdl.php) version 5.4 or newer. Be sure to install the optional Inno Setup Preprocessor feature, which is required. -* Python 3.5+ (to run the ``build.py`` script) +* Python 3.5+ (to run the ``packaging.py`` script) Building ======== -The ``build.py`` script automates the process of producing an +The ``packaging.py`` script automates the process of producing an Inno installer. It manages fetching and configuring the non-system dependencies (such as py2exe, gettext, and various Python packages). @@ -31,11 +31,11 @@ From the prompt, change to the Mercurial source directory. e.g. ``cd c:\src\hg``. -Next, invoke ``build.py`` to produce an Inno installer. You will +Next, invoke ``packaging.py`` to produce an Inno installer. You will need to supply the path to the Python interpreter to use.:: - $ python3.exe contrib\packaging\inno\build.py \ - --python c:\python27\python.exe + $ python3.exe contrib\packaging\packaging.py \ + inno --python c:\python27\python.exe .. note:: @@ -49,13 +49,13 @@ and an installer placed in the ``dist`` sub-directory. The final line of output should print the name of the generated installer. -Additional options may be configured. Run ``build.py --help`` to -see a list of program flags. +Additional options may be configured. Run +``packaging.py inno --help`` to see a list of program flags. MinGW ===== It is theoretically possible to generate an installer that uses -MinGW. This isn't well tested and ``build.py`` and may properly +MinGW. This isn't well tested and ``packaging.py`` and may properly support it. See old versions of this file in version control for potentially useful hints as to how to achieve this. diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/inno/requirements.txt --- a/contrib/packaging/inno/requirements.txt Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/packaging/inno/requirements.txt Tue Jan 21 13:14:51 2020 -0500 @@ -8,65 +8,6 @@ --hash=sha256:e4f3620cfea4f83eedc95b24abd9cd56f3c4b146dd0177e83a21b4eb49e21e50 \ --hash=sha256:fd7c7c74727ddcf00e9acd26bba8da604ffec95bf1c2144e67aff7a8b50e6cef \ # via dulwich -cffi==1.13.1 \ - --hash=sha256:00d890313797d9fe4420506613384b43099ad7d2b905c0752dbcc3a6f14d80fa \ - --hash=sha256:0cf9e550ac6c5e57b713437e2f4ac2d7fd0cd10336525a27224f5fc1ec2ee59a \ - --hash=sha256:0ea23c9c0cdd6778146a50d867d6405693ac3b80a68829966c98dd5e1bbae400 \ - --hash=sha256:193697c2918ecdb3865acf6557cddf5076bb39f1f654975e087b67efdff83365 \ - --hash=sha256:1ae14b542bf3b35e5229439c35653d2ef7d8316c1fffb980f9b7647e544baa98 \ - --hash=sha256:1e389e069450609c6ffa37f21f40cce36f9be7643bbe5051ab1de99d5a779526 \ - --hash=sha256:263242b6ace7f9cd4ea401428d2d45066b49a700852334fd55311bde36dcda14 \ - --hash=sha256:33142ae9807665fa6511cfa9857132b2c3ee6ddffb012b3f0933fc11e1e830d5 \ - --hash=sha256:364f8404034ae1b232335d8c7f7b57deac566f148f7222cef78cf8ae28ef764e \ - --hash=sha256:47368f69fe6529f8f49a5d146ddee713fc9057e31d61e8b6dc86a6a5e38cecc1 \ - --hash=sha256:4895640844f17bec32943995dc8c96989226974dfeb9dd121cc45d36e0d0c434 \ - --hash=sha256:558b3afef987cf4b17abd849e7bedf64ee12b28175d564d05b628a0f9355599b \ - --hash=sha256:5ba86e1d80d458b338bda676fd9f9d68cb4e7a03819632969cf6d46b01a26730 \ - --hash=sha256:63424daa6955e6b4c70dc2755897f5be1d719eabe71b2625948b222775ed5c43 \ - --hash=sha256:6381a7d8b1ebd0bc27c3bc85bc1bfadbb6e6f756b4d4db0aa1425c3719ba26b4 \ - --hash=sha256:6381ab708158c4e1639da1f2a7679a9bbe3e5a776fc6d1fd808076f0e3145331 \ - --hash=sha256:6fd58366747debfa5e6163ada468a90788411f10c92597d3b0a912d07e580c36 \ - --hash=sha256:728ec653964655d65408949b07f9b2219df78badd601d6c49e28d604efe40599 \ - --hash=sha256:7cfcfda59ef1f95b9f729c56fe8a4041899f96b72685d36ef16a3440a0f85da8 \ - --hash=sha256:819f8d5197c2684524637f940445c06e003c4a541f9983fd30d6deaa2a5487d8 \ - --hash=sha256:825ecffd9574557590e3225560a8a9d751f6ffe4a49e3c40918c9969b93395fa \ - --hash=sha256:9009e917d8f5ef780c2626e29b6bc126f4cb2a4d43ca67aa2b40f2a5d6385e78 \ - --hash=sha256:9c77564a51d4d914ed5af096cd9843d90c45b784b511723bd46a8a9d09cf16fc \ - --hash=sha256:a19089fa74ed19c4fe96502a291cfdb89223a9705b1d73b3005df4256976142e \ - --hash=sha256:a40ed527bffa2b7ebe07acc5a3f782da072e262ca994b4f2085100b5a444bbb2 \ - --hash=sha256:bb75ba21d5716abc41af16eac1145ab2e471deedde1f22c6f99bd9f995504df0 \ - --hash=sha256:e22a00c0c81ffcecaf07c2bfb3672fa372c50e2bd1024ffee0da191c1b27fc71 \ - --hash=sha256:e55b5a746fb77f10c83e8af081979351722f6ea48facea79d470b3731c7b2891 \ - --hash=sha256:ec2fa3ee81707a5232bf2dfbd6623fdb278e070d596effc7e2d788f2ada71a05 \ - --hash=sha256:fd82eb4694be712fcae03c717ca2e0fc720657ac226b80bbb597e971fc6928c2 \ - # via cryptography -configparser==4.0.2 \ - --hash=sha256:254c1d9c79f60c45dfde850850883d5aaa7f19a23f13561243a050d5a7c3fe4c \ - --hash=sha256:c7d282687a5308319bf3d2e7706e575c635b0a470342641c93bea0ea3b5331df \ - # via entrypoints -cryptography==2.8 \ - --hash=sha256:02079a6addc7b5140ba0825f542c0869ff4df9a69c360e339ecead5baefa843c \ - --hash=sha256:1df22371fbf2004c6f64e927668734070a8953362cd8370ddd336774d6743595 \ - --hash=sha256:369d2346db5934345787451504853ad9d342d7f721ae82d098083e1f49a582ad \ - --hash=sha256:3cda1f0ed8747339bbdf71b9f38ca74c7b592f24f65cdb3ab3765e4b02871651 \ - --hash=sha256:44ff04138935882fef7c686878e1c8fd80a723161ad6a98da31e14b7553170c2 \ - --hash=sha256:4b1030728872c59687badcca1e225a9103440e467c17d6d1730ab3d2d64bfeff \ - --hash=sha256:58363dbd966afb4f89b3b11dfb8ff200058fbc3b947507675c19ceb46104b48d \ - --hash=sha256:6ec280fb24d27e3d97aa731e16207d58bd8ae94ef6eab97249a2afe4ba643d42 \ - --hash=sha256:7270a6c29199adc1297776937a05b59720e8a782531f1f122f2eb8467f9aab4d \ - --hash=sha256:73fd30c57fa2d0a1d7a49c561c40c2f79c7d6c374cc7750e9ac7c99176f6428e \ - --hash=sha256:7f09806ed4fbea8f51585231ba742b58cbcfbfe823ea197d8c89a5e433c7e912 \ - --hash=sha256:90df0cc93e1f8d2fba8365fb59a858f51a11a394d64dbf3ef844f783844cc793 \ - --hash=sha256:971221ed40f058f5662a604bd1ae6e4521d84e6cad0b7b170564cc34169c8f13 \ - --hash=sha256:a518c153a2b5ed6b8cc03f7ae79d5ffad7315ad4569b2d5333a13c38d64bd8d7 \ - --hash=sha256:b0de590a8b0979649ebeef8bb9f54394d3a41f66c5584fff4220901739b6b2f0 \ - --hash=sha256:b43f53f29816ba1db8525f006fa6f49292e9b029554b3eb56a189a70f2a40879 \ - --hash=sha256:d31402aad60ed889c7e57934a03477b572a03af7794fa8fb1780f21ea8f6551f \ - --hash=sha256:de96157ec73458a7f14e3d26f17f8128c959084931e8997b9e655a39c8fde9f9 \ - --hash=sha256:df6b4dca2e11865e6cfbfb708e800efb18370f5a46fd601d3755bc7f85b3a8a2 \ - --hash=sha256:ecadccc7ba52193963c0475ac9f6fa28ac01e01349a2ca48509667ef41ffd2cf \ - --hash=sha256:fb81c17e0ebe3358486cd8cc3ad78adbae58af12fc2bf2bc0bb84e8090fa5ce8 \ - # via secretstorage docutils==0.15.2 \ --hash=sha256:6c4f696463b79f1fb8ba0c594b63840ebd41f059e92b31957c46b74a4599b6d0 \ --hash=sha256:9e4d7ecfc600058e07ba661411a2b7de2fd0fafa17d1a7f7361cd47b1175c827 \ @@ -85,35 +26,16 @@ --hash=sha256:589f874b313739ad35be6e0cd7efde2a4e9b6fea91edcc34e58ecbb8dbe56d19 \ --hash=sha256:c70dd71abe5a8c85e55e12c19bd91ccfeec11a6e99044204511f9ed547d48451 \ # via keyring -enum34==1.1.6 \ - --hash=sha256:2d81cbbe0e73112bdfe6ef8576f2238f2ba27dd0d55752a776c41d38b7da2850 \ - --hash=sha256:644837f692e5f550741432dd3f223bbb9852018674981b1664e5dc339387588a \ - --hash=sha256:6bd0f6ad48ec2aa117d3d141940d484deccda84d4fcd884f5c3d93c23ecd8c79 \ - --hash=sha256:8ad8c4783bf61ded74527bffb48ed9b54166685e4230386a9ed9b1279e2df5b1 \ - # via cryptography -ipaddress==1.0.23 \ - --hash=sha256:6e0f4a39e66cb5bb9a137b00276a2eff74f93b71dcbdad6f10ff7df9d3557fcc \ - --hash=sha256:b7f8e0369580bb4a24d5ba1d7cc29660a4a6987763faf1d8a8046830e020e7e2 \ - # via cryptography keyring==18.0.1 \ --hash=sha256:67d6cc0132bd77922725fae9f18366bb314fd8f95ff4d323a4df41890a96a838 \ --hash=sha256:7b29ebfcf8678c4da531b2478a912eea01e80007e5ddca9ee0c7038cb3489ec6 -pycparser==2.19 \ - --hash=sha256:a988718abfad80b6b157acce7bf130a30876d27603738ac39f140993246b25b3 \ - # via cffi pygments==2.4.2 \ --hash=sha256:71e430bc85c88a430f000ac1d9b331d2407f681d6f6aec95e8bcfbc3df5b0127 \ --hash=sha256:881c4c157e45f30af185c1ffe8d549d48ac9127433f2c380c24b84572ad66297 pywin32-ctypes==0.2.0 \ --hash=sha256:24ffc3b341d457d48e8922352130cf2644024a4ff09762a2261fd34c36ee5942 \ - --hash=sha256:9dc2d991b3479cc2df15930958b674a48a227d5361d413827a4cfd0b5876fc98 -secretstorage==2.3.1 \ - --hash=sha256:3af65c87765323e6f64c83575b05393f9e003431959c9395d1791d51497f29b6 \ + --hash=sha256:9dc2d991b3479cc2df15930958b674a48a227d5361d413827a4cfd0b5876fc98 \ # via keyring -six==1.12.0 \ - --hash=sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c \ - --hash=sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73 \ - # via cryptography urllib3==1.25.6 \ --hash=sha256:3de946ffbed6e6746608990594d08faac602528ac7015ac28d33cee6a45b7398 \ --hash=sha256:9a107b99a5393caf59c7aa3c1249c16e6879447533d0887f4336dde834c7be86 \ diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/packaging.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contrib/packaging/packaging.py Tue Jan 21 13:14:51 2020 -0500 @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 +# +# packaging.py - Mercurial packaging functionality +# +# Copyright 2019 Gregory Szorc +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +import os +import pathlib +import subprocess +import sys +import venv + + +HERE = pathlib.Path(os.path.abspath(__file__)).parent +REQUIREMENTS_TXT = HERE / "requirements.txt" +SOURCE_DIR = HERE.parent.parent +VENV = SOURCE_DIR / "build" / "venv-packaging" + + +def bootstrap(): + venv_created = not VENV.exists() + + VENV.parent.mkdir(exist_ok=True) + + venv.create(VENV, with_pip=True) + + if os.name == "nt": + venv_bin = VENV / "Scripts" + pip = venv_bin / "pip.exe" + python = venv_bin / "python.exe" + else: + venv_bin = VENV / "bin" + pip = venv_bin / "pip" + python = venv_bin / "python" + + args = [ + str(pip), + "install", + "-r", + str(REQUIREMENTS_TXT), + "--disable-pip-version-check", + ] + + if not venv_created: + args.append("-q") + + subprocess.run(args, check=True) + + os.environ["HGPACKAGING_BOOTSTRAPPED"] = "1" + os.environ["PATH"] = "%s%s%s" % (venv_bin, os.pathsep, os.environ["PATH"]) + + subprocess.run([str(python), __file__] + sys.argv[1:], check=True) + + +def run(): + import hgpackaging.cli as cli + + # Need to strip off main Python executable. + cli.main() + + +if __name__ == "__main__": + try: + if "HGPACKAGING_BOOTSTRAPPED" not in os.environ: + bootstrap() + else: + run() + except subprocess.CalledProcessError as e: + sys.exit(e.returncode) + except KeyboardInterrupt: + sys.exit(1) diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/requirements.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contrib/packaging/requirements.txt Tue Jan 21 13:14:51 2020 -0500 @@ -0,0 +1,39 @@ +# +# This file is autogenerated by pip-compile +# To update, run: +# +# pip-compile --generate-hashes --output-file=contrib/packaging/requirements.txt contrib/packaging/requirements.txt.in +# +jinja2==2.10.3 \ + --hash=sha256:74320bb91f31270f9551d46522e33af46a80c3d619f4a4bf42b3164d30b5911f \ + --hash=sha256:9fe95f19286cfefaa917656583d020be14e7859c6b0252588391e47db34527de +markupsafe==1.1.1 \ + --hash=sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473 \ + --hash=sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161 \ + --hash=sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235 \ + --hash=sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5 \ + --hash=sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff \ + --hash=sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b \ + --hash=sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1 \ + --hash=sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e \ + --hash=sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183 \ + --hash=sha256:535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66 \ + --hash=sha256:62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1 \ + --hash=sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1 \ + --hash=sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e \ + --hash=sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b \ + --hash=sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905 \ + --hash=sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735 \ + --hash=sha256:8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d \ + --hash=sha256:98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e \ + --hash=sha256:9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d \ + --hash=sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c \ + --hash=sha256:ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21 \ + --hash=sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2 \ + --hash=sha256:b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5 \ + --hash=sha256:b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b \ + --hash=sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6 \ + --hash=sha256:c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f \ + --hash=sha256:cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f \ + --hash=sha256:e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7 \ + # via jinja2 diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/requirements.txt.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contrib/packaging/requirements.txt.in Tue Jan 21 13:14:51 2020 -0500 @@ -0,0 +1,1 @@ +jinja2 diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/wix/build.py --- a/contrib/packaging/wix/build.py Thu Jan 09 14:19:20 2020 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,96 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2019 Gregory Szorc -# -# This software may be used and distributed according to the terms of the -# GNU General Public License version 2 or any later version. - -# no-check-code because Python 3 native. - -"""Code to build Mercurial WiX installer.""" - -import argparse -import os -import pathlib -import sys - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - - parser.add_argument('--name', help='Application name', default='Mercurial') - parser.add_argument( - '--python', help='Path to Python executable to use', required=True - ) - parser.add_argument( - '--sign-sn', - help='Subject name (or fragment thereof) of certificate ' - 'to use for signing', - ) - parser.add_argument( - '--sign-cert', help='Path to certificate to use for signing' - ) - parser.add_argument( - '--sign-password', help='Password for signing certificate' - ) - parser.add_argument( - '--sign-timestamp-url', - help='URL of timestamp server to use for signing', - ) - parser.add_argument('--version', help='Version string to use') - parser.add_argument( - '--extra-packages-script', - help=( - 'Script to execute to include extra packages in ' 'py2exe binary.' - ), - ) - parser.add_argument( - '--extra-wxs', help='CSV of path_to_wxs_file=working_dir_for_wxs_file' - ) - parser.add_argument( - '--extra-features', - help=( - 'CSV of extra feature names to include ' - 'in the installer from the extra wxs files' - ), - ) - - args = parser.parse_args() - - here = pathlib.Path(os.path.abspath(os.path.dirname(__file__))) - source_dir = here.parent.parent.parent - - sys.path.insert(0, str(source_dir / 'contrib' / 'packaging')) - - from hgpackaging.wix import ( - build_installer, - build_signed_installer, - ) - - fn = build_installer - kwargs = { - 'source_dir': source_dir, - 'python_exe': pathlib.Path(args.python), - 'version': args.version, - } - - if not os.path.isabs(args.python): - raise Exception('--python arg must be an absolute path') - - if args.extra_packages_script: - kwargs['extra_packages_script'] = args.extra_packages_script - if args.extra_wxs: - kwargs['extra_wxs'] = dict( - thing.split("=") for thing in args.extra_wxs.split(',') - ) - if args.extra_features: - kwargs['extra_features'] = args.extra_features.split(',') - - if args.sign_sn or args.sign_cert: - fn = build_signed_installer - kwargs['name'] = args.name - kwargs['subject_name'] = args.sign_sn - kwargs['cert_path'] = args.sign_cert - kwargs['cert_password'] = args.sign_password - kwargs['timestamp_url'] = args.sign_timestamp_url - - fn(**kwargs) diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/wix/contrib.wxs --- a/contrib/packaging/wix/contrib.wxs Thu Jan 09 14:19:20 2020 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,43 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/wix/dist.wxs --- a/contrib/packaging/wix/dist.wxs Thu Jan 09 14:19:20 2020 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,15 +0,0 @@ - - - - - - - - - - - - - - - diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/wix/doc.wxs --- a/contrib/packaging/wix/doc.wxs Thu Jan 09 14:19:20 2020 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,50 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/wix/guids.wxi --- a/contrib/packaging/wix/guids.wxi Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/packaging/wix/guids.wxi Tue Jan 21 13:14:51 2020 -0500 @@ -4,49 +4,9 @@ and replace 'Mercurial' in this notice with the name of your project. Component GUIDs have global namespace! --> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/wix/help.wxs --- a/contrib/packaging/wix/help.wxs Thu Jan 09 14:19:20 2020 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,65 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/wix/i18n.wxs --- a/contrib/packaging/wix/i18n.wxs Thu Jan 09 14:19:20 2020 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,26 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/wix/locale.wxs --- a/contrib/packaging/wix/locale.wxs Thu Jan 09 14:19:20 2020 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,34 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/wix/mercurial.wxs --- a/contrib/packaging/wix/mercurial.wxs Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/packaging/wix/mercurial.wxs Tue Jan 21 13:14:51 2020 -0500 @@ -60,30 +60,10 @@ - + - - - - - - - - - - - - - - - - @@ -117,15 +97,12 @@ - - - - - - - - + + + + + @@ -135,14 +112,13 @@ - - + - + - + diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/wix/readme.rst --- a/contrib/packaging/wix/readme.rst Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/packaging/wix/readme.rst Tue Jan 21 13:14:51 2020 -0500 @@ -18,12 +18,12 @@ * Python 2.7 (download from https://www.python.org/downloads/) * Microsoft Visual C++ Compiler for Python 2.7 (https://www.microsoft.com/en-us/download/details.aspx?id=44266) -* Python 3.5+ (to run the ``build.py`` script) +* Python 3.5+ (to run the ``packaging.py`` script) Building ======== -The ``build.py`` script automates the process of producing an MSI +The ``packaging.py`` script automates the process of producing an MSI installer. It manages fetching and configuring non-system dependencies (such as py2exe, gettext, and various Python packages). @@ -37,11 +37,11 @@ From the prompt, change to the Mercurial source directory. e.g. ``cd c:\src\hg``. -Next, invoke ``build.py`` to produce an MSI installer. You will need +Next, invoke ``packaging.py`` to produce an MSI installer. You will need to supply the path to the Python interpreter to use.:: - $ python3 contrib\packaging\wix\build.py \ - --python c:\python27\python.exe + $ python3 contrib\packaging\packaging.py \ + wix --python c:\python27\python.exe .. note:: @@ -54,8 +54,8 @@ and an installer placed in the ``dist`` sub-directory. The final line of output should print the name of the generated installer. -Additional options may be configured. Run ``build.py --help`` to see -a list of program flags. +Additional options may be configured. Run ``packaging.py wix --help`` to +see a list of program flags. Relationship to TortoiseHG ========================== diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/wix/requirements.txt --- a/contrib/packaging/wix/requirements.txt Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/packaging/wix/requirements.txt Tue Jan 21 13:14:51 2020 -0500 @@ -1,13 +1,13 @@ -# -# This file is autogenerated by pip-compile -# To update, run: -# -# pip-compile --generate-hashes --output-file=contrib/packaging/wix/requirements.txt contrib/packaging/wix/requirements.txt.in -# -docutils==0.15.2 \ - --hash=sha256:6c4f696463b79f1fb8ba0c594b63840ebd41f059e92b31957c46b74a4599b6d0 \ - --hash=sha256:9e4d7ecfc600058e07ba661411a2b7de2fd0fafa17d1a7f7361cd47b1175c827 \ - --hash=sha256:a2aeea129088da402665e92e0b25b04b073c04b2dce4ab65caaa38b7ce2e1a99 -pygments==2.4.2 \ - --hash=sha256:71e430bc85c88a430f000ac1d9b331d2407f681d6f6aec95e8bcfbc3df5b0127 \ - --hash=sha256:881c4c157e45f30af185c1ffe8d549d48ac9127433f2c380c24b84572ad66297 +# +# This file is autogenerated by pip-compile +# To update, run: +# +# pip-compile --generate-hashes --output-file=contrib/packaging/wix/requirements.txt contrib/packaging/wix/requirements.txt.in +# +docutils==0.15.2 \ + --hash=sha256:6c4f696463b79f1fb8ba0c594b63840ebd41f059e92b31957c46b74a4599b6d0 \ + --hash=sha256:9e4d7ecfc600058e07ba661411a2b7de2fd0fafa17d1a7f7361cd47b1175c827 \ + --hash=sha256:a2aeea129088da402665e92e0b25b04b073c04b2dce4ab65caaa38b7ce2e1a99 +pygments==2.4.2 \ + --hash=sha256:71e430bc85c88a430f000ac1d9b331d2407f681d6f6aec95e8bcfbc3df5b0127 \ + --hash=sha256:881c4c157e45f30af185c1ffe8d549d48ac9127433f2c380c24b84572ad66297 diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/wix/requirements.txt.in --- a/contrib/packaging/wix/requirements.txt.in Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/packaging/wix/requirements.txt.in Tue Jan 21 13:14:51 2020 -0500 @@ -1,2 +1,2 @@ -docutils -pygments +docutils +pygments diff -r 61881b170140 -r 84a0102c05c7 contrib/packaging/wix/templates.wxs --- a/contrib/packaging/wix/templates.wxs Thu Jan 09 14:19:20 2020 -0500 +++ /dev/null Thu Jandiff -r 61881b170140 -r 84a0102c05c7 contrib/perf.py --- a/contrib/perf.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/perf.py Tue Jan 21 13:14:51 2020 -0500 @@ -726,8 +726,8 @@ def clearchangelog(repo): if repo is not repo.unfiltered(): - object.__setattr__(repo, r'_clcachekey', None) - object.__setattr__(repo, r'_clcache', None) + object.__setattr__(repo, '_clcachekey', None) + object.__setattr__(repo, '_clcache', None) clearfilecache(repo.unfiltered(), 'changelog') @@ -760,7 +760,10 @@ @command( b'perfstatus', - [(b'u', b'unknown', False, b'ask status to look for unknown files')] + [ + (b'u', b'unknown', False, b'ask status to look for unknown files'), + (b'', b'dirstate', False, b'benchmark the internal dirstate call'), + ] + formatteropts, ) def perfstatus(ui, repo, **opts): @@ -776,7 +779,20 @@ # timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False, # False)))) timer, fm = gettimer(ui, opts) - timer(lambda: sum(map(len, repo.status(unknown=opts[b'unknown'])))) + if opts[b'dirstate']: + dirstate = repo.dirstate + m = scmutil.matchall(repo) + unknown = opts[b'unknown'] + + def status_dirstate(): + s = dirstate.status( + m, subrepos=[], ignored=False, clean=False, unknown=unknown + ) + sum(map(bool, s)) + + timer(status_dirstate) + else: + timer(lambda: sum(map(len, repo.status(unknown=opts[b'unknown'])))) fm.end() @@ -804,6 +820,7 @@ if util.safehasattr(cl, b'clearcaches'): cl.clearcaches() elif util.safehasattr(cl, b'_nodecache'): + # <= hg-5.2 from mercurial.node import nullid, nullrev cl._nodecache = {nullid: nullrev} @@ -1404,13 +1421,15 @@ else: ui.statusnoi18n(b'publishing: no\n') - nodemap = repo.changelog.nodemap + has_node = getattr(repo.changelog.index, 'has_node', None) + if has_node is None: + has_node = repo.changelog.nodemap.__contains__ nonpublishroots = 0 for nhex, phase in remotephases.iteritems(): if nhex == b'publishing': # ignore data related to publish option continue node = bin(nhex) - if node in nodemap and int(phase): + if has_node(node) and int(phase): nonpublishroots += 1 ui.statusnoi18n(b'number of roots: %d\n' % len(remotephases)) ui.statusnoi18n(b'number of known non public roots: %d\n' % nonpublishroots) @@ -1610,7 +1629,11 @@ def setnodeget(): # probably not necessary, but for good measure clearchangelog(unfi) - nodeget[0] = makecl(unfi).nodemap.get + cl = makecl(unfi) + if util.safehasattr(cl.index, 'get_rev'): + nodeget[0] = cl.index.get_rev + else: + nodeget[0] = cl.nodemap.get def d(): get = nodeget[0] @@ -1636,13 +1659,13 @@ timer, fm = gettimer(ui, opts) def d(): - if os.name != r'nt': + if os.name != 'nt': os.system( b"HGRCPATH= %s version -q > /dev/null" % fsencode(sys.argv[0]) ) else: - os.environ[r'HGRCPATH'] = r' ' - os.system(r"%s version -q > NUL" % sys.argv[0]) + os.environ['HGRCPATH'] = r' ' + os.system("%s version -q > NUL" % sys.argv[0]) timer(d) fm.end() @@ -1828,7 +1851,7 @@ opts = _byteskwargs(opts) nullui = ui.copy() - nullui.fout = open(os.devnull, r'wb') + nullui.fout = open(os.devnull, 'wb') nullui.disablepager() revs = opts.get(b'rev') if not revs: @@ -1855,7 +1878,6 @@ def _displaystats(ui, opts, entries, data): - pass # use a second formatter because the data are quite different, not sure # how it flies with the templater. fm = ui.formatter(b'perf-stats', opts) @@ -2025,8 +2047,8 @@ data['p1.time'] = end - begin begin = util.timer() p2renames = copies.pathcopies(b, p2) + end = util.timer() data['p2.time'] = end - begin - end = util.timer() data['p1.renamedfiles'] = len(p1renames) data['p2.renamedfiles'] = len(p2renames) @@ -2198,9 +2220,6 @@ fm.end() if dostats: - # use a second formatter because the data are quite different, not sure - # how it flies with the templater. - fm = ui.formatter(b'perf', opts) entries = [ ('nbrevs', 'number of revision covered'), ('nbmissingfiles', 'number of missing files at head'), @@ -2576,25 +2595,38 @@ index[rev] def resolvenode(node): - nodemap = revlogio.parseindex(data, inline)[1] - # This only works for the C code. - if nodemap is None: - return + index = revlogio.parseindex(data, inline)[0] + rev = getattr(index, 'rev', None) + if rev is None: + nodemap = getattr( + revlogio.parseindex(data, inline)[0], 'nodemap', None + ) + # This only works for the C code. + if nodemap is None: + return + rev = nodemap.__getitem__ try: - nodemap[node] + rev(node) except error.RevlogError: pass def resolvenodes(nodes, count=1): - nodemap = revlogio.parseindex(data, inline)[1] - if nodemap is None: - return + index = revlogio.parseindex(data, inline)[0] + rev = getattr(index, 'rev', None) + if rev is None: + nodemap = getattr( + revlogio.parseindex(data, inline)[0], 'nodemap', None + ) + # This only works for the C code. + if nodemap is None: + return + rev = nodemap.__getitem__ for i in range(count): for node in nodes: try: - nodemap[node] + rev(node) except error.RevlogError: pass diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/NEWS.rst --- a/contrib/python-zstandard/NEWS.rst Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/NEWS.rst Tue Jan 21 13:14:51 2020 -0500 @@ -43,13 +43,18 @@ * Support modifying compression parameters mid operation when supported by zstd API. * Expose ``ZSTD_CLEVEL_DEFAULT`` constant. +* Expose ``ZSTD_SRCSIZEHINT_{MIN,MAX}`` constants. * Support ``ZSTD_p_forceAttachDict`` compression parameter. -* Support ``ZSTD_c_literalCompressionMode `` compression parameter. +* Support ``ZSTD_dictForceLoad`` dictionary compression parameter. +* Support ``ZSTD_c_targetCBlockSize`` compression parameter. +* Support ``ZSTD_c_literalCompressionMode`` compression parameter. +* Support ``ZSTD_c_srcSizeHint`` compression parameter. * Use ``ZSTD_CCtx_getParameter()``/``ZSTD_CCtxParam_getParameter()`` for retrieving compression parameters. * Consider exposing ``ZSTDMT_toFlushNow()``. * Expose ``ZDICT_trainFromBuffer_fastCover()``, ``ZDICT_optimizeTrainFromBuffer_fastCover``. +* Expose ``ZSTD_Sequence`` struct and related ``ZSTD_getSequences()`` API. * Expose and enforce ``ZSTD_minCLevel()`` for minimum compression level. * Consider a ``chunker()`` API for decompression. * Consider stats for ``chunker()`` API, including finding the last consumed @@ -67,6 +72,20 @@ * API for ensuring max memory ceiling isn't exceeded. * Move off nose for testing. +0.13.0 (released 2019-12-28) +============================ + +Changes +------- + +* ``pytest-xdist`` ``pytest`` extension is now installed so tests can be + run in parallel. +* CI now builds ``manylinux2010`` and ``manylinux2014`` binary wheels + instead of a mix of ``manylinux2010`` and ``manylinux1``. +* Official support for Python 3.8 has been added. +* Bundled zstandard library upgraded from 1.4.3 to 1.4.4. +* Python code has been reformatted with black. + 0.12.0 (released 2019-09-15) ============================ diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/README.rst --- a/contrib/python-zstandard/README.rst Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/README.rst Tue Jan 21 13:14:51 2020 -0500 @@ -20,7 +20,7 @@ Requirements ============ -This extension is designed to run with Python 2.7, 3.4, 3.5, 3.6, and 3.7 +This extension is designed to run with Python 2.7, 3.5, 3.6, 3.7, and 3.8 on common platforms (Linux, Windows, and OS X). On PyPy (both PyPy2 and PyPy3) we support version 6.0.0 and above. x86 and x86_64 are well-tested on Windows. Only x86_64 is well-tested on Linux and macOS. diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/c-ext/python-zstandard.h --- a/contrib/python-zstandard/c-ext/python-zstandard.h Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/c-ext/python-zstandard.h Tue Jan 21 13:14:51 2020 -0500 @@ -16,7 +16,7 @@ #include /* Remember to change the string in zstandard/__init__ as well */ -#define PYTHON_ZSTANDARD_VERSION "0.12.0" +#define PYTHON_ZSTANDARD_VERSION "0.13.0" typedef enum { compressorobj_flush_finish, diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/make_cffi.py --- a/contrib/python-zstandard/make_cffi.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/make_cffi.py Tue Jan 21 13:14:51 2020 -0500 @@ -16,80 +16,82 @@ HERE = os.path.abspath(os.path.dirname(__file__)) -SOURCES = ['zstd/%s' % p for p in ( - 'common/debug.c', - 'common/entropy_common.c', - 'common/error_private.c', - 'common/fse_decompress.c', - 'common/pool.c', - 'common/threading.c', - 'common/xxhash.c', - 'common/zstd_common.c', - 'compress/fse_compress.c', - 'compress/hist.c', - 'compress/huf_compress.c', - 'compress/zstd_compress.c', - 'compress/zstd_compress_literals.c', - 'compress/zstd_compress_sequences.c', - 'compress/zstd_double_fast.c', - 'compress/zstd_fast.c', - 'compress/zstd_lazy.c', - 'compress/zstd_ldm.c', - 'compress/zstd_opt.c', - 'compress/zstdmt_compress.c', - 'decompress/huf_decompress.c', - 'decompress/zstd_ddict.c', - 'decompress/zstd_decompress.c', - 'decompress/zstd_decompress_block.c', - 'dictBuilder/cover.c', - 'dictBuilder/fastcover.c', - 'dictBuilder/divsufsort.c', - 'dictBuilder/zdict.c', -)] +SOURCES = [ + "zstd/%s" % p + for p in ( + "common/debug.c", + "common/entropy_common.c", + "common/error_private.c", + "common/fse_decompress.c", + "common/pool.c", + "common/threading.c", + "common/xxhash.c", + "common/zstd_common.c", + "compress/fse_compress.c", + "compress/hist.c", + "compress/huf_compress.c", + "compress/zstd_compress.c", + "compress/zstd_compress_literals.c", + "compress/zstd_compress_sequences.c", + "compress/zstd_double_fast.c", + "compress/zstd_fast.c", + "compress/zstd_lazy.c", + "compress/zstd_ldm.c", + "compress/zstd_opt.c", + "compress/zstdmt_compress.c", + "decompress/huf_decompress.c", + "decompress/zstd_ddict.c", + "decompress/zstd_decompress.c", + "decompress/zstd_decompress_block.c", + "dictBuilder/cover.c", + "dictBuilder/fastcover.c", + "dictBuilder/divsufsort.c", + "dictBuilder/zdict.c", + ) +] # Headers whose preprocessed output will be fed into cdef(). -HEADERS = [os.path.join(HERE, 'zstd', *p) for p in ( - ('zstd.h',), - ('dictBuilder', 'zdict.h'), -)] +HEADERS = [ + os.path.join(HERE, "zstd", *p) for p in (("zstd.h",), ("dictBuilder", "zdict.h"),) +] -INCLUDE_DIRS = [os.path.join(HERE, d) for d in ( - 'zstd', - 'zstd/common', - 'zstd/compress', - 'zstd/decompress', - 'zstd/dictBuilder', -)] +INCLUDE_DIRS = [ + os.path.join(HERE, d) + for d in ( + "zstd", + "zstd/common", + "zstd/compress", + "zstd/decompress", + "zstd/dictBuilder", + ) +] # cffi can't parse some of the primitives in zstd.h. So we invoke the # preprocessor and feed its output into cffi. compiler = distutils.ccompiler.new_compiler() # Needed for MSVC. -if hasattr(compiler, 'initialize'): +if hasattr(compiler, "initialize"): compiler.initialize() # Distutils doesn't set compiler.preprocessor, so invoke the preprocessor # manually. -if compiler.compiler_type == 'unix': - args = list(compiler.executables['compiler']) - args.extend([ - '-E', - '-DZSTD_STATIC_LINKING_ONLY', - '-DZDICT_STATIC_LINKING_ONLY', - ]) -elif compiler.compiler_type == 'msvc': +if compiler.compiler_type == "unix": + args = list(compiler.executables["compiler"]) + args.extend( + ["-E", "-DZSTD_STATIC_LINKING_ONLY", "-DZDICT_STATIC_LINKING_ONLY",] + ) +elif compiler.compiler_type == "msvc": args = [compiler.cc] - args.extend([ - '/EP', - '/DZSTD_STATIC_LINKING_ONLY', - '/DZDICT_STATIC_LINKING_ONLY', - ]) + args.extend( + ["/EP", "/DZSTD_STATIC_LINKING_ONLY", "/DZDICT_STATIC_LINKING_ONLY",] + ) else: - raise Exception('unsupported compiler type: %s' % compiler.compiler_type) + raise Exception("unsupported compiler type: %s" % compiler.compiler_type) + def preprocess(path): - with open(path, 'rb') as fh: + with open(path, "rb") as fh: lines = [] it = iter(fh) @@ -104,32 +106,44 @@ # We define ZSTD_STATIC_LINKING_ONLY, which is redundant with the inline # #define in zstdmt_compress.h and results in a compiler warning. So drop # the inline #define. - if l.startswith((b'#include ', - b'#include "zstd.h"', - b'#define ZSTD_STATIC_LINKING_ONLY')): + if l.startswith( + ( + b"#include ", + b'#include "zstd.h"', + b"#define ZSTD_STATIC_LINKING_ONLY", + ) + ): continue + # The preprocessor environment on Windows doesn't define include + # paths, so the #include of limits.h fails. We work around this + # by removing that import and defining INT_MAX ourselves. This is + # a bit hacky. But it gets the job done. + # TODO make limits.h work on Windows so we ensure INT_MAX is + # correct. + if l.startswith(b"#include "): + l = b"#define INT_MAX 2147483647\n" + # ZSTDLIB_API may not be defined if we dropped zstd.h. It isn't # important so just filter it out. - if l.startswith(b'ZSTDLIB_API'): - l = l[len(b'ZSTDLIB_API '):] + if l.startswith(b"ZSTDLIB_API"): + l = l[len(b"ZSTDLIB_API ") :] lines.append(l) - fd, input_file = tempfile.mkstemp(suffix='.h') - os.write(fd, b''.join(lines)) + fd, input_file = tempfile.mkstemp(suffix=".h") + os.write(fd, b"".join(lines)) os.close(fd) try: env = dict(os.environ) - if getattr(compiler, '_paths', None): - env['PATH'] = compiler._paths - process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE, - env=env) + if getattr(compiler, "_paths", None): + env["PATH"] = compiler._paths + process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE, env=env) output = process.communicate()[0] ret = process.poll() if ret: - raise Exception('preprocessor exited with error') + raise Exception("preprocessor exited with error") return output finally: @@ -141,16 +155,16 @@ for line in output.splitlines(): # CFFI's parser doesn't like __attribute__ on UNIX compilers. if line.startswith(b'__attribute__ ((visibility ("default"))) '): - line = line[len(b'__attribute__ ((visibility ("default"))) '):] + line = line[len(b'__attribute__ ((visibility ("default"))) ') :] - if line.startswith(b'__attribute__((deprecated('): + if line.startswith(b"__attribute__((deprecated("): continue - elif b'__declspec(deprecated(' in line: + elif b"__declspec(deprecated(" in line: continue lines.append(line) - return b'\n'.join(lines) + return b"\n".join(lines) ffi = cffi.FFI() @@ -159,18 +173,22 @@ # *_DISABLE_DEPRECATE_WARNINGS prevents the compiler from emitting a warning # when cffi uses the function. Since we statically link against zstd, even # if we use the deprecated functions it shouldn't be a huge problem. -ffi.set_source('_zstd_cffi', ''' +ffi.set_source( + "_zstd_cffi", + """ #define MIN(a,b) ((a)<(b) ? (a) : (b)) #define ZSTD_STATIC_LINKING_ONLY #include #define ZDICT_STATIC_LINKING_ONLY #define ZDICT_DISABLE_DEPRECATE_WARNINGS #include -''', sources=SOURCES, - include_dirs=INCLUDE_DIRS, - extra_compile_args=['-DZSTD_MULTITHREAD']) +""", + sources=SOURCES, + include_dirs=INCLUDE_DIRS, + extra_compile_args=["-DZSTD_MULTITHREAD"], +) -DEFINE = re.compile(b'^\\#define ([a-zA-Z0-9_]+) ') +DEFINE = re.compile(b"^\\#define ([a-zA-Z0-9_]+) ") sources = [] @@ -181,27 +199,27 @@ # #define's are effectively erased as part of going through preprocessor. # So perform a manual pass to re-add those to the cdef source. - with open(header, 'rb') as fh: + with open(header, "rb") as fh: for line in fh: line = line.strip() m = DEFINE.match(line) if not m: continue - if m.group(1) == b'ZSTD_STATIC_LINKING_ONLY': + if m.group(1) == b"ZSTD_STATIC_LINKING_ONLY": continue # The parser doesn't like some constants with complex values. - if m.group(1) in (b'ZSTD_LIB_VERSION', b'ZSTD_VERSION_STRING'): + if m.group(1) in (b"ZSTD_LIB_VERSION", b"ZSTD_VERSION_STRING"): continue # The ... is magic syntax by the cdef parser to resolve the # value at compile time. - sources.append(m.group(0) + b' ...') + sources.append(m.group(0) + b" ...") -cdeflines = b'\n'.join(sources).splitlines() +cdeflines = b"\n".join(sources).splitlines() cdeflines = [l for l in cdeflines if l.strip()] -ffi.cdef(b'\n'.join(cdeflines).decode('latin1')) +ffi.cdef(b"\n".join(cdeflines).decode("latin1")) -if __name__ == '__main__': +if __name__ == "__main__": ffi.compile() diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/setup.py --- a/contrib/python-zstandard/setup.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/setup.py Tue Jan 21 13:14:51 2020 -0500 @@ -16,7 +16,7 @@ # (like memoryview). # Need feature in 1.11 for ffi.gc() to declare size of objects so we avoid # garbage collection pitfalls. -MINIMUM_CFFI_VERSION = '1.11' +MINIMUM_CFFI_VERSION = "1.11" try: import cffi @@ -26,9 +26,11 @@ # out the CFFI version here and reject CFFI if it is too old. cffi_version = LooseVersion(cffi.__version__) if cffi_version < LooseVersion(MINIMUM_CFFI_VERSION): - print('CFFI 1.11 or newer required (%s found); ' - 'not building CFFI backend' % cffi_version, - file=sys.stderr) + print( + "CFFI 1.11 or newer required (%s found); " + "not building CFFI backend" % cffi_version, + file=sys.stderr, + ) cffi = None except ImportError: @@ -40,73 +42,77 @@ SYSTEM_ZSTD = False WARNINGS_AS_ERRORS = False -if os.environ.get('ZSTD_WARNINGS_AS_ERRORS', ''): +if os.environ.get("ZSTD_WARNINGS_AS_ERRORS", ""): WARNINGS_AS_ERRORS = True -if '--legacy' in sys.argv: +if "--legacy" in sys.argv: SUPPORT_LEGACY = True - sys.argv.remove('--legacy') + sys.argv.remove("--legacy") -if '--system-zstd' in sys.argv: +if "--system-zstd" in sys.argv: SYSTEM_ZSTD = True - sys.argv.remove('--system-zstd') + sys.argv.remove("--system-zstd") -if '--warnings-as-errors' in sys.argv: +if "--warnings-as-errors" in sys.argv: WARNINGS_AS_ERRORS = True - sys.argv.remove('--warning-as-errors') + sys.argv.remove("--warning-as-errors") # Code for obtaining the Extension instance is in its own module to # facilitate reuse in other projects. extensions = [ - setup_zstd.get_c_extension(name='zstd', - support_legacy=SUPPORT_LEGACY, - system_zstd=SYSTEM_ZSTD, - warnings_as_errors=WARNINGS_AS_ERRORS), + setup_zstd.get_c_extension( + name="zstd", + support_legacy=SUPPORT_LEGACY, + system_zstd=SYSTEM_ZSTD, + warnings_as_errors=WARNINGS_AS_ERRORS, + ), ] install_requires = [] if cffi: import make_cffi + extensions.append(make_cffi.ffi.distutils_extension()) - install_requires.append('cffi>=%s' % MINIMUM_CFFI_VERSION) + install_requires.append("cffi>=%s" % MINIMUM_CFFI_VERSION) version = None -with open('c-ext/python-zstandard.h', 'r') as fh: +with open("c-ext/python-zstandard.h", "r") as fh: for line in fh: - if not line.startswith('#define PYTHON_ZSTANDARD_VERSION'): + if not line.startswith("#define PYTHON_ZSTANDARD_VERSION"): continue version = line.split()[2][1:-1] break if not version: - raise Exception('could not resolve package version; ' - 'this should never happen') + raise Exception("could not resolve package version; " "this should never happen") setup( - name='zstandard', + name="zstandard", version=version, - description='Zstandard bindings for Python', - long_description=open('README.rst', 'r').read(), - url='https://github.com/indygreg/python-zstandard', - author='Gregory Szorc', - author_email='gregory.szorc@gmail.com', - license='BSD', + description="Zstandard bindings for Python", + long_description=open("README.rst", "r").read(), + url="https://github.com/indygreg/python-zstandard", + author="Gregory Szorc", + author_email="gregory.szorc@gmail.com", + license="BSD", classifiers=[ - 'Development Status :: 4 - Beta', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: BSD License', - 'Programming Language :: C', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: BSD License", + "Programming Language :: C", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", ], - keywords='zstandard zstd compression', - packages=['zstandard'], + keywords="zstandard zstd compression", + packages=["zstandard"], ext_modules=extensions, - test_suite='tests', + test_suite="tests", install_requires=install_requires, + tests_require=["hypothesis"], ) diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/setup_zstd.py --- a/contrib/python-zstandard/setup_zstd.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/setup_zstd.py Tue Jan 21 13:14:51 2020 -0500 @@ -10,97 +10,110 @@ from distutils.extension import Extension -zstd_sources = ['zstd/%s' % p for p in ( - 'common/debug.c', - 'common/entropy_common.c', - 'common/error_private.c', - 'common/fse_decompress.c', - 'common/pool.c', - 'common/threading.c', - 'common/xxhash.c', - 'common/zstd_common.c', - 'compress/fse_compress.c', - 'compress/hist.c', - 'compress/huf_compress.c', - 'compress/zstd_compress_literals.c', - 'compress/zstd_compress_sequences.c', - 'compress/zstd_compress.c', - 'compress/zstd_double_fast.c', - 'compress/zstd_fast.c', - 'compress/zstd_lazy.c', - 'compress/zstd_ldm.c', - 'compress/zstd_opt.c', - 'compress/zstdmt_compress.c', - 'decompress/huf_decompress.c', - 'decompress/zstd_ddict.c', - 'decompress/zstd_decompress.c', - 'decompress/zstd_decompress_block.c', - 'dictBuilder/cover.c', - 'dictBuilder/divsufsort.c', - 'dictBuilder/fastcover.c', - 'dictBuilder/zdict.c', -)] +zstd_sources = [ + "zstd/%s" % p + for p in ( + "common/debug.c", + "common/entropy_common.c", + "common/error_private.c", + "common/fse_decompress.c", + "common/pool.c", + "common/threading.c", + "common/xxhash.c", + "common/zstd_common.c", + "compress/fse_compress.c", + "compress/hist.c", + "compress/huf_compress.c", + "compress/zstd_compress_literals.c", + "compress/zstd_compress_sequences.c", + "compress/zstd_compress.c", + "compress/zstd_double_fast.c", + "compress/zstd_fast.c", + "compress/zstd_lazy.c", + "compress/zstd_ldm.c", + "compress/zstd_opt.c", + "compress/zstdmt_compress.c", + "decompress/huf_decompress.c", + "decompress/zstd_ddict.c", + "decompress/zstd_decompress.c", + "decompress/zstd_decompress_block.c", + "dictBuilder/cover.c", + "dictBuilder/divsufsort.c", + "dictBuilder/fastcover.c", + "dictBuilder/zdict.c", + ) +] -zstd_sources_legacy = ['zstd/%s' % p for p in ( - 'deprecated/zbuff_common.c', - 'deprecated/zbuff_compress.c', - 'deprecated/zbuff_decompress.c', - 'legacy/zstd_v01.c', - 'legacy/zstd_v02.c', - 'legacy/zstd_v03.c', - 'legacy/zstd_v04.c', - 'legacy/zstd_v05.c', - 'legacy/zstd_v06.c', - 'legacy/zstd_v07.c' -)] +zstd_sources_legacy = [ + "zstd/%s" % p + for p in ( + "deprecated/zbuff_common.c", + "deprecated/zbuff_compress.c", + "deprecated/zbuff_decompress.c", + "legacy/zstd_v01.c", + "legacy/zstd_v02.c", + "legacy/zstd_v03.c", + "legacy/zstd_v04.c", + "legacy/zstd_v05.c", + "legacy/zstd_v06.c", + "legacy/zstd_v07.c", + ) +] zstd_includes = [ - 'zstd', - 'zstd/common', - 'zstd/compress', - 'zstd/decompress', - 'zstd/dictBuilder', + "zstd", + "zstd/common", + "zstd/compress", + "zstd/decompress", + "zstd/dictBuilder", ] zstd_includes_legacy = [ - 'zstd/deprecated', - 'zstd/legacy', + "zstd/deprecated", + "zstd/legacy", ] ext_includes = [ - 'c-ext', - 'zstd/common', + "c-ext", + "zstd/common", ] ext_sources = [ - 'zstd/common/pool.c', - 'zstd/common/threading.c', - 'zstd.c', - 'c-ext/bufferutil.c', - 'c-ext/compressiondict.c', - 'c-ext/compressobj.c', - 'c-ext/compressor.c', - 'c-ext/compressoriterator.c', - 'c-ext/compressionchunker.c', - 'c-ext/compressionparams.c', - 'c-ext/compressionreader.c', - 'c-ext/compressionwriter.c', - 'c-ext/constants.c', - 'c-ext/decompressobj.c', - 'c-ext/decompressor.c', - 'c-ext/decompressoriterator.c', - 'c-ext/decompressionreader.c', - 'c-ext/decompressionwriter.c', - 'c-ext/frameparams.c', + "zstd/common/error_private.c", + "zstd/common/pool.c", + "zstd/common/threading.c", + "zstd/common/zstd_common.c", + "zstd.c", + "c-ext/bufferutil.c", + "c-ext/compressiondict.c", + "c-ext/compressobj.c", + "c-ext/compressor.c", + "c-ext/compressoriterator.c", + "c-ext/compressionchunker.c", + "c-ext/compressionparams.c", + "c-ext/compressionreader.c", + "c-ext/compressionwriter.c", + "c-ext/constants.c", + "c-ext/decompressobj.c", + "c-ext/decompressor.c", + "c-ext/decompressoriterator.c", + "c-ext/decompressionreader.c", + "c-ext/decompressionwriter.c", + "c-ext/frameparams.c", ] zstd_depends = [ - 'c-ext/python-zstandard.h', + "c-ext/python-zstandard.h", ] -def get_c_extension(support_legacy=False, system_zstd=False, name='zstd', - warnings_as_errors=False, root=None): +def get_c_extension( + support_legacy=False, + system_zstd=False, + name="zstd", + warnings_as_errors=False, + root=None, +): """Obtain a distutils.extension.Extension for the C extension. ``support_legacy`` controls whether to compile in legacy zstd format support. @@ -125,17 +138,16 @@ if not system_zstd: sources.update([os.path.join(actual_root, p) for p in zstd_sources]) if support_legacy: - sources.update([os.path.join(actual_root, p) - for p in zstd_sources_legacy]) + sources.update([os.path.join(actual_root, p) for p in zstd_sources_legacy]) sources = list(sources) include_dirs = set([os.path.join(actual_root, d) for d in ext_includes]) if not system_zstd: - include_dirs.update([os.path.join(actual_root, d) - for d in zstd_includes]) + include_dirs.update([os.path.join(actual_root, d) for d in zstd_includes]) if support_legacy: - include_dirs.update([os.path.join(actual_root, d) - for d in zstd_includes_legacy]) + include_dirs.update( + [os.path.join(actual_root, d) for d in zstd_includes_legacy] + ) include_dirs = list(include_dirs) depends = [os.path.join(actual_root, p) for p in zstd_depends] @@ -143,41 +155,40 @@ compiler = distutils.ccompiler.new_compiler() # Needed for MSVC. - if hasattr(compiler, 'initialize'): + if hasattr(compiler, "initialize"): compiler.initialize() - if compiler.compiler_type == 'unix': - compiler_type = 'unix' - elif compiler.compiler_type == 'msvc': - compiler_type = 'msvc' - elif compiler.compiler_type == 'mingw32': - compiler_type = 'mingw32' + if compiler.compiler_type == "unix": + compiler_type = "unix" + elif compiler.compiler_type == "msvc": + compiler_type = "msvc" + elif compiler.compiler_type == "mingw32": + compiler_type = "mingw32" else: - raise Exception('unhandled compiler type: %s' % - compiler.compiler_type) + raise Exception("unhandled compiler type: %s" % compiler.compiler_type) - extra_args = ['-DZSTD_MULTITHREAD'] + extra_args = ["-DZSTD_MULTITHREAD"] if not system_zstd: - extra_args.append('-DZSTDLIB_VISIBILITY=') - extra_args.append('-DZDICTLIB_VISIBILITY=') - extra_args.append('-DZSTDERRORLIB_VISIBILITY=') + extra_args.append("-DZSTDLIB_VISIBILITY=") + extra_args.append("-DZDICTLIB_VISIBILITY=") + extra_args.append("-DZSTDERRORLIB_VISIBILITY=") - if compiler_type == 'unix': - extra_args.append('-fvisibility=hidden') + if compiler_type == "unix": + extra_args.append("-fvisibility=hidden") if not system_zstd and support_legacy: - extra_args.append('-DZSTD_LEGACY_SUPPORT=1') + extra_args.append("-DZSTD_LEGACY_SUPPORT=1") if warnings_as_errors: - if compiler_type in ('unix', 'mingw32'): - extra_args.append('-Werror') - elif compiler_type == 'msvc': - extra_args.append('/WX') + if compiler_type in ("unix", "mingw32"): + extra_args.append("-Werror") + elif compiler_type == "msvc": + extra_args.append("/WX") else: assert False - libraries = ['zstd'] if system_zstd else [] + libraries = ["zstd"] if system_zstd else [] # Python 3.7 doesn't like absolute paths. So normalize to relative. sources = [os.path.relpath(p, root) for p in sources] @@ -185,8 +196,11 @@ depends = [os.path.relpath(p, root) for p in depends] # TODO compile with optimizations. - return Extension(name, sources, - include_dirs=include_dirs, - depends=depends, - extra_compile_args=extra_args, - libraries=libraries) + return Extension( + name, + sources, + include_dirs=include_dirs, + depends=depends, + extra_compile_args=extra_args, + libraries=libraries, + ) diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/tests/common.py --- a/contrib/python-zstandard/tests/common.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/tests/common.py Tue Jan 21 13:14:51 2020 -0500 @@ -3,6 +3,7 @@ import io import os import types +import unittest try: import hypothesis @@ -10,39 +11,46 @@ hypothesis = None +class TestCase(unittest.TestCase): + if not getattr(unittest.TestCase, "assertRaisesRegex", False): + assertRaisesRegex = unittest.TestCase.assertRaisesRegexp + + def make_cffi(cls): """Decorator to add CFFI versions of each test method.""" # The module containing this class definition should # `import zstandard as zstd`. Otherwise things may blow up. mod = inspect.getmodule(cls) - if not hasattr(mod, 'zstd'): + if not hasattr(mod, "zstd"): raise Exception('test module does not contain "zstd" symbol') - if not hasattr(mod.zstd, 'backend'): - raise Exception('zstd symbol does not have "backend" attribute; did ' - 'you `import zstandard as zstd`?') + if not hasattr(mod.zstd, "backend"): + raise Exception( + 'zstd symbol does not have "backend" attribute; did ' + "you `import zstandard as zstd`?" + ) # If `import zstandard` already chose the cffi backend, there is nothing # for us to do: we only add the cffi variation if the default backend # is the C extension. - if mod.zstd.backend == 'cffi': + if mod.zstd.backend == "cffi": return cls old_env = dict(os.environ) - os.environ['PYTHON_ZSTANDARD_IMPORT_POLICY'] = 'cffi' + os.environ["PYTHON_ZSTANDARD_IMPORT_POLICY"] = "cffi" try: try: - mod_info = imp.find_module('zstandard') - mod = imp.load_module('zstandard_cffi', *mod_info) + mod_info = imp.find_module("zstandard") + mod = imp.load_module("zstandard_cffi", *mod_info) except ImportError: return cls finally: os.environ.clear() os.environ.update(old_env) - if mod.backend != 'cffi': - raise Exception('got the zstandard %s backend instead of cffi' % mod.backend) + if mod.backend != "cffi": + raise Exception("got the zstandard %s backend instead of cffi" % mod.backend) # If CFFI version is available, dynamically construct test methods # that use it. @@ -52,27 +60,31 @@ if not inspect.ismethod(fn) and not inspect.isfunction(fn): continue - if not fn.__name__.startswith('test_'): + if not fn.__name__.startswith("test_"): continue - name = '%s_cffi' % fn.__name__ + name = "%s_cffi" % fn.__name__ # Replace the "zstd" symbol with the CFFI module instance. Then copy # the function object and install it in a new attribute. if isinstance(fn, types.FunctionType): globs = dict(fn.__globals__) - globs['zstd'] = mod - new_fn = types.FunctionType(fn.__code__, globs, name, - fn.__defaults__, fn.__closure__) + globs["zstd"] = mod + new_fn = types.FunctionType( + fn.__code__, globs, name, fn.__defaults__, fn.__closure__ + ) new_method = new_fn else: globs = dict(fn.__func__.func_globals) - globs['zstd'] = mod - new_fn = types.FunctionType(fn.__func__.func_code, globs, name, - fn.__func__.func_defaults, - fn.__func__.func_closure) - new_method = types.UnboundMethodType(new_fn, fn.im_self, - fn.im_class) + globs["zstd"] = mod + new_fn = types.FunctionType( + fn.__func__.func_code, + globs, + name, + fn.__func__.func_defaults, + fn.__func__.func_closure, + ) + new_method = types.UnboundMethodType(new_fn, fn.im_self, fn.im_class) setattr(cls, name, new_method) @@ -84,6 +96,7 @@ This allows us to access written data after close(). """ + def __init__(self, *args, **kwargs): super(NonClosingBytesIO, self).__init__(*args, **kwargs) self._saved_buffer = None @@ -135,7 +148,7 @@ dirs[:] = list(sorted(dirs)) for f in sorted(files): try: - with open(os.path.join(root, f), 'rb') as fh: + with open(os.path.join(root, f), "rb") as fh: data = fh.read() if data: _source_files.append(data) @@ -154,11 +167,11 @@ def generate_samples(): inputs = [ - b'foo', - b'bar', - b'abcdef', - b'sometext', - b'baz', + b"foo", + b"bar", + b"abcdef", + b"sometext", + b"baz", ] samples = [] @@ -173,13 +186,12 @@ if hypothesis: default_settings = hypothesis.settings(deadline=10000) - hypothesis.settings.register_profile('default', default_settings) + hypothesis.settings.register_profile("default", default_settings) ci_settings = hypothesis.settings(deadline=20000, max_examples=1000) - hypothesis.settings.register_profile('ci', ci_settings) + hypothesis.settings.register_profile("ci", ci_settings) expensive_settings = hypothesis.settings(deadline=None, max_examples=10000) - hypothesis.settings.register_profile('expensive', expensive_settings) + hypothesis.settings.register_profile("expensive", expensive_settings) - hypothesis.settings.load_profile( - os.environ.get('HYPOTHESIS_PROFILE', 'default')) + hypothesis.settings.load_profile(os.environ.get("HYPOTHESIS_PROFILE", "default")) diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/tests/test_buffer_util.py --- a/contrib/python-zstandard/tests/test_buffer_util.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/tests/test_buffer_util.py Tue Jan 21 13:14:51 2020 -0500 @@ -3,104 +3,114 @@ import zstandard as zstd -ss = struct.Struct('=QQ') +from .common import TestCase + +ss = struct.Struct("=QQ") -class TestBufferWithSegments(unittest.TestCase): +class TestBufferWithSegments(TestCase): def test_arguments(self): - if not hasattr(zstd, 'BufferWithSegments'): - self.skipTest('BufferWithSegments not available') + if not hasattr(zstd, "BufferWithSegments"): + self.skipTest("BufferWithSegments not available") with self.assertRaises(TypeError): zstd.BufferWithSegments() with self.assertRaises(TypeError): - zstd.BufferWithSegments(b'foo') + zstd.BufferWithSegments(b"foo") # Segments data should be a multiple of 16. - with self.assertRaisesRegexp(ValueError, 'segments array size is not a multiple of 16'): - zstd.BufferWithSegments(b'foo', b'\x00\x00') + with self.assertRaisesRegex( + ValueError, "segments array size is not a multiple of 16" + ): + zstd.BufferWithSegments(b"foo", b"\x00\x00") def test_invalid_offset(self): - if not hasattr(zstd, 'BufferWithSegments'): - self.skipTest('BufferWithSegments not available') + if not hasattr(zstd, "BufferWithSegments"): + self.skipTest("BufferWithSegments not available") - with self.assertRaisesRegexp(ValueError, 'offset within segments array references memory'): - zstd.BufferWithSegments(b'foo', ss.pack(0, 4)) + with self.assertRaisesRegex( + ValueError, "offset within segments array references memory" + ): + zstd.BufferWithSegments(b"foo", ss.pack(0, 4)) def test_invalid_getitem(self): - if not hasattr(zstd, 'BufferWithSegments'): - self.skipTest('BufferWithSegments not available') + if not hasattr(zstd, "BufferWithSegments"): + self.skipTest("BufferWithSegments not available") - b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3)) + b = zstd.BufferWithSegments(b"foo", ss.pack(0, 3)) - with self.assertRaisesRegexp(IndexError, 'offset must be non-negative'): + with self.assertRaisesRegex(IndexError, "offset must be non-negative"): test = b[-10] - with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'): + with self.assertRaisesRegex(IndexError, "offset must be less than 1"): test = b[1] - with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'): + with self.assertRaisesRegex(IndexError, "offset must be less than 1"): test = b[2] def test_single(self): - if not hasattr(zstd, 'BufferWithSegments'): - self.skipTest('BufferWithSegments not available') + if not hasattr(zstd, "BufferWithSegments"): + self.skipTest("BufferWithSegments not available") - b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3)) + b = zstd.BufferWithSegments(b"foo", ss.pack(0, 3)) self.assertEqual(len(b), 1) self.assertEqual(b.size, 3) - self.assertEqual(b.tobytes(), b'foo') + self.assertEqual(b.tobytes(), b"foo") self.assertEqual(len(b[0]), 3) self.assertEqual(b[0].offset, 0) - self.assertEqual(b[0].tobytes(), b'foo') + self.assertEqual(b[0].tobytes(), b"foo") def test_multiple(self): - if not hasattr(zstd, 'BufferWithSegments'): - self.skipTest('BufferWithSegments not available') + if not hasattr(zstd, "BufferWithSegments"): + self.skipTest("BufferWithSegments not available") - b = zstd.BufferWithSegments(b'foofooxfooxy', b''.join([ss.pack(0, 3), - ss.pack(3, 4), - ss.pack(7, 5)])) + b = zstd.BufferWithSegments( + b"foofooxfooxy", b"".join([ss.pack(0, 3), ss.pack(3, 4), ss.pack(7, 5)]) + ) self.assertEqual(len(b), 3) self.assertEqual(b.size, 12) - self.assertEqual(b.tobytes(), b'foofooxfooxy') + self.assertEqual(b.tobytes(), b"foofooxfooxy") - self.assertEqual(b[0].tobytes(), b'foo') - self.assertEqual(b[1].tobytes(), b'foox') - self.assertEqual(b[2].tobytes(), b'fooxy') + self.assertEqual(b[0].tobytes(), b"foo") + self.assertEqual(b[1].tobytes(), b"foox") + self.assertEqual(b[2].tobytes(), b"fooxy") -class TestBufferWithSegmentsCollection(unittest.TestCase): +class TestBufferWithSegmentsCollection(TestCase): def test_empty_constructor(self): - if not hasattr(zstd, 'BufferWithSegmentsCollection'): - self.skipTest('BufferWithSegmentsCollection not available') + if not hasattr(zstd, "BufferWithSegmentsCollection"): + self.skipTest("BufferWithSegmentsCollection not available") - with self.assertRaisesRegexp(ValueError, 'must pass at least 1 argument'): + with self.assertRaisesRegex(ValueError, "must pass at least 1 argument"): zstd.BufferWithSegmentsCollection() def test_argument_validation(self): - if not hasattr(zstd, 'BufferWithSegmentsCollection'): - self.skipTest('BufferWithSegmentsCollection not available') + if not hasattr(zstd, "BufferWithSegmentsCollection"): + self.skipTest("BufferWithSegmentsCollection not available") - with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'): + with self.assertRaisesRegex(TypeError, "arguments must be BufferWithSegments"): zstd.BufferWithSegmentsCollection(None) - with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'): - zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'foo', ss.pack(0, 3)), - None) + with self.assertRaisesRegex(TypeError, "arguments must be BufferWithSegments"): + zstd.BufferWithSegmentsCollection( + zstd.BufferWithSegments(b"foo", ss.pack(0, 3)), None + ) - with self.assertRaisesRegexp(ValueError, 'ZstdBufferWithSegments cannot be empty'): - zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'', b'')) + with self.assertRaisesRegex( + ValueError, "ZstdBufferWithSegments cannot be empty" + ): + zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b"", b"")) def test_length(self): - if not hasattr(zstd, 'BufferWithSegmentsCollection'): - self.skipTest('BufferWithSegmentsCollection not available') + if not hasattr(zstd, "BufferWithSegmentsCollection"): + self.skipTest("BufferWithSegmentsCollection not available") - b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3)) - b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3), - ss.pack(3, 3)])) + b1 = zstd.BufferWithSegments(b"foo", ss.pack(0, 3)) + b2 = zstd.BufferWithSegments( + b"barbaz", b"".join([ss.pack(0, 3), ss.pack(3, 3)]) + ) c = zstd.BufferWithSegmentsCollection(b1) self.assertEqual(len(c), 1) @@ -115,21 +125,22 @@ self.assertEqual(c.size(), 9) def test_getitem(self): - if not hasattr(zstd, 'BufferWithSegmentsCollection'): - self.skipTest('BufferWithSegmentsCollection not available') + if not hasattr(zstd, "BufferWithSegmentsCollection"): + self.skipTest("BufferWithSegmentsCollection not available") - b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3)) - b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3), - ss.pack(3, 3)])) + b1 = zstd.BufferWithSegments(b"foo", ss.pack(0, 3)) + b2 = zstd.BufferWithSegments( + b"barbaz", b"".join([ss.pack(0, 3), ss.pack(3, 3)]) + ) c = zstd.BufferWithSegmentsCollection(b1, b2) - with self.assertRaisesRegexp(IndexError, 'offset must be less than 3'): + with self.assertRaisesRegex(IndexError, "offset must be less than 3"): c[3] - with self.assertRaisesRegexp(IndexError, 'offset must be less than 3'): + with self.assertRaisesRegex(IndexError, "offset must be less than 3"): c[4] - self.assertEqual(c[0].tobytes(), b'foo') - self.assertEqual(c[1].tobytes(), b'bar') - self.assertEqual(c[2].tobytes(), b'baz') + self.assertEqual(c[0].tobytes(), b"foo") + self.assertEqual(c[1].tobytes(), b"bar") + self.assertEqual(c[2].tobytes(), b"baz") diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/tests/test_compressor.py --- a/contrib/python-zstandard/tests/test_compressor.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/tests/test_compressor.py Tue Jan 21 13:14:51 2020 -0500 @@ -13,6 +13,7 @@ make_cffi, NonClosingBytesIO, OpCountingBytesIO, + TestCase, ) @@ -23,14 +24,13 @@ def multithreaded_chunk_size(level, source_size=0): - params = zstd.ZstdCompressionParameters.from_level(level, - source_size=source_size) + params = zstd.ZstdCompressionParameters.from_level(level, source_size=source_size) return 1 << (params.window_log + 2) @make_cffi -class TestCompressor(unittest.TestCase): +class TestCompressor(TestCase): def test_level_bounds(self): with self.assertRaises(ValueError): zstd.ZstdCompressor(level=23) @@ -41,11 +41,11 @@ @make_cffi -class TestCompressor_compress(unittest.TestCase): +class TestCompressor_compress(TestCase): def test_compress_empty(self): cctx = zstd.ZstdCompressor(level=1, write_content_size=False) - result = cctx.compress(b'') - self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') + result = cctx.compress(b"") + self.assertEqual(result, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00") params = zstd.get_frame_parameters(result) self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) self.assertEqual(params.window_size, 524288) @@ -53,21 +53,21 @@ self.assertFalse(params.has_checksum, 0) cctx = zstd.ZstdCompressor() - result = cctx.compress(b'') - self.assertEqual(result, b'\x28\xb5\x2f\xfd\x20\x00\x01\x00\x00') + result = cctx.compress(b"") + self.assertEqual(result, b"\x28\xb5\x2f\xfd\x20\x00\x01\x00\x00") params = zstd.get_frame_parameters(result) self.assertEqual(params.content_size, 0) def test_input_types(self): cctx = zstd.ZstdCompressor(level=1, write_content_size=False) - expected = b'\x28\xb5\x2f\xfd\x00\x00\x19\x00\x00\x66\x6f\x6f' + expected = b"\x28\xb5\x2f\xfd\x00\x00\x19\x00\x00\x66\x6f\x6f" mutable_array = bytearray(3) - mutable_array[:] = b'foo' + mutable_array[:] = b"foo" sources = [ - memoryview(b'foo'), - bytearray(b'foo'), + memoryview(b"foo"), + bytearray(b"foo"), mutable_array, ] @@ -77,43 +77,46 @@ def test_compress_large(self): chunks = [] for i in range(255): - chunks.append(struct.Struct('>B').pack(i) * 16384) + chunks.append(struct.Struct(">B").pack(i) * 16384) cctx = zstd.ZstdCompressor(level=3, write_content_size=False) - result = cctx.compress(b''.join(chunks)) + result = cctx.compress(b"".join(chunks)) self.assertEqual(len(result), 999) - self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd') + self.assertEqual(result[0:4], b"\x28\xb5\x2f\xfd") # This matches the test for read_to_iter() below. cctx = zstd.ZstdCompressor(level=1, write_content_size=False) - result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o') - self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00' - b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0' - b'\x02\x09\x00\x00\x6f') + result = cctx.compress(b"f" * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b"o") + self.assertEqual( + result, + b"\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00" + b"\x10\x66\x66\x01\x00\xfb\xff\x39\xc0" + b"\x02\x09\x00\x00\x6f", + ) def test_negative_level(self): cctx = zstd.ZstdCompressor(level=-4) - result = cctx.compress(b'foo' * 256) + result = cctx.compress(b"foo" * 256) def test_no_magic(self): - params = zstd.ZstdCompressionParameters.from_level( - 1, format=zstd.FORMAT_ZSTD1) + params = zstd.ZstdCompressionParameters.from_level(1, format=zstd.FORMAT_ZSTD1) cctx = zstd.ZstdCompressor(compression_params=params) - magic = cctx.compress(b'foobar') + magic = cctx.compress(b"foobar") params = zstd.ZstdCompressionParameters.from_level( - 1, format=zstd.FORMAT_ZSTD1_MAGICLESS) + 1, format=zstd.FORMAT_ZSTD1_MAGICLESS + ) cctx = zstd.ZstdCompressor(compression_params=params) - no_magic = cctx.compress(b'foobar') + no_magic = cctx.compress(b"foobar") - self.assertEqual(magic[0:4], b'\x28\xb5\x2f\xfd') + self.assertEqual(magic[0:4], b"\x28\xb5\x2f\xfd") self.assertEqual(magic[4:], no_magic) def test_write_checksum(self): cctx = zstd.ZstdCompressor(level=1) - no_checksum = cctx.compress(b'foobar') + no_checksum = cctx.compress(b"foobar") cctx = zstd.ZstdCompressor(level=1, write_checksum=True) - with_checksum = cctx.compress(b'foobar') + with_checksum = cctx.compress(b"foobar") self.assertEqual(len(with_checksum), len(no_checksum) + 4) @@ -125,9 +128,9 @@ def test_write_content_size(self): cctx = zstd.ZstdCompressor(level=1) - with_size = cctx.compress(b'foobar' * 256) + with_size = cctx.compress(b"foobar" * 256) cctx = zstd.ZstdCompressor(level=1, write_content_size=False) - no_size = cctx.compress(b'foobar' * 256) + no_size = cctx.compress(b"foobar" * 256) self.assertEqual(len(with_size), len(no_size) + 1) @@ -139,17 +142,17 @@ def test_no_dict_id(self): samples = [] for i in range(128): - samples.append(b'foo' * 64) - samples.append(b'bar' * 64) - samples.append(b'foobar' * 64) + samples.append(b"foo" * 64) + samples.append(b"bar" * 64) + samples.append(b"foobar" * 64) d = zstd.train_dictionary(1024, samples) cctx = zstd.ZstdCompressor(level=1, dict_data=d) - with_dict_id = cctx.compress(b'foobarfoobar') + with_dict_id = cctx.compress(b"foobarfoobar") cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False) - no_dict_id = cctx.compress(b'foobarfoobar') + no_dict_id = cctx.compress(b"foobarfoobar") self.assertEqual(len(with_dict_id), len(no_dict_id) + 4) @@ -161,23 +164,23 @@ def test_compress_dict_multiple(self): samples = [] for i in range(128): - samples.append(b'foo' * 64) - samples.append(b'bar' * 64) - samples.append(b'foobar' * 64) + samples.append(b"foo" * 64) + samples.append(b"bar" * 64) + samples.append(b"foobar" * 64) d = zstd.train_dictionary(8192, samples) cctx = zstd.ZstdCompressor(level=1, dict_data=d) for i in range(32): - cctx.compress(b'foo bar foobar foo bar foobar') + cctx.compress(b"foo bar foobar foo bar foobar") def test_dict_precompute(self): samples = [] for i in range(128): - samples.append(b'foo' * 64) - samples.append(b'bar' * 64) - samples.append(b'foobar' * 64) + samples.append(b"foo" * 64) + samples.append(b"bar" * 64) + samples.append(b"foobar" * 64) d = zstd.train_dictionary(8192, samples) d.precompute_compress(level=1) @@ -185,11 +188,11 @@ cctx = zstd.ZstdCompressor(level=1, dict_data=d) for i in range(32): - cctx.compress(b'foo bar foobar foo bar foobar') + cctx.compress(b"foo bar foobar foo bar foobar") def test_multithreaded(self): chunk_size = multithreaded_chunk_size(1) - source = b''.join([b'x' * chunk_size, b'y' * chunk_size]) + source = b"".join([b"x" * chunk_size, b"y" * chunk_size]) cctx = zstd.ZstdCompressor(level=1, threads=2) compressed = cctx.compress(source) @@ -205,73 +208,72 @@ def test_multithreaded_dict(self): samples = [] for i in range(128): - samples.append(b'foo' * 64) - samples.append(b'bar' * 64) - samples.append(b'foobar' * 64) + samples.append(b"foo" * 64) + samples.append(b"bar" * 64) + samples.append(b"foobar" * 64) d = zstd.train_dictionary(1024, samples) cctx = zstd.ZstdCompressor(dict_data=d, threads=2) - result = cctx.compress(b'foo') - params = zstd.get_frame_parameters(result); - self.assertEqual(params.content_size, 3); + result = cctx.compress(b"foo") + params = zstd.get_frame_parameters(result) + self.assertEqual(params.content_size, 3) self.assertEqual(params.dict_id, d.dict_id()) - self.assertEqual(result, - b'\x28\xb5\x2f\xfd\x23\x8f\x55\x0f\x70\x03\x19\x00\x00' - b'\x66\x6f\x6f') + self.assertEqual( + result, + b"\x28\xb5\x2f\xfd\x23\x8f\x55\x0f\x70\x03\x19\x00\x00" b"\x66\x6f\x6f", + ) def test_multithreaded_compression_params(self): params = zstd.ZstdCompressionParameters.from_level(0, threads=2) cctx = zstd.ZstdCompressor(compression_params=params) - result = cctx.compress(b'foo') - params = zstd.get_frame_parameters(result); - self.assertEqual(params.content_size, 3); + result = cctx.compress(b"foo") + params = zstd.get_frame_parameters(result) + self.assertEqual(params.content_size, 3) - self.assertEqual(result, - b'\x28\xb5\x2f\xfd\x20\x03\x19\x00\x00\x66\x6f\x6f') + self.assertEqual(result, b"\x28\xb5\x2f\xfd\x20\x03\x19\x00\x00\x66\x6f\x6f") @make_cffi -class TestCompressor_compressobj(unittest.TestCase): +class TestCompressor_compressobj(TestCase): def test_compressobj_empty(self): cctx = zstd.ZstdCompressor(level=1, write_content_size=False) cobj = cctx.compressobj() - self.assertEqual(cobj.compress(b''), b'') - self.assertEqual(cobj.flush(), - b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') + self.assertEqual(cobj.compress(b""), b"") + self.assertEqual(cobj.flush(), b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00") def test_input_types(self): - expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f' + expected = b"\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f" cctx = zstd.ZstdCompressor(level=1, write_content_size=False) mutable_array = bytearray(3) - mutable_array[:] = b'foo' + mutable_array[:] = b"foo" sources = [ - memoryview(b'foo'), - bytearray(b'foo'), + memoryview(b"foo"), + bytearray(b"foo"), mutable_array, ] for source in sources: cobj = cctx.compressobj() - self.assertEqual(cobj.compress(source), b'') + self.assertEqual(cobj.compress(source), b"") self.assertEqual(cobj.flush(), expected) def test_compressobj_large(self): chunks = [] for i in range(255): - chunks.append(struct.Struct('>B').pack(i) * 16384) + chunks.append(struct.Struct(">B").pack(i) * 16384) cctx = zstd.ZstdCompressor(level=3) cobj = cctx.compressobj() - result = cobj.compress(b''.join(chunks)) + cobj.flush() + result = cobj.compress(b"".join(chunks)) + cobj.flush() self.assertEqual(len(result), 999) - self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd') + self.assertEqual(result[0:4], b"\x28\xb5\x2f\xfd") params = zstd.get_frame_parameters(result) self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) @@ -282,10 +284,10 @@ def test_write_checksum(self): cctx = zstd.ZstdCompressor(level=1) cobj = cctx.compressobj() - no_checksum = cobj.compress(b'foobar') + cobj.flush() + no_checksum = cobj.compress(b"foobar") + cobj.flush() cctx = zstd.ZstdCompressor(level=1, write_checksum=True) cobj = cctx.compressobj() - with_checksum = cobj.compress(b'foobar') + cobj.flush() + with_checksum = cobj.compress(b"foobar") + cobj.flush() no_params = zstd.get_frame_parameters(no_checksum) with_params = zstd.get_frame_parameters(with_checksum) @@ -300,11 +302,11 @@ def test_write_content_size(self): cctx = zstd.ZstdCompressor(level=1) - cobj = cctx.compressobj(size=len(b'foobar' * 256)) - with_size = cobj.compress(b'foobar' * 256) + cobj.flush() + cobj = cctx.compressobj(size=len(b"foobar" * 256)) + with_size = cobj.compress(b"foobar" * 256) + cobj.flush() cctx = zstd.ZstdCompressor(level=1, write_content_size=False) - cobj = cctx.compressobj(size=len(b'foobar' * 256)) - no_size = cobj.compress(b'foobar' * 256) + cobj.flush() + cobj = cctx.compressobj(size=len(b"foobar" * 256)) + no_size = cobj.compress(b"foobar" * 256) + cobj.flush() no_params = zstd.get_frame_parameters(no_size) with_params = zstd.get_frame_parameters(with_size) @@ -321,48 +323,53 @@ cctx = zstd.ZstdCompressor() cobj = cctx.compressobj() - cobj.compress(b'foo') + cobj.compress(b"foo") cobj.flush() - with self.assertRaisesRegexp(zstd.ZstdError, r'cannot call compress\(\) after compressor'): - cobj.compress(b'foo') + with self.assertRaisesRegex( + zstd.ZstdError, r"cannot call compress\(\) after compressor" + ): + cobj.compress(b"foo") - with self.assertRaisesRegexp(zstd.ZstdError, 'compressor object already finished'): + with self.assertRaisesRegex( + zstd.ZstdError, "compressor object already finished" + ): cobj.flush() def test_flush_block_repeated(self): cctx = zstd.ZstdCompressor(level=1) cobj = cctx.compressobj() - self.assertEqual(cobj.compress(b'foo'), b'') - self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), - b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo') - self.assertEqual(cobj.compress(b'bar'), b'') + self.assertEqual(cobj.compress(b"foo"), b"") + self.assertEqual( + cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), + b"\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo", + ) + self.assertEqual(cobj.compress(b"bar"), b"") # 3 byte header plus content. - self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), - b'\x18\x00\x00bar') - self.assertEqual(cobj.flush(), b'\x01\x00\x00') + self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b"\x18\x00\x00bar") + self.assertEqual(cobj.flush(), b"\x01\x00\x00") def test_flush_empty_block(self): cctx = zstd.ZstdCompressor(write_checksum=True) cobj = cctx.compressobj() - cobj.compress(b'foobar') + cobj.compress(b"foobar") cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK) # No-op if no block is active (this is internal to zstd). - self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b'') + self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b"") trailing = cobj.flush() # 3 bytes block header + 4 bytes frame checksum self.assertEqual(len(trailing), 7) header = trailing[0:3] - self.assertEqual(header, b'\x01\x00\x00') + self.assertEqual(header, b"\x01\x00\x00") def test_multithreaded(self): source = io.BytesIO() - source.write(b'a' * 1048576) - source.write(b'b' * 1048576) - source.write(b'c' * 1048576) + source.write(b"a" * 1048576) + source.write(b"b" * 1048576) + source.write(b"c" * 1048576) source.seek(0) cctx = zstd.ZstdCompressor(level=1, threads=2) @@ -378,9 +385,9 @@ chunks.append(cobj.flush()) - compressed = b''.join(chunks) + compressed = b"".join(chunks) - self.assertEqual(len(compressed), 295) + self.assertEqual(len(compressed), 119) def test_frame_progression(self): cctx = zstd.ZstdCompressor() @@ -389,7 +396,7 @@ cobj = cctx.compressobj() - cobj.compress(b'foobar') + cobj.compress(b"foobar") self.assertEqual(cctx.frame_progression(), (6, 0, 0)) cobj.flush() @@ -399,20 +406,20 @@ cctx = zstd.ZstdCompressor() cobj = cctx.compressobj(size=2) - with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'): - cobj.compress(b'foo') + with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"): + cobj.compress(b"foo") # Try another operation on this instance. - with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'): - cobj.compress(b'aa') + with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"): + cobj.compress(b"aa") # Try another operation on the compressor. cctx.compressobj(size=4) - cctx.compress(b'foobar') + cctx.compress(b"foobar") @make_cffi -class TestCompressor_copy_stream(unittest.TestCase): +class TestCompressor_copy_stream(TestCase): def test_no_read(self): source = object() dest = io.BytesIO() @@ -438,13 +445,12 @@ self.assertEqual(int(r), 0) self.assertEqual(w, 9) - self.assertEqual(dest.getvalue(), - b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') + self.assertEqual(dest.getvalue(), b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00") def test_large_data(self): source = io.BytesIO() for i in range(255): - source.write(struct.Struct('>B').pack(i) * 16384) + source.write(struct.Struct(">B").pack(i) * 16384) source.seek(0) dest = io.BytesIO() @@ -461,7 +467,7 @@ self.assertFalse(params.has_checksum) def test_write_checksum(self): - source = io.BytesIO(b'foobar') + source = io.BytesIO(b"foobar") no_checksum = io.BytesIO() cctx = zstd.ZstdCompressor(level=1) @@ -472,8 +478,7 @@ cctx = zstd.ZstdCompressor(level=1, write_checksum=True) cctx.copy_stream(source, with_checksum) - self.assertEqual(len(with_checksum.getvalue()), - len(no_checksum.getvalue()) + 4) + self.assertEqual(len(with_checksum.getvalue()), len(no_checksum.getvalue()) + 4) no_params = zstd.get_frame_parameters(no_checksum.getvalue()) with_params = zstd.get_frame_parameters(with_checksum.getvalue()) @@ -485,7 +490,7 @@ self.assertTrue(with_params.has_checksum) def test_write_content_size(self): - source = io.BytesIO(b'foobar' * 256) + source = io.BytesIO(b"foobar" * 256) no_size = io.BytesIO() cctx = zstd.ZstdCompressor(level=1, write_content_size=False) @@ -497,16 +502,14 @@ cctx.copy_stream(source, with_size) # Source content size is unknown, so no content size written. - self.assertEqual(len(with_size.getvalue()), - len(no_size.getvalue())) + self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue())) source.seek(0) with_size = io.BytesIO() cctx.copy_stream(source, with_size, size=len(source.getvalue())) # We specified source size, so content size header is present. - self.assertEqual(len(with_size.getvalue()), - len(no_size.getvalue()) + 1) + self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()) + 1) no_params = zstd.get_frame_parameters(no_size.getvalue()) with_params = zstd.get_frame_parameters(with_size.getvalue()) @@ -518,7 +521,7 @@ self.assertFalse(with_params.has_checksum) def test_read_write_size(self): - source = OpCountingBytesIO(b'foobarfoobar') + source = OpCountingBytesIO(b"foobarfoobar") dest = OpCountingBytesIO() cctx = zstd.ZstdCompressor() r, w = cctx.copy_stream(source, dest, read_size=1, write_size=1) @@ -530,16 +533,16 @@ def test_multithreaded(self): source = io.BytesIO() - source.write(b'a' * 1048576) - source.write(b'b' * 1048576) - source.write(b'c' * 1048576) + source.write(b"a" * 1048576) + source.write(b"b" * 1048576) + source.write(b"c" * 1048576) source.seek(0) dest = io.BytesIO() cctx = zstd.ZstdCompressor(threads=2, write_content_size=False) r, w = cctx.copy_stream(source, dest) self.assertEqual(r, 3145728) - self.assertEqual(w, 295) + self.assertEqual(w, 111) params = zstd.get_frame_parameters(dest.getvalue()) self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) @@ -559,15 +562,15 @@ def test_bad_size(self): source = io.BytesIO() - source.write(b'a' * 32768) - source.write(b'b' * 32768) + source.write(b"a" * 32768) + source.write(b"b" * 32768) source.seek(0) dest = io.BytesIO() cctx = zstd.ZstdCompressor() - with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'): + with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"): cctx.copy_stream(source, dest, size=42) # Try another operation on this compressor. @@ -577,31 +580,31 @@ @make_cffi -class TestCompressor_stream_reader(unittest.TestCase): +class TestCompressor_stream_reader(TestCase): def test_context_manager(self): cctx = zstd.ZstdCompressor() - with cctx.stream_reader(b'foo') as reader: - with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'): + with cctx.stream_reader(b"foo") as reader: + with self.assertRaisesRegex(ValueError, "cannot __enter__ multiple times"): with reader as reader2: pass def test_no_context_manager(self): cctx = zstd.ZstdCompressor() - reader = cctx.stream_reader(b'foo') + reader = cctx.stream_reader(b"foo") reader.read(4) self.assertFalse(reader.closed) reader.close() self.assertTrue(reader.closed) - with self.assertRaisesRegexp(ValueError, 'stream is closed'): + with self.assertRaisesRegex(ValueError, "stream is closed"): reader.read(1) def test_not_implemented(self): cctx = zstd.ZstdCompressor() - with cctx.stream_reader(b'foo' * 60) as reader: + with cctx.stream_reader(b"foo" * 60) as reader: with self.assertRaises(io.UnsupportedOperation): reader.readline() @@ -618,12 +621,12 @@ reader.writelines([]) with self.assertRaises(OSError): - reader.write(b'foo') + reader.write(b"foo") def test_constant_methods(self): cctx = zstd.ZstdCompressor() - with cctx.stream_reader(b'boo') as reader: + with cctx.stream_reader(b"boo") as reader: self.assertTrue(reader.readable()) self.assertFalse(reader.writable()) self.assertFalse(reader.seekable()) @@ -637,27 +640,29 @@ def test_read_closed(self): cctx = zstd.ZstdCompressor() - with cctx.stream_reader(b'foo' * 60) as reader: + with cctx.stream_reader(b"foo" * 60) as reader: reader.close() self.assertTrue(reader.closed) - with self.assertRaisesRegexp(ValueError, 'stream is closed'): + with self.assertRaisesRegex(ValueError, "stream is closed"): reader.read(10) def test_read_sizes(self): cctx = zstd.ZstdCompressor() - foo = cctx.compress(b'foo') + foo = cctx.compress(b"foo") - with cctx.stream_reader(b'foo') as reader: - with self.assertRaisesRegexp(ValueError, 'cannot read negative amounts less than -1'): + with cctx.stream_reader(b"foo") as reader: + with self.assertRaisesRegex( + ValueError, "cannot read negative amounts less than -1" + ): reader.read(-2) - self.assertEqual(reader.read(0), b'') + self.assertEqual(reader.read(0), b"") self.assertEqual(reader.read(), foo) def test_read_buffer(self): cctx = zstd.ZstdCompressor() - source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60]) + source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60]) frame = cctx.compress(source) with cctx.stream_reader(source) as reader: @@ -667,13 +672,13 @@ result = reader.read(8192) self.assertEqual(result, frame) self.assertEqual(reader.tell(), len(result)) - self.assertEqual(reader.read(), b'') + self.assertEqual(reader.read(), b"") self.assertEqual(reader.tell(), len(result)) def test_read_buffer_small_chunks(self): cctx = zstd.ZstdCompressor() - source = b'foo' * 60 + source = b"foo" * 60 chunks = [] with cctx.stream_reader(source) as reader: @@ -687,12 +692,12 @@ chunks.append(chunk) self.assertEqual(reader.tell(), sum(map(len, chunks))) - self.assertEqual(b''.join(chunks), cctx.compress(source)) + self.assertEqual(b"".join(chunks), cctx.compress(source)) def test_read_stream(self): cctx = zstd.ZstdCompressor() - source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60]) + source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60]) frame = cctx.compress(source) with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader: @@ -701,13 +706,13 @@ chunk = reader.read(8192) self.assertEqual(chunk, frame) self.assertEqual(reader.tell(), len(chunk)) - self.assertEqual(reader.read(), b'') + self.assertEqual(reader.read(), b"") self.assertEqual(reader.tell(), len(chunk)) def test_read_stream_small_chunks(self): cctx = zstd.ZstdCompressor() - source = b'foo' * 60 + source = b"foo" * 60 chunks = [] with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader: @@ -721,25 +726,25 @@ chunks.append(chunk) self.assertEqual(reader.tell(), sum(map(len, chunks))) - self.assertEqual(b''.join(chunks), cctx.compress(source)) + self.assertEqual(b"".join(chunks), cctx.compress(source)) def test_read_after_exit(self): cctx = zstd.ZstdCompressor() - with cctx.stream_reader(b'foo' * 60) as reader: + with cctx.stream_reader(b"foo" * 60) as reader: while reader.read(8192): pass - with self.assertRaisesRegexp(ValueError, 'stream is closed'): + with self.assertRaisesRegex(ValueError, "stream is closed"): reader.read(10) def test_bad_size(self): cctx = zstd.ZstdCompressor() - source = io.BytesIO(b'foobar') + source = io.BytesIO(b"foobar") with cctx.stream_reader(source, size=2) as reader: - with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'): + with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"): reader.read(10) # Try another compression operation. @@ -748,36 +753,36 @@ def test_readall(self): cctx = zstd.ZstdCompressor() - frame = cctx.compress(b'foo' * 1024) + frame = cctx.compress(b"foo" * 1024) - reader = cctx.stream_reader(b'foo' * 1024) + reader = cctx.stream_reader(b"foo" * 1024) self.assertEqual(reader.readall(), frame) def test_readinto(self): cctx = zstd.ZstdCompressor() - foo = cctx.compress(b'foo') + foo = cctx.compress(b"foo") - reader = cctx.stream_reader(b'foo') + reader = cctx.stream_reader(b"foo") with self.assertRaises(Exception): - reader.readinto(b'foobar') + reader.readinto(b"foobar") # readinto() with sufficiently large destination. b = bytearray(1024) - reader = cctx.stream_reader(b'foo') + reader = cctx.stream_reader(b"foo") self.assertEqual(reader.readinto(b), len(foo)) - self.assertEqual(b[0:len(foo)], foo) + self.assertEqual(b[0 : len(foo)], foo) self.assertEqual(reader.readinto(b), 0) - self.assertEqual(b[0:len(foo)], foo) + self.assertEqual(b[0 : len(foo)], foo) # readinto() with small reads. b = bytearray(1024) - reader = cctx.stream_reader(b'foo', read_size=1) + reader = cctx.stream_reader(b"foo", read_size=1) self.assertEqual(reader.readinto(b), len(foo)) - self.assertEqual(b[0:len(foo)], foo) + self.assertEqual(b[0 : len(foo)], foo) # Too small destination buffer. b = bytearray(2) - reader = cctx.stream_reader(b'foo') + reader = cctx.stream_reader(b"foo") self.assertEqual(reader.readinto(b), 2) self.assertEqual(b[:], foo[0:2]) self.assertEqual(reader.readinto(b), 2) @@ -787,41 +792,41 @@ def test_readinto1(self): cctx = zstd.ZstdCompressor() - foo = b''.join(cctx.read_to_iter(io.BytesIO(b'foo'))) + foo = b"".join(cctx.read_to_iter(io.BytesIO(b"foo"))) - reader = cctx.stream_reader(b'foo') + reader = cctx.stream_reader(b"foo") with self.assertRaises(Exception): - reader.readinto1(b'foobar') + reader.readinto1(b"foobar") b = bytearray(1024) - source = OpCountingBytesIO(b'foo') + source = OpCountingBytesIO(b"foo") reader = cctx.stream_reader(source) self.assertEqual(reader.readinto1(b), len(foo)) - self.assertEqual(b[0:len(foo)], foo) + self.assertEqual(b[0 : len(foo)], foo) self.assertEqual(source._read_count, 2) # readinto1() with small reads. b = bytearray(1024) - source = OpCountingBytesIO(b'foo') + source = OpCountingBytesIO(b"foo") reader = cctx.stream_reader(source, read_size=1) self.assertEqual(reader.readinto1(b), len(foo)) - self.assertEqual(b[0:len(foo)], foo) + self.assertEqual(b[0 : len(foo)], foo) self.assertEqual(source._read_count, 4) def test_read1(self): cctx = zstd.ZstdCompressor() - foo = b''.join(cctx.read_to_iter(io.BytesIO(b'foo'))) + foo = b"".join(cctx.read_to_iter(io.BytesIO(b"foo"))) - b = OpCountingBytesIO(b'foo') + b = OpCountingBytesIO(b"foo") reader = cctx.stream_reader(b) self.assertEqual(reader.read1(), foo) self.assertEqual(b._read_count, 2) - b = OpCountingBytesIO(b'foo') + b = OpCountingBytesIO(b"foo") reader = cctx.stream_reader(b) - self.assertEqual(reader.read1(0), b'') + self.assertEqual(reader.read1(0), b"") self.assertEqual(reader.read1(2), foo[0:2]) self.assertEqual(b._read_count, 2) self.assertEqual(reader.read1(2), foo[2:4]) @@ -829,7 +834,7 @@ @make_cffi -class TestCompressor_stream_writer(unittest.TestCase): +class TestCompressor_stream_writer(TestCase): def test_io_api(self): buffer = io.BytesIO() cctx = zstd.ZstdCompressor() @@ -899,7 +904,7 @@ self.assertFalse(writer.closed) def test_fileno_file(self): - with tempfile.TemporaryFile('wb') as tf: + with tempfile.TemporaryFile("wb") as tf: cctx = zstd.ZstdCompressor() writer = cctx.stream_writer(tf) @@ -910,33 +915,35 @@ cctx = zstd.ZstdCompressor(level=1) writer = cctx.stream_writer(buffer) - writer.write(b'foo' * 1024) + writer.write(b"foo" * 1024) self.assertFalse(writer.closed) self.assertFalse(buffer.closed) writer.close() self.assertTrue(writer.closed) self.assertTrue(buffer.closed) - with self.assertRaisesRegexp(ValueError, 'stream is closed'): - writer.write(b'foo') + with self.assertRaisesRegex(ValueError, "stream is closed"): + writer.write(b"foo") - with self.assertRaisesRegexp(ValueError, 'stream is closed'): + with self.assertRaisesRegex(ValueError, "stream is closed"): writer.flush() - with self.assertRaisesRegexp(ValueError, 'stream is closed'): + with self.assertRaisesRegex(ValueError, "stream is closed"): with writer: pass - self.assertEqual(buffer.getvalue(), - b'\x28\xb5\x2f\xfd\x00\x48\x55\x00\x00\x18\x66\x6f' - b'\x6f\x01\x00\xfa\xd3\x77\x43') + self.assertEqual( + buffer.getvalue(), + b"\x28\xb5\x2f\xfd\x00\x48\x55\x00\x00\x18\x66\x6f" + b"\x6f\x01\x00\xfa\xd3\x77\x43", + ) # Context manager exit should close stream. buffer = io.BytesIO() writer = cctx.stream_writer(buffer) with writer: - writer.write(b'foo') + writer.write(b"foo") self.assertTrue(writer.closed) @@ -944,10 +951,10 @@ buffer = NonClosingBytesIO() cctx = zstd.ZstdCompressor(level=1, write_content_size=False) with cctx.stream_writer(buffer) as compressor: - compressor.write(b'') + compressor.write(b"") result = buffer.getvalue() - self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') + self.assertEqual(result, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00") params = zstd.get_frame_parameters(result) self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) @@ -958,11 +965,11 @@ # Test without context manager. buffer = io.BytesIO() compressor = cctx.stream_writer(buffer) - self.assertEqual(compressor.write(b''), 0) - self.assertEqual(buffer.getvalue(), b'') + self.assertEqual(compressor.write(b""), 0) + self.assertEqual(buffer.getvalue(), b"") self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 9) result = buffer.getvalue() - self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') + self.assertEqual(result, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00") params = zstd.get_frame_parameters(result) self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) @@ -972,18 +979,18 @@ # Test write_return_read=True compressor = cctx.stream_writer(buffer, write_return_read=True) - self.assertEqual(compressor.write(b''), 0) + self.assertEqual(compressor.write(b""), 0) def test_input_types(self): - expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f' + expected = b"\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f" cctx = zstd.ZstdCompressor(level=1) mutable_array = bytearray(3) - mutable_array[:] = b'foo' + mutable_array[:] = b"foo" sources = [ - memoryview(b'foo'), - bytearray(b'foo'), + memoryview(b"foo"), + bytearray(b"foo"), mutable_array, ] @@ -1001,51 +1008,55 @@ buffer = NonClosingBytesIO() cctx = zstd.ZstdCompressor(level=5) with cctx.stream_writer(buffer) as compressor: - self.assertEqual(compressor.write(b'foo'), 0) - self.assertEqual(compressor.write(b'bar'), 0) - self.assertEqual(compressor.write(b'x' * 8192), 0) + self.assertEqual(compressor.write(b"foo"), 0) + self.assertEqual(compressor.write(b"bar"), 0) + self.assertEqual(compressor.write(b"x" * 8192), 0) result = buffer.getvalue() - self.assertEqual(result, - b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f' - b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23') + self.assertEqual( + result, + b"\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f" + b"\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23", + ) # Test without context manager. buffer = io.BytesIO() compressor = cctx.stream_writer(buffer) - self.assertEqual(compressor.write(b'foo'), 0) - self.assertEqual(compressor.write(b'bar'), 0) - self.assertEqual(compressor.write(b'x' * 8192), 0) + self.assertEqual(compressor.write(b"foo"), 0) + self.assertEqual(compressor.write(b"bar"), 0) + self.assertEqual(compressor.write(b"x" * 8192), 0) self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 23) result = buffer.getvalue() - self.assertEqual(result, - b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f' - b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23') + self.assertEqual( + result, + b"\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f" + b"\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23", + ) # Test with write_return_read=True. compressor = cctx.stream_writer(buffer, write_return_read=True) - self.assertEqual(compressor.write(b'foo'), 3) - self.assertEqual(compressor.write(b'barbiz'), 6) - self.assertEqual(compressor.write(b'x' * 8192), 8192) + self.assertEqual(compressor.write(b"foo"), 3) + self.assertEqual(compressor.write(b"barbiz"), 6) + self.assertEqual(compressor.write(b"x" * 8192), 8192) def test_dictionary(self): samples = [] for i in range(128): - samples.append(b'foo' * 64) - samples.append(b'bar' * 64) - samples.append(b'foobar' * 64) + samples.append(b"foo" * 64) + samples.append(b"bar" * 64) + samples.append(b"foobar" * 64) d = zstd.train_dictionary(8192, samples) h = hashlib.sha1(d.as_bytes()).hexdigest() - self.assertEqual(h, '7a2e59a876db958f74257141045af8f912e00d4e') + self.assertEqual(h, "7a2e59a876db958f74257141045af8f912e00d4e") buffer = NonClosingBytesIO() cctx = zstd.ZstdCompressor(level=9, dict_data=d) with cctx.stream_writer(buffer) as compressor: - self.assertEqual(compressor.write(b'foo'), 0) - self.assertEqual(compressor.write(b'bar'), 0) - self.assertEqual(compressor.write(b'foo' * 16384), 0) + self.assertEqual(compressor.write(b"foo"), 0) + self.assertEqual(compressor.write(b"bar"), 0) + self.assertEqual(compressor.write(b"foo" * 16384), 0) compressed = buffer.getvalue() @@ -1056,14 +1067,15 @@ self.assertFalse(params.has_checksum) h = hashlib.sha1(compressed).hexdigest() - self.assertEqual(h, '0a7c05635061f58039727cdbe76388c6f4cfef06') + self.assertEqual(h, "0a7c05635061f58039727cdbe76388c6f4cfef06") - source = b'foo' + b'bar' + (b'foo' * 16384) + source = b"foo" + b"bar" + (b"foo" * 16384) dctx = zstd.ZstdDecompressor(dict_data=d) - self.assertEqual(dctx.decompress(compressed, max_output_size=len(source)), - source) + self.assertEqual( + dctx.decompress(compressed, max_output_size=len(source)), source + ) def test_compression_params(self): params = zstd.ZstdCompressionParameters( @@ -1073,14 +1085,15 @@ min_match=5, search_log=4, target_length=10, - strategy=zstd.STRATEGY_FAST) + strategy=zstd.STRATEGY_FAST, + ) buffer = NonClosingBytesIO() cctx = zstd.ZstdCompressor(compression_params=params) with cctx.stream_writer(buffer) as compressor: - self.assertEqual(compressor.write(b'foo'), 0) - self.assertEqual(compressor.write(b'bar'), 0) - self.assertEqual(compressor.write(b'foobar' * 16384), 0) + self.assertEqual(compressor.write(b"foo"), 0) + self.assertEqual(compressor.write(b"bar"), 0) + self.assertEqual(compressor.write(b"foobar" * 16384), 0) compressed = buffer.getvalue() @@ -1091,18 +1104,18 @@ self.assertFalse(params.has_checksum) h = hashlib.sha1(compressed).hexdigest() - self.assertEqual(h, 'dd4bb7d37c1a0235b38a2f6b462814376843ef0b') + self.assertEqual(h, "dd4bb7d37c1a0235b38a2f6b462814376843ef0b") def test_write_checksum(self): no_checksum = NonClosingBytesIO() cctx = zstd.ZstdCompressor(level=1) with cctx.stream_writer(no_checksum) as compressor: - self.assertEqual(compressor.write(b'foobar'), 0) + self.assertEqual(compressor.write(b"foobar"), 0) with_checksum = NonClosingBytesIO() cctx = zstd.ZstdCompressor(level=1, write_checksum=True) with cctx.stream_writer(with_checksum) as compressor: - self.assertEqual(compressor.write(b'foobar'), 0) + self.assertEqual(compressor.write(b"foobar"), 0) no_params = zstd.get_frame_parameters(no_checksum.getvalue()) with_params = zstd.get_frame_parameters(with_checksum.getvalue()) @@ -1113,29 +1126,27 @@ self.assertFalse(no_params.has_checksum) self.assertTrue(with_params.has_checksum) - self.assertEqual(len(with_checksum.getvalue()), - len(no_checksum.getvalue()) + 4) + self.assertEqual(len(with_checksum.getvalue()), len(no_checksum.getvalue()) + 4) def test_write_content_size(self): no_size = NonClosingBytesIO() cctx = zstd.ZstdCompressor(level=1, write_content_size=False) with cctx.stream_writer(no_size) as compressor: - self.assertEqual(compressor.write(b'foobar' * 256), 0) + self.assertEqual(compressor.write(b"foobar" * 256), 0) with_size = NonClosingBytesIO() cctx = zstd.ZstdCompressor(level=1) with cctx.stream_writer(with_size) as compressor: - self.assertEqual(compressor.write(b'foobar' * 256), 0) + self.assertEqual(compressor.write(b"foobar" * 256), 0) # Source size is not known in streaming mode, so header not # written. - self.assertEqual(len(with_size.getvalue()), - len(no_size.getvalue())) + self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue())) # Declaring size will write the header. with_size = NonClosingBytesIO() - with cctx.stream_writer(with_size, size=len(b'foobar' * 256)) as compressor: - self.assertEqual(compressor.write(b'foobar' * 256), 0) + with cctx.stream_writer(with_size, size=len(b"foobar" * 256)) as compressor: + self.assertEqual(compressor.write(b"foobar" * 256), 0) no_params = zstd.get_frame_parameters(no_size.getvalue()) with_params = zstd.get_frame_parameters(with_size.getvalue()) @@ -1146,31 +1157,30 @@ self.assertFalse(no_params.has_checksum) self.assertFalse(with_params.has_checksum) - self.assertEqual(len(with_size.getvalue()), - len(no_size.getvalue()) + 1) + self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()) + 1) def test_no_dict_id(self): samples = [] for i in range(128): - samples.append(b'foo' * 64) - samples.append(b'bar' * 64) - samples.append(b'foobar' * 64) + samples.append(b"foo" * 64) + samples.append(b"bar" * 64) + samples.append(b"foobar" * 64) d = zstd.train_dictionary(1024, samples) with_dict_id = NonClosingBytesIO() cctx = zstd.ZstdCompressor(level=1, dict_data=d) with cctx.stream_writer(with_dict_id) as compressor: - self.assertEqual(compressor.write(b'foobarfoobar'), 0) + self.assertEqual(compressor.write(b"foobarfoobar"), 0) - self.assertEqual(with_dict_id.getvalue()[4:5], b'\x03') + self.assertEqual(with_dict_id.getvalue()[4:5], b"\x03") cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False) no_dict_id = NonClosingBytesIO() with cctx.stream_writer(no_dict_id) as compressor: - self.assertEqual(compressor.write(b'foobarfoobar'), 0) + self.assertEqual(compressor.write(b"foobarfoobar"), 0) - self.assertEqual(no_dict_id.getvalue()[4:5], b'\x00') + self.assertEqual(no_dict_id.getvalue()[4:5], b"\x00") no_params = zstd.get_frame_parameters(no_dict_id.getvalue()) with_params = zstd.get_frame_parameters(with_dict_id.getvalue()) @@ -1181,14 +1191,13 @@ self.assertFalse(no_params.has_checksum) self.assertFalse(with_params.has_checksum) - self.assertEqual(len(with_dict_id.getvalue()), - len(no_dict_id.getvalue()) + 4) + self.assertEqual(len(with_dict_id.getvalue()), len(no_dict_id.getvalue()) + 4) def test_memory_size(self): cctx = zstd.ZstdCompressor(level=3) buffer = io.BytesIO() with cctx.stream_writer(buffer) as compressor: - compressor.write(b'foo') + compressor.write(b"foo") size = compressor.memory_size() self.assertGreater(size, 100000) @@ -1197,9 +1206,9 @@ cctx = zstd.ZstdCompressor(level=3) dest = OpCountingBytesIO() with cctx.stream_writer(dest, write_size=1) as compressor: - self.assertEqual(compressor.write(b'foo'), 0) - self.assertEqual(compressor.write(b'bar'), 0) - self.assertEqual(compressor.write(b'foobar'), 0) + self.assertEqual(compressor.write(b"foo"), 0) + self.assertEqual(compressor.write(b"bar"), 0) + self.assertEqual(compressor.write(b"foobar"), 0) self.assertEqual(len(dest.getvalue()), dest._write_count) @@ -1207,15 +1216,15 @@ cctx = zstd.ZstdCompressor(level=3) dest = OpCountingBytesIO() with cctx.stream_writer(dest) as compressor: - self.assertEqual(compressor.write(b'foo'), 0) + self.assertEqual(compressor.write(b"foo"), 0) self.assertEqual(dest._write_count, 0) self.assertEqual(compressor.flush(), 12) self.assertEqual(dest._write_count, 1) - self.assertEqual(compressor.write(b'bar'), 0) + self.assertEqual(compressor.write(b"bar"), 0) self.assertEqual(dest._write_count, 1) self.assertEqual(compressor.flush(), 6) self.assertEqual(dest._write_count, 2) - self.assertEqual(compressor.write(b'baz'), 0) + self.assertEqual(compressor.write(b"baz"), 0) self.assertEqual(dest._write_count, 3) @@ -1223,7 +1232,7 @@ cctx = zstd.ZstdCompressor(level=3, write_checksum=True) dest = OpCountingBytesIO() with cctx.stream_writer(dest) as compressor: - self.assertEqual(compressor.write(b'foobar' * 8192), 0) + self.assertEqual(compressor.write(b"foobar" * 8192), 0) count = dest._write_count offset = dest.tell() self.assertEqual(compressor.flush(), 23) @@ -1238,41 +1247,43 @@ self.assertEqual(len(trailing), 7) header = trailing[0:3] - self.assertEqual(header, b'\x01\x00\x00') + self.assertEqual(header, b"\x01\x00\x00") def test_flush_frame(self): cctx = zstd.ZstdCompressor(level=3) dest = OpCountingBytesIO() with cctx.stream_writer(dest) as compressor: - self.assertEqual(compressor.write(b'foobar' * 8192), 0) + self.assertEqual(compressor.write(b"foobar" * 8192), 0) self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 23) - compressor.write(b'biz' * 16384) + compressor.write(b"biz" * 16384) - self.assertEqual(dest.getvalue(), - # Frame 1. - b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x30\x66\x6f\x6f' - b'\x62\x61\x72\x01\x00\xf7\xbf\xe8\xa5\x08' - # Frame 2. - b'\x28\xb5\x2f\xfd\x00\x58\x5d\x00\x00\x18\x62\x69\x7a' - b'\x01\x00\xfa\x3f\x75\x37\x04') + self.assertEqual( + dest.getvalue(), + # Frame 1. + b"\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x30\x66\x6f\x6f" + b"\x62\x61\x72\x01\x00\xf7\xbf\xe8\xa5\x08" + # Frame 2. + b"\x28\xb5\x2f\xfd\x00\x58\x5d\x00\x00\x18\x62\x69\x7a" + b"\x01\x00\xfa\x3f\x75\x37\x04", + ) def test_bad_flush_mode(self): cctx = zstd.ZstdCompressor() dest = io.BytesIO() with cctx.stream_writer(dest) as compressor: - with self.assertRaisesRegexp(ValueError, 'unknown flush_mode: 42'): + with self.assertRaisesRegex(ValueError, "unknown flush_mode: 42"): compressor.flush(flush_mode=42) def test_multithreaded(self): dest = NonClosingBytesIO() cctx = zstd.ZstdCompressor(threads=2) with cctx.stream_writer(dest) as compressor: - compressor.write(b'a' * 1048576) - compressor.write(b'b' * 1048576) - compressor.write(b'c' * 1048576) + compressor.write(b"a" * 1048576) + compressor.write(b"b" * 1048576) + compressor.write(b"c" * 1048576) - self.assertEqual(len(dest.getvalue()), 295) + self.assertEqual(len(dest.getvalue()), 111) def test_tell(self): dest = io.BytesIO() @@ -1281,7 +1292,7 @@ self.assertEqual(compressor.tell(), 0) for i in range(256): - compressor.write(b'foo' * (i + 1)) + compressor.write(b"foo" * (i + 1)) self.assertEqual(compressor.tell(), dest.tell()) def test_bad_size(self): @@ -1289,9 +1300,9 @@ dest = io.BytesIO() - with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'): + with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"): with cctx.stream_writer(dest, size=2) as compressor: - compressor.write(b'foo') + compressor.write(b"foo") # Test another operation. with cctx.stream_writer(dest, size=42): @@ -1301,20 +1312,20 @@ dest = NonClosingBytesIO() cctx = zstd.ZstdCompressor() with cctx.stream_writer(dest) as compressor: - with tarfile.open('tf', mode='w|', fileobj=compressor) as tf: - tf.add(__file__, 'test_compressor.py') + with tarfile.open("tf", mode="w|", fileobj=compressor) as tf: + tf.add(__file__, "test_compressor.py") dest = io.BytesIO(dest.getvalue()) dctx = zstd.ZstdDecompressor() with dctx.stream_reader(dest) as reader: - with tarfile.open(mode='r|', fileobj=reader) as tf: + with tarfile.open(mode="r|", fileobj=reader) as tf: for member in tf: - self.assertEqual(member.name, 'test_compressor.py') + self.assertEqual(member.name, "test_compressor.py") @make_cffi -class TestCompressor_read_to_iter(unittest.TestCase): +class TestCompressor_read_to_iter(TestCase): def test_type_validation(self): cctx = zstd.ZstdCompressor() @@ -1323,10 +1334,10 @@ pass # Buffer protocol works. - for chunk in cctx.read_to_iter(b'foobar'): + for chunk in cctx.read_to_iter(b"foobar"): pass - with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'): + with self.assertRaisesRegex(ValueError, "must pass an object with a read"): for chunk in cctx.read_to_iter(True): pass @@ -1337,22 +1348,22 @@ it = cctx.read_to_iter(source) chunks = list(it) self.assertEqual(len(chunks), 1) - compressed = b''.join(chunks) - self.assertEqual(compressed, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') + compressed = b"".join(chunks) + self.assertEqual(compressed, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00") # And again with the buffer protocol. - it = cctx.read_to_iter(b'') + it = cctx.read_to_iter(b"") chunks = list(it) self.assertEqual(len(chunks), 1) - compressed2 = b''.join(chunks) + compressed2 = b"".join(chunks) self.assertEqual(compressed2, compressed) def test_read_large(self): cctx = zstd.ZstdCompressor(level=1, write_content_size=False) source = io.BytesIO() - source.write(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE) - source.write(b'o') + source.write(b"f" * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE) + source.write(b"o") source.seek(0) # Creating an iterator should not perform any compression until @@ -1380,9 +1391,9 @@ next(it) # We should get the same output as the one-shot compression mechanism. - self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue())) + self.assertEqual(b"".join(chunks), cctx.compress(source.getvalue())) - params = zstd.get_frame_parameters(b''.join(chunks)) + params = zstd.get_frame_parameters(b"".join(chunks)) self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) self.assertEqual(params.window_size, 262144) self.assertEqual(params.dict_id, 0) @@ -1393,16 +1404,16 @@ chunks = list(it) self.assertEqual(len(chunks), 2) - params = zstd.get_frame_parameters(b''.join(chunks)) + params = zstd.get_frame_parameters(b"".join(chunks)) self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) - #self.assertEqual(params.window_size, 262144) + # self.assertEqual(params.window_size, 262144) self.assertEqual(params.dict_id, 0) self.assertFalse(params.has_checksum) - self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue())) + self.assertEqual(b"".join(chunks), cctx.compress(source.getvalue())) def test_read_write_size(self): - source = OpCountingBytesIO(b'foobarfoobar') + source = OpCountingBytesIO(b"foobarfoobar") cctx = zstd.ZstdCompressor(level=3) for chunk in cctx.read_to_iter(source, read_size=1, write_size=1): self.assertEqual(len(chunk), 1) @@ -1411,42 +1422,42 @@ def test_multithreaded(self): source = io.BytesIO() - source.write(b'a' * 1048576) - source.write(b'b' * 1048576) - source.write(b'c' * 1048576) + source.write(b"a" * 1048576) + source.write(b"b" * 1048576) + source.write(b"c" * 1048576) source.seek(0) cctx = zstd.ZstdCompressor(threads=2) - compressed = b''.join(cctx.read_to_iter(source)) - self.assertEqual(len(compressed), 295) + compressed = b"".join(cctx.read_to_iter(source)) + self.assertEqual(len(compressed), 111) def test_bad_size(self): cctx = zstd.ZstdCompressor() - source = io.BytesIO(b'a' * 42) + source = io.BytesIO(b"a" * 42) - with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'): - b''.join(cctx.read_to_iter(source, size=2)) + with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"): + b"".join(cctx.read_to_iter(source, size=2)) # Test another operation on errored compressor. - b''.join(cctx.read_to_iter(source)) + b"".join(cctx.read_to_iter(source)) @make_cffi -class TestCompressor_chunker(unittest.TestCase): +class TestCompressor_chunker(TestCase): def test_empty(self): cctx = zstd.ZstdCompressor(write_content_size=False) chunker = cctx.chunker() - it = chunker.compress(b'') + it = chunker.compress(b"") with self.assertRaises(StopIteration): next(it) it = chunker.finish() - self.assertEqual(next(it), b'\x28\xb5\x2f\xfd\x00\x58\x01\x00\x00') + self.assertEqual(next(it), b"\x28\xb5\x2f\xfd\x00\x58\x01\x00\x00") with self.assertRaises(StopIteration): next(it) @@ -1455,21 +1466,23 @@ cctx = zstd.ZstdCompressor() chunker = cctx.chunker() - it = chunker.compress(b'foobar') + it = chunker.compress(b"foobar") with self.assertRaises(StopIteration): next(it) - it = chunker.compress(b'baz' * 30) + it = chunker.compress(b"baz" * 30) with self.assertRaises(StopIteration): next(it) it = chunker.finish() - self.assertEqual(next(it), - b'\x28\xb5\x2f\xfd\x00\x58\x7d\x00\x00\x48\x66\x6f' - b'\x6f\x62\x61\x72\x62\x61\x7a\x01\x00\xe4\xe4\x8e') + self.assertEqual( + next(it), + b"\x28\xb5\x2f\xfd\x00\x58\x7d\x00\x00\x48\x66\x6f" + b"\x6f\x62\x61\x72\x62\x61\x7a\x01\x00\xe4\xe4\x8e", + ) with self.assertRaises(StopIteration): next(it) @@ -1478,57 +1491,60 @@ cctx = zstd.ZstdCompressor() chunker = cctx.chunker(size=1024) - it = chunker.compress(b'x' * 1000) + it = chunker.compress(b"x" * 1000) with self.assertRaises(StopIteration): next(it) - it = chunker.compress(b'y' * 24) + it = chunker.compress(b"y" * 24) with self.assertRaises(StopIteration): next(it) chunks = list(chunker.finish()) - self.assertEqual(chunks, [ - b'\x28\xb5\x2f\xfd\x60\x00\x03\x65\x00\x00\x18\x78\x78\x79\x02\x00' - b'\xa0\x16\xe3\x2b\x80\x05' - ]) + self.assertEqual( + chunks, + [ + b"\x28\xb5\x2f\xfd\x60\x00\x03\x65\x00\x00\x18\x78\x78\x79\x02\x00" + b"\xa0\x16\xe3\x2b\x80\x05" + ], + ) dctx = zstd.ZstdDecompressor() - self.assertEqual(dctx.decompress(b''.join(chunks)), - (b'x' * 1000) + (b'y' * 24)) + self.assertEqual(dctx.decompress(b"".join(chunks)), (b"x" * 1000) + (b"y" * 24)) def test_small_chunk_size(self): cctx = zstd.ZstdCompressor() chunker = cctx.chunker(chunk_size=1) - chunks = list(chunker.compress(b'foo' * 1024)) + chunks = list(chunker.compress(b"foo" * 1024)) self.assertEqual(chunks, []) chunks = list(chunker.finish()) self.assertTrue(all(len(chunk) == 1 for chunk in chunks)) self.assertEqual( - b''.join(chunks), - b'\x28\xb5\x2f\xfd\x00\x58\x55\x00\x00\x18\x66\x6f\x6f\x01\x00' - b'\xfa\xd3\x77\x43') + b"".join(chunks), + b"\x28\xb5\x2f\xfd\x00\x58\x55\x00\x00\x18\x66\x6f\x6f\x01\x00" + b"\xfa\xd3\x77\x43", + ) dctx = zstd.ZstdDecompressor() - self.assertEqual(dctx.decompress(b''.join(chunks), - max_output_size=10000), - b'foo' * 1024) + self.assertEqual( + dctx.decompress(b"".join(chunks), max_output_size=10000), b"foo" * 1024 + ) def test_input_types(self): cctx = zstd.ZstdCompressor() mutable_array = bytearray(3) - mutable_array[:] = b'foo' + mutable_array[:] = b"foo" sources = [ - memoryview(b'foo'), - bytearray(b'foo'), + memoryview(b"foo"), + bytearray(b"foo"), mutable_array, ] @@ -1536,28 +1552,32 @@ chunker = cctx.chunker() self.assertEqual(list(chunker.compress(source)), []) - self.assertEqual(list(chunker.finish()), [ - b'\x28\xb5\x2f\xfd\x00\x58\x19\x00\x00\x66\x6f\x6f' - ]) + self.assertEqual( + list(chunker.finish()), + [b"\x28\xb5\x2f\xfd\x00\x58\x19\x00\x00\x66\x6f\x6f"], + ) def test_flush(self): cctx = zstd.ZstdCompressor() chunker = cctx.chunker() - self.assertEqual(list(chunker.compress(b'foo' * 1024)), []) - self.assertEqual(list(chunker.compress(b'bar' * 1024)), []) + self.assertEqual(list(chunker.compress(b"foo" * 1024)), []) + self.assertEqual(list(chunker.compress(b"bar" * 1024)), []) chunks1 = list(chunker.flush()) - self.assertEqual(chunks1, [ - b'\x28\xb5\x2f\xfd\x00\x58\x8c\x00\x00\x30\x66\x6f\x6f\x62\x61\x72' - b'\x02\x00\xfa\x03\xfe\xd0\x9f\xbe\x1b\x02' - ]) + self.assertEqual( + chunks1, + [ + b"\x28\xb5\x2f\xfd\x00\x58\x8c\x00\x00\x30\x66\x6f\x6f\x62\x61\x72" + b"\x02\x00\xfa\x03\xfe\xd0\x9f\xbe\x1b\x02" + ], + ) self.assertEqual(list(chunker.flush()), []) self.assertEqual(list(chunker.flush()), []) - self.assertEqual(list(chunker.compress(b'baz' * 1024)), []) + self.assertEqual(list(chunker.compress(b"baz" * 1024)), []) chunks2 = list(chunker.flush()) self.assertEqual(len(chunks2), 1) @@ -1567,53 +1587,56 @@ dctx = zstd.ZstdDecompressor() - self.assertEqual(dctx.decompress(b''.join(chunks1 + chunks2 + chunks3), - max_output_size=10000), - (b'foo' * 1024) + (b'bar' * 1024) + (b'baz' * 1024)) + self.assertEqual( + dctx.decompress( + b"".join(chunks1 + chunks2 + chunks3), max_output_size=10000 + ), + (b"foo" * 1024) + (b"bar" * 1024) + (b"baz" * 1024), + ) def test_compress_after_finish(self): cctx = zstd.ZstdCompressor() chunker = cctx.chunker() - list(chunker.compress(b'foo')) + list(chunker.compress(b"foo")) list(chunker.finish()) - with self.assertRaisesRegexp( - zstd.ZstdError, - r'cannot call compress\(\) after compression finished'): - list(chunker.compress(b'foo')) + with self.assertRaisesRegex( + zstd.ZstdError, r"cannot call compress\(\) after compression finished" + ): + list(chunker.compress(b"foo")) def test_flush_after_finish(self): cctx = zstd.ZstdCompressor() chunker = cctx.chunker() - list(chunker.compress(b'foo')) + list(chunker.compress(b"foo")) list(chunker.finish()) - with self.assertRaisesRegexp( - zstd.ZstdError, - r'cannot call flush\(\) after compression finished'): + with self.assertRaisesRegex( + zstd.ZstdError, r"cannot call flush\(\) after compression finished" + ): list(chunker.flush()) def test_finish_after_finish(self): cctx = zstd.ZstdCompressor() chunker = cctx.chunker() - list(chunker.compress(b'foo')) + list(chunker.compress(b"foo")) list(chunker.finish()) - with self.assertRaisesRegexp( - zstd.ZstdError, - r'cannot call finish\(\) after compression finished'): + with self.assertRaisesRegex( + zstd.ZstdError, r"cannot call finish\(\) after compression finished" + ): list(chunker.finish()) -class TestCompressor_multi_compress_to_buffer(unittest.TestCase): +class TestCompressor_multi_compress_to_buffer(TestCase): def test_invalid_inputs(self): cctx = zstd.ZstdCompressor() - if not hasattr(cctx, 'multi_compress_to_buffer'): - self.skipTest('multi_compress_to_buffer not available') + if not hasattr(cctx, "multi_compress_to_buffer"): + self.skipTest("multi_compress_to_buffer not available") with self.assertRaises(TypeError): cctx.multi_compress_to_buffer(True) @@ -1621,28 +1644,28 @@ with self.assertRaises(TypeError): cctx.multi_compress_to_buffer((1, 2)) - with self.assertRaisesRegexp(TypeError, 'item 0 not a bytes like object'): - cctx.multi_compress_to_buffer([u'foo']) + with self.assertRaisesRegex(TypeError, "item 0 not a bytes like object"): + cctx.multi_compress_to_buffer([u"foo"]) def test_empty_input(self): cctx = zstd.ZstdCompressor() - if not hasattr(cctx, 'multi_compress_to_buffer'): - self.skipTest('multi_compress_to_buffer not available') + if not hasattr(cctx, "multi_compress_to_buffer"): + self.skipTest("multi_compress_to_buffer not available") - with self.assertRaisesRegexp(ValueError, 'no source elements found'): + with self.assertRaisesRegex(ValueError, "no source elements found"): cctx.multi_compress_to_buffer([]) - with self.assertRaisesRegexp(ValueError, 'source elements are empty'): - cctx.multi_compress_to_buffer([b'', b'', b'']) + with self.assertRaisesRegex(ValueError, "source elements are empty"): + cctx.multi_compress_to_buffer([b"", b"", b""]) def test_list_input(self): cctx = zstd.ZstdCompressor(write_checksum=True) - if not hasattr(cctx, 'multi_compress_to_buffer'): - self.skipTest('multi_compress_to_buffer not available') + if not hasattr(cctx, "multi_compress_to_buffer"): + self.skipTest("multi_compress_to_buffer not available") - original = [b'foo' * 12, b'bar' * 6] + original = [b"foo" * 12, b"bar" * 6] frames = [cctx.compress(c) for c in original] b = cctx.multi_compress_to_buffer(original) @@ -1657,15 +1680,16 @@ def test_buffer_with_segments_input(self): cctx = zstd.ZstdCompressor(write_checksum=True) - if not hasattr(cctx, 'multi_compress_to_buffer'): - self.skipTest('multi_compress_to_buffer not available') + if not hasattr(cctx, "multi_compress_to_buffer"): + self.skipTest("multi_compress_to_buffer not available") - original = [b'foo' * 4, b'bar' * 6] + original = [b"foo" * 4, b"bar" * 6] frames = [cctx.compress(c) for c in original] - offsets = struct.pack('=QQQQ', 0, len(original[0]), - len(original[0]), len(original[1])) - segments = zstd.BufferWithSegments(b''.join(original), offsets) + offsets = struct.pack( + "=QQQQ", 0, len(original[0]), len(original[0]), len(original[1]) + ) + segments = zstd.BufferWithSegments(b"".join(original), offsets) result = cctx.multi_compress_to_buffer(segments) @@ -1678,28 +1702,39 @@ def test_buffer_with_segments_collection_input(self): cctx = zstd.ZstdCompressor(write_checksum=True) - if not hasattr(cctx, 'multi_compress_to_buffer'): - self.skipTest('multi_compress_to_buffer not available') + if not hasattr(cctx, "multi_compress_to_buffer"): + self.skipTest("multi_compress_to_buffer not available") original = [ - b'foo1', - b'foo2' * 2, - b'foo3' * 3, - b'foo4' * 4, - b'foo5' * 5, + b"foo1", + b"foo2" * 2, + b"foo3" * 3, + b"foo4" * 4, + b"foo5" * 5, ] frames = [cctx.compress(c) for c in original] - b = b''.join([original[0], original[1]]) - b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ', - 0, len(original[0]), - len(original[0]), len(original[1]))) - b = b''.join([original[2], original[3], original[4]]) - b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ', - 0, len(original[2]), - len(original[2]), len(original[3]), - len(original[2]) + len(original[3]), len(original[4]))) + b = b"".join([original[0], original[1]]) + b1 = zstd.BufferWithSegments( + b, + struct.pack( + "=QQQQ", 0, len(original[0]), len(original[0]), len(original[1]) + ), + ) + b = b"".join([original[2], original[3], original[4]]) + b2 = zstd.BufferWithSegments( + b, + struct.pack( + "=QQQQQQ", + 0, + len(original[2]), + len(original[2]), + len(original[3]), + len(original[2]) + len(original[3]), + len(original[4]), + ), + ) c = zstd.BufferWithSegmentsCollection(b1, b2) @@ -1714,16 +1749,16 @@ # threads argument will cause multi-threaded ZSTD APIs to be used, which will # make output different. refcctx = zstd.ZstdCompressor(write_checksum=True) - reference = [refcctx.compress(b'x' * 64), refcctx.compress(b'y' * 64)] + reference = [refcctx.compress(b"x" * 64), refcctx.compress(b"y" * 64)] cctx = zstd.ZstdCompressor(write_checksum=True) - if not hasattr(cctx, 'multi_compress_to_buffer'): - self.skipTest('multi_compress_to_buffer not available') + if not hasattr(cctx, "multi_compress_to_buffer"): + self.skipTest("multi_compress_to_buffer not available") frames = [] - frames.extend(b'x' * 64 for i in range(256)) - frames.extend(b'y' * 64 for i in range(256)) + frames.extend(b"x" * 64 for i in range(256)) + frames.extend(b"y" * 64 for i in range(256)) result = cctx.multi_compress_to_buffer(frames, threads=-1) diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/tests/test_compressor_fuzzing.py --- a/contrib/python-zstandard/tests/test_compressor_fuzzing.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/tests/test_compressor_fuzzing.py Tue Jan 21 13:14:51 2020 -0500 @@ -6,28 +6,31 @@ import hypothesis import hypothesis.strategies as strategies except ImportError: - raise unittest.SkipTest('hypothesis not available') + raise unittest.SkipTest("hypothesis not available") import zstandard as zstd -from . common import ( +from .common import ( make_cffi, NonClosingBytesIO, random_input_data, + TestCase, ) -@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') +@unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") @make_cffi -class TestCompressor_stream_reader_fuzzing(unittest.TestCase): +class TestCompressor_stream_reader_fuzzing(TestCase): @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - source_read_size=strategies.integers(1, 16384), - read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) - def test_stream_source_read(self, original, level, source_read_size, - read_size): + suppress_health_check=[hypothesis.HealthCheck.large_base_example] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + source_read_size=strategies.integers(1, 16384), + read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), + ) + def test_stream_source_read(self, original, level, source_read_size, read_size): if read_size == 0: read_size = -1 @@ -35,8 +38,9 @@ ref_frame = refctx.compress(original) cctx = zstd.ZstdCompressor(level=level) - with cctx.stream_reader(io.BytesIO(original), size=len(original), - read_size=source_read_size) as reader: + with cctx.stream_reader( + io.BytesIO(original), size=len(original), read_size=source_read_size + ) as reader: chunks = [] while True: chunk = reader.read(read_size) @@ -45,16 +49,18 @@ chunks.append(chunk) - self.assertEqual(b''.join(chunks), ref_frame) + self.assertEqual(b"".join(chunks), ref_frame) @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - source_read_size=strategies.integers(1, 16384), - read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) - def test_buffer_source_read(self, original, level, source_read_size, - read_size): + suppress_health_check=[hypothesis.HealthCheck.large_base_example] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + source_read_size=strategies.integers(1, 16384), + read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), + ) + def test_buffer_source_read(self, original, level, source_read_size, read_size): if read_size == 0: read_size = -1 @@ -62,8 +68,9 @@ ref_frame = refctx.compress(original) cctx = zstd.ZstdCompressor(level=level) - with cctx.stream_reader(original, size=len(original), - read_size=source_read_size) as reader: + with cctx.stream_reader( + original, size=len(original), read_size=source_read_size + ) as reader: chunks = [] while True: chunk = reader.read(read_size) @@ -72,22 +79,30 @@ chunks.append(chunk) - self.assertEqual(b''.join(chunks), ref_frame) + self.assertEqual(b"".join(chunks), ref_frame) @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - source_read_size=strategies.integers(1, 16384), - read_sizes=strategies.data()) - def test_stream_source_read_variance(self, original, level, source_read_size, - read_sizes): + suppress_health_check=[ + hypothesis.HealthCheck.large_base_example, + hypothesis.HealthCheck.too_slow, + ] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + source_read_size=strategies.integers(1, 16384), + read_sizes=strategies.data(), + ) + def test_stream_source_read_variance( + self, original, level, source_read_size, read_sizes + ): refctx = zstd.ZstdCompressor(level=level) ref_frame = refctx.compress(original) cctx = zstd.ZstdCompressor(level=level) - with cctx.stream_reader(io.BytesIO(original), size=len(original), - read_size=source_read_size) as reader: + with cctx.stream_reader( + io.BytesIO(original), size=len(original), read_size=source_read_size + ) as reader: chunks = [] while True: read_size = read_sizes.draw(strategies.integers(-1, 16384)) @@ -97,23 +112,31 @@ chunks.append(chunk) - self.assertEqual(b''.join(chunks), ref_frame) + self.assertEqual(b"".join(chunks), ref_frame) @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - source_read_size=strategies.integers(1, 16384), - read_sizes=strategies.data()) - def test_buffer_source_read_variance(self, original, level, source_read_size, - read_sizes): + suppress_health_check=[ + hypothesis.HealthCheck.large_base_example, + hypothesis.HealthCheck.too_slow, + ] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + source_read_size=strategies.integers(1, 16384), + read_sizes=strategies.data(), + ) + def test_buffer_source_read_variance( + self, original, level, source_read_size, read_sizes + ): refctx = zstd.ZstdCompressor(level=level) ref_frame = refctx.compress(original) cctx = zstd.ZstdCompressor(level=level) - with cctx.stream_reader(original, size=len(original), - read_size=source_read_size) as reader: + with cctx.stream_reader( + original, size=len(original), read_size=source_read_size + ) as reader: chunks = [] while True: read_size = read_sizes.draw(strategies.integers(-1, 16384)) @@ -123,22 +146,25 @@ chunks.append(chunk) - self.assertEqual(b''.join(chunks), ref_frame) + self.assertEqual(b"".join(chunks), ref_frame) @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - source_read_size=strategies.integers(1, 16384), - read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) - def test_stream_source_readinto(self, original, level, - source_read_size, read_size): + suppress_health_check=[hypothesis.HealthCheck.large_base_example] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + source_read_size=strategies.integers(1, 16384), + read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), + ) + def test_stream_source_readinto(self, original, level, source_read_size, read_size): refctx = zstd.ZstdCompressor(level=level) ref_frame = refctx.compress(original) cctx = zstd.ZstdCompressor(level=level) - with cctx.stream_reader(io.BytesIO(original), size=len(original), - read_size=source_read_size) as reader: + with cctx.stream_reader( + io.BytesIO(original), size=len(original), read_size=source_read_size + ) as reader: chunks = [] while True: b = bytearray(read_size) @@ -149,23 +175,26 @@ chunks.append(bytes(b[0:count])) - self.assertEqual(b''.join(chunks), ref_frame) + self.assertEqual(b"".join(chunks), ref_frame) @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - source_read_size=strategies.integers(1, 16384), - read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) - def test_buffer_source_readinto(self, original, level, - source_read_size, read_size): + suppress_health_check=[hypothesis.HealthCheck.large_base_example] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + source_read_size=strategies.integers(1, 16384), + read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), + ) + def test_buffer_source_readinto(self, original, level, source_read_size, read_size): refctx = zstd.ZstdCompressor(level=level) ref_frame = refctx.compress(original) cctx = zstd.ZstdCompressor(level=level) - with cctx.stream_reader(original, size=len(original), - read_size=source_read_size) as reader: + with cctx.stream_reader( + original, size=len(original), read_size=source_read_size + ) as reader: chunks = [] while True: b = bytearray(read_size) @@ -176,22 +205,30 @@ chunks.append(bytes(b[0:count])) - self.assertEqual(b''.join(chunks), ref_frame) + self.assertEqual(b"".join(chunks), ref_frame) @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - source_read_size=strategies.integers(1, 16384), - read_sizes=strategies.data()) - def test_stream_source_readinto_variance(self, original, level, - source_read_size, read_sizes): + suppress_health_check=[ + hypothesis.HealthCheck.large_base_example, + hypothesis.HealthCheck.too_slow, + ] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + source_read_size=strategies.integers(1, 16384), + read_sizes=strategies.data(), + ) + def test_stream_source_readinto_variance( + self, original, level, source_read_size, read_sizes + ): refctx = zstd.ZstdCompressor(level=level) ref_frame = refctx.compress(original) cctx = zstd.ZstdCompressor(level=level) - with cctx.stream_reader(io.BytesIO(original), size=len(original), - read_size=source_read_size) as reader: + with cctx.stream_reader( + io.BytesIO(original), size=len(original), read_size=source_read_size + ) as reader: chunks = [] while True: read_size = read_sizes.draw(strategies.integers(1, 16384)) @@ -203,23 +240,31 @@ chunks.append(bytes(b[0:count])) - self.assertEqual(b''.join(chunks), ref_frame) + self.assertEqual(b"".join(chunks), ref_frame) @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - source_read_size=strategies.integers(1, 16384), - read_sizes=strategies.data()) - def test_buffer_source_readinto_variance(self, original, level, - source_read_size, read_sizes): + suppress_health_check=[ + hypothesis.HealthCheck.large_base_example, + hypothesis.HealthCheck.too_slow, + ] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + source_read_size=strategies.integers(1, 16384), + read_sizes=strategies.data(), + ) + def test_buffer_source_readinto_variance( + self, original, level, source_read_size, read_sizes + ): refctx = zstd.ZstdCompressor(level=level) ref_frame = refctx.compress(original) cctx = zstd.ZstdCompressor(level=level) - with cctx.stream_reader(original, size=len(original), - read_size=source_read_size) as reader: + with cctx.stream_reader( + original, size=len(original), read_size=source_read_size + ) as reader: chunks = [] while True: read_size = read_sizes.draw(strategies.integers(1, 16384)) @@ -231,16 +276,18 @@ chunks.append(bytes(b[0:count])) - self.assertEqual(b''.join(chunks), ref_frame) + self.assertEqual(b"".join(chunks), ref_frame) @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - source_read_size=strategies.integers(1, 16384), - read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) - def test_stream_source_read1(self, original, level, source_read_size, - read_size): + suppress_health_check=[hypothesis.HealthCheck.large_base_example] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + source_read_size=strategies.integers(1, 16384), + read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), + ) + def test_stream_source_read1(self, original, level, source_read_size, read_size): if read_size == 0: read_size = -1 @@ -248,8 +295,9 @@ ref_frame = refctx.compress(original) cctx = zstd.ZstdCompressor(level=level) - with cctx.stream_reader(io.BytesIO(original), size=len(original), - read_size=source_read_size) as reader: + with cctx.stream_reader( + io.BytesIO(original), size=len(original), read_size=source_read_size + ) as reader: chunks = [] while True: chunk = reader.read1(read_size) @@ -258,16 +306,18 @@ chunks.append(chunk) - self.assertEqual(b''.join(chunks), ref_frame) + self.assertEqual(b"".join(chunks), ref_frame) @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - source_read_size=strategies.integers(1, 16384), - read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) - def test_buffer_source_read1(self, original, level, source_read_size, - read_size): + suppress_health_check=[hypothesis.HealthCheck.large_base_example] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + source_read_size=strategies.integers(1, 16384), + read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), + ) + def test_buffer_source_read1(self, original, level, source_read_size, read_size): if read_size == 0: read_size = -1 @@ -275,8 +325,9 @@ ref_frame = refctx.compress(original) cctx = zstd.ZstdCompressor(level=level) - with cctx.stream_reader(original, size=len(original), - read_size=source_read_size) as reader: + with cctx.stream_reader( + original, size=len(original), read_size=source_read_size + ) as reader: chunks = [] while True: chunk = reader.read1(read_size) @@ -285,22 +336,30 @@ chunks.append(chunk) - self.assertEqual(b''.join(chunks), ref_frame) + self.assertEqual(b"".join(chunks), ref_frame) @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - source_read_size=strategies.integers(1, 16384), - read_sizes=strategies.data()) - def test_stream_source_read1_variance(self, original, level, source_read_size, - read_sizes): + suppress_health_check=[ + hypothesis.HealthCheck.large_base_example, + hypothesis.HealthCheck.too_slow, + ] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + source_read_size=strategies.integers(1, 16384), + read_sizes=strategies.data(), + ) + def test_stream_source_read1_variance( + self, original, level, source_read_size, read_sizes + ): refctx = zstd.ZstdCompressor(level=level) ref_frame = refctx.compress(original) cctx = zstd.ZstdCompressor(level=level) - with cctx.stream_reader(io.BytesIO(original), size=len(original), - read_size=source_read_size) as reader: + with cctx.stream_reader( + io.BytesIO(original), size=len(original), read_size=source_read_size + ) as reader: chunks = [] while True: read_size = read_sizes.draw(strategies.integers(-1, 16384)) @@ -310,23 +369,31 @@ chunks.append(chunk) - self.assertEqual(b''.join(chunks), ref_frame) + self.assertEqual(b"".join(chunks), ref_frame) @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - source_read_size=strategies.integers(1, 16384), - read_sizes=strategies.data()) - def test_buffer_source_read1_variance(self, original, level, source_read_size, - read_sizes): + suppress_health_check=[ + hypothesis.HealthCheck.large_base_example, + hypothesis.HealthCheck.too_slow, + ] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + source_read_size=strategies.integers(1, 16384), + read_sizes=strategies.data(), + ) + def test_buffer_source_read1_variance( + self, original, level, source_read_size, read_sizes + ): refctx = zstd.ZstdCompressor(level=level) ref_frame = refctx.compress(original) cctx = zstd.ZstdCompressor(level=level) - with cctx.stream_reader(original, size=len(original), - read_size=source_read_size) as reader: + with cctx.stream_reader( + original, size=len(original), read_size=source_read_size + ) as reader: chunks = [] while True: read_size = read_sizes.draw(strategies.integers(-1, 16384)) @@ -336,17 +403,20 @@ chunks.append(chunk) - self.assertEqual(b''.join(chunks), ref_frame) - + self.assertEqual(b"".join(chunks), ref_frame) @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - source_read_size=strategies.integers(1, 16384), - read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) - def test_stream_source_readinto1(self, original, level, source_read_size, - read_size): + suppress_health_check=[hypothesis.HealthCheck.large_base_example] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + source_read_size=strategies.integers(1, 16384), + read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), + ) + def test_stream_source_readinto1( + self, original, level, source_read_size, read_size + ): if read_size == 0: read_size = -1 @@ -354,8 +424,9 @@ ref_frame = refctx.compress(original) cctx = zstd.ZstdCompressor(level=level) - with cctx.stream_reader(io.BytesIO(original), size=len(original), - read_size=source_read_size) as reader: + with cctx.stream_reader( + io.BytesIO(original), size=len(original), read_size=source_read_size + ) as reader: chunks = [] while True: b = bytearray(read_size) @@ -366,16 +437,20 @@ chunks.append(bytes(b[0:count])) - self.assertEqual(b''.join(chunks), ref_frame) + self.assertEqual(b"".join(chunks), ref_frame) @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - source_read_size=strategies.integers(1, 16384), - read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) - def test_buffer_source_readinto1(self, original, level, source_read_size, - read_size): + suppress_health_check=[hypothesis.HealthCheck.large_base_example] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + source_read_size=strategies.integers(1, 16384), + read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), + ) + def test_buffer_source_readinto1( + self, original, level, source_read_size, read_size + ): if read_size == 0: read_size = -1 @@ -383,8 +458,9 @@ ref_frame = refctx.compress(original) cctx = zstd.ZstdCompressor(level=level) - with cctx.stream_reader(original, size=len(original), - read_size=source_read_size) as reader: + with cctx.stream_reader( + original, size=len(original), read_size=source_read_size + ) as reader: chunks = [] while True: b = bytearray(read_size) @@ -395,22 +471,30 @@ chunks.append(bytes(b[0:count])) - self.assertEqual(b''.join(chunks), ref_frame) + self.assertEqual(b"".join(chunks), ref_frame) @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - source_read_size=strategies.integers(1, 16384), - read_sizes=strategies.data()) - def test_stream_source_readinto1_variance(self, original, level, source_read_size, - read_sizes): + suppress_health_check=[ + hypothesis.HealthCheck.large_base_example, + hypothesis.HealthCheck.too_slow, + ] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + source_read_size=strategies.integers(1, 16384), + read_sizes=strategies.data(), + ) + def test_stream_source_readinto1_variance( + self, original, level, source_read_size, read_sizes + ): refctx = zstd.ZstdCompressor(level=level) ref_frame = refctx.compress(original) cctx = zstd.ZstdCompressor(level=level) - with cctx.stream_reader(io.BytesIO(original), size=len(original), - read_size=source_read_size) as reader: + with cctx.stream_reader( + io.BytesIO(original), size=len(original), read_size=source_read_size + ) as reader: chunks = [] while True: read_size = read_sizes.draw(strategies.integers(1, 16384)) @@ -422,23 +506,31 @@ chunks.append(bytes(b[0:count])) - self.assertEqual(b''.join(chunks), ref_frame) + self.assertEqual(b"".join(chunks), ref_frame) @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - source_read_size=strategies.integers(1, 16384), - read_sizes=strategies.data()) - def test_buffer_source_readinto1_variance(self, original, level, source_read_size, - read_sizes): + suppress_health_check=[ + hypothesis.HealthCheck.large_base_example, + hypothesis.HealthCheck.too_slow, + ] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + source_read_size=strategies.integers(1, 16384), + read_sizes=strategies.data(), + ) + def test_buffer_source_readinto1_variance( + self, original, level, source_read_size, read_sizes + ): refctx = zstd.ZstdCompressor(level=level) ref_frame = refctx.compress(original) cctx = zstd.ZstdCompressor(level=level) - with cctx.stream_reader(original, size=len(original), - read_size=source_read_size) as reader: + with cctx.stream_reader( + original, size=len(original), read_size=source_read_size + ) as reader: chunks = [] while True: read_size = read_sizes.draw(strategies.integers(1, 16384)) @@ -450,35 +542,40 @@ chunks.append(bytes(b[0:count])) - self.assertEqual(b''.join(chunks), ref_frame) - + self.assertEqual(b"".join(chunks), ref_frame) -@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') +@unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") @make_cffi -class TestCompressor_stream_writer_fuzzing(unittest.TestCase): - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - write_size=strategies.integers(min_value=1, max_value=1048576)) +class TestCompressor_stream_writer_fuzzing(TestCase): + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + write_size=strategies.integers(min_value=1, max_value=1048576), + ) def test_write_size_variance(self, original, level, write_size): refctx = zstd.ZstdCompressor(level=level) ref_frame = refctx.compress(original) cctx = zstd.ZstdCompressor(level=level) b = NonClosingBytesIO() - with cctx.stream_writer(b, size=len(original), write_size=write_size) as compressor: + with cctx.stream_writer( + b, size=len(original), write_size=write_size + ) as compressor: compressor.write(original) self.assertEqual(b.getvalue(), ref_frame) -@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') +@unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") @make_cffi -class TestCompressor_copy_stream_fuzzing(unittest.TestCase): - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - read_size=strategies.integers(min_value=1, max_value=1048576), - write_size=strategies.integers(min_value=1, max_value=1048576)) +class TestCompressor_copy_stream_fuzzing(TestCase): + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + read_size=strategies.integers(min_value=1, max_value=1048576), + write_size=strategies.integers(min_value=1, max_value=1048576), + ) def test_read_write_size_variance(self, original, level, read_size, write_size): refctx = zstd.ZstdCompressor(level=level) ref_frame = refctx.compress(original) @@ -487,20 +584,27 @@ source = io.BytesIO(original) dest = io.BytesIO() - cctx.copy_stream(source, dest, size=len(original), read_size=read_size, - write_size=write_size) + cctx.copy_stream( + source, dest, size=len(original), read_size=read_size, write_size=write_size + ) self.assertEqual(dest.getvalue(), ref_frame) -@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') +@unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") @make_cffi -class TestCompressor_compressobj_fuzzing(unittest.TestCase): +class TestCompressor_compressobj_fuzzing(TestCase): @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - chunk_sizes=strategies.data()) + suppress_health_check=[ + hypothesis.HealthCheck.large_base_example, + hypothesis.HealthCheck.too_slow, + ] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + chunk_sizes=strategies.data(), + ) def test_random_input_sizes(self, original, level, chunk_sizes): refctx = zstd.ZstdCompressor(level=level) ref_frame = refctx.compress(original) @@ -512,7 +616,7 @@ i = 0 while True: chunk_size = chunk_sizes.draw(strategies.integers(1, 4096)) - source = original[i:i + chunk_size] + source = original[i : i + chunk_size] if not source: break @@ -521,14 +625,20 @@ chunks.append(cobj.flush()) - self.assertEqual(b''.join(chunks), ref_frame) + self.assertEqual(b"".join(chunks), ref_frame) @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - chunk_sizes=strategies.data(), - flushes=strategies.data()) + suppress_health_check=[ + hypothesis.HealthCheck.large_base_example, + hypothesis.HealthCheck.too_slow, + ] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + chunk_sizes=strategies.data(), + flushes=strategies.data(), + ) def test_flush_block(self, original, level, chunk_sizes, flushes): cctx = zstd.ZstdCompressor(level=level) cobj = cctx.compressobj() @@ -541,7 +651,7 @@ i = 0 while True: input_size = chunk_sizes.draw(strategies.integers(1, 4096)) - source = original[i:i + input_size] + source = original[i : i + input_size] if not source: break @@ -558,24 +668,28 @@ compressed_chunks.append(chunk) decompressed_chunks.append(dobj.decompress(chunk)) - self.assertEqual(b''.join(decompressed_chunks), original[0:i]) + self.assertEqual(b"".join(decompressed_chunks), original[0:i]) chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_FINISH) compressed_chunks.append(chunk) decompressed_chunks.append(dobj.decompress(chunk)) - self.assertEqual(dctx.decompress(b''.join(compressed_chunks), - max_output_size=len(original)), - original) - self.assertEqual(b''.join(decompressed_chunks), original) + self.assertEqual( + dctx.decompress(b"".join(compressed_chunks), max_output_size=len(original)), + original, + ) + self.assertEqual(b"".join(decompressed_chunks), original) + -@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') +@unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") @make_cffi -class TestCompressor_read_to_iter_fuzzing(unittest.TestCase): - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - read_size=strategies.integers(min_value=1, max_value=4096), - write_size=strategies.integers(min_value=1, max_value=4096)) +class TestCompressor_read_to_iter_fuzzing(TestCase): + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + read_size=strategies.integers(min_value=1, max_value=4096), + write_size=strategies.integers(min_value=1, max_value=4096), + ) def test_read_write_size_variance(self, original, level, read_size, write_size): refcctx = zstd.ZstdCompressor(level=level) ref_frame = refcctx.compress(original) @@ -583,32 +697,35 @@ source = io.BytesIO(original) cctx = zstd.ZstdCompressor(level=level) - chunks = list(cctx.read_to_iter(source, size=len(original), - read_size=read_size, - write_size=write_size)) + chunks = list( + cctx.read_to_iter( + source, size=len(original), read_size=read_size, write_size=write_size + ) + ) - self.assertEqual(b''.join(chunks), ref_frame) + self.assertEqual(b"".join(chunks), ref_frame) -@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') -class TestCompressor_multi_compress_to_buffer_fuzzing(unittest.TestCase): - @hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()), - min_size=1, max_size=1024), - threads=strategies.integers(min_value=1, max_value=8), - use_dict=strategies.booleans()) +@unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") +class TestCompressor_multi_compress_to_buffer_fuzzing(TestCase): + @hypothesis.given( + original=strategies.lists( + strategies.sampled_from(random_input_data()), min_size=1, max_size=1024 + ), + threads=strategies.integers(min_value=1, max_value=8), + use_dict=strategies.booleans(), + ) def test_data_equivalence(self, original, threads, use_dict): kwargs = {} # Use a content dictionary because it is cheap to create. if use_dict: - kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0]) + kwargs["dict_data"] = zstd.ZstdCompressionDict(original[0]) - cctx = zstd.ZstdCompressor(level=1, - write_checksum=True, - **kwargs) + cctx = zstd.ZstdCompressor(level=1, write_checksum=True, **kwargs) - if not hasattr(cctx, 'multi_compress_to_buffer'): - self.skipTest('multi_compress_to_buffer not available') + if not hasattr(cctx, "multi_compress_to_buffer"): + self.skipTest("multi_compress_to_buffer not available") result = cctx.multi_compress_to_buffer(original, threads=-1) @@ -624,17 +741,21 @@ self.assertEqual(dctx.decompress(frame), original[i]) -@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') +@unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") @make_cffi -class TestCompressor_chunker_fuzzing(unittest.TestCase): +class TestCompressor_chunker_fuzzing(TestCase): @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - chunk_size=strategies.integers( - min_value=1, - max_value=32 * 1048576), - input_sizes=strategies.data()) + suppress_health_check=[ + hypothesis.HealthCheck.large_base_example, + hypothesis.HealthCheck.too_slow, + ] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + chunk_size=strategies.integers(min_value=1, max_value=32 * 1048576), + input_sizes=strategies.data(), + ) def test_random_input_sizes(self, original, level, chunk_size, input_sizes): cctx = zstd.ZstdCompressor(level=level) chunker = cctx.chunker(chunk_size=chunk_size) @@ -643,7 +764,7 @@ i = 0 while True: input_size = input_sizes.draw(strategies.integers(1, 4096)) - source = original[i:i + input_size] + source = original[i : i + input_size] if not source: break @@ -654,23 +775,26 @@ dctx = zstd.ZstdDecompressor() - self.assertEqual(dctx.decompress(b''.join(chunks), - max_output_size=len(original)), - original) + self.assertEqual( + dctx.decompress(b"".join(chunks), max_output_size=len(original)), original + ) self.assertTrue(all(len(chunk) == chunk_size for chunk in chunks[:-1])) @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - chunk_size=strategies.integers( - min_value=1, - max_value=32 * 1048576), - input_sizes=strategies.data(), - flushes=strategies.data()) - def test_flush_block(self, original, level, chunk_size, input_sizes, - flushes): + suppress_health_check=[ + hypothesis.HealthCheck.large_base_example, + hypothesis.HealthCheck.too_slow, + ] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + chunk_size=strategies.integers(min_value=1, max_value=32 * 1048576), + input_sizes=strategies.data(), + flushes=strategies.data(), + ) + def test_flush_block(self, original, level, chunk_size, input_sizes, flushes): cctx = zstd.ZstdCompressor(level=level) chunker = cctx.chunker(chunk_size=chunk_size) @@ -682,7 +806,7 @@ i = 0 while True: input_size = input_sizes.draw(strategies.integers(1, 4096)) - source = original[i:i + input_size] + source = original[i : i + input_size] if not source: break @@ -690,22 +814,23 @@ chunks = list(chunker.compress(source)) compressed_chunks.extend(chunks) - decompressed_chunks.append(dobj.decompress(b''.join(chunks))) + decompressed_chunks.append(dobj.decompress(b"".join(chunks))) if not flushes.draw(strategies.booleans()): continue chunks = list(chunker.flush()) compressed_chunks.extend(chunks) - decompressed_chunks.append(dobj.decompress(b''.join(chunks))) + decompressed_chunks.append(dobj.decompress(b"".join(chunks))) - self.assertEqual(b''.join(decompressed_chunks), original[0:i]) + self.assertEqual(b"".join(decompressed_chunks), original[0:i]) chunks = list(chunker.finish()) compressed_chunks.extend(chunks) - decompressed_chunks.append(dobj.decompress(b''.join(chunks))) + decompressed_chunks.append(dobj.decompress(b"".join(chunks))) - self.assertEqual(dctx.decompress(b''.join(compressed_chunks), - max_output_size=len(original)), - original) - self.assertEqual(b''.join(decompressed_chunks), original) \ No newline at end of file + self.assertEqual( + dctx.decompress(b"".join(compressed_chunks), max_output_size=len(original)), + original, + ) + self.assertEqual(b"".join(decompressed_chunks), original) diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/tests/test_data_structures.py --- a/contrib/python-zstandard/tests/test_data_structures.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/tests/test_data_structures.py Tue Jan 21 13:14:51 2020 -0500 @@ -3,29 +3,34 @@ import zstandard as zstd -from . common import ( +from .common import ( make_cffi, + TestCase, ) @make_cffi -class TestCompressionParameters(unittest.TestCase): +class TestCompressionParameters(TestCase): def test_bounds(self): - zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MIN, - chain_log=zstd.CHAINLOG_MIN, - hash_log=zstd.HASHLOG_MIN, - search_log=zstd.SEARCHLOG_MIN, - min_match=zstd.MINMATCH_MIN + 1, - target_length=zstd.TARGETLENGTH_MIN, - strategy=zstd.STRATEGY_FAST) + zstd.ZstdCompressionParameters( + window_log=zstd.WINDOWLOG_MIN, + chain_log=zstd.CHAINLOG_MIN, + hash_log=zstd.HASHLOG_MIN, + search_log=zstd.SEARCHLOG_MIN, + min_match=zstd.MINMATCH_MIN + 1, + target_length=zstd.TARGETLENGTH_MIN, + strategy=zstd.STRATEGY_FAST, + ) - zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MAX, - chain_log=zstd.CHAINLOG_MAX, - hash_log=zstd.HASHLOG_MAX, - search_log=zstd.SEARCHLOG_MAX, - min_match=zstd.MINMATCH_MAX - 1, - target_length=zstd.TARGETLENGTH_MAX, - strategy=zstd.STRATEGY_BTULTRA2) + zstd.ZstdCompressionParameters( + window_log=zstd.WINDOWLOG_MAX, + chain_log=zstd.CHAINLOG_MAX, + hash_log=zstd.HASHLOG_MAX, + search_log=zstd.SEARCHLOG_MAX, + min_match=zstd.MINMATCH_MAX - 1, + target_length=zstd.TARGETLENGTH_MAX, + strategy=zstd.STRATEGY_BTULTRA2, + ) def test_from_level(self): p = zstd.ZstdCompressionParameters.from_level(1) @@ -37,13 +42,15 @@ self.assertEqual(p.window_log, 19) def test_members(self): - p = zstd.ZstdCompressionParameters(window_log=10, - chain_log=6, - hash_log=7, - search_log=4, - min_match=5, - target_length=8, - strategy=1) + p = zstd.ZstdCompressionParameters( + window_log=10, + chain_log=6, + hash_log=7, + search_log=4, + min_match=5, + target_length=8, + strategy=1, + ) self.assertEqual(p.window_log, 10) self.assertEqual(p.chain_log, 6) self.assertEqual(p.hash_log, 7) @@ -58,8 +65,7 @@ p = zstd.ZstdCompressionParameters(threads=4) self.assertEqual(p.threads, 4) - p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576, - overlap_log=6) + p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576, overlap_log=6) self.assertEqual(p.threads, 2) self.assertEqual(p.job_size, 1048576) self.assertEqual(p.overlap_log, 6) @@ -91,20 +97,25 @@ self.assertEqual(p.ldm_hash_rate_log, 8) def test_estimated_compression_context_size(self): - p = zstd.ZstdCompressionParameters(window_log=20, - chain_log=16, - hash_log=17, - search_log=1, - min_match=5, - target_length=16, - strategy=zstd.STRATEGY_DFAST) + p = zstd.ZstdCompressionParameters( + window_log=20, + chain_log=16, + hash_log=17, + search_log=1, + min_match=5, + target_length=16, + strategy=zstd.STRATEGY_DFAST, + ) # 32-bit has slightly different values from 64-bit. - self.assertAlmostEqual(p.estimated_compression_context_size(), 1294144, - delta=250) + self.assertAlmostEqual( + p.estimated_compression_context_size(), 1294464, delta=400 + ) def test_strategy(self): - with self.assertRaisesRegexp(ValueError, 'cannot specify both compression_strategy'): + with self.assertRaisesRegex( + ValueError, "cannot specify both compression_strategy" + ): zstd.ZstdCompressionParameters(strategy=0, compression_strategy=0) p = zstd.ZstdCompressionParameters(strategy=2) @@ -114,7 +125,9 @@ self.assertEqual(p.compression_strategy, 3) def test_ldm_hash_rate_log(self): - with self.assertRaisesRegexp(ValueError, 'cannot specify both ldm_hash_rate_log'): + with self.assertRaisesRegex( + ValueError, "cannot specify both ldm_hash_rate_log" + ): zstd.ZstdCompressionParameters(ldm_hash_rate_log=8, ldm_hash_every_log=4) p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8) @@ -124,7 +137,7 @@ self.assertEqual(p.ldm_hash_every_log, 16) def test_overlap_log(self): - with self.assertRaisesRegexp(ValueError, 'cannot specify both overlap_log'): + with self.assertRaisesRegex(ValueError, "cannot specify both overlap_log"): zstd.ZstdCompressionParameters(overlap_log=1, overlap_size_log=9) p = zstd.ZstdCompressionParameters(overlap_log=2) @@ -137,7 +150,7 @@ @make_cffi -class TestFrameParameters(unittest.TestCase): +class TestFrameParameters(TestCase): def test_invalid_type(self): with self.assertRaises(TypeError): zstd.get_frame_parameters(None) @@ -145,71 +158,71 @@ # Python 3 doesn't appear to convert unicode to Py_buffer. if sys.version_info[0] >= 3: with self.assertRaises(TypeError): - zstd.get_frame_parameters(u'foobarbaz') + zstd.get_frame_parameters(u"foobarbaz") else: # CPython will convert unicode to Py_buffer. But CFFI won't. - if zstd.backend == 'cffi': + if zstd.backend == "cffi": with self.assertRaises(TypeError): - zstd.get_frame_parameters(u'foobarbaz') + zstd.get_frame_parameters(u"foobarbaz") else: with self.assertRaises(zstd.ZstdError): - zstd.get_frame_parameters(u'foobarbaz') + zstd.get_frame_parameters(u"foobarbaz") def test_invalid_input_sizes(self): - with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'): - zstd.get_frame_parameters(b'') + with self.assertRaisesRegex(zstd.ZstdError, "not enough data for frame"): + zstd.get_frame_parameters(b"") - with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'): + with self.assertRaisesRegex(zstd.ZstdError, "not enough data for frame"): zstd.get_frame_parameters(zstd.FRAME_HEADER) def test_invalid_frame(self): - with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'): - zstd.get_frame_parameters(b'foobarbaz') + with self.assertRaisesRegex(zstd.ZstdError, "Unknown frame descriptor"): + zstd.get_frame_parameters(b"foobarbaz") def test_attributes(self): - params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00') + params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x00\x00") self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) self.assertEqual(params.window_size, 1024) self.assertEqual(params.dict_id, 0) self.assertFalse(params.has_checksum) # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte. - params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff') + params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x01\x00\xff") self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) self.assertEqual(params.window_size, 1024) self.assertEqual(params.dict_id, 255) self.assertFalse(params.has_checksum) # Lowest 3rd bit indicates if checksum is present. - params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00') + params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x04\x00") self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) self.assertEqual(params.window_size, 1024) self.assertEqual(params.dict_id, 0) self.assertTrue(params.has_checksum) # Upper 2 bits indicate content size. - params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x40\x00\xff\x00') + params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x40\x00\xff\x00") self.assertEqual(params.content_size, 511) self.assertEqual(params.window_size, 1024) self.assertEqual(params.dict_id, 0) self.assertFalse(params.has_checksum) # Window descriptor is 2nd byte after frame header. - params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40') + params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x00\x40") self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) self.assertEqual(params.window_size, 262144) self.assertEqual(params.dict_id, 0) self.assertFalse(params.has_checksum) # Set multiple things. - params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x45\x40\x0f\x10\x00') + params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x45\x40\x0f\x10\x00") self.assertEqual(params.content_size, 272) self.assertEqual(params.window_size, 262144) self.assertEqual(params.dict_id, 15) self.assertTrue(params.has_checksum) def test_input_types(self): - v = zstd.FRAME_HEADER + b'\x00\x00' + v = zstd.FRAME_HEADER + b"\x00\x00" mutable_array = bytearray(len(v)) mutable_array[:] = v diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/tests/test_data_structures_fuzzing.py --- a/contrib/python-zstandard/tests/test_data_structures_fuzzing.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/tests/test_data_structures_fuzzing.py Tue Jan 21 13:14:51 2020 -0500 @@ -7,70 +7,99 @@ import hypothesis import hypothesis.strategies as strategies except ImportError: - raise unittest.SkipTest('hypothesis not available') + raise unittest.SkipTest("hypothesis not available") import zstandard as zstd from .common import ( make_cffi, + TestCase, +) + + +s_windowlog = strategies.integers( + min_value=zstd.WINDOWLOG_MIN, max_value=zstd.WINDOWLOG_MAX +) +s_chainlog = strategies.integers( + min_value=zstd.CHAINLOG_MIN, max_value=zstd.CHAINLOG_MAX +) +s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN, max_value=zstd.HASHLOG_MAX) +s_searchlog = strategies.integers( + min_value=zstd.SEARCHLOG_MIN, max_value=zstd.SEARCHLOG_MAX +) +s_minmatch = strategies.integers( + min_value=zstd.MINMATCH_MIN, max_value=zstd.MINMATCH_MAX +) +s_targetlength = strategies.integers( + min_value=zstd.TARGETLENGTH_MIN, max_value=zstd.TARGETLENGTH_MAX +) +s_strategy = strategies.sampled_from( + ( + zstd.STRATEGY_FAST, + zstd.STRATEGY_DFAST, + zstd.STRATEGY_GREEDY, + zstd.STRATEGY_LAZY, + zstd.STRATEGY_LAZY2, + zstd.STRATEGY_BTLAZY2, + zstd.STRATEGY_BTOPT, + zstd.STRATEGY_BTULTRA, + zstd.STRATEGY_BTULTRA2, + ) ) -s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN, - max_value=zstd.WINDOWLOG_MAX) -s_chainlog = strategies.integers(min_value=zstd.CHAINLOG_MIN, - max_value=zstd.CHAINLOG_MAX) -s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN, - max_value=zstd.HASHLOG_MAX) -s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN, - max_value=zstd.SEARCHLOG_MAX) -s_minmatch = strategies.integers(min_value=zstd.MINMATCH_MIN, - max_value=zstd.MINMATCH_MAX) -s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN, - max_value=zstd.TARGETLENGTH_MAX) -s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST, - zstd.STRATEGY_DFAST, - zstd.STRATEGY_GREEDY, - zstd.STRATEGY_LAZY, - zstd.STRATEGY_LAZY2, - zstd.STRATEGY_BTLAZY2, - zstd.STRATEGY_BTOPT, - zstd.STRATEGY_BTULTRA, - zstd.STRATEGY_BTULTRA2)) - +@make_cffi +@unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") +class TestCompressionParametersHypothesis(TestCase): + @hypothesis.given( + s_windowlog, + s_chainlog, + s_hashlog, + s_searchlog, + s_minmatch, + s_targetlength, + s_strategy, + ) + def test_valid_init( + self, windowlog, chainlog, hashlog, searchlog, minmatch, targetlength, strategy + ): + zstd.ZstdCompressionParameters( + window_log=windowlog, + chain_log=chainlog, + hash_log=hashlog, + search_log=searchlog, + min_match=minmatch, + target_length=targetlength, + strategy=strategy, + ) -@make_cffi -@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') -class TestCompressionParametersHypothesis(unittest.TestCase): - @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog, - s_minmatch, s_targetlength, s_strategy) - def test_valid_init(self, windowlog, chainlog, hashlog, searchlog, - minmatch, targetlength, strategy): - zstd.ZstdCompressionParameters(window_log=windowlog, - chain_log=chainlog, - hash_log=hashlog, - search_log=searchlog, - min_match=minmatch, - target_length=targetlength, - strategy=strategy) - - @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog, - s_minmatch, s_targetlength, s_strategy) - def test_estimated_compression_context_size(self, windowlog, chainlog, - hashlog, searchlog, - minmatch, targetlength, - strategy): - if minmatch == zstd.MINMATCH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY): + @hypothesis.given( + s_windowlog, + s_chainlog, + s_hashlog, + s_searchlog, + s_minmatch, + s_targetlength, + s_strategy, + ) + def test_estimated_compression_context_size( + self, windowlog, chainlog, hashlog, searchlog, minmatch, targetlength, strategy + ): + if minmatch == zstd.MINMATCH_MIN and strategy in ( + zstd.STRATEGY_FAST, + zstd.STRATEGY_GREEDY, + ): minmatch += 1 elif minmatch == zstd.MINMATCH_MAX and strategy != zstd.STRATEGY_FAST: minmatch -= 1 - p = zstd.ZstdCompressionParameters(window_log=windowlog, - chain_log=chainlog, - hash_log=hashlog, - search_log=searchlog, - min_match=minmatch, - target_length=targetlength, - strategy=strategy) + p = zstd.ZstdCompressionParameters( + window_log=windowlog, + chain_log=chainlog, + hash_log=hashlog, + search_log=searchlog, + min_match=minmatch, + target_length=targetlength, + strategy=strategy, + ) size = p.estimated_compression_context_size() - diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/tests/test_decompressor.py --- a/contrib/python-zstandard/tests/test_decompressor.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/tests/test_decompressor.py Tue Jan 21 13:14:51 2020 -0500 @@ -13,6 +13,7 @@ make_cffi, NonClosingBytesIO, OpCountingBytesIO, + TestCase, ) @@ -23,62 +24,67 @@ @make_cffi -class TestFrameHeaderSize(unittest.TestCase): +class TestFrameHeaderSize(TestCase): def test_empty(self): - with self.assertRaisesRegexp( - zstd.ZstdError, 'could not determine frame header size: Src size ' - 'is incorrect'): - zstd.frame_header_size(b'') + with self.assertRaisesRegex( + zstd.ZstdError, + "could not determine frame header size: Src size " "is incorrect", + ): + zstd.frame_header_size(b"") def test_too_small(self): - with self.assertRaisesRegexp( - zstd.ZstdError, 'could not determine frame header size: Src size ' - 'is incorrect'): - zstd.frame_header_size(b'foob') + with self.assertRaisesRegex( + zstd.ZstdError, + "could not determine frame header size: Src size " "is incorrect", + ): + zstd.frame_header_size(b"foob") def test_basic(self): # It doesn't matter that it isn't a valid frame. - self.assertEqual(zstd.frame_header_size(b'long enough but no magic'), 6) + self.assertEqual(zstd.frame_header_size(b"long enough but no magic"), 6) @make_cffi -class TestFrameContentSize(unittest.TestCase): +class TestFrameContentSize(TestCase): def test_empty(self): - with self.assertRaisesRegexp(zstd.ZstdError, - 'error when determining content size'): - zstd.frame_content_size(b'') + with self.assertRaisesRegex( + zstd.ZstdError, "error when determining content size" + ): + zstd.frame_content_size(b"") def test_too_small(self): - with self.assertRaisesRegexp(zstd.ZstdError, - 'error when determining content size'): - zstd.frame_content_size(b'foob') + with self.assertRaisesRegex( + zstd.ZstdError, "error when determining content size" + ): + zstd.frame_content_size(b"foob") def test_bad_frame(self): - with self.assertRaisesRegexp(zstd.ZstdError, - 'error when determining content size'): - zstd.frame_content_size(b'invalid frame header') + with self.assertRaisesRegex( + zstd.ZstdError, "error when determining content size" + ): + zstd.frame_content_size(b"invalid frame header") def test_unknown(self): cctx = zstd.ZstdCompressor(write_content_size=False) - frame = cctx.compress(b'foobar') + frame = cctx.compress(b"foobar") self.assertEqual(zstd.frame_content_size(frame), -1) def test_empty(self): cctx = zstd.ZstdCompressor() - frame = cctx.compress(b'') + frame = cctx.compress(b"") self.assertEqual(zstd.frame_content_size(frame), 0) def test_basic(self): cctx = zstd.ZstdCompressor() - frame = cctx.compress(b'foobar') + frame = cctx.compress(b"foobar") self.assertEqual(zstd.frame_content_size(frame), 6) @make_cffi -class TestDecompressor(unittest.TestCase): +class TestDecompressor(TestCase): def test_memory_size(self): dctx = zstd.ZstdDecompressor() @@ -86,22 +92,26 @@ @make_cffi -class TestDecompressor_decompress(unittest.TestCase): +class TestDecompressor_decompress(TestCase): def test_empty_input(self): dctx = zstd.ZstdDecompressor() - with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'): - dctx.decompress(b'') + with self.assertRaisesRegex( + zstd.ZstdError, "error determining content size from frame header" + ): + dctx.decompress(b"") def test_invalid_input(self): dctx = zstd.ZstdDecompressor() - with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'): - dctx.decompress(b'foobar') + with self.assertRaisesRegex( + zstd.ZstdError, "error determining content size from frame header" + ): + dctx.decompress(b"foobar") def test_input_types(self): cctx = zstd.ZstdCompressor(level=1) - compressed = cctx.compress(b'foo') + compressed = cctx.compress(b"foo") mutable_array = bytearray(len(compressed)) mutable_array[:] = compressed @@ -114,36 +124,38 @@ dctx = zstd.ZstdDecompressor() for source in sources: - self.assertEqual(dctx.decompress(source), b'foo') + self.assertEqual(dctx.decompress(source), b"foo") def test_no_content_size_in_frame(self): cctx = zstd.ZstdCompressor(write_content_size=False) - compressed = cctx.compress(b'foobar') + compressed = cctx.compress(b"foobar") dctx = zstd.ZstdDecompressor() - with self.assertRaisesRegexp(zstd.ZstdError, 'could not determine content size in frame header'): + with self.assertRaisesRegex( + zstd.ZstdError, "could not determine content size in frame header" + ): dctx.decompress(compressed) def test_content_size_present(self): cctx = zstd.ZstdCompressor() - compressed = cctx.compress(b'foobar') + compressed = cctx.compress(b"foobar") dctx = zstd.ZstdDecompressor() decompressed = dctx.decompress(compressed) - self.assertEqual(decompressed, b'foobar') + self.assertEqual(decompressed, b"foobar") def test_empty_roundtrip(self): cctx = zstd.ZstdCompressor() - compressed = cctx.compress(b'') + compressed = cctx.compress(b"") dctx = zstd.ZstdDecompressor() decompressed = dctx.decompress(compressed) - self.assertEqual(decompressed, b'') + self.assertEqual(decompressed, b"") def test_max_output_size(self): cctx = zstd.ZstdCompressor(write_content_size=False) - source = b'foobar' * 256 + source = b"foobar" * 256 compressed = cctx.compress(source) dctx = zstd.ZstdDecompressor() @@ -152,8 +164,9 @@ self.assertEqual(decompressed, source) # Input size - 1 fails - with self.assertRaisesRegexp(zstd.ZstdError, - 'decompression error: did not decompress full frame'): + with self.assertRaisesRegex( + zstd.ZstdError, "decompression error: did not decompress full frame" + ): dctx.decompress(compressed, max_output_size=len(source) - 1) # Input size + 1 works @@ -166,24 +179,24 @@ def test_stupidly_large_output_buffer(self): cctx = zstd.ZstdCompressor(write_content_size=False) - compressed = cctx.compress(b'foobar' * 256) + compressed = cctx.compress(b"foobar" * 256) dctx = zstd.ZstdDecompressor() # Will get OverflowError on some Python distributions that can't # handle really large integers. with self.assertRaises((MemoryError, OverflowError)): - dctx.decompress(compressed, max_output_size=2**62) + dctx.decompress(compressed, max_output_size=2 ** 62) def test_dictionary(self): samples = [] for i in range(128): - samples.append(b'foo' * 64) - samples.append(b'bar' * 64) - samples.append(b'foobar' * 64) + samples.append(b"foo" * 64) + samples.append(b"bar" * 64) + samples.append(b"foobar" * 64) d = zstd.train_dictionary(8192, samples) - orig = b'foobar' * 16384 + orig = b"foobar" * 16384 cctx = zstd.ZstdCompressor(level=1, dict_data=d) compressed = cctx.compress(orig) @@ -195,13 +208,13 @@ def test_dictionary_multiple(self): samples = [] for i in range(128): - samples.append(b'foo' * 64) - samples.append(b'bar' * 64) - samples.append(b'foobar' * 64) + samples.append(b"foo" * 64) + samples.append(b"bar" * 64) + samples.append(b"foobar" * 64) d = zstd.train_dictionary(8192, samples) - sources = (b'foobar' * 8192, b'foo' * 8192, b'bar' * 8192) + sources = (b"foobar" * 8192, b"foo" * 8192, b"bar" * 8192) compressed = [] cctx = zstd.ZstdCompressor(level=1, dict_data=d) for source in sources: @@ -213,7 +226,7 @@ self.assertEqual(decompressed, sources[i]) def test_max_window_size(self): - with open(__file__, 'rb') as fh: + with open(__file__, "rb") as fh: source = fh.read() # If we write a content size, the decompressor engages single pass @@ -221,15 +234,16 @@ cctx = zstd.ZstdCompressor(write_content_size=False) frame = cctx.compress(source) - dctx = zstd.ZstdDecompressor(max_window_size=2**zstd.WINDOWLOG_MIN) + dctx = zstd.ZstdDecompressor(max_window_size=2 ** zstd.WINDOWLOG_MIN) - with self.assertRaisesRegexp( - zstd.ZstdError, 'decompression error: Frame requires too much memory'): + with self.assertRaisesRegex( + zstd.ZstdError, "decompression error: Frame requires too much memory" + ): dctx.decompress(frame, max_output_size=len(source)) @make_cffi -class TestDecompressor_copy_stream(unittest.TestCase): +class TestDecompressor_copy_stream(TestCase): def test_no_read(self): source = object() dest = io.BytesIO() @@ -256,12 +270,12 @@ self.assertEqual(r, 0) self.assertEqual(w, 0) - self.assertEqual(dest.getvalue(), b'') + self.assertEqual(dest.getvalue(), b"") def test_large_data(self): source = io.BytesIO() for i in range(255): - source.write(struct.Struct('>B').pack(i) * 16384) + source.write(struct.Struct(">B").pack(i) * 16384) source.seek(0) compressed = io.BytesIO() @@ -277,33 +291,32 @@ self.assertEqual(w, len(source.getvalue())) def test_read_write_size(self): - source = OpCountingBytesIO(zstd.ZstdCompressor().compress( - b'foobarfoobar')) + source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b"foobarfoobar")) dest = OpCountingBytesIO() dctx = zstd.ZstdDecompressor() r, w = dctx.copy_stream(source, dest, read_size=1, write_size=1) self.assertEqual(r, len(source.getvalue())) - self.assertEqual(w, len(b'foobarfoobar')) + self.assertEqual(w, len(b"foobarfoobar")) self.assertEqual(source._read_count, len(source.getvalue()) + 1) self.assertEqual(dest._write_count, len(dest.getvalue())) @make_cffi -class TestDecompressor_stream_reader(unittest.TestCase): +class TestDecompressor_stream_reader(TestCase): def test_context_manager(self): dctx = zstd.ZstdDecompressor() - with dctx.stream_reader(b'foo') as reader: - with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'): + with dctx.stream_reader(b"foo") as reader: + with self.assertRaisesRegex(ValueError, "cannot __enter__ multiple times"): with reader as reader2: pass def test_not_implemented(self): dctx = zstd.ZstdDecompressor() - with dctx.stream_reader(b'foo') as reader: + with dctx.stream_reader(b"foo") as reader: with self.assertRaises(io.UnsupportedOperation): reader.readline() @@ -317,7 +330,7 @@ next(reader) with self.assertRaises(io.UnsupportedOperation): - reader.write(b'foo') + reader.write(b"foo") with self.assertRaises(io.UnsupportedOperation): reader.writelines([]) @@ -325,7 +338,7 @@ def test_constant_methods(self): dctx = zstd.ZstdDecompressor() - with dctx.stream_reader(b'foo') as reader: + with dctx.stream_reader(b"foo") as reader: self.assertFalse(reader.closed) self.assertTrue(reader.readable()) self.assertFalse(reader.writable()) @@ -340,29 +353,31 @@ def test_read_closed(self): dctx = zstd.ZstdDecompressor() - with dctx.stream_reader(b'foo') as reader: + with dctx.stream_reader(b"foo") as reader: reader.close() self.assertTrue(reader.closed) - with self.assertRaisesRegexp(ValueError, 'stream is closed'): + with self.assertRaisesRegex(ValueError, "stream is closed"): reader.read(1) def test_read_sizes(self): cctx = zstd.ZstdCompressor() - foo = cctx.compress(b'foo') + foo = cctx.compress(b"foo") dctx = zstd.ZstdDecompressor() with dctx.stream_reader(foo) as reader: - with self.assertRaisesRegexp(ValueError, 'cannot read negative amounts less than -1'): + with self.assertRaisesRegex( + ValueError, "cannot read negative amounts less than -1" + ): reader.read(-2) - self.assertEqual(reader.read(0), b'') - self.assertEqual(reader.read(), b'foo') + self.assertEqual(reader.read(0), b"") + self.assertEqual(reader.read(), b"foo") def test_read_buffer(self): cctx = zstd.ZstdCompressor() - source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60]) + source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60]) frame = cctx.compress(source) dctx = zstd.ZstdDecompressor() @@ -376,14 +391,14 @@ self.assertEqual(reader.tell(), len(source)) # Read after EOF should return empty bytes. - self.assertEqual(reader.read(1), b'') + self.assertEqual(reader.read(1), b"") self.assertEqual(reader.tell(), len(result)) self.assertTrue(reader.closed) def test_read_buffer_small_chunks(self): cctx = zstd.ZstdCompressor() - source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60]) + source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60]) frame = cctx.compress(source) dctx = zstd.ZstdDecompressor() @@ -398,11 +413,11 @@ chunks.append(chunk) self.assertEqual(reader.tell(), sum(map(len, chunks))) - self.assertEqual(b''.join(chunks), source) + self.assertEqual(b"".join(chunks), source) def test_read_stream(self): cctx = zstd.ZstdCompressor() - source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60]) + source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60]) frame = cctx.compress(source) dctx = zstd.ZstdDecompressor() @@ -412,7 +427,7 @@ chunk = reader.read(8192) self.assertEqual(chunk, source) self.assertEqual(reader.tell(), len(source)) - self.assertEqual(reader.read(1), b'') + self.assertEqual(reader.read(1), b"") self.assertEqual(reader.tell(), len(source)) self.assertFalse(reader.closed) @@ -420,7 +435,7 @@ def test_read_stream_small_chunks(self): cctx = zstd.ZstdCompressor() - source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60]) + source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60]) frame = cctx.compress(source) dctx = zstd.ZstdDecompressor() @@ -435,11 +450,11 @@ chunks.append(chunk) self.assertEqual(reader.tell(), sum(map(len, chunks))) - self.assertEqual(b''.join(chunks), source) + self.assertEqual(b"".join(chunks), source) def test_read_after_exit(self): cctx = zstd.ZstdCompressor() - frame = cctx.compress(b'foo' * 60) + frame = cctx.compress(b"foo" * 60) dctx = zstd.ZstdDecompressor() @@ -449,45 +464,46 @@ self.assertTrue(reader.closed) - with self.assertRaisesRegexp(ValueError, 'stream is closed'): + with self.assertRaisesRegex(ValueError, "stream is closed"): reader.read(10) def test_illegal_seeks(self): cctx = zstd.ZstdCompressor() - frame = cctx.compress(b'foo' * 60) + frame = cctx.compress(b"foo" * 60) dctx = zstd.ZstdDecompressor() with dctx.stream_reader(frame) as reader: - with self.assertRaisesRegexp(ValueError, - 'cannot seek to negative position'): + with self.assertRaisesRegex(ValueError, "cannot seek to negative position"): reader.seek(-1, os.SEEK_SET) reader.read(1) - with self.assertRaisesRegexp( - ValueError, 'cannot seek zstd decompression stream backwards'): + with self.assertRaisesRegex( + ValueError, "cannot seek zstd decompression stream backwards" + ): reader.seek(0, os.SEEK_SET) - with self.assertRaisesRegexp( - ValueError, 'cannot seek zstd decompression stream backwards'): + with self.assertRaisesRegex( + ValueError, "cannot seek zstd decompression stream backwards" + ): reader.seek(-1, os.SEEK_CUR) - with self.assertRaisesRegexp( - ValueError, - 'zstd decompression streams cannot be seeked with SEEK_END'): + with self.assertRaisesRegex( + ValueError, "zstd decompression streams cannot be seeked with SEEK_END" + ): reader.seek(0, os.SEEK_END) reader.close() - with self.assertRaisesRegexp(ValueError, 'stream is closed'): + with self.assertRaisesRegex(ValueError, "stream is closed"): reader.seek(4, os.SEEK_SET) - with self.assertRaisesRegexp(ValueError, 'stream is closed'): + with self.assertRaisesRegex(ValueError, "stream is closed"): reader.seek(0) def test_seek(self): - source = b'foobar' * 60 + source = b"foobar" * 60 cctx = zstd.ZstdCompressor() frame = cctx.compress(source) @@ -495,32 +511,32 @@ with dctx.stream_reader(frame) as reader: reader.seek(3) - self.assertEqual(reader.read(3), b'bar') + self.assertEqual(reader.read(3), b"bar") reader.seek(4, os.SEEK_CUR) - self.assertEqual(reader.read(2), b'ar') + self.assertEqual(reader.read(2), b"ar") def test_no_context_manager(self): - source = b'foobar' * 60 + source = b"foobar" * 60 cctx = zstd.ZstdCompressor() frame = cctx.compress(source) dctx = zstd.ZstdDecompressor() reader = dctx.stream_reader(frame) - self.assertEqual(reader.read(6), b'foobar') - self.assertEqual(reader.read(18), b'foobar' * 3) + self.assertEqual(reader.read(6), b"foobar") + self.assertEqual(reader.read(18), b"foobar" * 3) self.assertFalse(reader.closed) # Calling close prevents subsequent use. reader.close() self.assertTrue(reader.closed) - with self.assertRaisesRegexp(ValueError, 'stream is closed'): + with self.assertRaisesRegex(ValueError, "stream is closed"): reader.read(6) def test_read_after_error(self): - source = io.BytesIO(b'') + source = io.BytesIO(b"") dctx = zstd.ZstdDecompressor() reader = dctx.stream_reader(source) @@ -529,7 +545,7 @@ reader.read(0) with reader: - with self.assertRaisesRegexp(ValueError, 'stream is closed'): + with self.assertRaisesRegex(ValueError, "stream is closed"): reader.read(100) def test_partial_read(self): @@ -553,87 +569,87 @@ cctx = zstd.ZstdCompressor() source = io.BytesIO() writer = cctx.stream_writer(source) - writer.write(b'foo') + writer.write(b"foo") writer.flush(zstd.FLUSH_FRAME) - writer.write(b'bar') + writer.write(b"bar") writer.flush(zstd.FLUSH_FRAME) dctx = zstd.ZstdDecompressor() reader = dctx.stream_reader(source.getvalue()) - self.assertEqual(reader.read(2), b'fo') - self.assertEqual(reader.read(2), b'o') - self.assertEqual(reader.read(2), b'ba') - self.assertEqual(reader.read(2), b'r') + self.assertEqual(reader.read(2), b"fo") + self.assertEqual(reader.read(2), b"o") + self.assertEqual(reader.read(2), b"ba") + self.assertEqual(reader.read(2), b"r") source.seek(0) reader = dctx.stream_reader(source) - self.assertEqual(reader.read(2), b'fo') - self.assertEqual(reader.read(2), b'o') - self.assertEqual(reader.read(2), b'ba') - self.assertEqual(reader.read(2), b'r') + self.assertEqual(reader.read(2), b"fo") + self.assertEqual(reader.read(2), b"o") + self.assertEqual(reader.read(2), b"ba") + self.assertEqual(reader.read(2), b"r") reader = dctx.stream_reader(source.getvalue()) - self.assertEqual(reader.read(3), b'foo') - self.assertEqual(reader.read(3), b'bar') + self.assertEqual(reader.read(3), b"foo") + self.assertEqual(reader.read(3), b"bar") source.seek(0) reader = dctx.stream_reader(source) - self.assertEqual(reader.read(3), b'foo') - self.assertEqual(reader.read(3), b'bar') + self.assertEqual(reader.read(3), b"foo") + self.assertEqual(reader.read(3), b"bar") reader = dctx.stream_reader(source.getvalue()) - self.assertEqual(reader.read(4), b'foo') - self.assertEqual(reader.read(4), b'bar') + self.assertEqual(reader.read(4), b"foo") + self.assertEqual(reader.read(4), b"bar") source.seek(0) reader = dctx.stream_reader(source) - self.assertEqual(reader.read(4), b'foo') - self.assertEqual(reader.read(4), b'bar') + self.assertEqual(reader.read(4), b"foo") + self.assertEqual(reader.read(4), b"bar") reader = dctx.stream_reader(source.getvalue()) - self.assertEqual(reader.read(128), b'foo') - self.assertEqual(reader.read(128), b'bar') + self.assertEqual(reader.read(128), b"foo") + self.assertEqual(reader.read(128), b"bar") source.seek(0) reader = dctx.stream_reader(source) - self.assertEqual(reader.read(128), b'foo') - self.assertEqual(reader.read(128), b'bar') + self.assertEqual(reader.read(128), b"foo") + self.assertEqual(reader.read(128), b"bar") # Now tests for reads spanning frames. reader = dctx.stream_reader(source.getvalue(), read_across_frames=True) - self.assertEqual(reader.read(3), b'foo') - self.assertEqual(reader.read(3), b'bar') + self.assertEqual(reader.read(3), b"foo") + self.assertEqual(reader.read(3), b"bar") source.seek(0) reader = dctx.stream_reader(source, read_across_frames=True) - self.assertEqual(reader.read(3), b'foo') - self.assertEqual(reader.read(3), b'bar') + self.assertEqual(reader.read(3), b"foo") + self.assertEqual(reader.read(3), b"bar") reader = dctx.stream_reader(source.getvalue(), read_across_frames=True) - self.assertEqual(reader.read(6), b'foobar') + self.assertEqual(reader.read(6), b"foobar") source.seek(0) reader = dctx.stream_reader(source, read_across_frames=True) - self.assertEqual(reader.read(6), b'foobar') + self.assertEqual(reader.read(6), b"foobar") reader = dctx.stream_reader(source.getvalue(), read_across_frames=True) - self.assertEqual(reader.read(7), b'foobar') + self.assertEqual(reader.read(7), b"foobar") source.seek(0) reader = dctx.stream_reader(source, read_across_frames=True) - self.assertEqual(reader.read(7), b'foobar') + self.assertEqual(reader.read(7), b"foobar") reader = dctx.stream_reader(source.getvalue(), read_across_frames=True) - self.assertEqual(reader.read(128), b'foobar') + self.assertEqual(reader.read(128), b"foobar") source.seek(0) reader = dctx.stream_reader(source, read_across_frames=True) - self.assertEqual(reader.read(128), b'foobar') + self.assertEqual(reader.read(128), b"foobar") def test_readinto(self): cctx = zstd.ZstdCompressor() - foo = cctx.compress(b'foo') + foo = cctx.compress(b"foo") dctx = zstd.ZstdDecompressor() @@ -641,116 +657,116 @@ # The exact exception varies based on the backend. reader = dctx.stream_reader(foo) with self.assertRaises(Exception): - reader.readinto(b'foobar') + reader.readinto(b"foobar") # readinto() with sufficiently large destination. b = bytearray(1024) reader = dctx.stream_reader(foo) self.assertEqual(reader.readinto(b), 3) - self.assertEqual(b[0:3], b'foo') + self.assertEqual(b[0:3], b"foo") self.assertEqual(reader.readinto(b), 0) - self.assertEqual(b[0:3], b'foo') + self.assertEqual(b[0:3], b"foo") # readinto() with small reads. b = bytearray(1024) reader = dctx.stream_reader(foo, read_size=1) self.assertEqual(reader.readinto(b), 3) - self.assertEqual(b[0:3], b'foo') + self.assertEqual(b[0:3], b"foo") # Too small destination buffer. b = bytearray(2) reader = dctx.stream_reader(foo) self.assertEqual(reader.readinto(b), 2) - self.assertEqual(b[:], b'fo') + self.assertEqual(b[:], b"fo") def test_readinto1(self): cctx = zstd.ZstdCompressor() - foo = cctx.compress(b'foo') + foo = cctx.compress(b"foo") dctx = zstd.ZstdDecompressor() reader = dctx.stream_reader(foo) with self.assertRaises(Exception): - reader.readinto1(b'foobar') + reader.readinto1(b"foobar") # Sufficiently large destination. b = bytearray(1024) reader = dctx.stream_reader(foo) self.assertEqual(reader.readinto1(b), 3) - self.assertEqual(b[0:3], b'foo') + self.assertEqual(b[0:3], b"foo") self.assertEqual(reader.readinto1(b), 0) - self.assertEqual(b[0:3], b'foo') + self.assertEqual(b[0:3], b"foo") # readinto() with small reads. b = bytearray(1024) reader = dctx.stream_reader(foo, read_size=1) self.assertEqual(reader.readinto1(b), 3) - self.assertEqual(b[0:3], b'foo') + self.assertEqual(b[0:3], b"foo") # Too small destination buffer. b = bytearray(2) reader = dctx.stream_reader(foo) self.assertEqual(reader.readinto1(b), 2) - self.assertEqual(b[:], b'fo') + self.assertEqual(b[:], b"fo") def test_readall(self): cctx = zstd.ZstdCompressor() - foo = cctx.compress(b'foo') + foo = cctx.compress(b"foo") dctx = zstd.ZstdDecompressor() reader = dctx.stream_reader(foo) - self.assertEqual(reader.readall(), b'foo') + self.assertEqual(reader.readall(), b"foo") def test_read1(self): cctx = zstd.ZstdCompressor() - foo = cctx.compress(b'foo') + foo = cctx.compress(b"foo") dctx = zstd.ZstdDecompressor() b = OpCountingBytesIO(foo) reader = dctx.stream_reader(b) - self.assertEqual(reader.read1(), b'foo') + self.assertEqual(reader.read1(), b"foo") self.assertEqual(b._read_count, 1) b = OpCountingBytesIO(foo) reader = dctx.stream_reader(b) - self.assertEqual(reader.read1(0), b'') - self.assertEqual(reader.read1(2), b'fo') + self.assertEqual(reader.read1(0), b"") + self.assertEqual(reader.read1(2), b"fo") self.assertEqual(b._read_count, 1) - self.assertEqual(reader.read1(1), b'o') + self.assertEqual(reader.read1(1), b"o") self.assertEqual(b._read_count, 1) - self.assertEqual(reader.read1(1), b'') + self.assertEqual(reader.read1(1), b"") self.assertEqual(b._read_count, 2) def test_read_lines(self): cctx = zstd.ZstdCompressor() - source = b'\n'.join(('line %d' % i).encode('ascii') for i in range(1024)) + source = b"\n".join(("line %d" % i).encode("ascii") for i in range(1024)) frame = cctx.compress(source) dctx = zstd.ZstdDecompressor() reader = dctx.stream_reader(frame) - tr = io.TextIOWrapper(reader, encoding='utf-8') + tr = io.TextIOWrapper(reader, encoding="utf-8") lines = [] for line in tr: - lines.append(line.encode('utf-8')) + lines.append(line.encode("utf-8")) self.assertEqual(len(lines), 1024) - self.assertEqual(b''.join(lines), source) + self.assertEqual(b"".join(lines), source) reader = dctx.stream_reader(frame) - tr = io.TextIOWrapper(reader, encoding='utf-8') + tr = io.TextIOWrapper(reader, encoding="utf-8") lines = tr.readlines() self.assertEqual(len(lines), 1024) - self.assertEqual(''.join(lines).encode('utf-8'), source) + self.assertEqual("".join(lines).encode("utf-8"), source) reader = dctx.stream_reader(frame) - tr = io.TextIOWrapper(reader, encoding='utf-8') + tr = io.TextIOWrapper(reader, encoding="utf-8") lines = [] while True: @@ -758,26 +774,26 @@ if not line: break - lines.append(line.encode('utf-8')) + lines.append(line.encode("utf-8")) self.assertEqual(len(lines), 1024) - self.assertEqual(b''.join(lines), source) + self.assertEqual(b"".join(lines), source) @make_cffi -class TestDecompressor_decompressobj(unittest.TestCase): +class TestDecompressor_decompressobj(TestCase): def test_simple(self): - data = zstd.ZstdCompressor(level=1).compress(b'foobar') + data = zstd.ZstdCompressor(level=1).compress(b"foobar") dctx = zstd.ZstdDecompressor() dobj = dctx.decompressobj() - self.assertEqual(dobj.decompress(data), b'foobar') + self.assertEqual(dobj.decompress(data), b"foobar") self.assertIsNone(dobj.flush()) self.assertIsNone(dobj.flush(10)) self.assertIsNone(dobj.flush(length=100)) def test_input_types(self): - compressed = zstd.ZstdCompressor(level=1).compress(b'foo') + compressed = zstd.ZstdCompressor(level=1).compress(b"foo") dctx = zstd.ZstdDecompressor() @@ -795,28 +811,28 @@ self.assertIsNone(dobj.flush()) self.assertIsNone(dobj.flush(10)) self.assertIsNone(dobj.flush(length=100)) - self.assertEqual(dobj.decompress(source), b'foo') + self.assertEqual(dobj.decompress(source), b"foo") self.assertIsNone(dobj.flush()) def test_reuse(self): - data = zstd.ZstdCompressor(level=1).compress(b'foobar') + data = zstd.ZstdCompressor(level=1).compress(b"foobar") dctx = zstd.ZstdDecompressor() dobj = dctx.decompressobj() dobj.decompress(data) - with self.assertRaisesRegexp(zstd.ZstdError, 'cannot use a decompressobj'): + with self.assertRaisesRegex(zstd.ZstdError, "cannot use a decompressobj"): dobj.decompress(data) self.assertIsNone(dobj.flush()) def test_bad_write_size(self): dctx = zstd.ZstdDecompressor() - with self.assertRaisesRegexp(ValueError, 'write_size must be positive'): + with self.assertRaisesRegex(ValueError, "write_size must be positive"): dctx.decompressobj(write_size=0) def test_write_size(self): - source = b'foo' * 64 + b'bar' * 128 + source = b"foo" * 64 + b"bar" * 128 data = zstd.ZstdCompressor(level=1).compress(source) dctx = zstd.ZstdDecompressor() @@ -836,7 +852,7 @@ @make_cffi -class TestDecompressor_stream_writer(unittest.TestCase): +class TestDecompressor_stream_writer(TestCase): def test_io_api(self): buffer = io.BytesIO() dctx = zstd.ZstdDecompressor() @@ -908,14 +924,14 @@ writer.fileno() def test_fileno_file(self): - with tempfile.TemporaryFile('wb') as tf: + with tempfile.TemporaryFile("wb") as tf: dctx = zstd.ZstdDecompressor() writer = dctx.stream_writer(tf) self.assertEqual(writer.fileno(), tf.fileno()) def test_close(self): - foo = zstd.ZstdCompressor().compress(b'foo') + foo = zstd.ZstdCompressor().compress(b"foo") buffer = NonClosingBytesIO() dctx = zstd.ZstdDecompressor() @@ -928,17 +944,17 @@ self.assertTrue(writer.closed) self.assertTrue(buffer.closed) - with self.assertRaisesRegexp(ValueError, 'stream is closed'): - writer.write(b'') + with self.assertRaisesRegex(ValueError, "stream is closed"): + writer.write(b"") - with self.assertRaisesRegexp(ValueError, 'stream is closed'): + with self.assertRaisesRegex(ValueError, "stream is closed"): writer.flush() - with self.assertRaisesRegexp(ValueError, 'stream is closed'): + with self.assertRaisesRegex(ValueError, "stream is closed"): with writer: pass - self.assertEqual(buffer.getvalue(), b'foo') + self.assertEqual(buffer.getvalue(), b"foo") # Context manager exit should close stream. buffer = NonClosingBytesIO() @@ -948,7 +964,7 @@ writer.write(foo) self.assertTrue(writer.closed) - self.assertEqual(buffer.getvalue(), b'foo') + self.assertEqual(buffer.getvalue(), b"foo") def test_flush(self): buffer = OpCountingBytesIO() @@ -962,12 +978,12 @@ def test_empty_roundtrip(self): cctx = zstd.ZstdCompressor() - empty = cctx.compress(b'') - self.assertEqual(decompress_via_writer(empty), b'') + empty = cctx.compress(b"") + self.assertEqual(decompress_via_writer(empty), b"") def test_input_types(self): cctx = zstd.ZstdCompressor(level=1) - compressed = cctx.compress(b'foo') + compressed = cctx.compress(b"foo") mutable_array = bytearray(len(compressed)) mutable_array[:] = compressed @@ -984,25 +1000,25 @@ decompressor = dctx.stream_writer(buffer) decompressor.write(source) - self.assertEqual(buffer.getvalue(), b'foo') + self.assertEqual(buffer.getvalue(), b"foo") buffer = NonClosingBytesIO() with dctx.stream_writer(buffer) as decompressor: self.assertEqual(decompressor.write(source), 3) - self.assertEqual(buffer.getvalue(), b'foo') + self.assertEqual(buffer.getvalue(), b"foo") buffer = io.BytesIO() writer = dctx.stream_writer(buffer, write_return_read=True) self.assertEqual(writer.write(source), len(source)) - self.assertEqual(buffer.getvalue(), b'foo') + self.assertEqual(buffer.getvalue(), b"foo") def test_large_roundtrip(self): chunks = [] for i in range(255): - chunks.append(struct.Struct('>B').pack(i) * 16384) - orig = b''.join(chunks) + chunks.append(struct.Struct(">B").pack(i) * 16384) + orig = b"".join(chunks) cctx = zstd.ZstdCompressor() compressed = cctx.compress(orig) @@ -1012,9 +1028,9 @@ chunks = [] for i in range(255): for j in range(255): - chunks.append(struct.Struct('>B').pack(j) * i) + chunks.append(struct.Struct(">B").pack(j) * i) - orig = b''.join(chunks) + orig = b"".join(chunks) cctx = zstd.ZstdCompressor() compressed = cctx.compress(orig) @@ -1042,13 +1058,13 @@ def test_dictionary(self): samples = [] for i in range(128): - samples.append(b'foo' * 64) - samples.append(b'bar' * 64) - samples.append(b'foobar' * 64) + samples.append(b"foo" * 64) + samples.append(b"bar" * 64) + samples.append(b"foobar" * 64) d = zstd.train_dictionary(8192, samples) - orig = b'foobar' * 16384 + orig = b"foobar" * 16384 buffer = NonClosingBytesIO() cctx = zstd.ZstdCompressor(dict_data=d) with cctx.stream_writer(buffer) as compressor: @@ -1083,22 +1099,22 @@ self.assertGreater(size, 100000) def test_write_size(self): - source = zstd.ZstdCompressor().compress(b'foobarfoobar') + source = zstd.ZstdCompressor().compress(b"foobarfoobar") dest = OpCountingBytesIO() dctx = zstd.ZstdDecompressor() with dctx.stream_writer(dest, write_size=1) as decompressor: - s = struct.Struct('>B') + s = struct.Struct(">B") for c in source: if not isinstance(c, str): c = s.pack(c) decompressor.write(c) - self.assertEqual(dest.getvalue(), b'foobarfoobar') + self.assertEqual(dest.getvalue(), b"foobarfoobar") self.assertEqual(dest._write_count, len(dest.getvalue())) @make_cffi -class TestDecompressor_read_to_iter(unittest.TestCase): +class TestDecompressor_read_to_iter(TestCase): def test_type_validation(self): dctx = zstd.ZstdDecompressor() @@ -1106,10 +1122,10 @@ dctx.read_to_iter(io.BytesIO()) # Buffer protocol works. - dctx.read_to_iter(b'foobar') + dctx.read_to_iter(b"foobar") - with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'): - b''.join(dctx.read_to_iter(True)) + with self.assertRaisesRegex(ValueError, "must pass an object with a read"): + b"".join(dctx.read_to_iter(True)) def test_empty_input(self): dctx = zstd.ZstdDecompressor() @@ -1120,25 +1136,25 @@ with self.assertRaises(StopIteration): next(it) - it = dctx.read_to_iter(b'') + it = dctx.read_to_iter(b"") with self.assertRaises(StopIteration): next(it) def test_invalid_input(self): dctx = zstd.ZstdDecompressor() - source = io.BytesIO(b'foobar') + source = io.BytesIO(b"foobar") it = dctx.read_to_iter(source) - with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'): + with self.assertRaisesRegex(zstd.ZstdError, "Unknown frame descriptor"): next(it) - it = dctx.read_to_iter(b'foobar') - with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'): + it = dctx.read_to_iter(b"foobar") + with self.assertRaisesRegex(zstd.ZstdError, "Unknown frame descriptor"): next(it) def test_empty_roundtrip(self): cctx = zstd.ZstdCompressor(level=1, write_content_size=False) - empty = cctx.compress(b'') + empty = cctx.compress(b"") source = io.BytesIO(empty) source.seek(0) @@ -1157,24 +1173,28 @@ def test_skip_bytes_too_large(self): dctx = zstd.ZstdDecompressor() - with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'): - b''.join(dctx.read_to_iter(b'', skip_bytes=1, read_size=1)) + with self.assertRaisesRegex( + ValueError, "skip_bytes must be smaller than read_size" + ): + b"".join(dctx.read_to_iter(b"", skip_bytes=1, read_size=1)) - with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'): - b''.join(dctx.read_to_iter(b'foobar', skip_bytes=10)) + with self.assertRaisesRegex( + ValueError, "skip_bytes larger than first input chunk" + ): + b"".join(dctx.read_to_iter(b"foobar", skip_bytes=10)) def test_skip_bytes(self): cctx = zstd.ZstdCompressor(write_content_size=False) - compressed = cctx.compress(b'foobar') + compressed = cctx.compress(b"foobar") dctx = zstd.ZstdDecompressor() - output = b''.join(dctx.read_to_iter(b'hdr' + compressed, skip_bytes=3)) - self.assertEqual(output, b'foobar') + output = b"".join(dctx.read_to_iter(b"hdr" + compressed, skip_bytes=3)) + self.assertEqual(output, b"foobar") def test_large_output(self): source = io.BytesIO() - source.write(b'f' * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE) - source.write(b'o') + source.write(b"f" * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE) + source.write(b"o") source.seek(0) cctx = zstd.ZstdCompressor(level=1) @@ -1191,7 +1211,7 @@ with self.assertRaises(StopIteration): next(it) - decompressed = b''.join(chunks) + decompressed = b"".join(chunks) self.assertEqual(decompressed, source.getvalue()) # And again with buffer protocol. @@ -1203,12 +1223,12 @@ with self.assertRaises(StopIteration): next(it) - decompressed = b''.join(chunks) + decompressed = b"".join(chunks) self.assertEqual(decompressed, source.getvalue()) - @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') + @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") def test_large_input(self): - bytes = list(struct.Struct('>B').pack(i) for i in range(256)) + bytes = list(struct.Struct(">B").pack(i) for i in range(256)) compressed = NonClosingBytesIO() input_size = 0 cctx = zstd.ZstdCompressor(level=1) @@ -1217,14 +1237,18 @@ compressor.write(random.choice(bytes)) input_size += 1 - have_compressed = len(compressed.getvalue()) > zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE + have_compressed = ( + len(compressed.getvalue()) + > zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE + ) have_raw = input_size > zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE * 2 if have_compressed and have_raw: break compressed = io.BytesIO(compressed.getvalue()) - self.assertGreater(len(compressed.getvalue()), - zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE) + self.assertGreater( + len(compressed.getvalue()), zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE + ) dctx = zstd.ZstdDecompressor() it = dctx.read_to_iter(compressed) @@ -1237,7 +1261,7 @@ with self.assertRaises(StopIteration): next(it) - decompressed = b''.join(chunks) + decompressed = b"".join(chunks) self.assertEqual(len(decompressed), input_size) # And again with buffer protocol. @@ -1251,7 +1275,7 @@ with self.assertRaises(StopIteration): next(it) - decompressed = b''.join(chunks) + decompressed = b"".join(chunks) self.assertEqual(len(decompressed), input_size) def test_interesting(self): @@ -1263,22 +1287,23 @@ compressed = NonClosingBytesIO() with cctx.stream_writer(compressed) as compressor: for i in range(256): - chunk = b'\0' * 1024 + chunk = b"\0" * 1024 compressor.write(chunk) source.write(chunk) dctx = zstd.ZstdDecompressor() - simple = dctx.decompress(compressed.getvalue(), - max_output_size=len(source.getvalue())) + simple = dctx.decompress( + compressed.getvalue(), max_output_size=len(source.getvalue()) + ) self.assertEqual(simple, source.getvalue()) compressed = io.BytesIO(compressed.getvalue()) - streamed = b''.join(dctx.read_to_iter(compressed)) + streamed = b"".join(dctx.read_to_iter(compressed)) self.assertEqual(streamed, source.getvalue()) def test_read_write_size(self): - source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b'foobarfoobar')) + source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b"foobarfoobar")) dctx = zstd.ZstdDecompressor() for chunk in dctx.read_to_iter(source, read_size=1, write_size=1): self.assertEqual(len(chunk), 1) @@ -1287,97 +1312,110 @@ def test_magic_less(self): params = zstd.CompressionParameters.from_level( - 1, format=zstd.FORMAT_ZSTD1_MAGICLESS) + 1, format=zstd.FORMAT_ZSTD1_MAGICLESS + ) cctx = zstd.ZstdCompressor(compression_params=params) - frame = cctx.compress(b'foobar') + frame = cctx.compress(b"foobar") - self.assertNotEqual(frame[0:4], b'\x28\xb5\x2f\xfd') + self.assertNotEqual(frame[0:4], b"\x28\xb5\x2f\xfd") dctx = zstd.ZstdDecompressor() - with self.assertRaisesRegexp( - zstd.ZstdError, 'error determining content size from frame header'): + with self.assertRaisesRegex( + zstd.ZstdError, "error determining content size from frame header" + ): dctx.decompress(frame) dctx = zstd.ZstdDecompressor(format=zstd.FORMAT_ZSTD1_MAGICLESS) - res = b''.join(dctx.read_to_iter(frame)) - self.assertEqual(res, b'foobar') + res = b"".join(dctx.read_to_iter(frame)) + self.assertEqual(res, b"foobar") @make_cffi -class TestDecompressor_content_dict_chain(unittest.TestCase): +class TestDecompressor_content_dict_chain(TestCase): def test_bad_inputs_simple(self): dctx = zstd.ZstdDecompressor() with self.assertRaises(TypeError): - dctx.decompress_content_dict_chain(b'foo') + dctx.decompress_content_dict_chain(b"foo") with self.assertRaises(TypeError): - dctx.decompress_content_dict_chain((b'foo', b'bar')) + dctx.decompress_content_dict_chain((b"foo", b"bar")) - with self.assertRaisesRegexp(ValueError, 'empty input chain'): + with self.assertRaisesRegex(ValueError, "empty input chain"): dctx.decompress_content_dict_chain([]) - with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'): - dctx.decompress_content_dict_chain([u'foo']) + with self.assertRaisesRegex(ValueError, "chunk 0 must be bytes"): + dctx.decompress_content_dict_chain([u"foo"]) - with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'): + with self.assertRaisesRegex(ValueError, "chunk 0 must be bytes"): dctx.decompress_content_dict_chain([True]) - with self.assertRaisesRegexp(ValueError, 'chunk 0 is too small to contain a zstd frame'): + with self.assertRaisesRegex( + ValueError, "chunk 0 is too small to contain a zstd frame" + ): dctx.decompress_content_dict_chain([zstd.FRAME_HEADER]) - with self.assertRaisesRegexp(ValueError, 'chunk 0 is not a valid zstd frame'): - dctx.decompress_content_dict_chain([b'foo' * 8]) + with self.assertRaisesRegex(ValueError, "chunk 0 is not a valid zstd frame"): + dctx.decompress_content_dict_chain([b"foo" * 8]) - no_size = zstd.ZstdCompressor(write_content_size=False).compress(b'foo' * 64) + no_size = zstd.ZstdCompressor(write_content_size=False).compress(b"foo" * 64) - with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'): + with self.assertRaisesRegex( + ValueError, "chunk 0 missing content size in frame" + ): dctx.decompress_content_dict_chain([no_size]) # Corrupt first frame. - frame = zstd.ZstdCompressor().compress(b'foo' * 64) + frame = zstd.ZstdCompressor().compress(b"foo" * 64) frame = frame[0:12] + frame[15:] - with self.assertRaisesRegexp(zstd.ZstdError, - 'chunk 0 did not decompress full frame'): + with self.assertRaisesRegex( + zstd.ZstdError, "chunk 0 did not decompress full frame" + ): dctx.decompress_content_dict_chain([frame]) def test_bad_subsequent_input(self): - initial = zstd.ZstdCompressor().compress(b'foo' * 64) + initial = zstd.ZstdCompressor().compress(b"foo" * 64) dctx = zstd.ZstdDecompressor() - with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'): - dctx.decompress_content_dict_chain([initial, u'foo']) + with self.assertRaisesRegex(ValueError, "chunk 1 must be bytes"): + dctx.decompress_content_dict_chain([initial, u"foo"]) - with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'): + with self.assertRaisesRegex(ValueError, "chunk 1 must be bytes"): dctx.decompress_content_dict_chain([initial, None]) - with self.assertRaisesRegexp(ValueError, 'chunk 1 is too small to contain a zstd frame'): + with self.assertRaisesRegex( + ValueError, "chunk 1 is too small to contain a zstd frame" + ): dctx.decompress_content_dict_chain([initial, zstd.FRAME_HEADER]) - with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'): - dctx.decompress_content_dict_chain([initial, b'foo' * 8]) + with self.assertRaisesRegex(ValueError, "chunk 1 is not a valid zstd frame"): + dctx.decompress_content_dict_chain([initial, b"foo" * 8]) - no_size = zstd.ZstdCompressor(write_content_size=False).compress(b'foo' * 64) + no_size = zstd.ZstdCompressor(write_content_size=False).compress(b"foo" * 64) - with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'): + with self.assertRaisesRegex( + ValueError, "chunk 1 missing content size in frame" + ): dctx.decompress_content_dict_chain([initial, no_size]) # Corrupt second frame. - cctx = zstd.ZstdCompressor(dict_data=zstd.ZstdCompressionDict(b'foo' * 64)) - frame = cctx.compress(b'bar' * 64) + cctx = zstd.ZstdCompressor(dict_data=zstd.ZstdCompressionDict(b"foo" * 64)) + frame = cctx.compress(b"bar" * 64) frame = frame[0:12] + frame[15:] - with self.assertRaisesRegexp(zstd.ZstdError, 'chunk 1 did not decompress full frame'): + with self.assertRaisesRegex( + zstd.ZstdError, "chunk 1 did not decompress full frame" + ): dctx.decompress_content_dict_chain([initial, frame]) def test_simple(self): original = [ - b'foo' * 64, - b'foobar' * 64, - b'baz' * 64, - b'foobaz' * 64, - b'foobarbaz' * 64, + b"foo" * 64, + b"foobar" * 64, + b"baz" * 64, + b"foobaz" * 64, + b"foobarbaz" * 64, ] chunks = [] @@ -1396,12 +1434,12 @@ # TODO enable for CFFI -class TestDecompressor_multi_decompress_to_buffer(unittest.TestCase): +class TestDecompressor_multi_decompress_to_buffer(TestCase): def test_invalid_inputs(self): dctx = zstd.ZstdDecompressor() - if not hasattr(dctx, 'multi_decompress_to_buffer'): - self.skipTest('multi_decompress_to_buffer not available') + if not hasattr(dctx, "multi_decompress_to_buffer"): + self.skipTest("multi_decompress_to_buffer not available") with self.assertRaises(TypeError): dctx.multi_decompress_to_buffer(True) @@ -1409,22 +1447,24 @@ with self.assertRaises(TypeError): dctx.multi_decompress_to_buffer((1, 2)) - with self.assertRaisesRegexp(TypeError, 'item 0 not a bytes like object'): - dctx.multi_decompress_to_buffer([u'foo']) + with self.assertRaisesRegex(TypeError, "item 0 not a bytes like object"): + dctx.multi_decompress_to_buffer([u"foo"]) - with self.assertRaisesRegexp(ValueError, 'could not determine decompressed size of item 0'): - dctx.multi_decompress_to_buffer([b'foobarbaz']) + with self.assertRaisesRegex( + ValueError, "could not determine decompressed size of item 0" + ): + dctx.multi_decompress_to_buffer([b"foobarbaz"]) def test_list_input(self): cctx = zstd.ZstdCompressor() - original = [b'foo' * 4, b'bar' * 6] + original = [b"foo" * 4, b"bar" * 6] frames = [cctx.compress(d) for d in original] dctx = zstd.ZstdDecompressor() - if not hasattr(dctx, 'multi_decompress_to_buffer'): - self.skipTest('multi_decompress_to_buffer not available') + if not hasattr(dctx, "multi_decompress_to_buffer"): + self.skipTest("multi_decompress_to_buffer not available") result = dctx.multi_decompress_to_buffer(frames) @@ -1442,14 +1482,14 @@ def test_list_input_frame_sizes(self): cctx = zstd.ZstdCompressor() - original = [b'foo' * 4, b'bar' * 6, b'baz' * 8] + original = [b"foo" * 4, b"bar" * 6, b"baz" * 8] frames = [cctx.compress(d) for d in original] - sizes = struct.pack('=' + 'Q' * len(original), *map(len, original)) + sizes = struct.pack("=" + "Q" * len(original), *map(len, original)) dctx = zstd.ZstdDecompressor() - if not hasattr(dctx, 'multi_decompress_to_buffer'): - self.skipTest('multi_decompress_to_buffer not available') + if not hasattr(dctx, "multi_decompress_to_buffer"): + self.skipTest("multi_decompress_to_buffer not available") result = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes) @@ -1462,16 +1502,18 @@ def test_buffer_with_segments_input(self): cctx = zstd.ZstdCompressor() - original = [b'foo' * 4, b'bar' * 6] + original = [b"foo" * 4, b"bar" * 6] frames = [cctx.compress(d) for d in original] dctx = zstd.ZstdDecompressor() - if not hasattr(dctx, 'multi_decompress_to_buffer'): - self.skipTest('multi_decompress_to_buffer not available') + if not hasattr(dctx, "multi_decompress_to_buffer"): + self.skipTest("multi_decompress_to_buffer not available") - segments = struct.pack('=QQQQ', 0, len(frames[0]), len(frames[0]), len(frames[1])) - b = zstd.BufferWithSegments(b''.join(frames), segments) + segments = struct.pack( + "=QQQQ", 0, len(frames[0]), len(frames[0]), len(frames[1]) + ) + b = zstd.BufferWithSegments(b"".join(frames), segments) result = dctx.multi_decompress_to_buffer(b) @@ -1483,19 +1525,25 @@ def test_buffer_with_segments_sizes(self): cctx = zstd.ZstdCompressor(write_content_size=False) - original = [b'foo' * 4, b'bar' * 6, b'baz' * 8] + original = [b"foo" * 4, b"bar" * 6, b"baz" * 8] frames = [cctx.compress(d) for d in original] - sizes = struct.pack('=' + 'Q' * len(original), *map(len, original)) + sizes = struct.pack("=" + "Q" * len(original), *map(len, original)) dctx = zstd.ZstdDecompressor() - if not hasattr(dctx, 'multi_decompress_to_buffer'): - self.skipTest('multi_decompress_to_buffer not available') + if not hasattr(dctx, "multi_decompress_to_buffer"): + self.skipTest("multi_decompress_to_buffer not available") - segments = struct.pack('=QQQQQQ', 0, len(frames[0]), - len(frames[0]), len(frames[1]), - len(frames[0]) + len(frames[1]), len(frames[2])) - b = zstd.BufferWithSegments(b''.join(frames), segments) + segments = struct.pack( + "=QQQQQQ", + 0, + len(frames[0]), + len(frames[0]), + len(frames[1]), + len(frames[0]) + len(frames[1]), + len(frames[2]), + ) + b = zstd.BufferWithSegments(b"".join(frames), segments) result = dctx.multi_decompress_to_buffer(b, decompressed_sizes=sizes) @@ -1509,15 +1557,15 @@ cctx = zstd.ZstdCompressor() original = [ - b'foo0' * 2, - b'foo1' * 3, - b'foo2' * 4, - b'foo3' * 5, - b'foo4' * 6, + b"foo0" * 2, + b"foo1" * 3, + b"foo2" * 4, + b"foo3" * 5, + b"foo4" * 6, ] - if not hasattr(cctx, 'multi_compress_to_buffer'): - self.skipTest('multi_compress_to_buffer not available') + if not hasattr(cctx, "multi_compress_to_buffer"): + self.skipTest("multi_compress_to_buffer not available") frames = cctx.multi_compress_to_buffer(original) @@ -1532,16 +1580,24 @@ self.assertEqual(data, decompressed[i].tobytes()) # And a manual mode. - b = b''.join([frames[0].tobytes(), frames[1].tobytes()]) - b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ', - 0, len(frames[0]), - len(frames[0]), len(frames[1]))) + b = b"".join([frames[0].tobytes(), frames[1].tobytes()]) + b1 = zstd.BufferWithSegments( + b, struct.pack("=QQQQ", 0, len(frames[0]), len(frames[0]), len(frames[1])) + ) - b = b''.join([frames[2].tobytes(), frames[3].tobytes(), frames[4].tobytes()]) - b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ', - 0, len(frames[2]), - len(frames[2]), len(frames[3]), - len(frames[2]) + len(frames[3]), len(frames[4]))) + b = b"".join([frames[2].tobytes(), frames[3].tobytes(), frames[4].tobytes()]) + b2 = zstd.BufferWithSegments( + b, + struct.pack( + "=QQQQQQ", + 0, + len(frames[2]), + len(frames[2]), + len(frames[3]), + len(frames[2]) + len(frames[3]), + len(frames[4]), + ), + ) c = zstd.BufferWithSegmentsCollection(b1, b2) @@ -1560,8 +1616,8 @@ dctx = zstd.ZstdDecompressor(dict_data=d) - if not hasattr(dctx, 'multi_decompress_to_buffer'): - self.skipTest('multi_decompress_to_buffer not available') + if not hasattr(dctx, "multi_decompress_to_buffer"): + self.skipTest("multi_decompress_to_buffer not available") result = dctx.multi_decompress_to_buffer(frames) @@ -1571,41 +1627,44 @@ cctx = zstd.ZstdCompressor() frames = [] - frames.extend(cctx.compress(b'x' * 64) for i in range(256)) - frames.extend(cctx.compress(b'y' * 64) for i in range(256)) + frames.extend(cctx.compress(b"x" * 64) for i in range(256)) + frames.extend(cctx.compress(b"y" * 64) for i in range(256)) dctx = zstd.ZstdDecompressor() - if not hasattr(dctx, 'multi_decompress_to_buffer'): - self.skipTest('multi_decompress_to_buffer not available') + if not hasattr(dctx, "multi_decompress_to_buffer"): + self.skipTest("multi_decompress_to_buffer not available") result = dctx.multi_decompress_to_buffer(frames, threads=-1) self.assertEqual(len(result), len(frames)) self.assertEqual(result.size(), 2 * 64 * 256) - self.assertEqual(result[0].tobytes(), b'x' * 64) - self.assertEqual(result[256].tobytes(), b'y' * 64) + self.assertEqual(result[0].tobytes(), b"x" * 64) + self.assertEqual(result[256].tobytes(), b"y" * 64) def test_item_failure(self): cctx = zstd.ZstdCompressor() - frames = [cctx.compress(b'x' * 128), cctx.compress(b'y' * 128)] + frames = [cctx.compress(b"x" * 128), cctx.compress(b"y" * 128)] - frames[1] = frames[1][0:15] + b'extra' + frames[1][15:] + frames[1] = frames[1][0:15] + b"extra" + frames[1][15:] dctx = zstd.ZstdDecompressor() - if not hasattr(dctx, 'multi_decompress_to_buffer'): - self.skipTest('multi_decompress_to_buffer not available') + if not hasattr(dctx, "multi_decompress_to_buffer"): + self.skipTest("multi_decompress_to_buffer not available") - with self.assertRaisesRegexp(zstd.ZstdError, - 'error decompressing item 1: (' - 'Corrupted block|' - 'Destination buffer is too small)'): + with self.assertRaisesRegex( + zstd.ZstdError, + "error decompressing item 1: (" + "Corrupted block|" + "Destination buffer is too small)", + ): dctx.multi_decompress_to_buffer(frames) - with self.assertRaisesRegexp(zstd.ZstdError, - 'error decompressing item 1: (' - 'Corrupted block|' - 'Destination buffer is too small)'): + with self.assertRaisesRegex( + zstd.ZstdError, + "error decompressing item 1: (" + "Corrupted block|" + "Destination buffer is too small)", + ): dctx.multi_decompress_to_buffer(frames, threads=2) - diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/tests/test_decompressor_fuzzing.py --- a/contrib/python-zstandard/tests/test_decompressor_fuzzing.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/tests/test_decompressor_fuzzing.py Tue Jan 21 13:14:51 2020 -0500 @@ -6,29 +6,37 @@ import hypothesis import hypothesis.strategies as strategies except ImportError: - raise unittest.SkipTest('hypothesis not available') + raise unittest.SkipTest("hypothesis not available") import zstandard as zstd -from . common import ( +from .common import ( make_cffi, NonClosingBytesIO, random_input_data, + TestCase, ) -@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') +@unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") @make_cffi -class TestDecompressor_stream_reader_fuzzing(unittest.TestCase): +class TestDecompressor_stream_reader_fuzzing(TestCase): @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - streaming=strategies.booleans(), - source_read_size=strategies.integers(1, 1048576), - read_sizes=strategies.data()) - def test_stream_source_read_variance(self, original, level, streaming, - source_read_size, read_sizes): + suppress_health_check=[ + hypothesis.HealthCheck.large_base_example, + hypothesis.HealthCheck.too_slow, + ] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + streaming=strategies.booleans(), + source_read_size=strategies.integers(1, 1048576), + read_sizes=strategies.data(), + ) + def test_stream_source_read_variance( + self, original, level, streaming, source_read_size, read_sizes + ): cctx = zstd.ZstdCompressor(level=level) if streaming: @@ -53,18 +61,22 @@ chunks.append(chunk) - self.assertEqual(b''.join(chunks), original) + self.assertEqual(b"".join(chunks), original) # Similar to above except we have a constant read() size. @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - streaming=strategies.booleans(), - source_read_size=strategies.integers(1, 1048576), - read_size=strategies.integers(-1, 131072)) - def test_stream_source_read_size(self, original, level, streaming, - source_read_size, read_size): + suppress_health_check=[hypothesis.HealthCheck.large_base_example] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + streaming=strategies.booleans(), + source_read_size=strategies.integers(1, 1048576), + read_size=strategies.integers(-1, 131072), + ) + def test_stream_source_read_size( + self, original, level, streaming, source_read_size, read_size + ): if read_size == 0: read_size = 1 @@ -91,17 +103,24 @@ chunks.append(chunk) - self.assertEqual(b''.join(chunks), original) + self.assertEqual(b"".join(chunks), original) @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - streaming=strategies.booleans(), - source_read_size=strategies.integers(1, 1048576), - read_sizes=strategies.data()) - def test_buffer_source_read_variance(self, original, level, streaming, - source_read_size, read_sizes): + suppress_health_check=[ + hypothesis.HealthCheck.large_base_example, + hypothesis.HealthCheck.too_slow, + ] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + streaming=strategies.booleans(), + source_read_size=strategies.integers(1, 1048576), + read_sizes=strategies.data(), + ) + def test_buffer_source_read_variance( + self, original, level, streaming, source_read_size, read_sizes + ): cctx = zstd.ZstdCompressor(level=level) if streaming: @@ -125,18 +144,22 @@ chunks.append(chunk) - self.assertEqual(b''.join(chunks), original) + self.assertEqual(b"".join(chunks), original) # Similar to above except we have a constant read() size. @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - streaming=strategies.booleans(), - source_read_size=strategies.integers(1, 1048576), - read_size=strategies.integers(-1, 131072)) - def test_buffer_source_constant_read_size(self, original, level, streaming, - source_read_size, read_size): + suppress_health_check=[hypothesis.HealthCheck.large_base_example] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + streaming=strategies.booleans(), + source_read_size=strategies.integers(1, 1048576), + read_size=strategies.integers(-1, 131072), + ) + def test_buffer_source_constant_read_size( + self, original, level, streaming, source_read_size, read_size + ): if read_size == 0: read_size = -1 @@ -162,16 +185,18 @@ chunks.append(chunk) - self.assertEqual(b''.join(chunks), original) + self.assertEqual(b"".join(chunks), original) @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - streaming=strategies.booleans(), - source_read_size=strategies.integers(1, 1048576)) - def test_stream_source_readall(self, original, level, streaming, - source_read_size): + suppress_health_check=[hypothesis.HealthCheck.large_base_example] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + streaming=strategies.booleans(), + source_read_size=strategies.integers(1, 1048576), + ) + def test_stream_source_readall(self, original, level, streaming, source_read_size): cctx = zstd.ZstdCompressor(level=level) if streaming: @@ -190,14 +215,21 @@ self.assertEqual(data, original) @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - streaming=strategies.booleans(), - source_read_size=strategies.integers(1, 1048576), - read_sizes=strategies.data()) - def test_stream_source_read1_variance(self, original, level, streaming, - source_read_size, read_sizes): + suppress_health_check=[ + hypothesis.HealthCheck.large_base_example, + hypothesis.HealthCheck.too_slow, + ] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + streaming=strategies.booleans(), + source_read_size=strategies.integers(1, 1048576), + read_sizes=strategies.data(), + ) + def test_stream_source_read1_variance( + self, original, level, streaming, source_read_size, read_sizes + ): cctx = zstd.ZstdCompressor(level=level) if streaming: @@ -222,17 +254,24 @@ chunks.append(chunk) - self.assertEqual(b''.join(chunks), original) + self.assertEqual(b"".join(chunks), original) @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - streaming=strategies.booleans(), - source_read_size=strategies.integers(1, 1048576), - read_sizes=strategies.data()) - def test_stream_source_readinto1_variance(self, original, level, streaming, - source_read_size, read_sizes): + suppress_health_check=[ + hypothesis.HealthCheck.large_base_example, + hypothesis.HealthCheck.too_slow, + ] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + streaming=strategies.booleans(), + source_read_size=strategies.integers(1, 1048576), + read_sizes=strategies.data(), + ) + def test_stream_source_readinto1_variance( + self, original, level, streaming, source_read_size, read_sizes + ): cctx = zstd.ZstdCompressor(level=level) if streaming: @@ -259,18 +298,24 @@ chunks.append(bytes(b[0:count])) - self.assertEqual(b''.join(chunks), original) + self.assertEqual(b"".join(chunks), original) @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) + suppress_health_check=[ + hypothesis.HealthCheck.large_base_example, + hypothesis.HealthCheck.too_slow, + ] + ) @hypothesis.given( original=strategies.sampled_from(random_input_data()), level=strategies.integers(min_value=1, max_value=5), source_read_size=strategies.integers(1, 1048576), seek_amounts=strategies.data(), - read_sizes=strategies.data()) - def test_relative_seeks(self, original, level, source_read_size, seek_amounts, - read_sizes): + read_sizes=strategies.data(), + ) + def test_relative_seeks( + self, original, level, source_read_size, seek_amounts, read_sizes + ): cctx = zstd.ZstdCompressor(level=level) frame = cctx.compress(original) @@ -288,18 +333,24 @@ if not chunk: break - self.assertEqual(original[offset:offset + len(chunk)], chunk) + self.assertEqual(original[offset : offset + len(chunk)], chunk) @hypothesis.settings( - suppress_health_check=[hypothesis.HealthCheck.large_base_example]) + suppress_health_check=[ + hypothesis.HealthCheck.large_base_example, + hypothesis.HealthCheck.too_slow, + ] + ) @hypothesis.given( originals=strategies.data(), frame_count=strategies.integers(min_value=2, max_value=10), level=strategies.integers(min_value=1, max_value=5), source_read_size=strategies.integers(1, 1048576), - read_sizes=strategies.data()) - def test_multiple_frames(self, originals, frame_count, level, - source_read_size, read_sizes): + read_sizes=strategies.data(), + ) + def test_multiple_frames( + self, originals, frame_count, level, source_read_size, read_sizes + ): cctx = zstd.ZstdCompressor(level=level) source = io.BytesIO() @@ -314,8 +365,9 @@ dctx = zstd.ZstdDecompressor() buffer.seek(0) - reader = dctx.stream_reader(buffer, read_size=source_read_size, - read_across_frames=True) + reader = dctx.stream_reader( + buffer, read_size=source_read_size, read_across_frames=True + ) chunks = [] @@ -328,16 +380,24 @@ chunks.append(chunk) - self.assertEqual(source.getvalue(), b''.join(chunks)) + self.assertEqual(source.getvalue(), b"".join(chunks)) -@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') +@unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") @make_cffi -class TestDecompressor_stream_writer_fuzzing(unittest.TestCase): - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - write_size=strategies.integers(min_value=1, max_value=8192), - input_sizes=strategies.data()) +class TestDecompressor_stream_writer_fuzzing(TestCase): + @hypothesis.settings( + suppress_health_check=[ + hypothesis.HealthCheck.large_base_example, + hypothesis.HealthCheck.too_slow, + ] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + write_size=strategies.integers(min_value=1, max_value=8192), + input_sizes=strategies.data(), + ) def test_write_size_variance(self, original, level, write_size, input_sizes): cctx = zstd.ZstdCompressor(level=level) frame = cctx.compress(original) @@ -358,13 +418,21 @@ self.assertEqual(dest.getvalue(), original) -@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') +@unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") @make_cffi -class TestDecompressor_copy_stream_fuzzing(unittest.TestCase): - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - read_size=strategies.integers(min_value=1, max_value=8192), - write_size=strategies.integers(min_value=1, max_value=8192)) +class TestDecompressor_copy_stream_fuzzing(TestCase): + @hypothesis.settings( + suppress_health_check=[ + hypothesis.HealthCheck.large_base_example, + hypothesis.HealthCheck.too_slow, + ] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + read_size=strategies.integers(min_value=1, max_value=8192), + write_size=strategies.integers(min_value=1, max_value=8192), + ) def test_read_write_size_variance(self, original, level, read_size, write_size): cctx = zstd.ZstdCompressor(level=level) frame = cctx.compress(original) @@ -378,12 +446,20 @@ self.assertEqual(dest.getvalue(), original) -@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') +@unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") @make_cffi -class TestDecompressor_decompressobj_fuzzing(unittest.TestCase): - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - chunk_sizes=strategies.data()) +class TestDecompressor_decompressobj_fuzzing(TestCase): + @hypothesis.settings( + suppress_health_check=[ + hypothesis.HealthCheck.large_base_example, + hypothesis.HealthCheck.too_slow, + ] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + chunk_sizes=strategies.data(), + ) def test_random_input_sizes(self, original, level, chunk_sizes): cctx = zstd.ZstdCompressor(level=level) frame = cctx.compress(original) @@ -402,13 +478,22 @@ chunks.append(dobj.decompress(chunk)) - self.assertEqual(b''.join(chunks), original) + self.assertEqual(b"".join(chunks), original) - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - write_size=strategies.integers(min_value=1, - max_value=4 * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE), - chunk_sizes=strategies.data()) + @hypothesis.settings( + suppress_health_check=[ + hypothesis.HealthCheck.large_base_example, + hypothesis.HealthCheck.too_slow, + ] + ) + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + write_size=strategies.integers( + min_value=1, max_value=4 * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE + ), + chunk_sizes=strategies.data(), + ) def test_random_output_sizes(self, original, level, write_size, chunk_sizes): cctx = zstd.ZstdCompressor(level=level) frame = cctx.compress(original) @@ -427,16 +512,18 @@ chunks.append(dobj.decompress(chunk)) - self.assertEqual(b''.join(chunks), original) + self.assertEqual(b"".join(chunks), original) -@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') +@unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") @make_cffi -class TestDecompressor_read_to_iter_fuzzing(unittest.TestCase): - @hypothesis.given(original=strategies.sampled_from(random_input_data()), - level=strategies.integers(min_value=1, max_value=5), - read_size=strategies.integers(min_value=1, max_value=4096), - write_size=strategies.integers(min_value=1, max_value=4096)) +class TestDecompressor_read_to_iter_fuzzing(TestCase): + @hypothesis.given( + original=strategies.sampled_from(random_input_data()), + level=strategies.integers(min_value=1, max_value=5), + read_size=strategies.integers(min_value=1, max_value=4096), + write_size=strategies.integers(min_value=1, max_value=4096), + ) def test_read_write_size_variance(self, original, level, read_size, write_size): cctx = zstd.ZstdCompressor(level=level) frame = cctx.compress(original) @@ -444,29 +531,33 @@ source = io.BytesIO(frame) dctx = zstd.ZstdDecompressor() - chunks = list(dctx.read_to_iter(source, read_size=read_size, write_size=write_size)) + chunks = list( + dctx.read_to_iter(source, read_size=read_size, write_size=write_size) + ) - self.assertEqual(b''.join(chunks), original) + self.assertEqual(b"".join(chunks), original) -@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') -class TestDecompressor_multi_decompress_to_buffer_fuzzing(unittest.TestCase): - @hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()), - min_size=1, max_size=1024), - threads=strategies.integers(min_value=1, max_value=8), - use_dict=strategies.booleans()) +@unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") +class TestDecompressor_multi_decompress_to_buffer_fuzzing(TestCase): + @hypothesis.given( + original=strategies.lists( + strategies.sampled_from(random_input_data()), min_size=1, max_size=1024 + ), + threads=strategies.integers(min_value=1, max_value=8), + use_dict=strategies.booleans(), + ) def test_data_equivalence(self, original, threads, use_dict): kwargs = {} if use_dict: - kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0]) + kwargs["dict_data"] = zstd.ZstdCompressionDict(original[0]) - cctx = zstd.ZstdCompressor(level=1, - write_content_size=True, - write_checksum=True, - **kwargs) + cctx = zstd.ZstdCompressor( + level=1, write_content_size=True, write_checksum=True, **kwargs + ) - if not hasattr(cctx, 'multi_compress_to_buffer'): - self.skipTest('multi_compress_to_buffer not available') + if not hasattr(cctx, "multi_compress_to_buffer"): + self.skipTest("multi_compress_to_buffer not available") frames_buffer = cctx.multi_compress_to_buffer(original, threads=-1) diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/tests/test_estimate_sizes.py --- a/contrib/python-zstandard/tests/test_estimate_sizes.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/tests/test_estimate_sizes.py Tue Jan 21 13:14:51 2020 -0500 @@ -2,14 +2,14 @@ import zstandard as zstd -from . common import ( +from .common import ( make_cffi, + TestCase, ) @make_cffi -class TestSizes(unittest.TestCase): +class TestSizes(TestCase): def test_decompression_size(self): size = zstd.estimate_decompression_context_size() self.assertGreater(size, 100000) - diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/tests/test_module_attributes.py --- a/contrib/python-zstandard/tests/test_module_attributes.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/tests/test_module_attributes.py Tue Jan 21 13:14:51 2020 -0500 @@ -4,65 +4,66 @@ import zstandard as zstd -from . common import ( +from .common import ( make_cffi, + TestCase, ) @make_cffi -class TestModuleAttributes(unittest.TestCase): +class TestModuleAttributes(TestCase): def test_version(self): - self.assertEqual(zstd.ZSTD_VERSION, (1, 4, 3)) + self.assertEqual(zstd.ZSTD_VERSION, (1, 4, 4)) - self.assertEqual(zstd.__version__, '0.12.0') + self.assertEqual(zstd.__version__, "0.13.0") def test_constants(self): self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22) - self.assertEqual(zstd.FRAME_HEADER, b'\x28\xb5\x2f\xfd') + self.assertEqual(zstd.FRAME_HEADER, b"\x28\xb5\x2f\xfd") def test_hasattr(self): attrs = ( - 'CONTENTSIZE_UNKNOWN', - 'CONTENTSIZE_ERROR', - 'COMPRESSION_RECOMMENDED_INPUT_SIZE', - 'COMPRESSION_RECOMMENDED_OUTPUT_SIZE', - 'DECOMPRESSION_RECOMMENDED_INPUT_SIZE', - 'DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE', - 'MAGIC_NUMBER', - 'FLUSH_BLOCK', - 'FLUSH_FRAME', - 'BLOCKSIZELOG_MAX', - 'BLOCKSIZE_MAX', - 'WINDOWLOG_MIN', - 'WINDOWLOG_MAX', - 'CHAINLOG_MIN', - 'CHAINLOG_MAX', - 'HASHLOG_MIN', - 'HASHLOG_MAX', - 'HASHLOG3_MAX', - 'MINMATCH_MIN', - 'MINMATCH_MAX', - 'SEARCHLOG_MIN', - 'SEARCHLOG_MAX', - 'SEARCHLENGTH_MIN', - 'SEARCHLENGTH_MAX', - 'TARGETLENGTH_MIN', - 'TARGETLENGTH_MAX', - 'LDM_MINMATCH_MIN', - 'LDM_MINMATCH_MAX', - 'LDM_BUCKETSIZELOG_MAX', - 'STRATEGY_FAST', - 'STRATEGY_DFAST', - 'STRATEGY_GREEDY', - 'STRATEGY_LAZY', - 'STRATEGY_LAZY2', - 'STRATEGY_BTLAZY2', - 'STRATEGY_BTOPT', - 'STRATEGY_BTULTRA', - 'STRATEGY_BTULTRA2', - 'DICT_TYPE_AUTO', - 'DICT_TYPE_RAWCONTENT', - 'DICT_TYPE_FULLDICT', + "CONTENTSIZE_UNKNOWN", + "CONTENTSIZE_ERROR", + "COMPRESSION_RECOMMENDED_INPUT_SIZE", + "COMPRESSION_RECOMMENDED_OUTPUT_SIZE", + "DECOMPRESSION_RECOMMENDED_INPUT_SIZE", + "DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE", + "MAGIC_NUMBER", + "FLUSH_BLOCK", + "FLUSH_FRAME", + "BLOCKSIZELOG_MAX", + "BLOCKSIZE_MAX", + "WINDOWLOG_MIN", + "WINDOWLOG_MAX", + "CHAINLOG_MIN", + "CHAINLOG_MAX", + "HASHLOG_MIN", + "HASHLOG_MAX", + "HASHLOG3_MAX", + "MINMATCH_MIN", + "MINMATCH_MAX", + "SEARCHLOG_MIN", + "SEARCHLOG_MAX", + "SEARCHLENGTH_MIN", + "SEARCHLENGTH_MAX", + "TARGETLENGTH_MIN", + "TARGETLENGTH_MAX", + "LDM_MINMATCH_MIN", + "LDM_MINMATCH_MAX", + "LDM_BUCKETSIZELOG_MAX", + "STRATEGY_FAST", + "STRATEGY_DFAST", + "STRATEGY_GREEDY", + "STRATEGY_LAZY", + "STRATEGY_LAZY2", + "STRATEGY_BTLAZY2", + "STRATEGY_BTOPT", + "STRATEGY_BTULTRA", + "STRATEGY_BTULTRA2", + "DICT_TYPE_AUTO", + "DICT_TYPE_RAWCONTENT", + "DICT_TYPE_FULLDICT", ) for a in attrs: diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/tests/test_train_dictionary.py --- a/contrib/python-zstandard/tests/test_train_dictionary.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/tests/test_train_dictionary.py Tue Jan 21 13:14:51 2020 -0500 @@ -4,10 +4,11 @@ import zstandard as zstd -from . common import ( +from .common import ( generate_samples, make_cffi, random_input_data, + TestCase, ) if sys.version_info[0] >= 3: @@ -17,24 +18,24 @@ @make_cffi -class TestTrainDictionary(unittest.TestCase): +class TestTrainDictionary(TestCase): def test_no_args(self): with self.assertRaises(TypeError): zstd.train_dictionary() def test_bad_args(self): with self.assertRaises(TypeError): - zstd.train_dictionary(8192, u'foo') + zstd.train_dictionary(8192, u"foo") with self.assertRaises(ValueError): - zstd.train_dictionary(8192, [u'foo']) + zstd.train_dictionary(8192, [u"foo"]) def test_no_params(self): d = zstd.train_dictionary(8192, random_input_data()) self.assertIsInstance(d.dict_id(), int_type) # The dictionary ID may be different across platforms. - expected = b'\x37\xa4\x30\xec' + struct.pack('= 1.12. dest_buffer = ffi.from_buffer(b) - ffi.memmove(b, b'', 0) - out_buffer = ffi.new('ZSTD_outBuffer *') + ffi.memmove(b, b"", 0) + out_buffer = ffi.new("ZSTD_outBuffer *") out_buffer.dst = dest_buffer out_buffer.size = len(dest_buffer) out_buffer.pos = 0 @@ -1060,15 +1095,14 @@ # EOF. old_pos = out_buffer.pos - zresult = lib.ZSTD_compressStream2(self._compressor._cctx, - out_buffer, self._in_buffer, - lib.ZSTD_e_end) + zresult = lib.ZSTD_compressStream2( + self._compressor._cctx, out_buffer, self._in_buffer, lib.ZSTD_e_end + ) self._bytes_compressed += out_buffer.pos - old_pos if lib.ZSTD_isError(zresult): - raise ZstdError('error ending compression stream: %s', - _zstd_error(zresult)) + raise ZstdError("error ending compression stream: %s", _zstd_error(zresult)) if zresult == 0: self._finished_output = True @@ -1077,16 +1111,16 @@ def readinto1(self, b): if self._closed: - raise ValueError('stream is closed') + raise ValueError("stream is closed") if self._finished_output: return 0 # TODO use writable=True once we require CFFI >= 1.12. dest_buffer = ffi.from_buffer(b) - ffi.memmove(b, b'', 0) - - out_buffer = ffi.new('ZSTD_outBuffer *') + ffi.memmove(b, b"", 0) + + out_buffer = ffi.new("ZSTD_outBuffer *") out_buffer.dst = dest_buffer out_buffer.size = len(dest_buffer) out_buffer.pos = 0 @@ -1107,15 +1141,16 @@ # EOF. old_pos = out_buffer.pos - zresult = lib.ZSTD_compressStream2(self._compressor._cctx, - out_buffer, self._in_buffer, - lib.ZSTD_e_end) + zresult = lib.ZSTD_compressStream2( + self._compressor._cctx, out_buffer, self._in_buffer, lib.ZSTD_e_end + ) self._bytes_compressed += out_buffer.pos - old_pos if lib.ZSTD_isError(zresult): - raise ZstdError('error ending compression stream: %s' % - _zstd_error(zresult)) + raise ZstdError( + "error ending compression stream: %s" % _zstd_error(zresult) + ) if zresult == 0: self._finished_output = True @@ -1124,29 +1159,35 @@ class ZstdCompressor(object): - def __init__(self, level=3, dict_data=None, compression_params=None, - write_checksum=None, write_content_size=None, - write_dict_id=None, threads=0): + def __init__( + self, + level=3, + dict_data=None, + compression_params=None, + write_checksum=None, + write_content_size=None, + write_dict_id=None, + threads=0, + ): if level > lib.ZSTD_maxCLevel(): - raise ValueError('level must be less than %d' % lib.ZSTD_maxCLevel()) + raise ValueError("level must be less than %d" % lib.ZSTD_maxCLevel()) if threads < 0: threads = _cpu_count() if compression_params and write_checksum is not None: - raise ValueError('cannot define compression_params and ' - 'write_checksum') + raise ValueError("cannot define compression_params and " "write_checksum") if compression_params and write_content_size is not None: - raise ValueError('cannot define compression_params and ' - 'write_content_size') + raise ValueError( + "cannot define compression_params and " "write_content_size" + ) if compression_params and write_dict_id is not None: - raise ValueError('cannot define compression_params and ' - 'write_dict_id') + raise ValueError("cannot define compression_params and " "write_dict_id") if compression_params and threads: - raise ValueError('cannot define compression_params and threads') + raise ValueError("cannot define compression_params and threads") if compression_params: self._params = _make_cctx_params(compression_params) @@ -1160,27 +1201,24 @@ self._params = ffi.gc(params, lib.ZSTD_freeCCtxParams) - _set_compression_parameter(self._params, - lib.ZSTD_c_compressionLevel, - level) + _set_compression_parameter(self._params, lib.ZSTD_c_compressionLevel, level) _set_compression_parameter( self._params, lib.ZSTD_c_contentSizeFlag, - write_content_size if write_content_size is not None else 1) - - _set_compression_parameter(self._params, - lib.ZSTD_c_checksumFlag, - 1 if write_checksum else 0) - - _set_compression_parameter(self._params, - lib.ZSTD_c_dictIDFlag, - 1 if write_dict_id else 0) + write_content_size if write_content_size is not None else 1, + ) + + _set_compression_parameter( + self._params, lib.ZSTD_c_checksumFlag, 1 if write_checksum else 0 + ) + + _set_compression_parameter( + self._params, lib.ZSTD_c_dictIDFlag, 1 if write_dict_id else 0 + ) if threads: - _set_compression_parameter(self._params, - lib.ZSTD_c_nbWorkers, - threads) + _set_compression_parameter(self._params, lib.ZSTD_c_nbWorkers, threads) cctx = lib.ZSTD_createCCtx() if cctx == ffi.NULL: @@ -1194,15 +1232,16 @@ try: self._setup_cctx() finally: - self._cctx = ffi.gc(cctx, lib.ZSTD_freeCCtx, - size=lib.ZSTD_sizeof_CCtx(cctx)) + self._cctx = ffi.gc( + cctx, lib.ZSTD_freeCCtx, size=lib.ZSTD_sizeof_CCtx(cctx) + ) def _setup_cctx(self): - zresult = lib.ZSTD_CCtx_setParametersUsingCCtxParams(self._cctx, - self._params) + zresult = lib.ZSTD_CCtx_setParametersUsingCCtxParams(self._cctx, self._params) if lib.ZSTD_isError(zresult): - raise ZstdError('could not set compression parameters: %s' % - _zstd_error(zresult)) + raise ZstdError( + "could not set compression parameters: %s" % _zstd_error(zresult) + ) dict_data = self._dict_data @@ -1211,12 +1250,17 @@ zresult = lib.ZSTD_CCtx_refCDict(self._cctx, dict_data._cdict) else: zresult = lib.ZSTD_CCtx_loadDictionary_advanced( - self._cctx, dict_data.as_bytes(), len(dict_data), - lib.ZSTD_dlm_byRef, dict_data._dict_type) + self._cctx, + dict_data.as_bytes(), + len(dict_data), + lib.ZSTD_dlm_byRef, + dict_data._dict_type, + ) if lib.ZSTD_isError(zresult): - raise ZstdError('could not load compression dictionary: %s' % - _zstd_error(zresult)) + raise ZstdError( + "could not load compression dictionary: %s" % _zstd_error(zresult) + ) def memory_size(self): return lib.ZSTD_sizeof_CCtx(self._cctx) @@ -1227,15 +1271,14 @@ data_buffer = ffi.from_buffer(data) dest_size = lib.ZSTD_compressBound(len(data_buffer)) - out = new_nonzero('char[]', dest_size) + out = new_nonzero("char[]", dest_size) zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, len(data_buffer)) if lib.ZSTD_isError(zresult): - raise ZstdError('error setting source size: %s' % - _zstd_error(zresult)) - - out_buffer = ffi.new('ZSTD_outBuffer *') - in_buffer = ffi.new('ZSTD_inBuffer *') + raise ZstdError("error setting source size: %s" % _zstd_error(zresult)) + + out_buffer = ffi.new("ZSTD_outBuffer *") + in_buffer = ffi.new("ZSTD_inBuffer *") out_buffer.dst = out out_buffer.size = dest_size @@ -1245,16 +1288,14 @@ in_buffer.size = len(data_buffer) in_buffer.pos = 0 - zresult = lib.ZSTD_compressStream2(self._cctx, - out_buffer, - in_buffer, - lib.ZSTD_e_end) + zresult = lib.ZSTD_compressStream2( + self._cctx, out_buffer, in_buffer, lib.ZSTD_e_end + ) if lib.ZSTD_isError(zresult): - raise ZstdError('cannot compress: %s' % - _zstd_error(zresult)) + raise ZstdError("cannot compress: %s" % _zstd_error(zresult)) elif zresult: - raise ZstdError('unexpected partial frame flush') + raise ZstdError("unexpected partial frame flush") return ffi.buffer(out, out_buffer.pos)[:] @@ -1266,12 +1307,11 @@ zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size) if lib.ZSTD_isError(zresult): - raise ZstdError('error setting source size: %s' % - _zstd_error(zresult)) + raise ZstdError("error setting source size: %s" % _zstd_error(zresult)) cobj = ZstdCompressionObj() - cobj._out = ffi.new('ZSTD_outBuffer *') - cobj._dst_buffer = ffi.new('char[]', COMPRESSION_RECOMMENDED_OUTPUT_SIZE) + cobj._out = ffi.new("ZSTD_outBuffer *") + cobj._dst_buffer = ffi.new("char[]", COMPRESSION_RECOMMENDED_OUTPUT_SIZE) cobj._out.dst = cobj._dst_buffer cobj._out.size = COMPRESSION_RECOMMENDED_OUTPUT_SIZE cobj._out.pos = 0 @@ -1288,19 +1328,23 @@ zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size) if lib.ZSTD_isError(zresult): - raise ZstdError('error setting source size: %s' % - _zstd_error(zresult)) + raise ZstdError("error setting source size: %s" % _zstd_error(zresult)) return ZstdCompressionChunker(self, chunk_size=chunk_size) - def copy_stream(self, ifh, ofh, size=-1, - read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE, - write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE): - - if not hasattr(ifh, 'read'): - raise ValueError('first argument must have a read() method') - if not hasattr(ofh, 'write'): - raise ValueError('second argument must have a write() method') + def copy_stream( + self, + ifh, + ofh, + size=-1, + read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE, + write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE, + ): + + if not hasattr(ifh, "read"): + raise ValueError("first argument must have a read() method") + if not hasattr(ofh, "write"): + raise ValueError("second argument must have a write() method") lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only) @@ -1309,13 +1353,12 @@ zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size) if lib.ZSTD_isError(zresult): - raise ZstdError('error setting source size: %s' % - _zstd_error(zresult)) - - in_buffer = ffi.new('ZSTD_inBuffer *') - out_buffer = ffi.new('ZSTD_outBuffer *') - - dst_buffer = ffi.new('char[]', write_size) + raise ZstdError("error setting source size: %s" % _zstd_error(zresult)) + + in_buffer = ffi.new("ZSTD_inBuffer *") + out_buffer = ffi.new("ZSTD_outBuffer *") + + dst_buffer = ffi.new("char[]", write_size) out_buffer.dst = dst_buffer out_buffer.size = write_size out_buffer.pos = 0 @@ -1334,13 +1377,11 @@ in_buffer.pos = 0 while in_buffer.pos < in_buffer.size: - zresult = lib.ZSTD_compressStream2(self._cctx, - out_buffer, - in_buffer, - lib.ZSTD_e_continue) + zresult = lib.ZSTD_compressStream2( + self._cctx, out_buffer, in_buffer, lib.ZSTD_e_continue + ) if lib.ZSTD_isError(zresult): - raise ZstdError('zstd compress error: %s' % - _zstd_error(zresult)) + raise ZstdError("zstd compress error: %s" % _zstd_error(zresult)) if out_buffer.pos: ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos)) @@ -1349,13 +1390,13 @@ # We've finished reading. Flush the compressor. while True: - zresult = lib.ZSTD_compressStream2(self._cctx, - out_buffer, - in_buffer, - lib.ZSTD_e_end) + zresult = lib.ZSTD_compressStream2( + self._cctx, out_buffer, in_buffer, lib.ZSTD_e_end + ) if lib.ZSTD_isError(zresult): - raise ZstdError('error ending compression stream: %s' % - _zstd_error(zresult)) + raise ZstdError( + "error ending compression stream: %s" % _zstd_error(zresult) + ) if out_buffer.pos: ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos)) @@ -1367,8 +1408,9 @@ return total_read, total_write - def stream_reader(self, source, size=-1, - read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE): + def stream_reader( + self, source, size=-1, read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE + ): lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only) try: @@ -1381,40 +1423,48 @@ zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size) if lib.ZSTD_isError(zresult): - raise ZstdError('error setting source size: %s' % - _zstd_error(zresult)) + raise ZstdError("error setting source size: %s" % _zstd_error(zresult)) return ZstdCompressionReader(self, source, read_size) - def stream_writer(self, writer, size=-1, - write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE, - write_return_read=False): - - if not hasattr(writer, 'write'): - raise ValueError('must pass an object with a write() method') + def stream_writer( + self, + writer, + size=-1, + write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE, + write_return_read=False, + ): + + if not hasattr(writer, "write"): + raise ValueError("must pass an object with a write() method") lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only) if size < 0: size = lib.ZSTD_CONTENTSIZE_UNKNOWN - return ZstdCompressionWriter(self, writer, size, write_size, - write_return_read) + return ZstdCompressionWriter(self, writer, size, write_size, write_return_read) write_to = stream_writer - def read_to_iter(self, reader, size=-1, - read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE, - write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE): - if hasattr(reader, 'read'): + def read_to_iter( + self, + reader, + size=-1, + read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE, + write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE, + ): + if hasattr(reader, "read"): have_read = True - elif hasattr(reader, '__getitem__'): + elif hasattr(reader, "__getitem__"): have_read = False buffer_offset = 0 size = len(reader) else: - raise ValueError('must pass an object with a read() method or ' - 'conforms to buffer protocol') + raise ValueError( + "must pass an object with a read() method or " + "conforms to buffer protocol" + ) lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only) @@ -1423,17 +1473,16 @@ zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size) if lib.ZSTD_isError(zresult): - raise ZstdError('error setting source size: %s' % - _zstd_error(zresult)) - - in_buffer = ffi.new('ZSTD_inBuffer *') - out_buffer = ffi.new('ZSTD_outBuffer *') + raise ZstdError("error setting source size: %s" % _zstd_error(zresult)) + + in_buffer = ffi.new("ZSTD_inBuffer *") + out_buffer = ffi.new("ZSTD_outBuffer *") in_buffer.src = ffi.NULL in_buffer.size = 0 in_buffer.pos = 0 - dst_buffer = ffi.new('char[]', write_size) + dst_buffer = ffi.new("char[]", write_size) out_buffer.dst = dst_buffer out_buffer.size = write_size out_buffer.pos = 0 @@ -1449,7 +1498,7 @@ else: remaining = len(reader) - buffer_offset slice_size = min(remaining, read_size) - read_result = reader[buffer_offset:buffer_offset + slice_size] + read_result = reader[buffer_offset : buffer_offset + slice_size] buffer_offset += slice_size # No new input data. Break out of the read loop. @@ -1464,11 +1513,11 @@ in_buffer.pos = 0 while in_buffer.pos < in_buffer.size: - zresult = lib.ZSTD_compressStream2(self._cctx, out_buffer, in_buffer, - lib.ZSTD_e_continue) + zresult = lib.ZSTD_compressStream2( + self._cctx, out_buffer, in_buffer, lib.ZSTD_e_continue + ) if lib.ZSTD_isError(zresult): - raise ZstdError('zstd compress error: %s' % - _zstd_error(zresult)) + raise ZstdError("zstd compress error: %s" % _zstd_error(zresult)) if out_buffer.pos: data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:] @@ -1484,13 +1533,13 @@ # remains. while True: assert out_buffer.pos == 0 - zresult = lib.ZSTD_compressStream2(self._cctx, - out_buffer, - in_buffer, - lib.ZSTD_e_end) + zresult = lib.ZSTD_compressStream2( + self._cctx, out_buffer, in_buffer, lib.ZSTD_e_end + ) if lib.ZSTD_isError(zresult): - raise ZstdError('error ending compression stream: %s' % - _zstd_error(zresult)) + raise ZstdError( + "error ending compression stream: %s" % _zstd_error(zresult) + ) if out_buffer.pos: data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:] @@ -1522,7 +1571,7 @@ size = lib.ZSTD_getFrameContentSize(data_buffer, len(data_buffer)) if size == lib.ZSTD_CONTENTSIZE_ERROR: - raise ZstdError('error when determining content size') + raise ZstdError("error when determining content size") elif size == lib.ZSTD_CONTENTSIZE_UNKNOWN: return -1 else: @@ -1534,24 +1583,23 @@ zresult = lib.ZSTD_frameHeaderSize(data_buffer, len(data_buffer)) if lib.ZSTD_isError(zresult): - raise ZstdError('could not determine frame header size: %s' % - _zstd_error(zresult)) + raise ZstdError( + "could not determine frame header size: %s" % _zstd_error(zresult) + ) return zresult def get_frame_parameters(data): - params = ffi.new('ZSTD_frameHeader *') + params = ffi.new("ZSTD_frameHeader *") data_buffer = ffi.from_buffer(data) zresult = lib.ZSTD_getFrameHeader(params, data_buffer, len(data_buffer)) if lib.ZSTD_isError(zresult): - raise ZstdError('cannot get frame parameters: %s' % - _zstd_error(zresult)) + raise ZstdError("cannot get frame parameters: %s" % _zstd_error(zresult)) if zresult: - raise ZstdError('not enough data for frame parameters; need %d bytes' % - zresult) + raise ZstdError("not enough data for frame parameters; need %d bytes" % zresult) return FrameParameters(params[0]) @@ -1563,10 +1611,10 @@ self.k = k self.d = d - if dict_type not in (DICT_TYPE_AUTO, DICT_TYPE_RAWCONTENT, - DICT_TYPE_FULLDICT): - raise ValueError('invalid dictionary load mode: %d; must use ' - 'DICT_TYPE_* constants') + if dict_type not in (DICT_TYPE_AUTO, DICT_TYPE_RAWCONTENT, DICT_TYPE_FULLDICT): + raise ValueError( + "invalid dictionary load mode: %d; must use " "DICT_TYPE_* constants" + ) self._dict_type = dict_type self._cdict = None @@ -1582,16 +1630,15 @@ def precompute_compress(self, level=0, compression_params=None): if level and compression_params: - raise ValueError('must only specify one of level or ' - 'compression_params') + raise ValueError("must only specify one of level or " "compression_params") if not level and not compression_params: - raise ValueError('must specify one of level or compression_params') + raise ValueError("must specify one of level or compression_params") if level: cparams = lib.ZSTD_getCParams(level, 0, len(self._data)) else: - cparams = ffi.new('ZSTD_compressionParameters') + cparams = ffi.new("ZSTD_compressionParameters") cparams.chainLog = compression_params.chain_log cparams.hashLog = compression_params.hash_log cparams.minMatch = compression_params.min_match @@ -1600,59 +1647,75 @@ cparams.targetLength = compression_params.target_length cparams.windowLog = compression_params.window_log - cdict = lib.ZSTD_createCDict_advanced(self._data, len(self._data), - lib.ZSTD_dlm_byRef, - self._dict_type, - cparams, - lib.ZSTD_defaultCMem) + cdict = lib.ZSTD_createCDict_advanced( + self._data, + len(self._data), + lib.ZSTD_dlm_byRef, + self._dict_type, + cparams, + lib.ZSTD_defaultCMem, + ) if cdict == ffi.NULL: - raise ZstdError('unable to precompute dictionary') - - self._cdict = ffi.gc(cdict, lib.ZSTD_freeCDict, - size=lib.ZSTD_sizeof_CDict(cdict)) + raise ZstdError("unable to precompute dictionary") + + self._cdict = ffi.gc( + cdict, lib.ZSTD_freeCDict, size=lib.ZSTD_sizeof_CDict(cdict) + ) @property def _ddict(self): - ddict = lib.ZSTD_createDDict_advanced(self._data, len(self._data), - lib.ZSTD_dlm_byRef, - self._dict_type, - lib.ZSTD_defaultCMem) + ddict = lib.ZSTD_createDDict_advanced( + self._data, + len(self._data), + lib.ZSTD_dlm_byRef, + self._dict_type, + lib.ZSTD_defaultCMem, + ) if ddict == ffi.NULL: - raise ZstdError('could not create decompression dict') - - ddict = ffi.gc(ddict, lib.ZSTD_freeDDict, - size=lib.ZSTD_sizeof_DDict(ddict)) - self.__dict__['_ddict'] = ddict + raise ZstdError("could not create decompression dict") + + ddict = ffi.gc(ddict, lib.ZSTD_freeDDict, size=lib.ZSTD_sizeof_DDict(ddict)) + self.__dict__["_ddict"] = ddict return ddict -def train_dictionary(dict_size, samples, k=0, d=0, notifications=0, dict_id=0, - level=0, steps=0, threads=0): + +def train_dictionary( + dict_size, + samples, + k=0, + d=0, + notifications=0, + dict_id=0, + level=0, + steps=0, + threads=0, +): if not isinstance(samples, list): - raise TypeError('samples must be a list') + raise TypeError("samples must be a list") if threads < 0: threads = _cpu_count() total_size = sum(map(len, samples)) - samples_buffer = new_nonzero('char[]', total_size) - sample_sizes = new_nonzero('size_t[]', len(samples)) + samples_buffer = new_nonzero("char[]", total_size) + sample_sizes = new_nonzero("size_t[]", len(samples)) offset = 0 for i, sample in enumerate(samples): if not isinstance(sample, bytes_type): - raise ValueError('samples must be bytes') + raise ValueError("samples must be bytes") l = len(sample) ffi.memmove(samples_buffer + offset, sample, l) offset += l sample_sizes[i] = l - dict_data = new_nonzero('char[]', dict_size) - - dparams = ffi.new('ZDICT_cover_params_t *')[0] + dict_data = new_nonzero("char[]", dict_size) + + dparams = ffi.new("ZDICT_cover_params_t *")[0] dparams.k = k dparams.d = d dparams.steps = steps @@ -1661,34 +1724,51 @@ dparams.zParams.dictID = dict_id dparams.zParams.compressionLevel = level - if (not dparams.k and not dparams.d and not dparams.steps - and not dparams.nbThreads and not dparams.zParams.notificationLevel + if ( + not dparams.k + and not dparams.d + and not dparams.steps + and not dparams.nbThreads + and not dparams.zParams.notificationLevel and not dparams.zParams.dictID - and not dparams.zParams.compressionLevel): + and not dparams.zParams.compressionLevel + ): zresult = lib.ZDICT_trainFromBuffer( - ffi.addressof(dict_data), dict_size, + ffi.addressof(dict_data), + dict_size, ffi.addressof(samples_buffer), - ffi.addressof(sample_sizes, 0), len(samples)) + ffi.addressof(sample_sizes, 0), + len(samples), + ) elif dparams.steps or dparams.nbThreads: zresult = lib.ZDICT_optimizeTrainFromBuffer_cover( - ffi.addressof(dict_data), dict_size, + ffi.addressof(dict_data), + dict_size, ffi.addressof(samples_buffer), - ffi.addressof(sample_sizes, 0), len(samples), - ffi.addressof(dparams)) + ffi.addressof(sample_sizes, 0), + len(samples), + ffi.addressof(dparams), + ) else: zresult = lib.ZDICT_trainFromBuffer_cover( - ffi.addressof(dict_data), dict_size, + ffi.addressof(dict_data), + dict_size, ffi.addressof(samples_buffer), - ffi.addressof(sample_sizes, 0), len(samples), - dparams) + ffi.addressof(sample_sizes, 0), + len(samples), + dparams, + ) if lib.ZDICT_isError(zresult): - msg = ffi.string(lib.ZDICT_getErrorName(zresult)).decode('utf-8') - raise ZstdError('cannot train dict: %s' % msg) - - return ZstdCompressionDict(ffi.buffer(dict_data, zresult)[:], - dict_type=DICT_TYPE_FULLDICT, - k=dparams.k, d=dparams.d) + msg = ffi.string(lib.ZDICT_getErrorName(zresult)).decode("utf-8") + raise ZstdError("cannot train dict: %s" % msg) + + return ZstdCompressionDict( + ffi.buffer(dict_data, zresult)[:], + dict_type=DICT_TYPE_FULLDICT, + k=dparams.k, + d=dparams.d, + ) class ZstdDecompressionObj(object): @@ -1699,21 +1779,21 @@ def decompress(self, data): if self._finished: - raise ZstdError('cannot use a decompressobj multiple times') - - in_buffer = ffi.new('ZSTD_inBuffer *') - out_buffer = ffi.new('ZSTD_outBuffer *') + raise ZstdError("cannot use a decompressobj multiple times") + + in_buffer = ffi.new("ZSTD_inBuffer *") + out_buffer = ffi.new("ZSTD_outBuffer *") data_buffer = ffi.from_buffer(data) if len(data_buffer) == 0: - return b'' + return b"" in_buffer.src = data_buffer in_buffer.size = len(data_buffer) in_buffer.pos = 0 - dst_buffer = ffi.new('char[]', self._write_size) + dst_buffer = ffi.new("char[]", self._write_size) out_buffer.dst = dst_buffer out_buffer.size = len(dst_buffer) out_buffer.pos = 0 @@ -1721,11 +1801,11 @@ chunks = [] while True: - zresult = lib.ZSTD_decompressStream(self._decompressor._dctx, - out_buffer, in_buffer) + zresult = lib.ZSTD_decompressStream( + self._decompressor._dctx, out_buffer, in_buffer + ) if lib.ZSTD_isError(zresult): - raise ZstdError('zstd decompressor error: %s' % - _zstd_error(zresult)) + raise ZstdError("zstd decompressor error: %s" % _zstd_error(zresult)) if zresult == 0: self._finished = True @@ -1734,13 +1814,14 @@ if out_buffer.pos: chunks.append(ffi.buffer(out_buffer.dst, out_buffer.pos)[:]) - if (zresult == 0 or - (in_buffer.pos == in_buffer.size and out_buffer.pos == 0)): + if zresult == 0 or ( + in_buffer.pos == in_buffer.size and out_buffer.pos == 0 + ): break out_buffer.pos = 0 - return b''.join(chunks) + return b"".join(chunks) def flush(self, length=0): pass @@ -1757,13 +1838,13 @@ self._bytes_decompressed = 0 self._finished_input = False self._finished_output = False - self._in_buffer = ffi.new('ZSTD_inBuffer *') + self._in_buffer = ffi.new("ZSTD_inBuffer *") # Holds a ref to self._in_buffer.src. self._source_buffer = None def __enter__(self): if self._entered: - raise ValueError('cannot __enter__ multiple times') + raise ValueError("cannot __enter__ multiple times") self._entered = True return self @@ -1824,7 +1905,7 @@ chunks.append(chunk) - return b''.join(chunks) + return b"".join(chunks) def __iter__(self): raise io.UnsupportedOperation() @@ -1844,7 +1925,7 @@ return # Else populate the input buffer from our source. - if hasattr(self._source, 'read'): + if hasattr(self._source, "read"): data = self._source.read(self._read_size) if not data: @@ -1866,8 +1947,9 @@ Returns True if data in output buffer should be emitted. """ - zresult = lib.ZSTD_decompressStream(self._decompressor._dctx, - out_buffer, self._in_buffer) + zresult = lib.ZSTD_decompressStream( + self._decompressor._dctx, out_buffer, self._in_buffer + ) if self._in_buffer.pos == self._in_buffer.size: self._in_buffer.src = ffi.NULL @@ -1875,38 +1957,39 @@ self._in_buffer.size = 0 self._source_buffer = None - if not hasattr(self._source, 'read'): + if not hasattr(self._source, "read"): self._finished_input = True if lib.ZSTD_isError(zresult): - raise ZstdError('zstd decompress error: %s' % - _zstd_error(zresult)) + raise ZstdError("zstd decompress error: %s" % _zstd_error(zresult)) # Emit data if there is data AND either: # a) output buffer is full (read amount is satisfied) # b) we're at end of a frame and not in frame spanning mode - return (out_buffer.pos and - (out_buffer.pos == out_buffer.size or - zresult == 0 and not self._read_across_frames)) + return out_buffer.pos and ( + out_buffer.pos == out_buffer.size + or zresult == 0 + and not self._read_across_frames + ) def read(self, size=-1): if self._closed: - raise ValueError('stream is closed') + raise ValueError("stream is closed") if size < -1: - raise ValueError('cannot read negative amounts less than -1') + raise ValueError("cannot read negative amounts less than -1") if size == -1: # This is recursive. But it gets the job done. return self.readall() if self._finished_output or size == 0: - return b'' + return b"" # We /could/ call into readinto() here. But that introduces more # overhead. - dst_buffer = ffi.new('char[]', size) - out_buffer = ffi.new('ZSTD_outBuffer *') + dst_buffer = ffi.new("char[]", size) + out_buffer = ffi.new("ZSTD_outBuffer *") out_buffer.dst = dst_buffer out_buffer.size = size out_buffer.pos = 0 @@ -1927,15 +2010,15 @@ def readinto(self, b): if self._closed: - raise ValueError('stream is closed') + raise ValueError("stream is closed") if self._finished_output: return 0 # TODO use writable=True once we require CFFI >= 1.12. dest_buffer = ffi.from_buffer(b) - ffi.memmove(b, b'', 0) - out_buffer = ffi.new('ZSTD_outBuffer *') + ffi.memmove(b, b"", 0) + out_buffer = ffi.new("ZSTD_outBuffer *") out_buffer.dst = dest_buffer out_buffer.size = len(dest_buffer) out_buffer.pos = 0 @@ -1956,20 +2039,20 @@ def read1(self, size=-1): if self._closed: - raise ValueError('stream is closed') + raise ValueError("stream is closed") if size < -1: - raise ValueError('cannot read negative amounts less than -1') + raise ValueError("cannot read negative amounts less than -1") if self._finished_output or size == 0: - return b'' + return b"" # -1 returns arbitrary number of bytes. if size == -1: size = DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE - dst_buffer = ffi.new('char[]', size) - out_buffer = ffi.new('ZSTD_outBuffer *') + dst_buffer = ffi.new("char[]", size) + out_buffer = ffi.new("ZSTD_outBuffer *") out_buffer.dst = dst_buffer out_buffer.size = size out_buffer.pos = 0 @@ -1990,16 +2073,16 @@ def readinto1(self, b): if self._closed: - raise ValueError('stream is closed') + raise ValueError("stream is closed") if self._finished_output: return 0 # TODO use writable=True once we require CFFI >= 1.12. dest_buffer = ffi.from_buffer(b) - ffi.memmove(b, b'', 0) - - out_buffer = ffi.new('ZSTD_outBuffer *') + ffi.memmove(b, b"", 0) + + out_buffer = ffi.new("ZSTD_outBuffer *") out_buffer.dst = dest_buffer out_buffer.size = len(dest_buffer) out_buffer.pos = 0 @@ -2016,33 +2099,31 @@ def seek(self, pos, whence=os.SEEK_SET): if self._closed: - raise ValueError('stream is closed') + raise ValueError("stream is closed") read_amount = 0 if whence == os.SEEK_SET: if pos < 0: - raise ValueError('cannot seek to negative position with SEEK_SET') + raise ValueError("cannot seek to negative position with SEEK_SET") if pos < self._bytes_decompressed: - raise ValueError('cannot seek zstd decompression stream ' - 'backwards') + raise ValueError("cannot seek zstd decompression stream " "backwards") read_amount = pos - self._bytes_decompressed elif whence == os.SEEK_CUR: if pos < 0: - raise ValueError('cannot seek zstd decompression stream ' - 'backwards') + raise ValueError("cannot seek zstd decompression stream " "backwards") read_amount = pos elif whence == os.SEEK_END: - raise ValueError('zstd decompression streams cannot be seeked ' - 'with SEEK_END') + raise ValueError( + "zstd decompression streams cannot be seeked " "with SEEK_END" + ) while read_amount: - result = self.read(min(read_amount, - DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)) + result = self.read(min(read_amount, DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)) if not result: break @@ -2051,6 +2132,7 @@ return self._bytes_decompressed + class ZstdDecompressionWriter(object): def __init__(self, decompressor, writer, write_size, write_return_read): decompressor._ensure_dctx() @@ -2064,10 +2146,10 @@ def __enter__(self): if self._closed: - raise ValueError('stream is closed') + raise ValueError("stream is closed") if self._entered: - raise ZstdError('cannot __enter__ multiple times') + raise ZstdError("cannot __enter__ multiple times") self._entered = True @@ -2089,7 +2171,7 @@ finally: self._closed = True - f = getattr(self._writer, 'close', None) + f = getattr(self._writer, "close", None) if f: f() @@ -2098,17 +2180,17 @@ return self._closed def fileno(self): - f = getattr(self._writer, 'fileno', None) + f = getattr(self._writer, "fileno", None) if f: return f() else: - raise OSError('fileno not available on underlying writer') + raise OSError("fileno not available on underlying writer") def flush(self): if self._closed: - raise ValueError('stream is closed') - - f = getattr(self._writer, 'flush', None) + raise ValueError("stream is closed") + + f = getattr(self._writer, "flush", None) if f: return f() @@ -2153,19 +2235,19 @@ def write(self, data): if self._closed: - raise ValueError('stream is closed') + raise ValueError("stream is closed") total_write = 0 - in_buffer = ffi.new('ZSTD_inBuffer *') - out_buffer = ffi.new('ZSTD_outBuffer *') + in_buffer = ffi.new("ZSTD_inBuffer *") + out_buffer = ffi.new("ZSTD_outBuffer *") data_buffer = ffi.from_buffer(data) in_buffer.src = data_buffer in_buffer.size = len(data_buffer) in_buffer.pos = 0 - dst_buffer = ffi.new('char[]', self._write_size) + dst_buffer = ffi.new("char[]", self._write_size) out_buffer.dst = dst_buffer out_buffer.size = len(dst_buffer) out_buffer.pos = 0 @@ -2175,8 +2257,7 @@ while in_buffer.pos < in_buffer.size: zresult = lib.ZSTD_decompressStream(dctx, out_buffer, in_buffer) if lib.ZSTD_isError(zresult): - raise ZstdError('zstd decompress error: %s' % - _zstd_error(zresult)) + raise ZstdError("zstd decompress error: %s" % _zstd_error(zresult)) if out_buffer.pos: self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:]) @@ -2206,8 +2287,9 @@ try: self._ensure_dctx() finally: - self._dctx = ffi.gc(dctx, lib.ZSTD_freeDCtx, - size=lib.ZSTD_sizeof_DCtx(dctx)) + self._dctx = ffi.gc( + dctx, lib.ZSTD_freeDCtx, size=lib.ZSTD_sizeof_DCtx(dctx) + ) def memory_size(self): return lib.ZSTD_sizeof_DCtx(self._dctx) @@ -2220,85 +2302,96 @@ output_size = lib.ZSTD_getFrameContentSize(data_buffer, len(data_buffer)) if output_size == lib.ZSTD_CONTENTSIZE_ERROR: - raise ZstdError('error determining content size from frame header') + raise ZstdError("error determining content size from frame header") elif output_size == 0: - return b'' + return b"" elif output_size == lib.ZSTD_CONTENTSIZE_UNKNOWN: if not max_output_size: - raise ZstdError('could not determine content size in frame header') - - result_buffer = ffi.new('char[]', max_output_size) + raise ZstdError("could not determine content size in frame header") + + result_buffer = ffi.new("char[]", max_output_size) result_size = max_output_size output_size = 0 else: - result_buffer = ffi.new('char[]', output_size) + result_buffer = ffi.new("char[]", output_size) result_size = output_size - out_buffer = ffi.new('ZSTD_outBuffer *') + out_buffer = ffi.new("ZSTD_outBuffer *") out_buffer.dst = result_buffer out_buffer.size = result_size out_buffer.pos = 0 - in_buffer = ffi.new('ZSTD_inBuffer *') + in_buffer = ffi.new("ZSTD_inBuffer *") in_buffer.src = data_buffer in_buffer.size = len(data_buffer) in_buffer.pos = 0 zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer) if lib.ZSTD_isError(zresult): - raise ZstdError('decompression error: %s' % - _zstd_error(zresult)) + raise ZstdError("decompression error: %s" % _zstd_error(zresult)) elif zresult: - raise ZstdError('decompression error: did not decompress full frame') + raise ZstdError("decompression error: did not decompress full frame") elif output_size and out_buffer.pos != output_size: - raise ZstdError('decompression error: decompressed %d bytes; expected %d' % - (zresult, output_size)) + raise ZstdError( + "decompression error: decompressed %d bytes; expected %d" + % (zresult, output_size) + ) return ffi.buffer(result_buffer, out_buffer.pos)[:] - def stream_reader(self, source, read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE, - read_across_frames=False): + def stream_reader( + self, + source, + read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE, + read_across_frames=False, + ): self._ensure_dctx() return ZstdDecompressionReader(self, source, read_size, read_across_frames) def decompressobj(self, write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE): if write_size < 1: - raise ValueError('write_size must be positive') + raise ValueError("write_size must be positive") self._ensure_dctx() return ZstdDecompressionObj(self, write_size=write_size) - def read_to_iter(self, reader, read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE, - write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE, - skip_bytes=0): + def read_to_iter( + self, + reader, + read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE, + write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE, + skip_bytes=0, + ): if skip_bytes >= read_size: - raise ValueError('skip_bytes must be smaller than read_size') - - if hasattr(reader, 'read'): + raise ValueError("skip_bytes must be smaller than read_size") + + if hasattr(reader, "read"): have_read = True - elif hasattr(reader, '__getitem__'): + elif hasattr(reader, "__getitem__"): have_read = False buffer_offset = 0 size = len(reader) else: - raise ValueError('must pass an object with a read() method or ' - 'conforms to buffer protocol') + raise ValueError( + "must pass an object with a read() method or " + "conforms to buffer protocol" + ) if skip_bytes: if have_read: reader.read(skip_bytes) else: if skip_bytes > size: - raise ValueError('skip_bytes larger than first input chunk') + raise ValueError("skip_bytes larger than first input chunk") buffer_offset = skip_bytes self._ensure_dctx() - in_buffer = ffi.new('ZSTD_inBuffer *') - out_buffer = ffi.new('ZSTD_outBuffer *') - - dst_buffer = ffi.new('char[]', write_size) + in_buffer = ffi.new("ZSTD_inBuffer *") + out_buffer = ffi.new("ZSTD_outBuffer *") + + dst_buffer = ffi.new("char[]", write_size) out_buffer.dst = dst_buffer out_buffer.size = len(dst_buffer) out_buffer.pos = 0 @@ -2311,7 +2404,7 @@ else: remaining = size - buffer_offset slice_size = min(remaining, read_size) - read_result = reader[buffer_offset:buffer_offset + slice_size] + read_result = reader[buffer_offset : buffer_offset + slice_size] buffer_offset += slice_size # No new input. Break out of read loop. @@ -2330,8 +2423,7 @@ zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer) if lib.ZSTD_isError(zresult): - raise ZstdError('zstd decompress error: %s' % - _zstd_error(zresult)) + raise ZstdError("zstd decompress error: %s" % _zstd_error(zresult)) if out_buffer.pos: data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:] @@ -2348,30 +2440,37 @@ read_from = read_to_iter - def stream_writer(self, writer, write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE, - write_return_read=False): - if not hasattr(writer, 'write'): - raise ValueError('must pass an object with a write() method') - - return ZstdDecompressionWriter(self, writer, write_size, - write_return_read) + def stream_writer( + self, + writer, + write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE, + write_return_read=False, + ): + if not hasattr(writer, "write"): + raise ValueError("must pass an object with a write() method") + + return ZstdDecompressionWriter(self, writer, write_size, write_return_read) write_to = stream_writer - def copy_stream(self, ifh, ofh, - read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE, - write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE): - if not hasattr(ifh, 'read'): - raise ValueError('first argument must have a read() method') - if not hasattr(ofh, 'write'): - raise ValueError('second argument must have a write() method') + def copy_stream( + self, + ifh, + ofh, + read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE, + write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE, + ): + if not hasattr(ifh, "read"): + raise ValueError("first argument must have a read() method") + if not hasattr(ofh, "write"): + raise ValueError("second argument must have a write() method") self._ensure_dctx() - in_buffer = ffi.new('ZSTD_inBuffer *') - out_buffer = ffi.new('ZSTD_outBuffer *') - - dst_buffer = ffi.new('char[]', write_size) + in_buffer = ffi.new("ZSTD_inBuffer *") + out_buffer = ffi.new("ZSTD_outBuffer *") + + dst_buffer = ffi.new("char[]", write_size) out_buffer.dst = dst_buffer out_buffer.size = write_size out_buffer.pos = 0 @@ -2394,8 +2493,9 @@ while in_buffer.pos < in_buffer.size: zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer) if lib.ZSTD_isError(zresult): - raise ZstdError('zstd decompressor error: %s' % - _zstd_error(zresult)) + raise ZstdError( + "zstd decompressor error: %s" % _zstd_error(zresult) + ) if out_buffer.pos: ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos)) @@ -2408,48 +2508,47 @@ def decompress_content_dict_chain(self, frames): if not isinstance(frames, list): - raise TypeError('argument must be a list') + raise TypeError("argument must be a list") if not frames: - raise ValueError('empty input chain') + raise ValueError("empty input chain") # First chunk should not be using a dictionary. We handle it specially. chunk = frames[0] if not isinstance(chunk, bytes_type): - raise ValueError('chunk 0 must be bytes') + raise ValueError("chunk 0 must be bytes") # All chunks should be zstd frames and should have content size set. chunk_buffer = ffi.from_buffer(chunk) - params = ffi.new('ZSTD_frameHeader *') + params = ffi.new("ZSTD_frameHeader *") zresult = lib.ZSTD_getFrameHeader(params, chunk_buffer, len(chunk_buffer)) if lib.ZSTD_isError(zresult): - raise ValueError('chunk 0 is not a valid zstd frame') + raise ValueError("chunk 0 is not a valid zstd frame") elif zresult: - raise ValueError('chunk 0 is too small to contain a zstd frame') + raise ValueError("chunk 0 is too small to contain a zstd frame") if params.frameContentSize == lib.ZSTD_CONTENTSIZE_UNKNOWN: - raise ValueError('chunk 0 missing content size in frame') + raise ValueError("chunk 0 missing content size in frame") self._ensure_dctx(load_dict=False) - last_buffer = ffi.new('char[]', params.frameContentSize) - - out_buffer = ffi.new('ZSTD_outBuffer *') + last_buffer = ffi.new("char[]", params.frameContentSize) + + out_buffer = ffi.new("ZSTD_outBuffer *") out_buffer.dst = last_buffer out_buffer.size = len(last_buffer) out_buffer.pos = 0 - in_buffer = ffi.new('ZSTD_inBuffer *') + in_buffer = ffi.new("ZSTD_inBuffer *") in_buffer.src = chunk_buffer in_buffer.size = len(chunk_buffer) in_buffer.pos = 0 zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer) if lib.ZSTD_isError(zresult): - raise ZstdError('could not decompress chunk 0: %s' % - _zstd_error(zresult)) + raise ZstdError("could not decompress chunk 0: %s" % _zstd_error(zresult)) elif zresult: - raise ZstdError('chunk 0 did not decompress full frame') + raise ZstdError("chunk 0 did not decompress full frame") # Special case of chain length of 1 if len(frames) == 1: @@ -2459,19 +2558,19 @@ while i < len(frames): chunk = frames[i] if not isinstance(chunk, bytes_type): - raise ValueError('chunk %d must be bytes' % i) + raise ValueError("chunk %d must be bytes" % i) chunk_buffer = ffi.from_buffer(chunk) zresult = lib.ZSTD_getFrameHeader(params, chunk_buffer, len(chunk_buffer)) if lib.ZSTD_isError(zresult): - raise ValueError('chunk %d is not a valid zstd frame' % i) + raise ValueError("chunk %d is not a valid zstd frame" % i) elif zresult: - raise ValueError('chunk %d is too small to contain a zstd frame' % i) + raise ValueError("chunk %d is too small to contain a zstd frame" % i) if params.frameContentSize == lib.ZSTD_CONTENTSIZE_UNKNOWN: - raise ValueError('chunk %d missing content size in frame' % i) - - dest_buffer = ffi.new('char[]', params.frameContentSize) + raise ValueError("chunk %d missing content size in frame" % i) + + dest_buffer = ffi.new("char[]", params.frameContentSize) out_buffer.dst = dest_buffer out_buffer.size = len(dest_buffer) @@ -2483,10 +2582,11 @@ zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer) if lib.ZSTD_isError(zresult): - raise ZstdError('could not decompress chunk %d: %s' % - _zstd_error(zresult)) + raise ZstdError( + "could not decompress chunk %d: %s" % _zstd_error(zresult) + ) elif zresult: - raise ZstdError('chunk %d did not decompress full frame' % i) + raise ZstdError("chunk %d did not decompress full frame" % i) last_buffer = dest_buffer i += 1 @@ -2497,19 +2597,19 @@ lib.ZSTD_DCtx_reset(self._dctx, lib.ZSTD_reset_session_only) if self._max_window_size: - zresult = lib.ZSTD_DCtx_setMaxWindowSize(self._dctx, - self._max_window_size) + zresult = lib.ZSTD_DCtx_setMaxWindowSize(self._dctx, self._max_window_size) if lib.ZSTD_isError(zresult): - raise ZstdError('unable to set max window size: %s' % - _zstd_error(zresult)) + raise ZstdError( + "unable to set max window size: %s" % _zstd_error(zresult) + ) zresult = lib.ZSTD_DCtx_setFormat(self._dctx, self._format) if lib.ZSTD_isError(zresult): - raise ZstdError('unable to set decoding format: %s' % - _zstd_error(zresult)) + raise ZstdError("unable to set decoding format: %s" % _zstd_error(zresult)) if self._dict_data and load_dict: zresult = lib.ZSTD_DCtx_refDDict(self._dctx, self._dict_data._ddict) if lib.ZSTD_isError(zresult): - raise ZstdError('unable to reference prepared dictionary: %s' % - _zstd_error(zresult)) + raise ZstdError( + "unable to reference prepared dictionary: %s" % _zstd_error(zresult) + ) diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd.c --- a/contrib/python-zstandard/zstd.c Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd.c Tue Jan 21 13:14:51 2020 -0500 @@ -210,7 +210,7 @@ We detect this mismatch here and refuse to load the module if this scenario is detected. */ - if (ZSTD_VERSION_NUMBER != 10403 || ZSTD_versionNumber() != 10403) { + if (ZSTD_VERSION_NUMBER != 10404 || ZSTD_versionNumber() != 10404) { PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version"); return; } diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/common/bitstream.h --- a/contrib/python-zstandard/zstd/common/bitstream.h Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/common/bitstream.h Tue Jan 21 13:14:51 2020 -0500 @@ -164,7 +164,7 @@ _BitScanReverse ( &r, val ); return (unsigned) r; # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ - return 31 - __builtin_clz (val); + return __builtin_clz (val) ^ 31; # elif defined(__ICCARM__) /* IAR Intrinsic */ return 31 - __CLZ(val); # else /* Software version */ @@ -244,9 +244,9 @@ { size_t const nbBytes = bitC->bitPos >> 3; assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); + assert(bitC->ptr <= bitC->endPtr); MEM_writeLEST(bitC->ptr, bitC->bitContainer); bitC->ptr += nbBytes; - assert(bitC->ptr <= bitC->endPtr); bitC->bitPos &= 7; bitC->bitContainer >>= nbBytes*8; } @@ -260,6 +260,7 @@ { size_t const nbBytes = bitC->bitPos >> 3; assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); + assert(bitC->ptr <= bitC->endPtr); MEM_writeLEST(bitC->ptr, bitC->bitContainer); bitC->ptr += nbBytes; if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/common/compiler.h --- a/contrib/python-zstandard/zstd/common/compiler.h Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/common/compiler.h Tue Jan 21 13:14:51 2020 -0500 @@ -61,6 +61,13 @@ # define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR #endif +/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ +#if defined(__GNUC__) +# define UNUSED_ATTR __attribute__((unused)) +#else +# define UNUSED_ATTR +#endif + /* force no inlining */ #ifdef _MSC_VER # define FORCE_NOINLINE static __declspec(noinline) @@ -127,9 +134,14 @@ } \ } -/* vectorization */ +/* vectorization + * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */ #if !defined(__clang__) && defined(__GNUC__) -# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) +# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) +# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) +# else +# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")") +# endif #else # define DONT_VECTORIZE #endif diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/common/fse.h --- a/contrib/python-zstandard/zstd/common/fse.h Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/common/fse.h Tue Jan 21 13:14:51 2020 -0500 @@ -308,7 +308,7 @@ *******************************************/ /* FSE buffer bounds */ #define FSE_NCOUNTBOUND 512 -#define FSE_BLOCKBOUND(size) (size + (size>>7)) +#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */) #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/common/fse_decompress.c --- a/contrib/python-zstandard/zstd/common/fse_decompress.c Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/common/fse_decompress.c Tue Jan 21 13:14:51 2020 -0500 @@ -52,7 +52,9 @@ #define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ /* check and forward error code */ +#ifndef CHECK_F #define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; } +#endif /* ************************************************************** diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/common/mem.h --- a/contrib/python-zstandard/zstd/common/mem.h Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/common/mem.h Tue Jan 21 13:14:51 2020 -0500 @@ -47,6 +47,79 @@ #define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; } MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } +/* detects whether we are being compiled under msan */ +#if defined (__has_feature) +# if __has_feature(memory_sanitizer) +# define MEMORY_SANITIZER 1 +# endif +#endif + +#if defined (MEMORY_SANITIZER) +/* Not all platforms that support msan provide sanitizers/msan_interface.h. + * We therefore declare the functions we need ourselves, rather than trying to + * include the header file... */ + +#include /* intptr_t */ + +/* Make memory region fully initialized (without changing its contents). */ +void __msan_unpoison(const volatile void *a, size_t size); + +/* Make memory region fully uninitialized (without changing its contents). + This is a legacy interface that does not update origin information. Use + __msan_allocated_memory() instead. */ +void __msan_poison(const volatile void *a, size_t size); + +/* Returns the offset of the first (at least partially) poisoned byte in the + memory range, or -1 if the whole range is good. */ +intptr_t __msan_test_shadow(const volatile void *x, size_t size); +#endif + +/* detects whether we are being compiled under asan */ +#if defined (__has_feature) +# if __has_feature(address_sanitizer) +# define ADDRESS_SANITIZER 1 +# endif +#elif defined(__SANITIZE_ADDRESS__) +# define ADDRESS_SANITIZER 1 +#endif + +#if defined (ADDRESS_SANITIZER) +/* Not all platforms that support asan provide sanitizers/asan_interface.h. + * We therefore declare the functions we need ourselves, rather than trying to + * include the header file... */ + +/** + * Marks a memory region ([addr, addr+size)) as unaddressable. + * + * This memory must be previously allocated by your program. Instrumented + * code is forbidden from accessing addresses in this region until it is + * unpoisoned. This function is not guaranteed to poison the entire region - + * it could poison only a subregion of [addr, addr+size) due to ASan + * alignment restrictions. + * + * \note This function is not thread-safe because no two threads can poison or + * unpoison memory in the same memory region simultaneously. + * + * \param addr Start of memory region. + * \param size Size of memory region. */ +void __asan_poison_memory_region(void const volatile *addr, size_t size); + +/** + * Marks a memory region ([addr, addr+size)) as addressable. + * + * This memory must be previously allocated by your program. Accessing + * addresses in this region is allowed until this region is poisoned again. + * This function could unpoison a super-region of [addr, addr+size) due + * to ASan alignment restrictions. + * + * \note This function is not thread-safe because no two threads can + * poison or unpoison memory in the same memory region simultaneously. + * + * \param addr Start of memory region. + * \param size Size of memory region. */ +void __asan_unpoison_memory_region(void const volatile *addr, size_t size); +#endif + /*-************************************************************** * Basic Types diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/common/pool.c --- a/contrib/python-zstandard/zstd/common/pool.c Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/common/pool.c Tue Jan 21 13:14:51 2020 -0500 @@ -127,9 +127,13 @@ ctx->queueTail = 0; ctx->numThreadsBusy = 0; ctx->queueEmpty = 1; - (void)ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL); - (void)ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL); - (void)ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL); + { + int error = 0; + error |= ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL); + error |= ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL); + error |= ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL); + if (error) { POOL_free(ctx); return NULL; } + } ctx->shutdown = 0; /* Allocate space for the thread handles */ ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem); diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/common/threading.c --- a/contrib/python-zstandard/zstd/common/threading.c Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/common/threading.c Tue Jan 21 13:14:51 2020 -0500 @@ -14,6 +14,8 @@ * This file will hold wrapper for systems, which do not support pthreads */ +#include "threading.h" + /* create fake symbol to avoid empty translation unit warning */ int g_ZSTD_threading_useless_symbol; @@ -28,7 +30,6 @@ /* === Dependencies === */ #include #include -#include "threading.h" /* === Implementation === */ @@ -73,3 +74,47 @@ } #endif /* ZSTD_MULTITHREAD */ + +#if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32) + +#include + +int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr) +{ + *mutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t)); + if (!*mutex) + return 1; + return pthread_mutex_init(*mutex, attr); +} + +int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex) +{ + if (!*mutex) + return 0; + { + int const ret = pthread_mutex_destroy(*mutex); + free(*mutex); + return ret; + } +} + +int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr) +{ + *cond = (pthread_cond_t*)malloc(sizeof(pthread_cond_t)); + if (!*cond) + return 1; + return pthread_cond_init(*cond, attr); +} + +int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond) +{ + if (!*cond) + return 0; + { + int const ret = pthread_cond_destroy(*cond); + free(*cond); + return ret; + } +} + +#endif diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/common/threading.h --- a/contrib/python-zstandard/zstd/common/threading.h Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/common/threading.h Tue Jan 21 13:14:51 2020 -0500 @@ -13,6 +13,8 @@ #ifndef THREADING_H_938743 #define THREADING_H_938743 +#include "debug.h" + #if defined (__cplusplus) extern "C" { #endif @@ -75,10 +77,12 @@ */ -#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */ +#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */ /* === POSIX Systems === */ # include +#if DEBUGLEVEL < 1 + #define ZSTD_pthread_mutex_t pthread_mutex_t #define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b)) #define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a)) @@ -96,6 +100,33 @@ #define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) #define ZSTD_pthread_join(a, b) pthread_join((a),(b)) +#else /* DEBUGLEVEL >= 1 */ + +/* Debug implementation of threading. + * In this implementation we use pointers for mutexes and condition variables. + * This way, if we forget to init/destroy them the program will crash or ASAN + * will report leaks. + */ + +#define ZSTD_pthread_mutex_t pthread_mutex_t* +int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr); +int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex); +#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock(*(a)) +#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock(*(a)) + +#define ZSTD_pthread_cond_t pthread_cond_t* +int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr); +int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond); +#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait(*(a), *(b)) +#define ZSTD_pthread_cond_signal(a) pthread_cond_signal(*(a)) +#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast(*(a)) + +#define ZSTD_pthread_t pthread_t +#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) +#define ZSTD_pthread_join(a, b) pthread_join((a),(b)) + +#endif + #else /* ZSTD_MULTITHREAD not defined */ /* No multithreading support */ diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/common/zstd_internal.h --- a/contrib/python-zstandard/zstd/common/zstd_internal.h Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/common/zstd_internal.h Tue Jan 21 13:14:51 2020 -0500 @@ -197,79 +197,56 @@ static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); } #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } -#define WILDCOPY_OVERLENGTH 8 -#define VECLEN 16 +#define WILDCOPY_OVERLENGTH 32 +#define WILDCOPY_VECLEN 16 typedef enum { ZSTD_no_overlap, - ZSTD_overlap_src_before_dst, + ZSTD_overlap_src_before_dst /* ZSTD_overlap_dst_before_src, */ } ZSTD_overlap_e; /*! ZSTD_wildcopy() : - * custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */ + * Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0) + * @param ovtype controls the overlap detection + * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. + * - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart. + * The src buffer must be before the dst buffer. + */ MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE -void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype) +void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype) { ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; const BYTE* ip = (const BYTE*)src; BYTE* op = (BYTE*)dst; BYTE* const oend = op + length; - assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8)); - if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) { - do - COPY8(op, ip) - while (op < oend); - } - else { - if ((length & 8) == 0) - COPY8(op, ip); - do { - COPY16(op, ip); - } - while (op < oend); - } -} - -/*! ZSTD_wildcopy_16min() : - * same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */ -MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE -void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype) -{ - ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; - const BYTE* ip = (const BYTE*)src; - BYTE* op = (BYTE*)dst; - BYTE* const oend = op + length; + assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN)); - assert(length >= 8); - assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8)); - - if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) { - do - COPY8(op, ip) - while (op < oend); - } - else { - if ((length & 8) == 0) - COPY8(op, ip); - do { + if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { + /* Handle short offset copies. */ + do { + COPY8(op, ip) + } while (op < oend); + } else { + assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); + /* Separate out the first two COPY16() calls because the copy length is + * almost certain to be short, so the branches have different + * probabilities. + * On gcc-9 unrolling once is +1.6%, twice is +2%, thrice is +1.8%. + * On clang-8 unrolling once is +1.4%, twice is +3.3%, thrice is +3%. + */ COPY16(op, ip); - } - while (op < oend); + COPY16(op, ip); + if (op >= oend) return; + do { + COPY16(op, ip); + COPY16(op, ip); + } + while (op < oend); } } -MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */ -{ - const BYTE* ip = (const BYTE*)src; - BYTE* op = (BYTE*)dst; - BYTE* const oend = (BYTE*)dstEnd; - do - COPY8(op, ip) - while (op < oend); -} - /*-******************************************* * Private declarations @@ -323,7 +300,7 @@ _BitScanReverse(&r, val); return (unsigned)r; # elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ - return 31 - __builtin_clz(val); + return __builtin_clz (val) ^ 31; # elif defined(__ICCARM__) /* IAR Intrinsic */ return 31 - __CLZ(val); # else /* Software version */ diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/compress/zstd_compress.c --- a/contrib/python-zstandard/zstd/compress/zstd_compress.c Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/compress/zstd_compress.c Tue Jan 21 13:14:51 2020 -0500 @@ -42,15 +42,15 @@ * Context memory management ***************************************/ struct ZSTD_CDict_s { - void* dictBuffer; const void* dictContent; size_t dictContentSize; - void* workspace; - size_t workspaceSize; + U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ + ZSTD_cwksp workspace; ZSTD_matchState_t matchState; ZSTD_compressedBlockState_t cBlockState; ZSTD_customMem customMem; U32 dictID; + int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ }; /* typedef'd to ZSTD_CDict within "zstd.h" */ ZSTD_CCtx* ZSTD_createCCtx(void) @@ -84,23 +84,26 @@ ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize) { - ZSTD_CCtx* const cctx = (ZSTD_CCtx*) workspace; + ZSTD_cwksp ws; + ZSTD_CCtx* cctx; if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */ if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */ - memset(workspace, 0, workspaceSize); /* may be a bit generous, could memset be smaller ? */ + ZSTD_cwksp_init(&ws, workspace, workspaceSize); + + cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx)); + if (cctx == NULL) { + return NULL; + } + memset(cctx, 0, sizeof(ZSTD_CCtx)); + ZSTD_cwksp_move(&cctx->workspace, &ws); cctx->staticSize = workspaceSize; - cctx->workSpace = (void*)(cctx+1); - cctx->workSpaceSize = workspaceSize - sizeof(ZSTD_CCtx); /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ - if (cctx->workSpaceSize < HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t)) return NULL; - assert(((size_t)cctx->workSpace & (sizeof(void*)-1)) == 0); /* ensure correct alignment */ - cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)cctx->workSpace; - cctx->blockState.nextCBlock = cctx->blockState.prevCBlock + 1; - { - void* const ptr = cctx->blockState.nextCBlock + 1; - cctx->entropyWorkspace = (U32*)ptr; - } + if (!ZSTD_cwksp_check_available(&cctx->workspace, HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; + cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); + cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); + cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object( + &cctx->workspace, HUF_WORKSPACE_SIZE); cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); return cctx; } @@ -128,11 +131,11 @@ { assert(cctx != NULL); assert(cctx->staticSize == 0); - ZSTD_free(cctx->workSpace, cctx->customMem); cctx->workSpace = NULL; ZSTD_clearAllDicts(cctx); #ifdef ZSTD_MULTITHREAD ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; #endif + ZSTD_cwksp_free(&cctx->workspace, cctx->customMem); } size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) @@ -140,8 +143,13 @@ if (cctx==NULL) return 0; /* support free on NULL */ RETURN_ERROR_IF(cctx->staticSize, memory_allocation, "not compatible with static CCtx"); - ZSTD_freeCCtxContent(cctx); - ZSTD_free(cctx, cctx->customMem); + { + int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx); + ZSTD_freeCCtxContent(cctx); + if (!cctxInWorkspace) { + ZSTD_free(cctx, cctx->customMem); + } + } return 0; } @@ -160,7 +168,9 @@ size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) { if (cctx==NULL) return 0; /* support sizeof on NULL */ - return sizeof(*cctx) + cctx->workSpaceSize + /* cctx may be in the workspace */ + return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx)) + + ZSTD_cwksp_sizeof(&cctx->workspace) + ZSTD_sizeof_localDict(cctx->localDict) + ZSTD_sizeof_mtctx(cctx); } @@ -229,23 +239,23 @@ RETURN_ERROR_IF(!cctxParams, GENERIC); FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); memset(cctxParams, 0, sizeof(*cctxParams)); + assert(!ZSTD_checkCParams(params.cParams)); cctxParams->cParams = params.cParams; cctxParams->fParams = params.fParams; cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ - assert(!ZSTD_checkCParams(params.cParams)); return 0; } /* ZSTD_assignParamsToCCtxParams() : * params is presumed valid at this stage */ static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams( - ZSTD_CCtx_params cctxParams, ZSTD_parameters params) + const ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) { - ZSTD_CCtx_params ret = cctxParams; + ZSTD_CCtx_params ret = *cctxParams; + assert(!ZSTD_checkCParams(params.cParams)); ret.cParams = params.cParams; ret.fParams = params.fParams; ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ - assert(!ZSTD_checkCParams(params.cParams)); return ret; } @@ -378,7 +388,7 @@ case ZSTD_c_forceAttachDict: ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy); bounds.lowerBound = ZSTD_dictDefaultAttach; - bounds.upperBound = ZSTD_dictForceCopy; /* note : how to ensure at compile time that this is the highest value enum ? */ + bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */ return bounds; case ZSTD_c_literalCompressionMode: @@ -392,6 +402,11 @@ bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX; return bounds; + case ZSTD_c_srcSizeHint: + bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN; + bounds.upperBound = ZSTD_SRCSIZEHINT_MAX; + return bounds; + default: { ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 }; return boundError; @@ -448,6 +463,7 @@ case ZSTD_c_forceAttachDict: case ZSTD_c_literalCompressionMode: case ZSTD_c_targetCBlockSize: + case ZSTD_c_srcSizeHint: default: return 0; } @@ -494,6 +510,7 @@ case ZSTD_c_ldmMinMatch: case ZSTD_c_ldmBucketSizeLog: case ZSTD_c_targetCBlockSize: + case ZSTD_c_srcSizeHint: break; default: RETURN_ERROR(parameter_unsupported); @@ -517,33 +534,33 @@ if (value) { /* 0 : does not change current level */ CCtxParams->compressionLevel = value; } - if (CCtxParams->compressionLevel >= 0) return CCtxParams->compressionLevel; + if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel; return 0; /* return type (size_t) cannot represent negative values */ } case ZSTD_c_windowLog : if (value!=0) /* 0 => use default */ BOUNDCHECK(ZSTD_c_windowLog, value); - CCtxParams->cParams.windowLog = value; + CCtxParams->cParams.windowLog = (U32)value; return CCtxParams->cParams.windowLog; case ZSTD_c_hashLog : if (value!=0) /* 0 => use default */ BOUNDCHECK(ZSTD_c_hashLog, value); - CCtxParams->cParams.hashLog = value; + CCtxParams->cParams.hashLog = (U32)value; return CCtxParams->cParams.hashLog; case ZSTD_c_chainLog : if (value!=0) /* 0 => use default */ BOUNDCHECK(ZSTD_c_chainLog, value); - CCtxParams->cParams.chainLog = value; + CCtxParams->cParams.chainLog = (U32)value; return CCtxParams->cParams.chainLog; case ZSTD_c_searchLog : if (value!=0) /* 0 => use default */ BOUNDCHECK(ZSTD_c_searchLog, value); - CCtxParams->cParams.searchLog = value; - return value; + CCtxParams->cParams.searchLog = (U32)value; + return (size_t)value; case ZSTD_c_minMatch : if (value!=0) /* 0 => use default */ @@ -674,6 +691,12 @@ CCtxParams->targetCBlockSize = value; return CCtxParams->targetCBlockSize; + case ZSTD_c_srcSizeHint : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_srcSizeHint, value); + CCtxParams->srcSizeHint = value; + return CCtxParams->srcSizeHint; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } } @@ -779,6 +802,9 @@ case ZSTD_c_targetCBlockSize : *value = (int)CCtxParams->targetCBlockSize; break; + case ZSTD_c_srcSizeHint : + *value = (int)CCtxParams->srcSizeHint; + break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } return 0; @@ -1029,7 +1055,11 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize) { - ZSTD_compressionParameters cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize); + ZSTD_compressionParameters cParams; + if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) { + srcSizeHint = CCtxParams->srcSizeHint; + } + cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize); if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog; if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog; @@ -1049,10 +1079,19 @@ size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); size_t const hSize = ((size_t)1) << cParams->hashLog; U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; - size_t const h3Size = ((size_t)1) << hashLog3; - size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); - size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<strategy >= ZSTD_btopt)) ? optPotentialSpace : 0; @@ -1069,20 +1108,23 @@ size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); U32 const divider = (cParams.minMatch==3) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; - size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq; - size_t const entropySpace = HUF_WORKSPACE_SIZE; - size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t); + size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) + + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) + + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); + size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE); + size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1); size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams); - size_t const ldmSeqSpace = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq); + size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq)); size_t const neededSpace = entropySpace + blockStateSpace + tokenSpace + matchStateSize + ldmSpace + ldmSeqSpace; - - DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)sizeof(ZSTD_CCtx)); - DEBUGLOG(5, "estimate workSpace : %u", (U32)neededSpace); - return sizeof(ZSTD_CCtx) + neededSpace; + size_t const cctxSpace = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)); + + DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)cctxSpace); + DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); + return cctxSpace + neededSpace; } } @@ -1118,7 +1160,8 @@ size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize; size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1; - size_t const streamingSize = inBuffSize + outBuffSize; + size_t const streamingSize = ZSTD_cwksp_alloc_size(inBuffSize) + + ZSTD_cwksp_alloc_size(outBuffSize); return CCtxSize + streamingSize; } @@ -1186,17 +1229,6 @@ return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */ } - - -static U32 ZSTD_equivalentCParams(ZSTD_compressionParameters cParams1, - ZSTD_compressionParameters cParams2) -{ - return (cParams1.hashLog == cParams2.hashLog) - & (cParams1.chainLog == cParams2.chainLog) - & (cParams1.strategy == cParams2.strategy) /* opt parser space */ - & ((cParams1.minMatch==3) == (cParams2.minMatch==3)); /* hashlog3 space */ -} - static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, ZSTD_compressionParameters cParams2) { @@ -1211,71 +1243,6 @@ assert(cParams1.strategy == cParams2.strategy); } -/** The parameters are equivalent if ldm is not enabled in both sets or - * all the parameters are equivalent. */ -static U32 ZSTD_equivalentLdmParams(ldmParams_t ldmParams1, - ldmParams_t ldmParams2) -{ - return (!ldmParams1.enableLdm && !ldmParams2.enableLdm) || - (ldmParams1.enableLdm == ldmParams2.enableLdm && - ldmParams1.hashLog == ldmParams2.hashLog && - ldmParams1.bucketSizeLog == ldmParams2.bucketSizeLog && - ldmParams1.minMatchLength == ldmParams2.minMatchLength && - ldmParams1.hashRateLog == ldmParams2.hashRateLog); -} - -typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e; - -/* ZSTD_sufficientBuff() : - * check internal buffers exist for streaming if buffPol == ZSTDb_buffered . - * Note : they are assumed to be correctly sized if ZSTD_equivalentCParams()==1 */ -static U32 ZSTD_sufficientBuff(size_t bufferSize1, size_t maxNbSeq1, - size_t maxNbLit1, - ZSTD_buffered_policy_e buffPol2, - ZSTD_compressionParameters cParams2, - U64 pledgedSrcSize) -{ - size_t const windowSize2 = MAX(1, (size_t)MIN(((U64)1 << cParams2.windowLog), pledgedSrcSize)); - size_t const blockSize2 = MIN(ZSTD_BLOCKSIZE_MAX, windowSize2); - size_t const maxNbSeq2 = blockSize2 / ((cParams2.minMatch == 3) ? 3 : 4); - size_t const maxNbLit2 = blockSize2; - size_t const neededBufferSize2 = (buffPol2==ZSTDb_buffered) ? windowSize2 + blockSize2 : 0; - DEBUGLOG(4, "ZSTD_sufficientBuff: is neededBufferSize2=%u <= bufferSize1=%u", - (U32)neededBufferSize2, (U32)bufferSize1); - DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbSeq2=%u <= maxNbSeq1=%u", - (U32)maxNbSeq2, (U32)maxNbSeq1); - DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbLit2=%u <= maxNbLit1=%u", - (U32)maxNbLit2, (U32)maxNbLit1); - return (maxNbLit2 <= maxNbLit1) - & (maxNbSeq2 <= maxNbSeq1) - & (neededBufferSize2 <= bufferSize1); -} - -/** Equivalence for resetCCtx purposes */ -static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1, - ZSTD_CCtx_params params2, - size_t buffSize1, - size_t maxNbSeq1, size_t maxNbLit1, - ZSTD_buffered_policy_e buffPol2, - U64 pledgedSrcSize) -{ - DEBUGLOG(4, "ZSTD_equivalentParams: pledgedSrcSize=%u", (U32)pledgedSrcSize); - if (!ZSTD_equivalentCParams(params1.cParams, params2.cParams)) { - DEBUGLOG(4, "ZSTD_equivalentCParams() == 0"); - return 0; - } - if (!ZSTD_equivalentLdmParams(params1.ldmParams, params2.ldmParams)) { - DEBUGLOG(4, "ZSTD_equivalentLdmParams() == 0"); - return 0; - } - if (!ZSTD_sufficientBuff(buffSize1, maxNbSeq1, maxNbLit1, buffPol2, - params2.cParams, pledgedSrcSize)) { - DEBUGLOG(4, "ZSTD_sufficientBuff() == 0"); - return 0; - } - return 1; -} - static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) { int i; @@ -1301,87 +1268,104 @@ ms->dictMatchState = NULL; } -/*! ZSTD_continueCCtx() : - * reuse CCtx without reset (note : requires no dictionary) */ -static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pledgedSrcSize) -{ - size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); - size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); - DEBUGLOG(4, "ZSTD_continueCCtx: re-use context in place"); - - cctx->blockSize = blockSize; /* previous block size could be different even for same windowLog, due to pledgedSrcSize */ - cctx->appliedParams = params; - cctx->blockState.matchState.cParams = params.cParams; - cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; - cctx->consumedSrcSize = 0; - cctx->producedCSize = 0; - if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) - cctx->appliedParams.fParams.contentSizeFlag = 0; - DEBUGLOG(4, "pledged content size : %u ; flag : %u", - (U32)pledgedSrcSize, cctx->appliedParams.fParams.contentSizeFlag); - cctx->stage = ZSTDcs_init; - cctx->dictID = 0; - if (params.ldmParams.enableLdm) - ZSTD_window_clear(&cctx->ldmState.window); - ZSTD_referenceExternalSequences(cctx, NULL, 0); - ZSTD_invalidateMatchState(&cctx->blockState.matchState); - ZSTD_reset_compressedBlockState(cctx->blockState.prevCBlock); - XXH64_reset(&cctx->xxhState, 0); - return 0; -} - -typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e; - -typedef enum { ZSTD_resetTarget_CDict, ZSTD_resetTarget_CCtx } ZSTD_resetTarget_e; - -static void* +/** + * Indicates whether this compression proceeds directly from user-provided + * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or + * whether the context needs to buffer the input/output (ZSTDb_buffered). + */ +typedef enum { + ZSTDb_not_buffered, + ZSTDb_buffered +} ZSTD_buffered_policy_e; + +/** + * Controls, for this matchState reset, whether the tables need to be cleared / + * prepared for the coming compression (ZSTDcrp_makeClean), or whether the + * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a + * subsequent operation will overwrite the table space anyways (e.g., copying + * the matchState contents in from a CDict). + */ +typedef enum { + ZSTDcrp_makeClean, + ZSTDcrp_leaveDirty +} ZSTD_compResetPolicy_e; + +/** + * Controls, for this matchState reset, whether indexing can continue where it + * left off (ZSTDirp_continue), or whether it needs to be restarted from zero + * (ZSTDirp_reset). + */ +typedef enum { + ZSTDirp_continue, + ZSTDirp_reset +} ZSTD_indexResetPolicy_e; + +typedef enum { + ZSTD_resetTarget_CDict, + ZSTD_resetTarget_CCtx +} ZSTD_resetTarget_e; + +static size_t ZSTD_reset_matchState(ZSTD_matchState_t* ms, - void* ptr, + ZSTD_cwksp* ws, const ZSTD_compressionParameters* cParams, - ZSTD_compResetPolicy_e const crp, ZSTD_resetTarget_e const forWho) + const ZSTD_compResetPolicy_e crp, + const ZSTD_indexResetPolicy_e forceResetIndex, + const ZSTD_resetTarget_e forWho) { size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); size_t const hSize = ((size_t)1) << cParams->hashLog; U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; - size_t const h3Size = ((size_t)1) << hashLog3; - size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); - - assert(((size_t)ptr & 3) == 0); + size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; + + DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset); + if (forceResetIndex == ZSTDirp_reset) { + memset(&ms->window, 0, sizeof(ms->window)); + ms->window.dictLimit = 1; /* start from 1, so that 1st position is valid */ + ms->window.lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ + ms->window.nextSrc = ms->window.base + 1; /* see issue #1241 */ + ZSTD_cwksp_mark_tables_dirty(ws); + } ms->hashLog3 = hashLog3; - memset(&ms->window, 0, sizeof(ms->window)); - ms->window.dictLimit = 1; /* start from 1, so that 1st position is valid */ - ms->window.lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ - ms->window.nextSrc = ms->window.base + 1; /* see issue #1241 */ + ZSTD_invalidateMatchState(ms); + assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */ + + ZSTD_cwksp_clear_tables(ws); + + DEBUGLOG(5, "reserving table space"); + /* table Space */ + ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32)); + ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32)); + ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32)); + RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, + "failed a workspace allocation in ZSTD_reset_matchState"); + + DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty); + if (crp!=ZSTDcrp_leaveDirty) { + /* reset tables only */ + ZSTD_cwksp_clean_tables(ws); + } + /* opt parser space */ if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { DEBUGLOG(4, "reserving optimal parser space"); - ms->opt.litFreq = (unsigned*)ptr; - ms->opt.litLengthFreq = ms->opt.litFreq + (1<opt.matchLengthFreq = ms->opt.litLengthFreq + (MaxLL+1); - ms->opt.offCodeFreq = ms->opt.matchLengthFreq + (MaxML+1); - ptr = ms->opt.offCodeFreq + (MaxOff+1); - ms->opt.matchTable = (ZSTD_match_t*)ptr; - ptr = ms->opt.matchTable + ZSTD_OPT_NUM+1; - ms->opt.priceTable = (ZSTD_optimal_t*)ptr; - ptr = ms->opt.priceTable + ZSTD_OPT_NUM+1; + ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned)); + ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned)); + ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned)); + ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)); + ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); } - /* table Space */ - DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_noMemset); - assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ - if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace); /* reset tables only */ - ms->hashTable = (U32*)(ptr); - ms->chainTable = ms->hashTable + hSize; - ms->hashTable3 = ms->chainTable + chainSize; - ptr = ms->hashTable3 + h3Size; - ms->cParams = *cParams; - assert(((size_t)ptr & 3) == 0); - return ptr; + RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, + "failed a workspace allocation in ZSTD_reset_matchState"); + + return 0; } /* ZSTD_indexTooCloseToMax() : @@ -1397,13 +1381,6 @@ return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); } -#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 /* define "workspace is too large" as this number of times larger than needed */ -#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 /* when workspace is continuously too large - * during at least this number of times, - * context's memory usage is considered wasteful, - * because it's sized to handle a worst case scenario which rarely happens. - * In which case, resize it down to free some memory */ - /*! ZSTD_resetCCtx_internal() : note : `params` are assumed fully validated at this stage */ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, @@ -1412,30 +1389,12 @@ ZSTD_compResetPolicy_e const crp, ZSTD_buffered_policy_e const zbuff) { + ZSTD_cwksp* const ws = &zc->workspace; DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u", (U32)pledgedSrcSize, params.cParams.windowLog); assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); - if (crp == ZSTDcrp_continue) { - if (ZSTD_equivalentParams(zc->appliedParams, params, - zc->inBuffSize, - zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit, - zbuff, pledgedSrcSize) ) { - DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> consider continue mode"); - zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0); /* if it was too large, it still is */ - if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION) { - DEBUGLOG(4, "continue mode confirmed (wLog1=%u, blockSize1=%zu)", - zc->appliedParams.cParams.windowLog, zc->blockSize); - if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) { - /* prefer a reset, faster than a rescale */ - ZSTD_reset_matchState(&zc->blockState.matchState, - zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32, - ¶ms.cParams, - crp, ZSTD_resetTarget_CCtx); - } - return ZSTD_continueCCtx(zc, params, pledgedSrcSize); - } } } - DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx"); + zc->isFirstBlock = 1; if (params.ldmParams.enableLdm) { /* Adjust long distance matching parameters */ @@ -1449,58 +1408,74 @@ size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); U32 const divider = (params.cParams.minMatch==3) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; - size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq; + size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) + + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) + + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0; size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0; size_t const matchStateSize = ZSTD_sizeof_matchState(¶ms.cParams, /* forCCtx */ 1); size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); - void* ptr; /* used to partition workSpace */ - - /* Check if workSpace is large enough, alloc a new one if needed */ - { size_t const entropySpace = HUF_WORKSPACE_SIZE; - size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t); - size_t const bufferSpace = buffInSize + buffOutSize; + + ZSTD_indexResetPolicy_e needsIndexReset = ZSTDirp_continue; + + if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) { + needsIndexReset = ZSTDirp_reset; + } + + ZSTD_cwksp_bump_oversized_duration(ws, 0); + + /* Check if workspace is large enough, alloc a new one if needed */ + { size_t const cctxSpace = zc->staticSize ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; + size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE); + size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); + size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) + ZSTD_cwksp_alloc_size(buffOutSize); size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams); - size_t const ldmSeqSpace = maxNbLdmSeq * sizeof(rawSeq); - - size_t const neededSpace = entropySpace + blockStateSpace + ldmSpace + - ldmSeqSpace + matchStateSize + tokenSpace + - bufferSpace; - - int const workSpaceTooSmall = zc->workSpaceSize < neededSpace; - int const workSpaceTooLarge = zc->workSpaceSize > ZSTD_WORKSPACETOOLARGE_FACTOR * neededSpace; - int const workSpaceWasteful = workSpaceTooLarge && (zc->workSpaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION); - zc->workSpaceOversizedDuration = workSpaceTooLarge ? zc->workSpaceOversizedDuration+1 : 0; + size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)); + + size_t const neededSpace = + cctxSpace + + entropySpace + + blockStateSpace + + ldmSpace + + ldmSeqSpace + + matchStateSize + + tokenSpace + + bufferSpace; + + int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; + int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); DEBUGLOG(4, "Need %zuKB workspace, including %zuKB for match state, and %zuKB for buffers", neededSpace>>10, matchStateSize>>10, bufferSpace>>10); DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); - if (workSpaceTooSmall || workSpaceWasteful) { - DEBUGLOG(4, "Resize workSpaceSize from %zuKB to %zuKB", - zc->workSpaceSize >> 10, + if (workspaceTooSmall || workspaceWasteful) { + DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB", + ZSTD_cwksp_sizeof(ws) >> 10, neededSpace >> 10); RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize"); - zc->workSpaceSize = 0; - ZSTD_free(zc->workSpace, zc->customMem); - zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem); - RETURN_ERROR_IF(zc->workSpace == NULL, memory_allocation); - zc->workSpaceSize = neededSpace; - zc->workSpaceOversizedDuration = 0; - + needsIndexReset = ZSTDirp_reset; + + ZSTD_cwksp_free(ws, zc->customMem); + FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem)); + + DEBUGLOG(5, "reserving object space"); /* Statically sized space. * entropyWorkspace never moves, * though prev/next block swap places */ - assert(((size_t)zc->workSpace & 3) == 0); /* ensure correct alignment */ - assert(zc->workSpaceSize >= 2 * sizeof(ZSTD_compressedBlockState_t)); - zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)zc->workSpace; - zc->blockState.nextCBlock = zc->blockState.prevCBlock + 1; - ptr = zc->blockState.nextCBlock + 1; - zc->entropyWorkspace = (U32*)ptr; + assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t))); + zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); + RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock"); + zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); + RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock"); + zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, HUF_WORKSPACE_SIZE); + RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace"); } } + ZSTD_cwksp_clear(ws); + /* init params */ zc->appliedParams = params; zc->blockState.matchState.cParams = params.cParams; @@ -1519,58 +1494,58 @@ ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); - ptr = ZSTD_reset_matchState(&zc->blockState.matchState, - zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32, - ¶ms.cParams, - crp, ZSTD_resetTarget_CCtx); + /* ZSTD_wildcopy() is used to copy into the literals buffer, + * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. + */ + zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH); + zc->seqStore.maxNbLit = blockSize; + + /* buffers */ + zc->inBuffSize = buffInSize; + zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize); + zc->outBuffSize = buffOutSize; + zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize); + + /* ldm bucketOffsets table */ + if (params.ldmParams.enableLdm) { + /* TODO: avoid memset? */ + size_t const ldmBucketSize = + ((size_t)1) << (params.ldmParams.hashLog - + params.ldmParams.bucketSizeLog); + zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize); + memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize); + } + + /* sequences storage */ + ZSTD_referenceExternalSequences(zc, NULL, 0); + zc->seqStore.maxNbSeq = maxNbSeq; + zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef)); + + FORWARD_IF_ERROR(ZSTD_reset_matchState( + &zc->blockState.matchState, + ws, + ¶ms.cParams, + crp, + needsIndexReset, + ZSTD_resetTarget_CCtx)); /* ldm hash table */ - /* initialize bucketOffsets table later for pointer alignment */ if (params.ldmParams.enableLdm) { + /* TODO: avoid memset? */ size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog; - memset(ptr, 0, ldmHSize * sizeof(ldmEntry_t)); - assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ - zc->ldmState.hashTable = (ldmEntry_t*)ptr; - ptr = zc->ldmState.hashTable + ldmHSize; - zc->ldmSequences = (rawSeq*)ptr; - ptr = zc->ldmSequences + maxNbLdmSeq; + zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); + memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); + zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); zc->maxNbLdmSequences = maxNbLdmSeq; memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window)); - } - assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ - - /* sequences storage */ - zc->seqStore.maxNbSeq = maxNbSeq; - zc->seqStore.sequencesStart = (seqDef*)ptr; - ptr = zc->seqStore.sequencesStart + maxNbSeq; - zc->seqStore.llCode = (BYTE*) ptr; - zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq; - zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq; - zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq; - /* ZSTD_wildcopy() is used to copy into the literals buffer, - * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. - */ - zc->seqStore.maxNbLit = blockSize; - ptr = zc->seqStore.litStart + blockSize + WILDCOPY_OVERLENGTH; - - /* ldm bucketOffsets table */ - if (params.ldmParams.enableLdm) { - size_t const ldmBucketSize = - ((size_t)1) << (params.ldmParams.hashLog - - params.ldmParams.bucketSizeLog); - memset(ptr, 0, ldmBucketSize); - zc->ldmState.bucketOffsets = (BYTE*)ptr; - ptr = zc->ldmState.bucketOffsets + ldmBucketSize; ZSTD_window_clear(&zc->ldmState.window); } - ZSTD_referenceExternalSequences(zc, NULL, 0); - - /* buffers */ - zc->inBuffSize = buffInSize; - zc->inBuff = (char*)ptr; - zc->outBuffSize = buffOutSize; - zc->outBuff = zc->inBuff + buffInSize; + + DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); return 0; } @@ -1604,15 +1579,15 @@ }; static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, + const ZSTD_CCtx_params* params, U64 pledgedSrcSize) { size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy]; return ( pledgedSrcSize <= cutoff || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN - || params.attachDictPref == ZSTD_dictForceAttach ) - && params.attachDictPref != ZSTD_dictForceCopy - && !params.forceWindow; /* dictMatchState isn't correctly + || params->attachDictPref == ZSTD_dictForceAttach ) + && params->attachDictPref != ZSTD_dictForceCopy + && !params->forceWindow; /* dictMatchState isn't correctly * handled in _enforceMaxDist */ } @@ -1630,8 +1605,8 @@ * has its own tables. */ params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0); params.cParams.windowLog = windowLog; - ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, - ZSTDcrp_continue, zbuff); + FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + ZSTDcrp_makeClean, zbuff)); assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); } @@ -1679,30 +1654,36 @@ /* Copy only compression parameters related to tables. */ params.cParams = *cdict_cParams; params.cParams.windowLog = windowLog; - ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, - ZSTDcrp_noMemset, zbuff); + FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + ZSTDcrp_leaveDirty, zbuff)); assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog); } + ZSTD_cwksp_mark_tables_dirty(&cctx->workspace); + /* copy tables */ { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog); size_t const hSize = (size_t)1 << cdict_cParams->hashLog; - size_t const tableSpace = (chainSize + hSize) * sizeof(U32); - assert((U32*)cctx->blockState.matchState.chainTable == (U32*)cctx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */ - assert((U32*)cctx->blockState.matchState.hashTable3 == (U32*)cctx->blockState.matchState.chainTable + chainSize); - assert((U32*)cdict->matchState.chainTable == (U32*)cdict->matchState.hashTable + hSize); /* chainTable must follow hashTable */ - assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize); - memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */ + + memcpy(cctx->blockState.matchState.hashTable, + cdict->matchState.hashTable, + hSize * sizeof(U32)); + memcpy(cctx->blockState.matchState.chainTable, + cdict->matchState.chainTable, + chainSize * sizeof(U32)); } /* Zero the hashTable3, since the cdict never fills it */ - { size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3; + { int const h3log = cctx->blockState.matchState.hashLog3; + size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; assert(cdict->matchState.hashLog3 == 0); memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); } + ZSTD_cwksp_mark_tables_clean(&cctx->workspace); + /* copy dictionary offsets */ { ZSTD_matchState_t const* srcMatchState = &cdict->matchState; ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; @@ -1724,7 +1705,7 @@ * in-place. We decide here which strategy to use. */ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, + const ZSTD_CCtx_params* params, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { @@ -1734,10 +1715,10 @@ if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) { return ZSTD_resetCCtx_byAttachingCDict( - cctx, cdict, params, pledgedSrcSize, zbuff); + cctx, cdict, *params, pledgedSrcSize, zbuff); } else { return ZSTD_resetCCtx_byCopyingCDict( - cctx, cdict, params, pledgedSrcSize, zbuff); + cctx, cdict, *params, pledgedSrcSize, zbuff); } } @@ -1763,7 +1744,7 @@ params.cParams = srcCCtx->appliedParams.cParams; params.fParams = fParams; ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, - ZSTDcrp_noMemset, zbuff); + ZSTDcrp_leaveDirty, zbuff); assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog); @@ -1771,16 +1752,27 @@ assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3); } + ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace); + /* copy tables */ { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog); size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; - size_t const h3Size = (size_t)1 << srcCCtx->blockState.matchState.hashLog3; - size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); - assert((U32*)dstCCtx->blockState.matchState.chainTable == (U32*)dstCCtx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */ - assert((U32*)dstCCtx->blockState.matchState.hashTable3 == (U32*)dstCCtx->blockState.matchState.chainTable + chainSize); - memcpy(dstCCtx->blockState.matchState.hashTable, srcCCtx->blockState.matchState.hashTable, tableSpace); /* presumes all tables follow each other */ + int const h3log = srcCCtx->blockState.matchState.hashLog3; + size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; + + memcpy(dstCCtx->blockState.matchState.hashTable, + srcCCtx->blockState.matchState.hashTable, + hSize * sizeof(U32)); + memcpy(dstCCtx->blockState.matchState.chainTable, + srcCCtx->blockState.matchState.chainTable, + chainSize * sizeof(U32)); + memcpy(dstCCtx->blockState.matchState.hashTable3, + srcCCtx->blockState.matchState.hashTable3, + h3Size * sizeof(U32)); } + ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace); + /* copy dictionary offsets */ { const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState; @@ -1831,6 +1823,20 @@ int rowNb; assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ assert(size < (1U<<31)); /* can be casted to int */ + +#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) + /* To validate that the table re-use logic is sound, and that we don't + * access table space that we haven't cleaned, we re-"poison" the table + * space every time we mark it dirty. + * + * This function however is intended to operate on those dirty tables and + * re-clean them. So when this function is used correctly, we can unpoison + * the memory it operated on. This introduces a blind spot though, since + * if we now try to operate on __actually__ poisoned memory, we will not + * detect that. */ + __msan_unpoison(table, size * sizeof(U32)); +#endif + for (rowNb=0 ; rowNb < nbRows ; rowNb++) { int column; for (column=0; columncParams.windowLog > STREAM_ACCUMULATOR_MIN; @@ -1971,7 +1977,7 @@ ZSTD_disableLiteralsCompression(cctxParams), op, dstCapacity, literals, litSize, - workspace, wkspSize, + entropyWorkspace, entropyWkspSize, bmi2); FORWARD_IF_ERROR(cSize); assert(cSize <= dstCapacity); @@ -1981,12 +1987,17 @@ /* Sequences Header */ RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, dstSize_tooSmall); - if (nbSeq < 0x7F) + if (nbSeq < 128) { *op++ = (BYTE)nbSeq; - else if (nbSeq < LONGNBSEQ) - op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; - else - op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + } else if (nbSeq < LONGNBSEQ) { + op[0] = (BYTE)((nbSeq>>8) + 0x80); + op[1] = (BYTE)nbSeq; + op+=2; + } else { + op[0]=0xFF; + MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)); + op+=3; + } assert(op <= oend); if (nbSeq==0) { /* Copy the old tables over as if we repeated them */ @@ -2002,7 +2013,7 @@ ZSTD_seqToCodes(seqStorePtr); /* build CTable for Literal Lengths */ { unsigned max = MaxLL; - size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ DEBUGLOG(5, "Building LL table"); nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode; LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, @@ -2012,10 +2023,14 @@ ZSTD_defaultAllowed, strategy); assert(set_basic < set_compressed && set_rle < set_compressed); assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, - count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, - prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable), - workspace, wkspSize); + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, + count, max, llCodeTable, nbSeq, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + prevEntropy->fse.litlengthCTable, + sizeof(prevEntropy->fse.litlengthCTable), + entropyWorkspace, entropyWkspSize); FORWARD_IF_ERROR(countSize); if (LLtype == set_compressed) lastNCount = op; @@ -2024,7 +2039,8 @@ } } /* build CTable for Offsets */ { unsigned max = MaxOff; - size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + size_t const mostFrequent = HIST_countFast_wksp( + count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; DEBUGLOG(5, "Building OF table"); @@ -2035,10 +2051,14 @@ OF_defaultNorm, OF_defaultNormLog, defaultPolicy, strategy); assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, - count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, - prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable), - workspace, wkspSize); + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, + count, max, ofCodeTable, nbSeq, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + prevEntropy->fse.offcodeCTable, + sizeof(prevEntropy->fse.offcodeCTable), + entropyWorkspace, entropyWkspSize); FORWARD_IF_ERROR(countSize); if (Offtype == set_compressed) lastNCount = op; @@ -2047,7 +2067,8 @@ } } /* build CTable for MatchLengths */ { unsigned max = MaxML; - size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + size_t const mostFrequent = HIST_countFast_wksp( + count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode; MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, @@ -2056,10 +2077,14 @@ ML_defaultNorm, ML_defaultNormLog, ZSTD_defaultAllowed, strategy); assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, - count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, - prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable), - workspace, wkspSize); + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, + count, max, mlCodeTable, nbSeq, + ML_defaultNorm, ML_defaultNormLog, MaxML, + prevEntropy->fse.matchlengthCTable, + sizeof(prevEntropy->fse.matchlengthCTable), + entropyWorkspace, entropyWkspSize); FORWARD_IF_ERROR(countSize); if (MLtype == set_compressed) lastNCount = op; @@ -2107,13 +2132,13 @@ const ZSTD_CCtx_params* cctxParams, void* dst, size_t dstCapacity, size_t srcSize, - void* workspace, size_t wkspSize, + void* entropyWorkspace, size_t entropyWkspSize, int bmi2) { size_t const cSize = ZSTD_compressSequences_internal( seqStorePtr, prevEntropy, nextEntropy, cctxParams, dst, dstCapacity, - workspace, wkspSize, bmi2); + entropyWorkspace, entropyWkspSize, bmi2); if (cSize == 0) return 0; /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. @@ -2264,11 +2289,99 @@ return ZSTDbss_compress; } +static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) +{ + const seqStore_t* seqStore = ZSTD_getSeqStore(zc); + const seqDef* seqs = seqStore->sequencesStart; + size_t seqsSize = seqStore->sequences - seqs; + + ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; + size_t i; size_t position; int repIdx; + + assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); + for (i = 0, position = 0; i < seqsSize; ++i) { + outSeqs[i].offset = seqs[i].offset; + outSeqs[i].litLength = seqs[i].litLength; + outSeqs[i].matchLength = seqs[i].matchLength + MINMATCH; + + if (i == seqStore->longLengthPos) { + if (seqStore->longLengthID == 1) { + outSeqs[i].litLength += 0x10000; + } else if (seqStore->longLengthID == 2) { + outSeqs[i].matchLength += 0x10000; + } + } + + if (outSeqs[i].offset <= ZSTD_REP_NUM) { + outSeqs[i].rep = outSeqs[i].offset; + repIdx = (unsigned int)i - outSeqs[i].offset; + + if (outSeqs[i].litLength == 0) { + if (outSeqs[i].offset < 3) { + --repIdx; + } else { + repIdx = (unsigned int)i - 1; + } + ++outSeqs[i].rep; + } + assert(repIdx >= -3); + outSeqs[i].offset = repIdx >= 0 ? outSeqs[repIdx].offset : repStartValue[-repIdx - 1]; + if (outSeqs[i].rep == 4) { + --outSeqs[i].offset; + } + } else { + outSeqs[i].offset -= ZSTD_REP_NUM; + } + + position += outSeqs[i].litLength; + outSeqs[i].matchPos = (unsigned int)position; + position += outSeqs[i].matchLength; + } + zc->seqCollector.seqIndex += seqsSize; +} + +size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize) +{ + const size_t dstCapacity = ZSTD_compressBound(srcSize); + void* dst = ZSTD_malloc(dstCapacity, ZSTD_defaultCMem); + SeqCollector seqCollector; + + RETURN_ERROR_IF(dst == NULL, memory_allocation); + + seqCollector.collectSequences = 1; + seqCollector.seqStart = outSeqs; + seqCollector.seqIndex = 0; + seqCollector.maxSequences = outSeqsSize; + zc->seqCollector = seqCollector; + + ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); + ZSTD_free(dst, ZSTD_defaultCMem); + return zc->seqCollector.seqIndex; +} + +/* Returns true if the given block is a RLE block */ +static int ZSTD_isRLE(const BYTE *ip, size_t length) { + size_t i; + if (length < 2) return 1; + for (i = 1; i < length; ++i) { + if (ip[0] != ip[i]) return 0; + } + return 1; +} + static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, - const void* src, size_t srcSize) + const void* src, size_t srcSize, U32 frame) { + /* This the upper bound for the length of an rle block. + * This isn't the actual upper bound. Finding the real threshold + * needs further investigation. + */ + const U32 rleMaxLength = 25; size_t cSize; + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate); @@ -2278,6 +2391,11 @@ if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } } + if (zc->seqCollector.collectSequences) { + ZSTD_copyBlockSequences(zc); + return 0; + } + /* encode sequences and literals */ cSize = ZSTD_compressSequences(&zc->seqStore, &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, @@ -2287,8 +2405,21 @@ zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */, zc->bmi2); + if (frame && + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + !zc->isFirstBlock && + cSize < rleMaxLength && + ZSTD_isRLE(ip, srcSize)) + { + cSize = 1; + op[0] = ip[0]; + } + out: - if (!ZSTD_isError(cSize) && cSize != 0) { + if (!ZSTD_isError(cSize) && cSize > 1) { /* confirm repcodes and entropy tables when emitting a compressed block */ ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; zc->blockState.prevCBlock = zc->blockState.nextCBlock; @@ -2305,7 +2436,11 @@ } -static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, void const* ip, void const* iend) +static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + void const* ip, + void const* iend) { if (ZSTD_window_needOverflowCorrection(ms->window, iend)) { U32 const maxDist = (U32)1 << params->cParams.windowLog; @@ -2314,7 +2449,9 @@ ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); + ZSTD_cwksp_mark_tables_dirty(ws); ZSTD_reduceIndex(ms, params, correction); + ZSTD_cwksp_mark_tables_clean(ws); if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; else ms->nextToUpdate -= correction; /* invalidate dictionaries on overflow correction */ @@ -2323,7 +2460,6 @@ } } - /*! ZSTD_compress_frameChunk() : * Compress a chunk of data into one or multiple blocks. * All blocks will be terminated, all input will be consumed. @@ -2357,7 +2493,8 @@ "not enough space to store compressed block"); if (remaining < blockSize) blockSize = remaining; - ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, ip, ip + blockSize); + ZSTD_overflowCorrectIfNeeded( + ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize); ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ @@ -2365,15 +2502,16 @@ { size_t cSize = ZSTD_compressBlock_internal(cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, - ip, blockSize); + ip, blockSize, 1 /* frame */); FORWARD_IF_ERROR(cSize); - if (cSize == 0) { /* block is not compressible */ cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); FORWARD_IF_ERROR(cSize); } else { - U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); - MEM_writeLE24(op, cBlockHeader24); + const U32 cBlockHeader = cSize == 1 ? + lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : + lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader); cSize += ZSTD_blockHeaderSize; } @@ -2383,6 +2521,7 @@ op += cSize; assert(dstCapacity >= cSize); dstCapacity -= cSize; + cctx->isFirstBlock = 0; DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", (unsigned)cSize); } } @@ -2393,25 +2532,25 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, - ZSTD_CCtx_params params, U64 pledgedSrcSize, U32 dictID) + const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID) { BYTE* const op = (BYTE*)dst; U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ - U32 const dictIDSizeCode = params.fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ - U32 const checksumFlag = params.fParams.checksumFlag>0; - U32 const windowSize = (U32)1 << params.cParams.windowLog; - U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); - BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); - U32 const fcsCode = params.fParams.contentSizeFlag ? + U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ + U32 const checksumFlag = params->fParams.checksumFlag>0; + U32 const windowSize = (U32)1 << params->cParams.windowLog; + U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); + BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); + U32 const fcsCode = params->fParams.contentSizeFlag ? (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); size_t pos=0; - assert(!(params.fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); + assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall); DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", - !params.fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); - - if (params.format == ZSTD_f_zstd1) { + !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); + + if (params->format == ZSTD_f_zstd1) { MEM_writeLE32(dst, ZSTD_MAGICNUMBER); pos = 4; } @@ -2477,7 +2616,7 @@ "missing init (ZSTD_compressBegin)"); if (frame && (cctx->stage==ZSTDcs_init)) { - fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, cctx->pledgedSrcSizePlusOne-1, cctx->dictID); FORWARD_IF_ERROR(fhSize); assert(fhSize <= dstCapacity); @@ -2497,13 +2636,15 @@ if (!frame) { /* overflow check and correction for block mode */ - ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, src, (BYTE const*)src + srcSize); + ZSTD_overflowCorrectIfNeeded( + ms, &cctx->workspace, &cctx->appliedParams, + src, (BYTE const*)src + srcSize); } DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); { size_t const cSize = frame ? ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : - ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize); + ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */); FORWARD_IF_ERROR(cSize); cctx->consumedSrcSize += srcSize; cctx->producedCSize += (cSize + fhSize); @@ -2550,6 +2691,7 @@ * @return : 0, or an error code */ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, ZSTD_CCtx_params const* params, const void* src, size_t srcSize, ZSTD_dictTableLoadMethod_e dtlm) @@ -2570,7 +2712,7 @@ size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX); const BYTE* const ichunk = ip + chunk; - ZSTD_overflowCorrectIfNeeded(ms, params, ip, ichunk); + ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk); switch(params->cParams.strategy) { @@ -2629,10 +2771,11 @@ /*! ZSTD_loadZstdDictionary() : * @return : dictID, or an error code * assumptions : magic number supposed already checked - * dictSize supposed > 8 + * dictSize supposed >= 8 */ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, ZSTD_CCtx_params const* params, const void* dict, size_t dictSize, ZSTD_dictTableLoadMethod_e dtlm, @@ -2645,7 +2788,7 @@ size_t dictID; ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1< 8); + assert(dictSize >= 8); assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); dictPtr += 4; /* skip magic number */ @@ -2728,7 +2871,8 @@ bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid; bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid; bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid; - FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize, dtlm)); + FORWARD_IF_ERROR(ZSTD_loadDictionaryContent( + ms, ws, params, dictPtr, dictContentSize, dtlm)); return dictID; } } @@ -2738,6 +2882,7 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, const ZSTD_CCtx_params* params, const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, @@ -2745,27 +2890,35 @@ void* workspace) { DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); - if ((dict==NULL) || (dictSize<=8)) return 0; + if ((dict==NULL) || (dictSize<8)) { + RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong); + return 0; + } ZSTD_reset_compressedBlockState(bs); /* dict restricted modes */ if (dictContentType == ZSTD_dct_rawContent) - return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm); + return ZSTD_loadDictionaryContent(ms, ws, params, dict, dictSize, dtlm); if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { if (dictContentType == ZSTD_dct_auto) { DEBUGLOG(4, "raw content dictionary detected"); - return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm); + return ZSTD_loadDictionaryContent( + ms, ws, params, dict, dictSize, dtlm); } RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong); assert(0); /* impossible */ } /* dict as full zstd dictionary */ - return ZSTD_loadZstdDictionary(bs, ms, params, dict, dictSize, dtlm, workspace); + return ZSTD_loadZstdDictionary( + bs, ms, ws, params, dict, dictSize, dtlm, workspace); } +#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB) +#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6) + /*! ZSTD_compressBegin_internal() : * @return : 0, or an error code */ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, @@ -2773,23 +2926,34 @@ ZSTD_dictContentType_e dictContentType, ZSTD_dictTableLoadMethod_e dtlm, const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, U64 pledgedSrcSize, + const ZSTD_CCtx_params* params, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { - DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params.cParams.windowLog); + DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); /* params are supposed to be fully validated at this point */ - assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); assert(!((dict) && (cdict))); /* either dict or cdict, not both */ - - if (cdict && cdict->dictContentSize>0) { + if ( (cdict) + && (cdict->dictContentSize > 0) + && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF + || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || cdict->compressionLevel == 0) + && (params->attachDictPref != ZSTD_dictForceLoad) ) { return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); } - FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, - ZSTDcrp_continue, zbuff) ); - { size_t const dictID = ZSTD_compress_insertDictionary( - cctx->blockState.prevCBlock, &cctx->blockState.matchState, - ¶ms, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace); + FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize, + ZSTDcrp_makeClean, zbuff) ); + { size_t const dictID = cdict ? + ZSTD_compress_insertDictionary( + cctx->blockState.prevCBlock, &cctx->blockState.matchState, + &cctx->workspace, params, cdict->dictContent, cdict->dictContentSize, + dictContentType, dtlm, cctx->entropyWorkspace) + : ZSTD_compress_insertDictionary( + cctx->blockState.prevCBlock, &cctx->blockState.matchState, + &cctx->workspace, params, dict, dictSize, + dictContentType, dtlm, cctx->entropyWorkspace); FORWARD_IF_ERROR(dictID); assert(dictID <= UINT_MAX); cctx->dictID = (U32)dictID; @@ -2802,12 +2966,12 @@ ZSTD_dictContentType_e dictContentType, ZSTD_dictTableLoadMethod_e dtlm, const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, + const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize) { - DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params.cParams.windowLog); + DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog); /* compression parameters verification and optimization */ - FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); + FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) ); return ZSTD_compressBegin_internal(cctx, dict, dictSize, dictContentType, dtlm, cdict, @@ -2822,21 +2986,21 @@ ZSTD_parameters params, unsigned long long pledgedSrcSize) { ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); return ZSTD_compressBegin_advanced_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL /*cdict*/, - cctxParams, pledgedSrcSize); + &cctxParams, pledgedSrcSize); } size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) { ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, - cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); + &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); } size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) @@ -2859,7 +3023,7 @@ /* special case : empty frame */ if (cctx->stage == ZSTDcs_init) { - fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, 0, 0); + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); FORWARD_IF_ERROR(fhSize); dstCapacity -= fhSize; op += fhSize; @@ -2920,13 +3084,13 @@ ZSTD_parameters params) { ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); DEBUGLOG(4, "ZSTD_compress_internal"); return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, - cctxParams); + &cctxParams); } size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, @@ -2950,7 +3114,7 @@ void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict,size_t dictSize, - ZSTD_CCtx_params params) + const ZSTD_CCtx_params* params) { DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, @@ -2966,9 +3130,9 @@ int compressionLevel) { ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize + (!srcSize), dict ? dictSize : 0); - ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); assert(params.fParams.contentSizeFlag == 1); - return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, cctxParams); + return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams); } size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, @@ -3003,8 +3167,11 @@ ZSTD_dictLoadMethod_e dictLoadMethod) { DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict)); - return sizeof(ZSTD_CDict) + HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) - + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); + return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *)))); } size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) @@ -3017,7 +3184,9 @@ { if (cdict==NULL) return 0; /* support sizeof on NULL */ DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict)); - return cdict->workspaceSize + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict); + /* cdict may be in the workspace */ + return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict)) + + ZSTD_cwksp_sizeof(&cdict->workspace); } static size_t ZSTD_initCDict_internal( @@ -3031,28 +3200,29 @@ assert(!ZSTD_checkCParams(cParams)); cdict->matchState.cParams = cParams; if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { - cdict->dictBuffer = NULL; cdict->dictContent = dictBuffer; } else { - void* const internalBuffer = ZSTD_malloc(dictSize, cdict->customMem); - cdict->dictBuffer = internalBuffer; + void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*))); + RETURN_ERROR_IF(!internalBuffer, memory_allocation); cdict->dictContent = internalBuffer; - RETURN_ERROR_IF(!internalBuffer, memory_allocation); memcpy(internalBuffer, dictBuffer, dictSize); } cdict->dictContentSize = dictSize; + cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE); + + /* Reset the state to no dictionary */ ZSTD_reset_compressedBlockState(&cdict->cBlockState); - { void* const end = ZSTD_reset_matchState(&cdict->matchState, - (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32, - &cParams, - ZSTDcrp_continue, ZSTD_resetTarget_CDict); - assert(end == (char*)cdict->workspace + cdict->workspaceSize); - (void)end; - } + FORWARD_IF_ERROR(ZSTD_reset_matchState( + &cdict->matchState, + &cdict->workspace, + &cParams, + ZSTDcrp_makeClean, + ZSTDirp_reset, + ZSTD_resetTarget_CDict)); /* (Maybe) load the dictionary - * Skips loading the dictionary if it is <= 8 bytes. + * Skips loading the dictionary if it is < 8 bytes. */ { ZSTD_CCtx_params params; memset(¶ms, 0, sizeof(params)); @@ -3060,9 +3230,9 @@ params.fParams.contentSizeFlag = 1; params.cParams = cParams; { size_t const dictID = ZSTD_compress_insertDictionary( - &cdict->cBlockState, &cdict->matchState, ¶ms, - cdict->dictContent, cdict->dictContentSize, - dictContentType, ZSTD_dtlm_full, cdict->workspace); + &cdict->cBlockState, &cdict->matchState, &cdict->workspace, + ¶ms, cdict->dictContent, cdict->dictContentSize, + dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace); FORWARD_IF_ERROR(dictID); assert(dictID <= (size_t)(U32)-1); cdict->dictID = (U32)dictID; @@ -3080,18 +3250,29 @@ DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (unsigned)dictContentType); if (!customMem.customAlloc ^ !customMem.customFree) return NULL; - { ZSTD_CDict* const cdict = (ZSTD_CDict*)ZSTD_malloc(sizeof(ZSTD_CDict), customMem); - size_t const workspaceSize = HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); + { size_t const workspaceSize = + ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))); void* const workspace = ZSTD_malloc(workspaceSize, customMem); - - if (!cdict || !workspace) { - ZSTD_free(cdict, customMem); + ZSTD_cwksp ws; + ZSTD_CDict* cdict; + + if (!workspace) { ZSTD_free(workspace, customMem); return NULL; } + + ZSTD_cwksp_init(&ws, workspace, workspaceSize); + + cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); + assert(cdict != NULL); + ZSTD_cwksp_move(&cdict->workspace, &ws); cdict->customMem = customMem; - cdict->workspace = workspace; - cdict->workspaceSize = workspaceSize; + cdict->compressionLevel = 0; /* signals advanced API usage */ + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, dictBuffer, dictSize, dictLoadMethod, dictContentType, @@ -3107,9 +3288,12 @@ ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) { ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); - return ZSTD_createCDict_advanced(dict, dictSize, - ZSTD_dlm_byCopy, ZSTD_dct_auto, - cParams, ZSTD_defaultCMem); + ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byCopy, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); + if (cdict) + cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel; + return cdict; } ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) @@ -3124,9 +3308,11 @@ { if (cdict==NULL) return 0; /* support free on NULL */ { ZSTD_customMem const cMem = cdict->customMem; - ZSTD_free(cdict->workspace, cMem); - ZSTD_free(cdict->dictBuffer, cMem); - ZSTD_free(cdict, cMem); + int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict); + ZSTD_cwksp_free(&cdict->workspace, cMem); + if (!cdictInWorkspace) { + ZSTD_free(cdict, cMem); + } return 0; } } @@ -3152,28 +3338,30 @@ ZSTD_compressionParameters cParams) { size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); - size_t const neededSize = sizeof(ZSTD_CDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize) - + HUF_WORKSPACE_SIZE + matchStateSize; - ZSTD_CDict* const cdict = (ZSTD_CDict*) workspace; - void* ptr; + size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))) + + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + + matchStateSize; + ZSTD_CDict* cdict; + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + + { + ZSTD_cwksp ws; + ZSTD_cwksp_init(&ws, workspace, workspaceSize); + cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); + if (cdict == NULL) return NULL; + ZSTD_cwksp_move(&cdict->workspace, &ws); + } + DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); if (workspaceSize < neededSize) return NULL; - if (dictLoadMethod == ZSTD_dlm_byCopy) { - memcpy(cdict+1, dict, dictSize); - dict = cdict+1; - ptr = (char*)workspace + sizeof(ZSTD_CDict) + dictSize; - } else { - ptr = cdict+1; - } - cdict->workspace = ptr; - cdict->workspaceSize = HUF_WORKSPACE_SIZE + matchStateSize; - if (ZSTD_isError( ZSTD_initCDict_internal(cdict, dict, dictSize, - ZSTD_dlm_byRef, dictContentType, + dictLoadMethod, dictContentType, cParams) )) return NULL; @@ -3195,7 +3383,15 @@ DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); RETURN_ERROR_IF(cdict==NULL, dictionary_wrong); { ZSTD_CCtx_params params = cctx->requestedParams; - params.cParams = ZSTD_getCParamsFromCDict(cdict); + params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF + || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || cdict->compressionLevel == 0 ) + && (params.attachDictPref != ZSTD_dictForceLoad) ? + ZSTD_getCParamsFromCDict(cdict) + : ZSTD_getCParams(cdict->compressionLevel, + pledgedSrcSize, + cdict->dictContentSize); /* Increase window log to fit the entire dictionary and source if the * source size is known. Limit the increase to 19, which is the * window log for compression level 1 with the largest source size. @@ -3209,7 +3405,7 @@ return ZSTD_compressBegin_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, cdict, - params, pledgedSrcSize, + ¶ms, pledgedSrcSize, ZSTDb_not_buffered); } } @@ -3300,7 +3496,7 @@ FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, dict, dictSize, dictContentType, ZSTD_dtlm_fast, cdict, - params, pledgedSrcSize, + ¶ms, pledgedSrcSize, ZSTDb_buffered) ); cctx->inToCompress = 0; @@ -3334,13 +3530,14 @@ * Assumption 2 : either dict, or cdict, is defined, not both */ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, const void* dict, size_t dictSize, const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, unsigned long long pledgedSrcSize) + const ZSTD_CCtx_params* params, + unsigned long long pledgedSrcSize) { DEBUGLOG(4, "ZSTD_initCStream_internal"); FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); - assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); - zcs->requestedParams = params; + assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); + zcs->requestedParams = *params; assert(!((dict) && (cdict))); /* either dict or cdict, not both */ if (dict) { FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); @@ -3379,7 +3576,7 @@ /* ZSTD_initCStream_advanced() : * pledgedSrcSize must be exact. * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. - * dict is loaded with default parameters ZSTD_dm_auto and ZSTD_dlm_byCopy. */ + * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pss) @@ -3393,7 +3590,7 @@ FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); - zcs->requestedParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); + zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, params); FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); return 0; } @@ -3643,7 +3840,7 @@ if (cctx->mtctx == NULL) { DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", params.nbWorkers); - cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbWorkers, cctx->customMem); + cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem); RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation); } /* mt compression */ @@ -3771,8 +3968,8 @@ { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ - { 21, 16, 17, 1, 5, 1, ZSTD_dfast }, /* level 3 */ - { 21, 18, 18, 1, 5, 1, ZSTD_dfast }, /* level 4 */ + { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */ + { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */ { 21, 18, 19, 2, 5, 2, ZSTD_greedy }, /* level 5 */ { 21, 19, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */ { 21, 19, 19, 3, 5, 8, ZSTD_lazy }, /* level 7 */ @@ -3796,8 +3993,8 @@ /* W, C, H, S, L, T, strat */ { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */ - { 18, 14, 14, 1, 5, 1, ZSTD_dfast }, /* level 2 */ - { 18, 16, 16, 1, 4, 1, ZSTD_dfast }, /* level 3 */ + { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */ + { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */ { 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/ { 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/ { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ @@ -3823,8 +4020,8 @@ { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */ { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ - { 17, 15, 16, 2, 5, 1, ZSTD_dfast }, /* level 3 */ - { 17, 17, 17, 2, 4, 1, ZSTD_dfast }, /* level 4 */ + { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */ + { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */ { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ { 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ { 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ @@ -3849,7 +4046,7 @@ { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */ { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */ - { 14, 14, 15, 2, 4, 1, ZSTD_dfast }, /* level 3 */ + { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */ { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */ { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/ { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/compress/zstd_compress_internal.h --- a/contrib/python-zstandard/zstd/compress/zstd_compress_internal.h Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/compress/zstd_compress_internal.h Tue Jan 21 13:14:51 2020 -0500 @@ -19,6 +19,7 @@ * Dependencies ***************************************/ #include "zstd_internal.h" +#include "zstd_cwksp.h" #ifdef ZSTD_MULTITHREAD # include "zstdmt_compress.h" #endif @@ -192,6 +193,13 @@ size_t capacity; /* The capacity starting from `seq` pointer */ } rawSeqStore_t; +typedef struct { + int collectSequences; + ZSTD_Sequence* seqStart; + size_t seqIndex; + size_t maxSequences; +} SeqCollector; + struct ZSTD_CCtx_params_s { ZSTD_format_e format; ZSTD_compressionParameters cParams; @@ -203,6 +211,9 @@ size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize. * No target when targetCBlockSize == 0. * There is no guarantee on compressed block size */ + int srcSizeHint; /* User's best guess of source size. + * Hint is not valid when srcSizeHint == 0. + * There is no guarantee that hint is close to actual source size */ ZSTD_dictAttachPref_e attachDictPref; ZSTD_literalCompressionMode_e literalCompressionMode; @@ -228,9 +239,7 @@ ZSTD_CCtx_params appliedParams; U32 dictID; - int workSpaceOversizedDuration; - void* workSpace; - size_t workSpaceSize; + ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */ size_t blockSize; unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */ unsigned long long consumedSrcSize; @@ -238,6 +247,8 @@ XXH64_state_t xxhState; ZSTD_customMem customMem; size_t staticSize; + SeqCollector seqCollector; + int isFirstBlock; seqStore_t seqStore; /* sequences storage ptrs */ ldmState_t ldmState; /* long distance matching state */ @@ -337,26 +348,57 @@ return (srcSize >> minlog) + 2; } +/*! ZSTD_safecopyLiterals() : + * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w. + * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single + * large copies. + */ +static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) { + assert(iend > ilimit_w); + if (ip <= ilimit_w) { + ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap); + op += ilimit_w - ip; + ip = ilimit_w; + } + while (ip < iend) *op++ = *ip++; +} + /*! ZSTD_storeSeq() : - * Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. - * `offsetCode` : distance to match + 3 (values 1-3 are repCodes). + * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t. + * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes). * `mlBase` : matchLength - MINMATCH + * Allowed to overread literals up to litLimit. */ -MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase) +HINT_INLINE UNUSED_ATTR +void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase) { + BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; + BYTE const* const litEnd = literals + litLength; #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) static const BYTE* g_start = NULL; if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ { U32 const pos = (U32)((const BYTE*)literals - g_start); DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", - pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode); + pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode); } #endif assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); /* copy Literals */ assert(seqStorePtr->maxNbLit <= 128 KB); assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); - ZSTD_wildcopy(seqStorePtr->lit, literals, (ptrdiff_t)litLength, ZSTD_no_overlap); + assert(literals + litLength <= litLimit); + if (litEnd <= litLimit_w) { + /* Common case we can use wildcopy. + * First copy 16 bytes, because literals are likely short. + */ + assert(WILDCOPY_OVERLENGTH >= 16); + ZSTD_copy16(seqStorePtr->lit, literals); + if (litLength > 16) { + ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); + } + } else { + ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w); + } seqStorePtr->lit += litLength; /* literal Length */ @@ -368,7 +410,7 @@ seqStorePtr->sequences[0].litLength = (U16)litLength; /* match offset */ - seqStorePtr->sequences[0].offset = offsetCode + 1; + seqStorePtr->sequences[0].offset = offCode + 1; /* match Length */ if (mlBase>0xFFFF) { @@ -910,7 +952,7 @@ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, const void* dict, size_t dictSize, const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, unsigned long long pledgedSrcSize); + const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); void ZSTD_resetSeqStore(seqStore_t* ssPtr); @@ -925,7 +967,7 @@ ZSTD_dictContentType_e dictContentType, ZSTD_dictTableLoadMethod_e dtlm, const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, + const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); /* ZSTD_compress_advanced_internal() : @@ -934,7 +976,7 @@ void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict,size_t dictSize, - ZSTD_CCtx_params params); + const ZSTD_CCtx_params* params); /* ZSTD_writeLastEmptyBlock() : diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/compress/zstd_compress_literals.c --- a/contrib/python-zstandard/zstd/compress/zstd_compress_literals.c Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/compress/zstd_compress_literals.c Tue Jan 21 13:14:51 2020 -0500 @@ -70,7 +70,7 @@ ZSTD_strategy strategy, int disableLiteralCompression, void* dst, size_t dstCapacity, const void* src, size_t srcSize, - void* workspace, size_t wkspSize, + void* entropyWorkspace, size_t entropyWorkspaceSize, const int bmi2) { size_t const minGain = ZSTD_minGain(srcSize, strategy); @@ -99,10 +99,15 @@ { HUF_repeat repeat = prevHuf->repeatMode; int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; - cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, - workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) - : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, - workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); + cLitSize = singleStream ? + HUF_compress1X_repeat( + ostart+lhSize, dstCapacity-lhSize, src, srcSize, + 255, 11, entropyWorkspace, entropyWorkspaceSize, + (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) : + HUF_compress4X_repeat( + ostart+lhSize, dstCapacity-lhSize, src, srcSize, + 255, 11, entropyWorkspace, entropyWorkspaceSize, + (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); if (repeat != HUF_repeat_none) { /* reused the existing table */ hType = set_repeat; diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/compress/zstd_compress_literals.h --- a/contrib/python-zstandard/zstd/compress/zstd_compress_literals.h Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/compress/zstd_compress_literals.h Tue Jan 21 13:14:51 2020 -0500 @@ -23,7 +23,7 @@ ZSTD_strategy strategy, int disableLiteralCompression, void* dst, size_t dstCapacity, const void* src, size_t srcSize, - void* workspace, size_t wkspSize, + void* entropyWorkspace, size_t entropyWorkspaceSize, const int bmi2); #endif /* ZSTD_COMPRESS_LITERALS_H */ diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/compress/zstd_compress_sequences.c --- a/contrib/python-zstandard/zstd/compress/zstd_compress_sequences.c Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/compress/zstd_compress_sequences.c Tue Jan 21 13:14:51 2020 -0500 @@ -222,7 +222,7 @@ const BYTE* codeTable, size_t nbSeq, const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, const FSE_CTable* prevCTable, size_t prevCTableSize, - void* workspace, size_t workspaceSize) + void* entropyWorkspace, size_t entropyWorkspaceSize) { BYTE* op = (BYTE*)dst; const BYTE* const oend = op + dstCapacity; @@ -238,7 +238,7 @@ memcpy(nextCTable, prevCTable, prevCTableSize); return 0; case set_basic: - FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */ + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize)); /* note : could be pre-calculated */ return 0; case set_compressed: { S16 norm[MaxSeq + 1]; @@ -252,7 +252,7 @@ FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max)); { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ FORWARD_IF_ERROR(NCountSize); - FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize)); + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize)); return NCountSize; } } diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/compress/zstd_compress_sequences.h --- a/contrib/python-zstandard/zstd/compress/zstd_compress_sequences.h Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/compress/zstd_compress_sequences.h Tue Jan 21 13:14:51 2020 -0500 @@ -35,7 +35,7 @@ const BYTE* codeTable, size_t nbSeq, const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, const FSE_CTable* prevCTable, size_t prevCTableSize, - void* workspace, size_t workspaceSize); + void* entropyWorkspace, size_t entropyWorkspaceSize); size_t ZSTD_encodeSequences( void* dst, size_t dstCapacity, diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/compress/zstd_cwksp.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contrib/python-zstandard/zstd/compress/zstd_cwksp.h Tue Jan 21 13:14:51 2020 -0500 @@ -0,0 +1,535 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_CWKSP_H +#define ZSTD_CWKSP_H + +/*-************************************* +* Dependencies +***************************************/ +#include "zstd_internal.h" + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* +* Constants +***************************************/ + +/* define "workspace is too large" as this number of times larger than needed */ +#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 + +/* when workspace is continuously too large + * during at least this number of times, + * context's memory usage is considered wasteful, + * because it's sized to handle a worst case scenario which rarely happens. + * In which case, resize it down to free some memory */ +#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 + +/* Since the workspace is effectively its own little malloc implementation / + * arena, when we run under ASAN, we should similarly insert redzones between + * each internal element of the workspace, so ASAN will catch overruns that + * reach outside an object but that stay inside the workspace. + * + * This defines the size of that redzone. + */ +#ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE +#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128 +#endif + +/*-************************************* +* Structures +***************************************/ +typedef enum { + ZSTD_cwksp_alloc_objects, + ZSTD_cwksp_alloc_buffers, + ZSTD_cwksp_alloc_aligned +} ZSTD_cwksp_alloc_phase_e; + +/** + * Zstd fits all its internal datastructures into a single continuous buffer, + * so that it only needs to perform a single OS allocation (or so that a buffer + * can be provided to it and it can perform no allocations at all). This buffer + * is called the workspace. + * + * Several optimizations complicate that process of allocating memory ranges + * from this workspace for each internal datastructure: + * + * - These different internal datastructures have different setup requirements: + * + * - The static objects need to be cleared once and can then be trivially + * reused for each compression. + * + * - Various buffers don't need to be initialized at all--they are always + * written into before they're read. + * + * - The matchstate tables have a unique requirement that they don't need + * their memory to be totally cleared, but they do need the memory to have + * some bound, i.e., a guarantee that all values in the memory they've been + * allocated is less than some maximum value (which is the starting value + * for the indices that they will then use for compression). When this + * guarantee is provided to them, they can use the memory without any setup + * work. When it can't, they have to clear the area. + * + * - These buffers also have different alignment requirements. + * + * - We would like to reuse the objects in the workspace for multiple + * compressions without having to perform any expensive reallocation or + * reinitialization work. + * + * - We would like to be able to efficiently reuse the workspace across + * multiple compressions **even when the compression parameters change** and + * we need to resize some of the objects (where possible). + * + * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp + * abstraction was created. It works as follows: + * + * Workspace Layout: + * + * [ ... workspace ... ] + * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers] + * + * The various objects that live in the workspace are divided into the + * following categories, and are allocated separately: + * + * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict, + * so that literally everything fits in a single buffer. Note: if present, + * this must be the first object in the workspace, since ZSTD_free{CCtx, + * CDict}() rely on a pointer comparison to see whether one or two frees are + * required. + * + * - Fixed size objects: these are fixed-size, fixed-count objects that are + * nonetheless "dynamically" allocated in the workspace so that we can + * control how they're initialized separately from the broader ZSTD_CCtx. + * Examples: + * - Entropy Workspace + * - 2 x ZSTD_compressedBlockState_t + * - CDict dictionary contents + * + * - Tables: these are any of several different datastructures (hash tables, + * chain tables, binary trees) that all respect a common format: they are + * uint32_t arrays, all of whose values are between 0 and (nextSrc - base). + * Their sizes depend on the cparams. + * + * - Aligned: these buffers are used for various purposes that require 4 byte + * alignment, but don't require any initialization before they're used. + * + * - Buffers: these buffers are used for various purposes that don't require + * any alignment or initialization before they're used. This means they can + * be moved around at no cost for a new compression. + * + * Allocating Memory: + * + * The various types of objects must be allocated in order, so they can be + * correctly packed into the workspace buffer. That order is: + * + * 1. Objects + * 2. Buffers + * 3. Aligned + * 4. Tables + * + * Attempts to reserve objects of different types out of order will fail. + */ +typedef struct { + void* workspace; + void* workspaceEnd; + + void* objectEnd; + void* tableEnd; + void* tableValidEnd; + void* allocStart; + + int allocFailed; + int workspaceOversizedDuration; + ZSTD_cwksp_alloc_phase_e phase; +} ZSTD_cwksp; + +/*-************************************* +* Functions +***************************************/ + +MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws); + +MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) { + (void)ws; + assert(ws->workspace <= ws->objectEnd); + assert(ws->objectEnd <= ws->tableEnd); + assert(ws->objectEnd <= ws->tableValidEnd); + assert(ws->tableEnd <= ws->allocStart); + assert(ws->tableValidEnd <= ws->allocStart); + assert(ws->allocStart <= ws->workspaceEnd); +} + +/** + * Align must be a power of 2. + */ +MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) { + size_t const mask = align - 1; + assert((align & mask) == 0); + return (size + mask) & ~mask; +} + +/** + * Use this to determine how much space in the workspace we will consume to + * allocate this object. (Normally it should be exactly the size of the object, + * but under special conditions, like ASAN, where we pad each object, it might + * be larger.) + * + * Since tables aren't currently redzoned, you don't need to call through this + * to figure out how much space you need for the matchState tables. Everything + * else is though. + */ +MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) { +#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; +#else + return size; +#endif +} + +MEM_STATIC void ZSTD_cwksp_internal_advance_phase( + ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) { + assert(phase >= ws->phase); + if (phase > ws->phase) { + if (ws->phase < ZSTD_cwksp_alloc_buffers && + phase >= ZSTD_cwksp_alloc_buffers) { + ws->tableValidEnd = ws->objectEnd; + } + if (ws->phase < ZSTD_cwksp_alloc_aligned && + phase >= ZSTD_cwksp_alloc_aligned) { + /* If unaligned allocations down from a too-large top have left us + * unaligned, we need to realign our alloc ptr. Technically, this + * can consume space that is unaccounted for in the neededSpace + * calculation. However, I believe this can only happen when the + * workspace is too large, and specifically when it is too large + * by a larger margin than the space that will be consumed. */ + /* TODO: cleaner, compiler warning friendly way to do this??? */ + ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1)); + if (ws->allocStart < ws->tableValidEnd) { + ws->tableValidEnd = ws->allocStart; + } + } + ws->phase = phase; + } +} + +/** + * Returns whether this object/buffer/etc was allocated in this workspace. + */ +MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) { + return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd); +} + +/** + * Internal function. Do not use directly. + */ +MEM_STATIC void* ZSTD_cwksp_reserve_internal( + ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) { + void* alloc; + void* bottom = ws->tableEnd; + ZSTD_cwksp_internal_advance_phase(ws, phase); + alloc = (BYTE *)ws->allocStart - bytes; + +#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* over-reserve space */ + alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; +#endif + + DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining", + alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); + ZSTD_cwksp_assert_internal_consistency(ws); + assert(alloc >= bottom); + if (alloc < bottom) { + DEBUGLOG(4, "cwksp: alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + if (alloc < ws->tableValidEnd) { + ws->tableValidEnd = alloc; + } + ws->allocStart = alloc; + +#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on + * either size. */ + alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; + __asan_unpoison_memory_region(alloc, bytes); +#endif + + return alloc; +} + +/** + * Reserves and returns unaligned memory. + */ +MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) { + return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers); +} + +/** + * Reserves and returns memory sized on and aligned on sizeof(unsigned). + */ +MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) { + assert((bytes & (sizeof(U32)-1)) == 0); + return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned); +} + +/** + * Aligned on sizeof(unsigned). These buffers have the special property that + * their values remain constrained, allowing us to re-use them without + * memset()-ing them. + */ +MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) { + const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned; + void* alloc = ws->tableEnd; + void* end = (BYTE *)alloc + bytes; + void* top = ws->allocStart; + + DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining", + alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); + assert((bytes & (sizeof(U32)-1)) == 0); + ZSTD_cwksp_internal_advance_phase(ws, phase); + ZSTD_cwksp_assert_internal_consistency(ws); + assert(end <= top); + if (end > top) { + DEBUGLOG(4, "cwksp: table alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + ws->tableEnd = end; + +#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + __asan_unpoison_memory_region(alloc, bytes); +#endif + + return alloc; +} + +/** + * Aligned on sizeof(void*). + */ +MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) { + size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*)); + void* alloc = ws->objectEnd; + void* end = (BYTE*)alloc + roundedBytes; + +#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* over-reserve space */ + end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; +#endif + + DEBUGLOG(5, + "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining", + alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes); + assert(((size_t)alloc & (sizeof(void*)-1)) == 0); + assert((bytes & (sizeof(void*)-1)) == 0); + ZSTD_cwksp_assert_internal_consistency(ws); + /* we must be in the first phase, no advance is possible */ + if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) { + DEBUGLOG(4, "cwksp: object alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + ws->objectEnd = end; + ws->tableEnd = end; + ws->tableValidEnd = end; + +#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on + * either size. */ + alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; + __asan_unpoison_memory_region(alloc, bytes); +#endif + + return alloc; +} + +MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty"); + +#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) + /* To validate that the table re-use logic is sound, and that we don't + * access table space that we haven't cleaned, we re-"poison" the table + * space every time we mark it dirty. */ + { + size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; + assert(__msan_test_shadow(ws->objectEnd, size) == -1); + __msan_poison(ws->objectEnd, size); + } +#endif + + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + ws->tableValidEnd = ws->objectEnd; + ZSTD_cwksp_assert_internal_consistency(ws); +} + +MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean"); + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + if (ws->tableValidEnd < ws->tableEnd) { + ws->tableValidEnd = ws->tableEnd; + } + ZSTD_cwksp_assert_internal_consistency(ws); +} + +/** + * Zero the part of the allocated tables not already marked clean. + */ +MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables"); + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + if (ws->tableValidEnd < ws->tableEnd) { + memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd); + } + ZSTD_cwksp_mark_tables_clean(ws); +} + +/** + * Invalidates table allocations. + * All other allocations remain valid. + */ +MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: clearing tables!"); + +#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + { + size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; + __asan_poison_memory_region(ws->objectEnd, size); + } +#endif + + ws->tableEnd = ws->objectEnd; + ZSTD_cwksp_assert_internal_consistency(ws); +} + +/** + * Invalidates all buffer, aligned, and table allocations. + * Object allocations remain valid. + */ +MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: clearing!"); + +#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) + /* To validate that the context re-use logic is sound, and that we don't + * access stuff that this compression hasn't initialized, we re-"poison" + * the workspace (or at least the non-static, non-table parts of it) + * every time we start a new compression. */ + { + size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd; + __msan_poison(ws->tableValidEnd, size); + } +#endif + +#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + { + size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd; + __asan_poison_memory_region(ws->objectEnd, size); + } +#endif + + ws->tableEnd = ws->objectEnd; + ws->allocStart = ws->workspaceEnd; + ws->allocFailed = 0; + if (ws->phase > ZSTD_cwksp_alloc_buffers) { + ws->phase = ZSTD_cwksp_alloc_buffers; + } + ZSTD_cwksp_assert_internal_consistency(ws); +} + +/** + * The provided workspace takes ownership of the buffer [start, start+size). + * Any existing values in the workspace are ignored (the previously managed + * buffer, if present, must be separately freed). + */ +MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) { + DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size); + assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */ + ws->workspace = start; + ws->workspaceEnd = (BYTE*)start + size; + ws->objectEnd = ws->workspace; + ws->tableValidEnd = ws->objectEnd; + ws->phase = ZSTD_cwksp_alloc_objects; + ZSTD_cwksp_clear(ws); + ws->workspaceOversizedDuration = 0; + ZSTD_cwksp_assert_internal_consistency(ws); +} + +MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) { + void* workspace = ZSTD_malloc(size, customMem); + DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size); + RETURN_ERROR_IF(workspace == NULL, memory_allocation); + ZSTD_cwksp_init(ws, workspace, size); + return 0; +} + +MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) { + void *ptr = ws->workspace; + DEBUGLOG(4, "cwksp: freeing workspace"); + memset(ws, 0, sizeof(ZSTD_cwksp)); + ZSTD_free(ptr, customMem); +} + +/** + * Moves the management of a workspace from one cwksp to another. The src cwksp + * is left in an invalid state (src must be re-init()'ed before its used again). + */ +MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) { + *dst = *src; + memset(src, 0, sizeof(ZSTD_cwksp)); +} + +MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace); +} + +MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { + return ws->allocFailed; +} + +/*-************************************* +* Functions Checking Free Space +***************************************/ + +MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd); +} + +MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace; +} + +MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_check_available( + ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR); +} + +MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace) + && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION; +} + +MEM_STATIC void ZSTD_cwksp_bump_oversized_duration( + ZSTD_cwksp* ws, size_t additionalNeededSpace) { + if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) { + ws->workspaceOversizedDuration++; + } else { + ws->workspaceOversizedDuration = 0; + } +} + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_CWKSP_H */ diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/compress/zstd_double_fast.c --- a/contrib/python-zstandard/zstd/compress/zstd_double_fast.c Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/compress/zstd_double_fast.c Tue Jan 21 13:14:51 2020 -0500 @@ -148,7 +148,7 @@ const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); goto _match_stored; } @@ -157,7 +157,7 @@ && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); goto _match_stored; } @@ -247,7 +247,7 @@ offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); _match_stored: /* match found */ @@ -278,7 +278,7 @@ const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; ip += repLength2; @@ -297,7 +297,7 @@ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); - ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH); ip += rLength; anchor = ip; continue; /* faster when present ... (?) */ @@ -411,7 +411,7 @@ const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); } else { if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; @@ -422,7 +422,7 @@ while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); @@ -447,7 +447,7 @@ } offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } else { ip += ((ip-anchor) >> kSearchStrength) + 1; @@ -479,7 +479,7 @@ const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; ip += repLength2; diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/compress/zstd_fast.c --- a/contrib/python-zstandard/zstd/compress/zstd_fast.c Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/compress/zstd_fast.c Tue Jan 21 13:14:51 2020 -0500 @@ -8,7 +8,7 @@ * You may select, at your option, one of the above-listed licenses. */ -#include "zstd_compress_internal.h" +#include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */ #include "zstd_fast.h" @@ -43,8 +43,8 @@ } -FORCE_INLINE_TEMPLATE -size_t ZSTD_compressBlock_fast_generic( +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_fast_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize, U32 const mls) @@ -74,8 +74,7 @@ DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); ip0 += (ip0 == prefixStart); ip1 = ip0 + 1; - { - U32 const maxRep = (U32)(ip0 - prefixStart); + { U32 const maxRep = (U32)(ip0 - prefixStart); if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; } @@ -118,8 +117,7 @@ match0 = match1; goto _offset; } - { - size_t const step = ((ip0-anchor) >> (kSearchStrength - 1)) + stepSize; + { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize; assert(step >= 2); ip0 += step; ip1 += step; @@ -138,7 +136,7 @@ _match: /* Requires: ip0, match0, offcode */ /* Count the forward length */ mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4; - ZSTD_storeSeq(seqStore, ip0-anchor, anchor, offcode, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH); /* match found */ ip0 += mLength; anchor = ip0; @@ -150,16 +148,15 @@ hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); - while ( (ip0 <= ilimit) - && ( (offset_2>0) - & (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) )) { + while ( ((ip0 <= ilimit) & (offset_2>0)) /* offset_2==0 means offset_2 is invalidated */ + && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) { /* store sequence */ size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; - U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); ip0 += rLength; ip1 = ip0 + 1; - ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); + ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH); anchor = ip0; continue; /* faster when present (confirmed on gcc-8) ... (?) */ } @@ -179,8 +176,7 @@ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - ZSTD_compressionParameters const* cParams = &ms->cParams; - U32 const mls = cParams->minMatch; + U32 const mls = ms->cParams.minMatch; assert(ms->dictMatchState == NULL); switch(mls) { @@ -265,7 +261,7 @@ const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); } else if ( (matchIndex <= prefixStartIndex) ) { size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); U32 const dictMatchIndex = dictHashTable[dictHash]; @@ -285,7 +281,7 @@ } /* catch up */ offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } } else if (MEM_read32(match) != MEM_read32(ip)) { /* it's not a match, and we're not going to check the dictionary */ @@ -300,7 +296,7 @@ && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } /* match found */ @@ -325,7 +321,7 @@ const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; ip += repLength2; anchor = ip; @@ -348,8 +344,7 @@ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - ZSTD_compressionParameters const* cParams = &ms->cParams; - U32 const mls = cParams->minMatch; + U32 const mls = ms->cParams.minMatch; assert(ms->dictMatchState != NULL); switch(mls) { @@ -408,16 +403,17 @@ const U32 repIndex = current + 1 - offset_1; const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - size_t mLength; hashTable[h] = current; /* update hash table */ assert(offset_1 <= current +1); /* check repIndex */ if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; + size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; } else { if ( (matchIndex < dictStartIndex) || (MEM_read32(match) != MEM_read32(ip)) ) { @@ -427,19 +423,15 @@ } { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; - U32 offset; - mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; + U32 const offset = current - matchIndex; + size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - offset = current - matchIndex; - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + offset_2 = offset_1; offset_1 = offset; /* update offset history */ + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ip += mLength; + anchor = ip; } } - /* found a match : store it */ - ip += mLength; - anchor = ip; - if (ip <= ilimit) { /* Fill Table */ hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; @@ -448,13 +440,13 @@ while (ip <= ilimit) { U32 const current2 = (U32)(ip-base); U32 const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; + const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */ && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; - U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); + { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH); hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; ip += repLength2; anchor = ip; @@ -476,8 +468,7 @@ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - ZSTD_compressionParameters const* cParams = &ms->cParams; - U32 const mls = cParams->minMatch; + U32 const mls = ms->cParams.minMatch; switch(mls) { default: /* includes case 3 */ diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/compress/zstd_lazy.c --- a/contrib/python-zstandard/zstd/compress/zstd_lazy.c Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/compress/zstd_lazy.c Tue Jan 21 13:14:51 2020 -0500 @@ -810,7 +810,7 @@ /* store sequence */ _storeSequence: { size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH); + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); anchor = ip = start + matchLength; } @@ -828,7 +828,7 @@ const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); ip += matchLength; anchor = ip; continue; @@ -843,7 +843,7 @@ /* store sequence */ matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ - ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); ip += matchLength; anchor = ip; continue; /* faster when present ... (?) */ @@ -1051,7 +1051,7 @@ /* store sequence */ _storeSequence: { size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH); + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); anchor = ip = start + matchLength; } @@ -1066,7 +1066,7 @@ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ - ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); ip += matchLength; anchor = ip; continue; /* faster when present ... (?) */ diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/compress/zstd_ldm.c --- a/contrib/python-zstandard/zstd/compress/zstd_ldm.c Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/compress/zstd_ldm.c Tue Jan 21 13:14:51 2020 -0500 @@ -49,9 +49,9 @@ { size_t const ldmHSize = ((size_t)1) << params.hashLog; size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog); - size_t const ldmBucketSize = - ((size_t)1) << (params.hashLog - ldmBucketSizeLog); - size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t); + size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog); + size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize) + + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t)); return params.enableLdm ? totalSize : 0; } @@ -583,7 +583,7 @@ rep[i] = rep[i-1]; rep[0] = sequence.offset; /* Store the sequence */ - ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, + ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, sequence.offset + ZSTD_REP_MOVE, sequence.matchLength - MINMATCH); ip += sequence.matchLength; diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/compress/zstd_opt.c --- a/contrib/python-zstandard/zstd/compress/zstd_opt.c Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/compress/zstd_opt.c Tue Jan 21 13:14:51 2020 -0500 @@ -1098,7 +1098,7 @@ assert(anchor + llen <= iend); ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); - ZSTD_storeSeq(seqStore, llen, anchor, offCode, mlen-MINMATCH); + ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH); anchor += advance; ip = anchor; } } diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/compress/zstdmt_compress.c --- a/contrib/python-zstandard/zstd/compress/zstdmt_compress.c Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/compress/zstdmt_compress.c Tue Jan 21 13:14:51 2020 -0500 @@ -668,7 +668,7 @@ /* init */ if (job->cdict) { - size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize); + size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize); assert(job->firstJob); /* only allowed for first job */ if (ZSTD_isError(initError)) JOB_ERROR(initError); } else { /* srcStart points at reloaded section */ @@ -680,7 +680,7 @@ job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */ ZSTD_dtlm_fast, NULL, /*cdict*/ - jobParams, pledgedSrcSize); + &jobParams, pledgedSrcSize); if (ZSTD_isError(initError)) JOB_ERROR(initError); } } @@ -927,12 +927,18 @@ unsigned jobID; DEBUGLOG(3, "ZSTDMT_releaseAllJobResources"); for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) { + /* Copy the mutex/cond out */ + ZSTD_pthread_mutex_t const mutex = mtctx->jobs[jobID].job_mutex; + ZSTD_pthread_cond_t const cond = mtctx->jobs[jobID].job_cond; + DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start); ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff); - mtctx->jobs[jobID].dstBuff = g_nullBuffer; - mtctx->jobs[jobID].cSize = 0; + + /* Clear the job description, but keep the mutex/cond */ + memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID])); + mtctx->jobs[jobID].job_mutex = mutex; + mtctx->jobs[jobID].job_cond = cond; } - memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription)); mtctx->inBuff.buffer = g_nullBuffer; mtctx->inBuff.filled = 0; mtctx->allJobsCompleted = 1; @@ -1028,9 +1034,9 @@ /* Sets parameters relevant to the compression job, * initializing others to default values. */ -static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params) +static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params) { - ZSTD_CCtx_params jobParams = params; + ZSTD_CCtx_params jobParams = *params; /* Clear parameters related to multithreading */ jobParams.forceWindow = 0; jobParams.nbWorkers = 0; @@ -1151,16 +1157,16 @@ /* ===== Multi-threaded compression ===== */ /* ------------------------------------------ */ -static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params) +static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params) { unsigned jobLog; - if (params.ldmParams.enableLdm) { + if (params->ldmParams.enableLdm) { /* In Long Range Mode, the windowLog is typically oversized. * In which case, it's preferable to determine the jobSize * based on chainLog instead. */ - jobLog = MAX(21, params.cParams.chainLog + 4); + jobLog = MAX(21, params->cParams.chainLog + 4); } else { - jobLog = MAX(20, params.cParams.windowLog + 2); + jobLog = MAX(20, params->cParams.windowLog + 2); } return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX); } @@ -1193,27 +1199,27 @@ return ovlog; } -static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params) +static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params) { - int const overlapRLog = 9 - ZSTDMT_overlapLog(params.overlapLog, params.cParams.strategy); - int ovLog = (overlapRLog >= 8) ? 0 : (params.cParams.windowLog - overlapRLog); + int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy); + int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog); assert(0 <= overlapRLog && overlapRLog <= 8); - if (params.ldmParams.enableLdm) { + if (params->ldmParams.enableLdm) { /* In Long Range Mode, the windowLog is typically oversized. * In which case, it's preferable to determine the jobSize * based on chainLog instead. * Then, ovLog becomes a fraction of the jobSize, rather than windowSize */ - ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) + ovLog = MIN(params->cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) - overlapRLog; } assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX); - DEBUGLOG(4, "overlapLog : %i", params.overlapLog); + DEBUGLOG(4, "overlapLog : %i", params->overlapLog); DEBUGLOG(4, "overlap size : %i", 1 << ovLog); return (ovLog==0) ? 0 : (size_t)1 << ovLog; } static unsigned -ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers) +ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nbWorkers) { assert(nbWorkers>0); { size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params); @@ -1236,9 +1242,9 @@ const ZSTD_CDict* cdict, ZSTD_CCtx_params params) { - ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params); - size_t const overlapSize = ZSTDMT_computeOverlapSize(params); - unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers); + ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(¶ms); + size_t const overlapSize = ZSTDMT_computeOverlapSize(¶ms); + unsigned const nbJobs = ZSTDMT_computeNbJobs(¶ms, srcSize, params.nbWorkers); size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs; size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */ const char* const srcStart = (const char*)src; @@ -1256,7 +1262,7 @@ ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0]; DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode"); if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams); - return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams); + return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, &jobParams); } assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */ @@ -1404,12 +1410,12 @@ mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */ if (mtctx->singleBlockingThread) { - ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params); + ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(¶ms); DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode"); assert(singleThreadParams.nbWorkers == 0); return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0], dict, dictSize, cdict, - singleThreadParams, pledgedSrcSize); + &singleThreadParams, pledgedSrcSize); } DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers); @@ -1435,11 +1441,11 @@ mtctx->cdict = cdict; } - mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params); + mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(¶ms); DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10)); mtctx->targetSectionSize = params.jobSize; if (mtctx->targetSectionSize == 0) { - mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params); + mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(¶ms); } assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX); diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/decompress/huf_decompress.c --- a/contrib/python-zstandard/zstd/decompress/huf_decompress.c Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/decompress/huf_decompress.c Tue Jan 21 13:14:51 2020 -0500 @@ -61,7 +61,9 @@ * Error Management ****************************************************************/ #define HUF_isError ERR_isError +#ifndef CHECK_F #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; } +#endif /* ************************************************************** diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/decompress/zstd_decompress.c --- a/contrib/python-zstandard/zstd/decompress/zstd_decompress.c Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/decompress/zstd_decompress.c Tue Jan 21 13:14:51 2020 -0500 @@ -88,10 +88,7 @@ static size_t ZSTD_startingInputLength(ZSTD_format_e format) { - size_t const startingInputLength = (format==ZSTD_f_zstd1_magicless) ? - ZSTD_FRAMEHEADERSIZE_PREFIX - ZSTD_FRAMEIDSIZE : - ZSTD_FRAMEHEADERSIZE_PREFIX; - ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE); + size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format); /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) ); return startingInputLength; @@ -376,7 +373,7 @@ { unsigned long long totalDstSize = 0; - while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) { + while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) { U32 const magicNumber = MEM_readLE32(src); if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { @@ -629,11 +626,12 @@ /* check */ RETURN_ERROR_IF( - remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize, + remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize, srcSize_wrong); /* Frame Header */ - { size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_FRAMEHEADERSIZE_PREFIX); + { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal( + ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format); if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize, srcSize_wrong); @@ -714,7 +712,7 @@ dictSize = ZSTD_DDict_dictSize(ddict); } - while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) { + while (srcSize >= ZSTD_startingInputLength(dctx->format)) { #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) if (ZSTD_isLegacy(src, srcSize)) { @@ -1098,7 +1096,7 @@ size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); for (i=0; i<3; i++) { U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; - RETURN_ERROR_IF(rep==0 || rep >= dictContentSize, + RETURN_ERROR_IF(rep==0 || rep > dictContentSize, dictionary_corrupted); entropy->rep[i] = rep; } } @@ -1267,7 +1265,7 @@ { RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); ZSTD_clearDict(dctx); - if (dict && dictSize >= 8) { + if (dict && dictSize != 0) { dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation); dctx->ddict = dctx->ddictLocal; @@ -1300,14 +1298,14 @@ /* ZSTD_initDStream_usingDict() : - * return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX. + * return : expected size, aka ZSTD_startingInputLength(). * this function cannot fail */ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) { DEBUGLOG(4, "ZSTD_initDStream_usingDict"); FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) ); FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) ); - return ZSTD_FRAMEHEADERSIZE_PREFIX; + return ZSTD_startingInputLength(zds->format); } /* note : this variant can't fail */ @@ -1324,16 +1322,16 @@ { FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) ); FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) ); - return ZSTD_FRAMEHEADERSIZE_PREFIX; + return ZSTD_startingInputLength(dctx->format); } /* ZSTD_resetDStream() : - * return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX. + * return : expected size, aka ZSTD_startingInputLength(). * this function cannot fail */ size_t ZSTD_resetDStream(ZSTD_DStream* dctx) { FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only)); - return ZSTD_FRAMEHEADERSIZE_PREFIX; + return ZSTD_startingInputLength(dctx->format); } @@ -1564,7 +1562,7 @@ zds->lhSize += remainingInput; } input->pos = input->size; - return (MAX(ZSTD_FRAMEHEADERSIZE_MIN, hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ + return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ } assert(ip != NULL); memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/decompress/zstd_decompress_block.c --- a/contrib/python-zstandard/zstd/decompress/zstd_decompress_block.c Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/decompress/zstd_decompress_block.c Tue Jan 21 13:14:51 2020 -0500 @@ -573,38 +573,118 @@ size_t pos; } seqState_t; +/*! ZSTD_overlapCopy8() : + * Copies 8 bytes from ip to op and updates op and ip where ip <= op. + * If the offset is < 8 then the offset is spread to at least 8 bytes. + * + * Precondition: *ip <= *op + * Postcondition: *op - *op >= 8 + */ +static void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { + assert(*ip <= *op); + if (offset < 8) { + /* close range match, overlap */ + static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ + static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ + int const sub2 = dec64table[offset]; + (*op)[0] = (*ip)[0]; + (*op)[1] = (*ip)[1]; + (*op)[2] = (*ip)[2]; + (*op)[3] = (*ip)[3]; + *ip += dec32table[offset]; + ZSTD_copy4(*op+4, *ip); + *ip -= sub2; + } else { + ZSTD_copy8(*op, *ip); + } + *ip += 8; + *op += 8; + assert(*op - *ip >= 8); +} -/* ZSTD_execSequenceLast7(): - * exceptional case : decompress a match starting within last 7 bytes of output buffer. - * requires more careful checks, to ensure there is no overflow. - * performance does not matter though. - * note : this case is supposed to be never generated "naturally" by reference encoder, - * since in most cases it needs at least 8 bytes to look for a match. - * but it's allowed by the specification. */ +/*! ZSTD_safecopy() : + * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer + * and write up to 16 bytes past oend_w (op >= oend_w is allowed). + * This function is only called in the uncommon case where the sequence is near the end of the block. It + * should be fast for a single long sequence, but can be slow for several short sequences. + * + * @param ovtype controls the overlap detection + * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. + * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart. + * The src buffer must be before the dst buffer. + */ +static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) { + ptrdiff_t const diff = op - ip; + BYTE* const oend = op + length; + + assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) || + (ovtype == ZSTD_overlap_src_before_dst && diff >= 0)); + + if (length < 8) { + /* Handle short lengths. */ + while (op < oend) *op++ = *ip++; + return; + } + if (ovtype == ZSTD_overlap_src_before_dst) { + /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */ + assert(length >= 8); + ZSTD_overlapCopy8(&op, &ip, diff); + assert(op - ip >= 8); + assert(op <= oend); + } + + if (oend <= oend_w) { + /* No risk of overwrite. */ + ZSTD_wildcopy(op, ip, length, ovtype); + return; + } + if (op <= oend_w) { + /* Wildcopy until we get close to the end. */ + assert(oend > oend_w); + ZSTD_wildcopy(op, ip, oend_w - op, ovtype); + ip += oend_w - op; + op = oend_w; + } + /* Handle the leftovers. */ + while (op < oend) *op++ = *ip++; +} + +/* ZSTD_execSequenceEnd(): + * This version handles cases that are near the end of the output buffer. It requires + * more careful checks to make sure there is no overflow. By separating out these hard + * and unlikely cases, we can speed up the common cases. + * + * NOTE: This function needs to be fast for a single long sequence, but doesn't need + * to be optimized for many small sequences, since those fall into ZSTD_execSequence(). + */ FORCE_NOINLINE -size_t ZSTD_execSequenceLast7(BYTE* op, - BYTE* const oend, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, - const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) +size_t ZSTD_execSequenceEnd(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) { BYTE* const oLitEnd = op + sequence.litLength; size_t const sequenceLength = sequence.litLength + sequence.matchLength; BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ const BYTE* const iLitEnd = *litPtr + sequence.litLength; const BYTE* match = oLitEnd - sequence.offset; + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; - /* check */ - RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must fit within dstBuffer"); + /* bounds checks */ + assert(oLitEnd < oMatchEnd); + RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must fit within dstBuffer"); RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer"); /* copy literals */ - while (op < oLitEnd) *op++ = *(*litPtr)++; + ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap); + op = oLitEnd; + *litPtr = iLitEnd; /* copy Match */ - if (sequence.offset > (size_t)(oLitEnd - base)) { + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { /* offset beyond prefix */ - RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - vBase),corruption_detected); - match = dictEnd - (base-match); + RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected); + match = dictEnd - (prefixStart-match); if (match + sequence.matchLength <= dictEnd) { memmove(oLitEnd, match, sequence.matchLength); return sequenceLength; @@ -614,13 +694,12 @@ memmove(oLitEnd, match, length1); op = oLitEnd + length1; sequence.matchLength -= length1; - match = base; + match = prefixStart; } } - while (op < oMatchEnd) *op++ = *match++; + ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst); return sequenceLength; } - HINT_INLINE size_t ZSTD_execSequence(BYTE* op, BYTE* const oend, seq_t sequence, @@ -634,20 +713,29 @@ const BYTE* const iLitEnd = *litPtr + sequence.litLength; const BYTE* match = oLitEnd - sequence.offset; - /* check */ - RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend"); - RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer"); - if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); + /* Errors and uncommon cases handled here. */ + assert(oLitEnd < oMatchEnd); + if (iLitEnd > litLimit || oMatchEnd > oend_w) + return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); + + /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */ + assert(iLitEnd <= litLimit /* Literal length is in bounds */); + assert(oLitEnd <= oend_w /* Can wildcopy literals */); + assert(oMatchEnd <= oend_w /* Can wildcopy matches */); - /* copy Literals */ - if (sequence.litLength > 8) - ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ - else - ZSTD_copy8(op, *litPtr); + /* Copy Literals: + * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9. + * We likely don't need the full 32-byte wildcopy. + */ + assert(WILDCOPY_OVERLENGTH >= 16); + ZSTD_copy16(op, (*litPtr)); + if (sequence.litLength > 16) { + ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap); + } op = oLitEnd; *litPtr = iLitEnd; /* update for next sequence */ - /* copy Match */ + /* Copy Match */ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { /* offset beyond prefix -> go into extDict */ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected); @@ -662,123 +750,33 @@ op = oLitEnd + length1; sequence.matchLength -= length1; match = prefixStart; - if (op > oend_w || sequence.matchLength < MINMATCH) { - U32 i; - for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i]; - return sequenceLength; - } } } - /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */ - - /* match within prefix */ - if (sequence.offset < 8) { - /* close range match, overlap */ - static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ - static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ - int const sub2 = dec64table[sequence.offset]; - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - match += dec32table[sequence.offset]; - ZSTD_copy4(op+4, match); - match -= sub2; - } else { - ZSTD_copy8(op, match); - } - op += 8; match += 8; - - if (oMatchEnd > oend-(16-MINMATCH)) { - if (op < oend_w) { - ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst); - match += oend_w - op; - op = oend_w; - } - while (op < oMatchEnd) *op++ = *match++; - } else { - ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */ - } - return sequenceLength; -} - - -HINT_INLINE -size_t ZSTD_execSequenceLong(BYTE* op, - BYTE* const oend, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, - const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd) -{ - BYTE* const oLitEnd = op + sequence.litLength; - size_t const sequenceLength = sequence.litLength + sequence.matchLength; - BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ - BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; - const BYTE* const iLitEnd = *litPtr + sequence.litLength; - const BYTE* match = sequence.match; - - /* check */ - RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend"); - RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer"); - if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd); + /* Match within prefix of 1 or more bytes */ + assert(op <= oMatchEnd); + assert(oMatchEnd <= oend_w); + assert(match >= prefixStart); + assert(sequence.matchLength >= 1); - /* copy Literals */ - if (sequence.litLength > 8) - ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ - else - ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */ - - op = oLitEnd; - *litPtr = iLitEnd; /* update for next sequence */ + /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy + * without overlap checking. + */ + if (sequence.offset >= WILDCOPY_VECLEN) { + /* We bet on a full wildcopy for matches, since we expect matches to be + * longer than literals (in general). In silesia, ~10% of matches are longer + * than 16 bytes. + */ + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap); + return sequenceLength; + } + assert(sequence.offset < WILDCOPY_VECLEN); - /* copy Match */ - if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { - /* offset beyond prefix */ - RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected); - if (match + sequence.matchLength <= dictEnd) { - memmove(oLitEnd, match, sequence.matchLength); - return sequenceLength; - } - /* span extDict & currentPrefixSegment */ - { size_t const length1 = dictEnd - match; - memmove(oLitEnd, match, length1); - op = oLitEnd + length1; - sequence.matchLength -= length1; - match = prefixStart; - if (op > oend_w || sequence.matchLength < MINMATCH) { - U32 i; - for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i]; - return sequenceLength; - } - } } - assert(op <= oend_w); - assert(sequence.matchLength >= MINMATCH); + /* Copy 8 bytes and spread the offset to be >= 8. */ + ZSTD_overlapCopy8(&op, &match, sequence.offset); - /* match within prefix */ - if (sequence.offset < 8) { - /* close range match, overlap */ - static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ - static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ - int const sub2 = dec64table[sequence.offset]; - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - match += dec32table[sequence.offset]; - ZSTD_copy4(op+4, match); - match -= sub2; - } else { - ZSTD_copy8(op, match); - } - op += 8; match += 8; - - if (oMatchEnd > oend-(16-MINMATCH)) { - if (op < oend_w) { - ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst); - match += oend_w - op; - op = oend_w; - } - while (op < oMatchEnd) *op++ = *match++; - } else { - ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */ + /* If the match length is > 8 bytes, then continue with the wildcopy. */ + if (sequence.matchLength > 8) { + assert(op < oMatchEnd); + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); } return sequenceLength; } @@ -1098,7 +1096,7 @@ /* decode and decompress */ for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb= 201402) /* C++14 or greater */ # define ZBUFF_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API -# elif (defined(__GNUC__) && (__GNUC__ >= 5)) || defined(__clang__) +# elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__) # define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message))) # elif defined(__GNUC__) && (__GNUC__ >= 3) # define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated)) diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/dictBuilder/cover.c --- a/contrib/python-zstandard/zstd/dictBuilder/cover.c Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/dictBuilder/cover.c Tue Jan 21 13:14:51 2020 -0500 @@ -638,8 +638,8 @@ "compared to the source size %u! " "size(source)/size(dictionary) = %f, but it should be >= " "10! This may lead to a subpar dictionary! We recommend " - "training on sources at least 10x, and up to 100x the " - "size of the dictionary!\n", (U32)maxDictSize, + "training on sources at least 10x, and preferably 100x " + "the size of the dictionary! \n", (U32)maxDictSize, (U32)nbDmers, ratio); } diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/dictBuilder/zdict.c --- a/contrib/python-zstandard/zstd/dictBuilder/zdict.c Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/dictBuilder/zdict.c Tue Jan 21 13:14:51 2020 -0500 @@ -571,7 +571,7 @@ unsigned const prime1 = 2654435761U; unsigned const prime2 = 2246822519U; unsigned acc = prime1; - size_t p=0;; + size_t p=0; for (p=0; p> 21); diff -r 61881b170140 -r 84a0102c05c7 contrib/python-zstandard/zstd/zstd.h --- a/contrib/python-zstandard/zstd/zstd.h Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python-zstandard/zstd/zstd.h Tue Jan 21 13:14:51 2020 -0500 @@ -15,6 +15,7 @@ #define ZSTD_H_235446 /* ====== Dependency ======*/ +#include /* INT_MAX */ #include /* size_t */ @@ -71,7 +72,7 @@ /*------ Version ------*/ #define ZSTD_VERSION_MAJOR 1 #define ZSTD_VERSION_MINOR 4 -#define ZSTD_VERSION_RELEASE 3 +#define ZSTD_VERSION_RELEASE 4 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */ @@ -196,9 +197,13 @@ ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /*! ZSTD_compressCCtx() : - * Same as ZSTD_compress(), using an explicit ZSTD_CCtx - * The function will compress at requested compression level, - * ignoring any other parameter */ + * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. + * Important : in order to behave similarly to `ZSTD_compress()`, + * this function compresses at requested compression level, + * __ignoring any other parameter__ . + * If any advanced parameter was set using the advanced API, + * they will all be reset. Only `compressionLevel` remains. + */ ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, @@ -233,7 +238,7 @@ * using ZSTD_CCtx_set*() functions. * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! - * They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx() + * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . * * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). * @@ -261,18 +266,26 @@ /* compression parameters * Note: When compressing with a ZSTD_CDict these parameters are superseded - * by the parameters used to construct the ZSTD_CDict. See ZSTD_CCtx_refCDict() - * for more info (superseded-by-cdict). */ - ZSTD_c_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table + * by the parameters used to construct the ZSTD_CDict. + * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */ + ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table. + * Note that exact compression parameters are dynamically determined, + * depending on both compression level and srcSize (when known). * Default level is ZSTD_CLEVEL_DEFAULT==3. * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. * Note 1 : it's possible to pass a negative compression level. - * Note 2 : setting a level sets all default values of other compression parameters */ + * Note 2 : setting a level resets all other compression parameters to default */ + /* Advanced compression parameters : + * It's possible to pin down compression parameters to some specific values. + * In which case, these values are no longer dynamically selected by the compressor */ ZSTD_c_windowLog=101, /* Maximum allowed back-reference distance, expressed as power of 2. + * This will set a memory budget for streaming decompression, + * with larger values requiring more memory + * and typically compressing more. * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. * Special: value 0 means "use default windowLog". * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT - * requires explicitly allowing such window size at decompression stage if using streaming. */ + * requires explicitly allowing such size at streaming decompression stage. */ ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2. * Resulting memory usage is (1 << (hashLog+2)). * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. @@ -283,13 +296,13 @@ * Resulting memory usage is (1 << (chainLog+2)). * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX. * Larger tables result in better and slower compression. - * This parameter is useless when using "fast" strategy. + * This parameter is useless for "fast" strategy. * It's still useful when using "dfast" strategy, * in which case it defines a secondary probe table. * Special: value 0 means "use default chainLog". */ ZSTD_c_searchLog=104, /* Number of search attempts, as a power of 2. * More attempts result in better and slower compression. - * This parameter is useless when using "fast" and "dFast" strategies. + * This parameter is useless for "fast" and "dFast" strategies. * Special: value 0 means "use default searchLog". */ ZSTD_c_minMatch=105, /* Minimum size of searched matches. * Note that Zstandard can still find matches of smaller size, @@ -344,7 +357,7 @@ ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) * Content size must be known at the beginning of compression. * This is automatically the case when using ZSTD_compress2(), - * For streaming variants, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */ + * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */ ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */ ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */ @@ -363,7 +376,7 @@ * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. * 0 means default, which is dynamically determined based on compression parameters. * Job size must be a minimum of overlap size, or 1 MB, whichever is largest. - * The minimum size is automatically and transparently enforced */ + * The minimum size is automatically and transparently enforced. */ ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size. * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. * It helps preserve compression ratio, while each job is compressed in parallel. @@ -386,6 +399,7 @@ * ZSTD_c_forceAttachDict * ZSTD_c_literalCompressionMode * ZSTD_c_targetCBlockSize + * ZSTD_c_srcSizeHint * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -396,6 +410,7 @@ ZSTD_c_experimentalParam4=1001, ZSTD_c_experimentalParam5=1002, ZSTD_c_experimentalParam6=1003, + ZSTD_c_experimentalParam7=1004 } ZSTD_cParameter; typedef struct { @@ -793,12 +808,17 @@ typedef struct ZSTD_CDict_s ZSTD_CDict; /*! ZSTD_createCDict() : - * When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once. - * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost. + * When compressing multiple messages or blocks using the same dictionary, + * it's recommended to digest the dictionary only once, since it's a costly operation. + * ZSTD_createCDict() will create a state from digesting a dictionary. + * The resulting state can be used for future compression operations with very limited startup cost. * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. - * `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict. - * Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content. - * Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. */ + * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict. + * Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content. + * Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer, + * in which case the only thing that it transports is the @compressionLevel. + * This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively, + * expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, int compressionLevel); @@ -925,7 +945,7 @@ * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. * It's a CPU consuming operation, with non-negligible impact on latency. * If there is a need to use the same prefix multiple times, consider loadDictionary instead. - * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dm_rawContent). + * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent). * Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize); @@ -969,7 +989,7 @@ * Note 2 : Prefix buffer is referenced. It **must** outlive decompression. * Prefix buffer must remain unmodified up to the end of frame, * reached when ZSTD_decompressStream() returns 0. - * Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent). + * Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent). * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section) * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost. * A full dictionary is more costly, as it requires building tables. @@ -1014,8 +1034,8 @@ * Some of them might be removed in the future (especially when redundant with existing stable functions) * ***************************************************************************************/ -#define ZSTD_FRAMEHEADERSIZE_PREFIX 5 /* minimum input size required to query frame header size */ -#define ZSTD_FRAMEHEADERSIZE_MIN 6 +#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1) /* minimum input size required to query frame header size */ +#define ZSTD_FRAMEHEADERSIZE_MIN(format) ((format) == ZSTD_f_zstd1 ? 6 : 2) #define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */ #define ZSTD_SKIPPABLEHEADERSIZE 8 @@ -1063,6 +1083,8 @@ /* Advanced parameter bounds */ #define ZSTD_TARGETCBLOCKSIZE_MIN 64 #define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX +#define ZSTD_SRCSIZEHINT_MIN 0 +#define ZSTD_SRCSIZEHINT_MAX INT_MAX /* internal */ #define ZSTD_HASHLOG3_MAX 17 @@ -1073,6 +1095,24 @@ typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params; typedef struct { + unsigned int matchPos; /* Match pos in dst */ + /* If seqDef.offset > 3, then this is seqDef.offset - 3 + * If seqDef.offset < 3, then this is the corresponding repeat offset + * But if seqDef.offset < 3 and litLength == 0, this is the + * repeat offset before the corresponding repeat offset + * And if seqDef.offset == 3 and litLength == 0, this is the + * most recent repeat offset - 1 + */ + unsigned int offset; + unsigned int litLength; /* Literal length */ + unsigned int matchLength; /* Match length */ + /* 0 when seq not rep and seqDef.offset otherwise + * when litLength == 0 this will be <= 4, otherwise <= 3 like normal + */ + unsigned int rep; +} ZSTD_Sequence; + +typedef struct { unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ unsigned hashLog; /**< dispatch table : larger == faster, more memory */ @@ -1101,21 +1141,12 @@ typedef enum { ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */ - ZSTD_dlm_byRef = 1, /**< Reference dictionary content -- the dictionary buffer must outlive its users. */ + ZSTD_dlm_byRef = 1 /**< Reference dictionary content -- the dictionary buffer must outlive its users. */ } ZSTD_dictLoadMethod_e; typedef enum { - /* Opened question : should we have a format ZSTD_f_auto ? - * Today, it would mean exactly the same as ZSTD_f_zstd1. - * But, in the future, should several formats become supported, - * on the compression side, it would mean "default format". - * On the decompression side, it would mean "automatic format detection", - * so that ZSTD_f_zstd1 would mean "accept *only* zstd frames". - * Since meaning is a little different, another option could be to define different enums for compression and decompression. - * This question could be kept for later, when there are actually multiple formats to support, - * but there is also the question of pinning enum values, and pinning value `0` is especially important */ ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */ - ZSTD_f_zstd1_magicless = 1, /* Variant of zstd frame format, without initial 4-bytes magic number. + ZSTD_f_zstd1_magicless = 1 /* Variant of zstd frame format, without initial 4-bytes magic number. * Useful to save 4 bytes per generated frame. * Decoder cannot recognise automatically this format, requiring this instruction. */ } ZSTD_format_e; @@ -1126,7 +1157,7 @@ * to evolve and should be considered only in the context of extremely * advanced performance tuning. * - * Zstd currently supports the use of a CDict in two ways: + * Zstd currently supports the use of a CDict in three ways: * * - The contents of the CDict can be copied into the working context. This * means that the compression can search both the dictionary and input @@ -1142,6 +1173,12 @@ * working context's tables can be reused). For small inputs, this can be * faster than copying the CDict's tables. * + * - The CDict's tables are not used at all, and instead we use the working + * context alone to reload the dictionary and use params based on the source + * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict(). + * This method is effective when the dictionary sizes are very small relative + * to the input size, and the input size is fairly large to begin with. + * * Zstd has a simple internal heuristic that selects which strategy to use * at the beginning of a compression. However, if experimentation shows that * Zstd is making poor choices, it is possible to override that choice with @@ -1150,6 +1187,7 @@ ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ + ZSTD_dictForceLoad = 3 /* Always reload the dictionary */ } ZSTD_dictAttachPref_e; typedef enum { @@ -1158,7 +1196,7 @@ * levels will be compressed. */ ZSTD_lcm_huffman = 1, /**< Always attempt Huffman compression. Uncompressed literals will still be * emitted if Huffman compression is not profitable. */ - ZSTD_lcm_uncompressed = 2, /**< Always emit uncompressed literals. */ + ZSTD_lcm_uncompressed = 2 /**< Always emit uncompressed literals. */ } ZSTD_literalCompressionMode_e; @@ -1210,20 +1248,38 @@ * or an error code (if srcSize is too small) */ ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); +/*! ZSTD_getSequences() : + * Extract sequences from the sequence store + * zc can be used to insert custom compression params. + * This function invokes ZSTD_compress2 + * @return : number of sequences extracted + */ +ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize); + /*************************************** * Memory management ***************************************/ /*! ZSTD_estimate*() : - * These functions make it possible to estimate memory usage - * of a future {D,C}Ctx, before its creation. - * ZSTD_estimateCCtxSize() will provide a budget large enough for any compression level up to selected one. - * It will also consider src size to be arbitrarily "large", which is worst case. - * If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation. - * ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. - * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. - * Note : CCtx size estimation is only correct for single-threaded compression. */ + * These functions make it possible to estimate memory usage of a future + * {D,C}Ctx, before its creation. + * + * ZSTD_estimateCCtxSize() will provide a budget large enough for any + * compression level up to selected one. Unlike ZSTD_estimateCStreamSize*(), + * this estimate does not include space for a window buffer, so this estimate + * is guaranteed to be enough for single-shot compressions, but not streaming + * compressions. It will however assume the input may be arbitrarily large, + * which is the worst case. If srcSize is known to always be small, + * ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation. + * ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with + * ZSTD_getCParams() to create cParams from compressionLevel. + * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with + * ZSTD_CCtxParams_setParameter(). + * + * Note: only single-threaded compression is supported. This function will + * return an error code if ZSTD_c_nbWorkers is >= 1. */ ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); @@ -1334,7 +1390,8 @@ * Create a digested dictionary for compression * Dictionary content is just referenced, not duplicated. * As a consequence, `dictBuffer` **must** outlive CDict, - * and its content must remain unmodified throughout the lifetime of CDict. */ + * and its content must remain unmodified throughout the lifetime of CDict. + * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); /*! ZSTD_getCParams() : @@ -1361,7 +1418,9 @@ ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); /*! ZSTD_compress_advanced() : - * Same as ZSTD_compress_usingDict(), with fine-tune control over compression parameters (by structure) */ + * Note : this function is now DEPRECATED. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. + * This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */ ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, @@ -1369,7 +1428,9 @@ ZSTD_parameters params); /*! ZSTD_compress_usingCDict_advanced() : - * Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters */ + * Note : this function is now REDUNDANT. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. + * This prototype will be marked as deprecated and generate compilation warning in some future version */ ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, @@ -1441,6 +1502,12 @@ * There is no guarantee on compressed block size (default:0) */ #define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 +/* User's best guess of source size. + * Hint is not valid when srcSizeHint == 0. + * There is no guarantee that hint is close to actual source size, + * but compression ratio may regress significantly if guess considerably underestimates */ +#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7 + /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, * and store it into int* value. @@ -1613,8 +1680,13 @@ * pledgedSrcSize must be correct. If it is not known at init time, use * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, * "0" also disables frame content size field. It may be enabled in the future. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x */ -ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); +ZSTDLIB_API size_t +ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, + int compressionLevel, + unsigned long long pledgedSrcSize); + /**! ZSTD_initCStream_usingDict() : * This function is deprecated, and is equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); @@ -1623,42 +1695,66 @@ * * Creates of an internal CDict (incompatible with static CCtx), except if * dict == NULL or dictSize < 8, in which case no dict is used. - * Note: dict is loaded with ZSTD_dm_auto (treated as a full zstd dictionary if + * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x */ -ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t +ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + int compressionLevel); + /**! ZSTD_initCStream_advanced() : * This function is deprecated, and is approximately equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_setZstdParams(zcs, params); // Set the zstd params and leave the rest as-is + * // Pseudocode: Set each zstd parameter and leave the rest as-is. + * for ((param, value) : params) { + * ZSTD_CCtx_setParameter(zcs, param, value); + * } * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); * - * pledgedSrcSize must be correct. If srcSize is not known at init time, use - * value ZSTD_CONTENTSIZE_UNKNOWN. dict is loaded with ZSTD_dm_auto and ZSTD_dlm_byCopy. + * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy. + * pledgedSrcSize must be correct. + * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x */ -ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, - ZSTD_parameters params, unsigned long long pledgedSrcSize); +ZSTDLIB_API size_t +ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, + unsigned long long pledgedSrcSize); + /**! ZSTD_initCStream_usingCDict() : * This function is deprecated, and equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * ZSTD_CCtx_refCDict(zcs, cdict); * * note : cdict will just be referenced, and must outlive compression session + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x */ ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); + /**! ZSTD_initCStream_usingCDict_advanced() : - * This function is deprecated, and is approximately equivalent to: + * This function is DEPRECATED, and is approximately equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_setZstdFrameParams(zcs, fParams); // Set the zstd frame params and leave the rest as-is + * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. + * for ((fParam, value) : fParams) { + * ZSTD_CCtx_setParameter(zcs, fParam, value); + * } * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); * ZSTD_CCtx_refCDict(zcs, cdict); * * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. * pledgedSrcSize must be correct. If srcSize is not known at init time, use * value ZSTD_CONTENTSIZE_UNKNOWN. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x */ -ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize); +ZSTDLIB_API size_t +ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize); /*! ZSTD_resetCStream() : * This function is deprecated, and is equivalent to: @@ -1673,6 +1769,7 @@ * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. * @return : 0, or an error code (which can be tested using ZSTD_isError()) + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x */ ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); @@ -1718,8 +1815,10 @@ * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); * * note: no dictionary will be used if dict == NULL or dictSize < 8 + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x */ ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); + /** * This function is deprecated, and is equivalent to: * @@ -1727,14 +1826,17 @@ * ZSTD_DCtx_refDDict(zds, ddict); * * note : ddict is referenced, it must outlive decompression session + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x */ ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); + /** * This function is deprecated, and is equivalent to: * * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); * * re-use decompression parameters from previous init; saves dictionary loading + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x */ ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); @@ -1908,7 +2010,7 @@ /*! Block functions produce and decode raw zstd blocks, without frame metadata. - Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes). + Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes). But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. A few rules to respect : diff -r 61881b170140 -r 84a0102c05c7 contrib/python3-ratchet.py --- a/contrib/python3-ratchet.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/python3-ratchet.py Tue Jan 21 13:14:51 2020 -0500 @@ -60,7 +60,7 @@ ) p.add_argument( '-j', - default=os.sysconf(r'SC_NPROCESSORS_ONLN'), + default=os.sysconf('SC_NPROCESSORS_ONLN'), type=int, help='Number of parallel tests to run.', ) diff -r 61881b170140 -r 84a0102c05c7 contrib/relnotes --- a/contrib/relnotes Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/relnotes Tue Jan 21 13:14:51 2020 -0500 @@ -98,6 +98,7 @@ (r"shelve|unshelve", "extensions"), ] + def wikify(desc): desc = desc.replace("(issue", "(Bts:issue") desc = re.sub(r"\b([0-9a-f]{12})\b", r"Cset:\1", desc) @@ -107,6 +108,7 @@ desc = re.sub(r"\b(\S*__\S*)\b", r"`\1`", desc) return desc + def main(): desc = "example: %(prog)s 4.7.2 --stoprev 4.8rc0" ap = argparse.ArgumentParser(description=desc) @@ -200,5 +202,6 @@ for d in sorted(apis): print(" * %s" % d) + if __name__ == "__main__": main() diff -r 61881b170140 -r 84a0102c05c7 contrib/simplemerge --- a/contrib/simplemerge Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/simplemerge Tue Jan 21 13:14:51 2020 -0500 @@ -5,6 +5,7 @@ import sys import hgdemandimport + hgdemandimport.enable() from mercurial.i18n import _ @@ -16,44 +17,54 @@ simplemerge, ui as uimod, ) -from mercurial.utils import ( - procutil, - stringutil -) +from mercurial.utils import procutil, stringutil -options = [(b'L', b'label', [], _(b'labels to use on conflict markers')), - (b'a', b'text', None, _(b'treat all files as text')), - (b'p', b'print', None, - _(b'print results instead of overwriting LOCAL')), - (b'', b'no-minimal', None, _(b'no effect (DEPRECATED)')), - (b'h', b'help', None, _(b'display help and exit')), - (b'q', b'quiet', None, _(b'suppress output'))] +options = [ + (b'L', b'label', [], _(b'labels to use on conflict markers')), + (b'a', b'text', None, _(b'treat all files as text')), + (b'p', b'print', None, _(b'print results instead of overwriting LOCAL')), + (b'', b'no-minimal', None, _(b'no effect (DEPRECATED)')), + (b'h', b'help', None, _(b'display help and exit')), + (b'q', b'quiet', None, _(b'suppress output')), +] -usage = _(b'''simplemerge [OPTS] LOCAL BASE OTHER +usage = _( + b'''simplemerge [OPTS] LOCAL BASE OTHER Simple three-way file merge utility with a minimal feature set. Apply to LOCAL the changes necessary to go from BASE to OTHER. By default, LOCAL is overwritten with the results of this operation. -''') +''' +) + class ParseError(Exception): """Exception raised on errors in parsing the command line.""" + def showhelp(): pycompat.stdout.write(usage) pycompat.stdout.write(b'\noptions:\n') out_opts = [] for shortopt, longopt, default, desc in options: - out_opts.append((b'%2s%s' % (shortopt and b'-%s' % shortopt, - longopt and b' --%s' % longopt), - b'%s' % desc)) + out_opts.append( + ( + b'%2s%s' + % ( + shortopt and b'-%s' % shortopt, + longopt and b' --%s' % longopt, + ), + b'%s' % desc, + ) + ) opts_len = max([len(opt[0]) for opt in out_opts]) for first, second in out_opts: pycompat.stdout.write(b' %-*s %s\n' % (opts_len, first, second)) + try: for fp in (sys.stdin, pycompat.stdout, sys.stderr): procutil.setbinary(fp) @@ -68,13 +79,17 @@ showhelp() sys.exit(0) if len(args) != 3: - raise ParseError(_(b'wrong number of arguments').decode('utf8')) + raise ParseError(_(b'wrong number of arguments').decode('utf8')) local, base, other = args - sys.exit(simplemerge.simplemerge(uimod.ui.load(), - context.arbitraryfilectx(local), - context.arbitraryfilectx(base), - context.arbitraryfilectx(other), - **pycompat.strkwargs(opts))) + sys.exit( + simplemerge.simplemerge( + uimod.ui.load(), + context.arbitraryfilectx(local), + context.arbitraryfilectx(base), + context.arbitraryfilectx(other), + **pycompat.strkwargs(opts) + ) + ) except ParseError as e: e = stringutil.forcebytestr(e) pycompat.stdout.write(b"%s: %s\n" % (sys.argv[0].encode('utf8'), e)) diff -r 61881b170140 -r 84a0102c05c7 contrib/testparseutil.py --- a/contrib/testparseutil.py Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/testparseutil.py Tue Jan 21 13:14:51 2020 -0500 @@ -80,7 +80,7 @@ #################### -class embeddedmatcher(object): +class embeddedmatcher(object): # pytype: disable=ignored-metaclass """Base class to detect embedded code fragments in *.t test script """ @@ -331,9 +331,9 @@ ) self._fileres = [ # "cat > NAME << LIMIT" case - re.compile(r' \$ \s*cat' + namepat + heredoclimitpat), + re.compile(r' {2}\$ \s*cat' + namepat + heredoclimitpat), # "cat << LIMIT > NAME" case - re.compile(r' \$ \s*cat' + heredoclimitpat + namepat), + re.compile(r' {2}\$ \s*cat' + heredoclimitpat + namepat), ] def startsat(self, line): @@ -426,7 +426,7 @@ """ _prefix = ' >>> ' - _prefixre = re.compile(r' (>>>|\.\.\.) ') + _prefixre = re.compile(r' {2}(>>>|\.\.\.) ') # If a line matches against not _prefixre but _outputre, that line # is "an expected output line" (= not a part of code fragment). @@ -436,7 +436,7 @@ # run-tests.py. But "directive line inside inline python code" # should be rejected by Mercurial reviewers. Therefore, this # regexp does not matche against such directive lines. - _outputre = re.compile(r' $| [^$]') + _outputre = re.compile(r' {2}$| {2}[^$]') def __init__(self): super(pydoctestmatcher, self).__init__("doctest style python code") @@ -509,7 +509,7 @@ _prefix = ' > ' _startre = re.compile( - r' \$ (\$PYTHON|"\$PYTHON"|python).*' + heredoclimitpat + r' {2}\$ (\$PYTHON|"\$PYTHON"|python).*' + heredoclimitpat ) def __init__(self): diff -r 61881b170140 -r 84a0102c05c7 contrib/undumprevlog --- a/contrib/undumprevlog Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/undumprevlog Tue Jan 21 13:14:51 2020 -0500 @@ -14,16 +14,15 @@ transaction, vfs as vfsmod, ) -from mercurial.utils import ( - procutil, -) +from mercurial.utils import procutil for fp in (sys.stdin, sys.stdout, sys.stderr): procutil.setbinary(fp) opener = vfsmod.vfs(b'.', False) -tr = transaction.transaction(sys.stderr.write, opener, {b'store': opener}, - b"undump.journal") +tr = transaction.transaction( + sys.stderr.write, opener, {b'store': opener}, b"undump.journal" +) while True: l = sys.stdin.readline() if not l: @@ -42,9 +41,9 @@ p2 = node.bin(p[1]) elif l.startswith("length:"): length = int(l[8:-1]) - sys.stdin.readline() # start marker + sys.stdin.readline() # start marker d = encoding.strtolocal(sys.stdin.read(length)) - sys.stdin.readline() # end marker + sys.stdin.readline() # end marker r.addrevision(d, tr, lr, p1, p2) tr.close() diff -r 61881b170140 -r 84a0102c05c7 contrib/vagrant/Vagrantfile --- a/contrib/vagrant/Vagrantfile Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/vagrant/Vagrantfile Tue Jan 21 13:14:51 2020 -0500 @@ -1,8 +1,8 @@ # -*- mode: ruby -*- Vagrant.configure('2') do |config| - # Debian 8.1 x86_64 without configuration management software - config.vm.box = "debian/jessie64" + # Debian 10.1 x86_64 without configuration management software + config.vm.box = "debian/buster64" config.vm.hostname = "tests" config.vm.define "tests" do |conf| diff -r 61881b170140 -r 84a0102c05c7 contrib/win32/ReadMe.html --- a/contrib/win32/ReadMe.html Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/win32/ReadMe.html Tue Jan 21 13:14:51 2020 -0500 @@ -140,8 +140,7 @@

- Mercurial is Copyright 2005-2019 Matt Mackall and others. See - the Contributors.txt file for a list of contributors. + Mercurial is Copyright 2005-2019 Matt Mackall and others.

diff -r 61881b170140 -r 84a0102c05c7 contrib/win32/mercurial.ini --- a/contrib/win32/mercurial.ini Thu Jan 09 14:19:20 2020 -0500 +++ b/contrib/win32/mercurial.ini Tue Jan 21 13:14:51 2020 -0500 @@ -16,7 +16,7 @@ [ui] ; editor used to enter commit logs, etc. Most text editors will work. -editor = notepad +; editor = notepad ; show changed files and be a bit more verbose if True ; verbose = True ; colorize commands output diff -r 61881b170140 -r 84a0102c05c7 doc/Makefile --- a/doc/Makefile Thu Jan 09 14:19:20 2020 -0500 +++ b/doc/Makefile Tue Jan 21 13:14:51 2020 -0500 @@ -1,8 +1,8 @@ -SOURCES=$(notdir $(wildcard ../mercurial/help/*.[0-9].txt)) +SOURCES=$(notdir $(wildcard ../mercurial/helptext/*.[0-9].txt)) MAN=$(SOURCES:%.txt=%) HTML=$(SOURCES:%.txt=%.html) GENDOC=gendoc.py ../mercurial/commands.py ../mercurial/help.py \ - ../mercurial/help/*.txt ../hgext/*.py ../hgext/*/__init__.py + ../mercurial/helptext/*.txt ../hgext/*.py ../hgext/*/__init__.py PREFIX=/usr/local MANDIR=$(PREFIX)/share/man INSTALL=install -c -m 644 diff -r 61881b170140 -r 84a0102c05c7 doc/docchecker --- a/doc/docchecker Thu Jan 09 14:19:20 2020 -0500 +++ b/doc/docchecker Tue Jan 21 13:14:51 2020 -0500 @@ -15,6 +15,7 @@ try: import msvcrt + msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) msvcrt.setmode(sys.stderr.fileno(), os.O_BINARY) except ImportError: @@ -25,14 +26,18 @@ leadingline = re.compile(br'(^\s*)(\S.*)$') checks = [ - (br""":hg:`[^`]*'[^`]*`""", - b"""warning: please avoid nesting ' in :hg:`...`"""), - (br'\w:hg:`', - b'warning: please have a space before :hg:'), - (br"""(?:[^a-z][^'.])hg ([^,;"`]*'(?!hg)){2}""", - b'''warning: please use " instead of ' for hg ... "..."'''), + ( + br""":hg:`[^`]*'[^`]*`""", + b"""warning: please avoid nesting ' in :hg:`...`""", + ), + (br'\w:hg:`', b'warning: please have a space before :hg:'), + ( + br"""(?:[^a-z][^'.])hg ([^,;"`]*'(?!hg)){2}""", + b'''warning: please use " instead of ' for hg ... "..."''', + ), ] + def check(line): messages = [] for match, msg in checks: @@ -43,6 +48,7 @@ for msg in messages: stdout.write(b'%s\n' % msg) + def work(file): (llead, lline) = (b'', b'') @@ -55,8 +61,8 @@ continue lead, line = match.group(1), match.group(2) - if (lead == llead): - if (lline != b''): + if lead == llead: + if lline != b'': lline += b' ' + line else: lline = line @@ -65,6 +71,7 @@ (llead, lline) = (lead, line) check(lline) + def main(): for f in sys.argv[1:]: try: @@ -73,4 +80,5 @@ except BaseException as e: sys.stdout.write(r"failed to process %s: %s\n" % (f, e)) + main() diff -r 61881b170140 -r 84a0102c05c7 doc/gendoc.py --- a/doc/gendoc.py Thu Jan 09 14:19:20 2020 -0500 +++ b/doc/gendoc.py Tue Jan 21 13:14:51 2020 -0500 @@ -20,17 +20,13 @@ # This script is executed during installs and may not have C extensions # available. Relax C module requirements. -os.environ[r'HGMODULEPOLICY'] = r'allow' +os.environ['HGMODULEPOLICY'] = 'allow' # import from the live mercurial repo -sys.path.insert(0, r"..") +sys.path.insert(0, "..") from mercurial import demandimport demandimport.enable() -# Load util so that the locale path is set by i18n.setdatapath() before -# calling _(). -from mercurial import util -util.datapath from mercurial import ( commands, encoding, diff -r 61881b170140 -r 84a0102c05c7 doc/runrst --- a/doc/runrst Thu Jan 09 14:19:20 2020 -0500 +++ b/doc/runrst Tue Jan 21 13:14:51 2020 -0500 @@ -15,20 +15,25 @@ from __future__ import absolute_import import sys + try: import docutils.core as core import docutils.nodes as nodes import docutils.utils as utils import docutils.parsers.rst.roles as roles except ImportError: - sys.stderr.write("abort: couldn't generate documentation: docutils " - "module is missing\n") - sys.stderr.write("please install python-docutils or see " - "http://docutils.sourceforge.net/\n") + sys.stderr.write( + "abort: couldn't generate documentation: docutils " + "module is missing\n" + ) + sys.stderr.write( + "please install python-docutils or see " + "http://docutils.sourceforge.net/\n" + ) sys.exit(-1) -def role_hg(name, rawtext, text, lineno, inliner, - options={}, content=[]): + +def role_hg(name, rawtext, text, lineno, inliner, options=None, content=None): text = "hg " + utils.unescape(text) linktext = nodes.literal(rawtext, text) parts = text.split() @@ -47,10 +52,10 @@ refuri = "hg.1.html#%s" % args[1] else: refuri = "hg.1.html#%s" % args[0] - node = nodes.reference(rawtext, '', linktext, - refuri=refuri) + node = nodes.reference(rawtext, '', linktext, refuri=refuri) return [node], [] + roles.register_local_role("hg", role_hg) if __name__ == "__main__": diff -r 61881b170140 -r 84a0102c05c7 hg --- a/hg Thu Jan 09 14:19:20 2020 -0500 +++ b/hg Tue Jan 21 13:14:51 2020 -0500 @@ -15,22 +15,29 @@ if libdir != '@' 'LIBDIR' '@': if not os.path.isabs(libdir): - libdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), - libdir) + libdir = os.path.join( + os.path.dirname(os.path.realpath(__file__)), libdir + ) libdir = os.path.abspath(libdir) sys.path.insert(0, libdir) from hgdemandimport import tracing + with tracing.log('hg script'): # enable importing on demand to reduce startup time try: if sys.version_info[0] < 3 or sys.version_info >= (3, 6): - import hgdemandimport; hgdemandimport.enable() + import hgdemandimport + + hgdemandimport.enable() except ImportError: - sys.stderr.write("abort: couldn't find mercurial libraries in [%s]\n" % - ' '.join(sys.path)) + sys.stderr.write( + "abort: couldn't find mercurial libraries in [%s]\n" + % ' '.join(sys.path) + ) sys.stderr.write("(check your install and PYTHONPATH)\n") sys.exit(-1) from mercurial import dispatch + dispatch.run() diff -r 61881b170140 -r 84a0102c05c7 hgdemandimport/demandimportpy2.py --- a/hgdemandimport/demandimportpy2.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgdemandimport/demandimportpy2.py Tue Jan 21 13:14:51 2020 -0500 @@ -70,9 +70,9 @@ head = name after = [] object.__setattr__( - self, r"_data", (head, globals, locals, after, level, set()) + self, "_data", (head, globals, locals, after, level, set()) ) - object.__setattr__(self, r"_module", None) + object.__setattr__(self, "_module", None) def _extend(self, name): """add to the list of submodules to load""" @@ -135,15 +135,15 @@ if locals: if locals.get(head) is self: locals[head] = mod - elif locals.get(head + r'mod') is self: - locals[head + r'mod'] = mod + elif locals.get(head + 'mod') is self: + locals[head + 'mod'] = mod for modname in modrefs: modref = sys.modules.get(modname, None) if modref and getattr(modref, head, None) is self: setattr(modref, head, mod) - object.__setattr__(self, r"_module", mod) + object.__setattr__(self, "_module", mod) def __repr__(self): if self._module: @@ -303,18 +303,18 @@ def enable(): - "enable global demand-loading of modules" + """enable global demand-loading of modules""" builtins.__import__ = _demandimport def disable(): - "disable global demand-loading of modules" + """disable global demand-loading of modules""" builtins.__import__ = _origimport @contextmanager def deactivated(): - "context manager for disabling demandimport in 'with' blocks" + """context manager for disabling demandimport in 'with' blocks""" demandenabled = isenabled() if demandenabled: disable() diff -r 61881b170140 -r 84a0102c05c7 hgdemandimport/demandimportpy3.py --- a/hgdemandimport/demandimportpy3.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgdemandimport/demandimportpy3.py Tue Jan 21 13:14:51 2020 -0500 @@ -27,8 +27,6 @@ from __future__ import absolute_import import contextlib -import importlib.abc -import importlib.machinery import importlib.util import sys @@ -36,6 +34,12 @@ _deactivated = False +# Python 3.5's LazyLoader doesn't work for some reason. +# https://bugs.python.org/issue26186 is a known issue with extension +# importing. But it appears to not have a meaningful effect with +# Mercurial. +_supported = sys.version_info[0:2] >= (3, 6) + class _lazyloaderex(importlib.util.LazyLoader): """This is a LazyLoader except it also follows the _deactivated global and @@ -51,29 +55,61 @@ super().exec_module(module) -# This is 3.6+ because with Python 3.5 it isn't possible to lazily load -# extensions. See the discussion in https://bugs.python.org/issue26186 for more. -if sys.version_info[0:2] >= (3, 6): - _extensions_loader = _lazyloaderex.factory( - importlib.machinery.ExtensionFileLoader - ) -else: - _extensions_loader = importlib.machinery.ExtensionFileLoader +class LazyFinder(object): + """A wrapper around a ``MetaPathFinder`` that makes loaders lazy. + + ``sys.meta_path`` finders have their ``find_spec()`` called to locate a + module. This returns a ``ModuleSpec`` if found or ``None``. The + ``ModuleSpec`` has a ``loader`` attribute, which is called to actually + load a module. + + Our class wraps an existing finder and overloads its ``find_spec()`` to + replace the ``loader`` with our lazy loader proxy. + + We have to use __getattribute__ to proxy the instance because some meta + path finders don't support monkeypatching. + """ + + __slots__ = ("_finder",) + + def __init__(self, finder): + object.__setattr__(self, "_finder", finder) + + def __repr__(self): + return "" % object.__getattribute__(self, "_finder") + + # __bool__ is canonical Python 3. But check-code insists on __nonzero__ being + # defined via `def`. + def __nonzero__(self): + return bool(object.__getattribute__(self, "_finder")) -_bytecode_loader = _lazyloaderex.factory( - importlib.machinery.SourcelessFileLoader -) -_source_loader = _lazyloaderex.factory(importlib.machinery.SourceFileLoader) + __bool__ = __nonzero__ + + def __getattribute__(self, name): + if name in ("_finder", "find_spec"): + return object.__getattribute__(self, name) + return getattr(object.__getattribute__(self, "_finder"), name) + + def __delattr__(self, name): + return delattr(object.__getattribute__(self, "_finder")) + + def __setattr__(self, name, value): + return setattr(object.__getattribute__(self, "_finder"), name, value) -def _makefinder(path): - return importlib.machinery.FileFinder( - path, - # This is the order in which loaders are passed in in core Python. - (_extensions_loader, importlib.machinery.EXTENSION_SUFFIXES), - (_source_loader, importlib.machinery.SOURCE_SUFFIXES), - (_bytecode_loader, importlib.machinery.BYTECODE_SUFFIXES), - ) + def find_spec(self, *args, **kwargs): + finder = object.__getattribute__(self, "_finder") + spec = finder.find_spec(*args, **kwargs) + + # Lazy loader requires exec_module(). + if ( + spec is not None + and spec.loader is not None + and getattr(spec.loader, "exec_module") + ): + spec.loader = _lazyloaderex(spec.loader) + + return spec ignores = set() @@ -85,19 +121,30 @@ def isenabled(): - return _makefinder in sys.path_hooks and not _deactivated + return not _deactivated and any( + isinstance(finder, LazyFinder) for finder in sys.meta_path + ) def disable(): - try: - while True: - sys.path_hooks.remove(_makefinder) - except ValueError: - pass + new_finders = [] + for finder in sys.meta_path: + new_finders.append( + finder._finder if isinstance(finder, LazyFinder) else finder + ) + sys.meta_path[:] = new_finders def enable(): - sys.path_hooks.insert(0, _makefinder) + if not _supported: + return + + new_finders = [] + for finder in sys.meta_path: + new_finders.append( + LazyFinder(finder) if not isinstance(finder, LazyFinder) else finder + ) + sys.meta_path[:] = new_finders @contextlib.contextmanager diff -r 61881b170140 -r 84a0102c05c7 hgext/absorb.py --- a/hgext/absorb.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/absorb.py Tue Jan 21 13:14:51 2020 -0500 @@ -511,7 +511,7 @@ if not editedtext: raise error.Abort(_(b'empty editor text')) # parse edited result - contents = [b'' for i in self.fctxs] + contents = [b''] * len(self.fctxs) leftpadpos = 4 colonpos = leftpadpos + len(visiblefctxs) + 1 for l in mdiff.splitnewlines(editedtext): diff -r 61881b170140 -r 84a0102c05c7 hgext/acl.py --- a/hgext/acl.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/acl.py Tue Jan 21 13:14:51 2020 -0500 @@ -369,8 +369,8 @@ return user = None - if source == b'serve' and r'url' in kwargs: - url = kwargs[r'url'].split(b':') + if source == b'serve' and 'url' in kwargs: + url = kwargs['url'].split(b':') if url[0] == b'remote' and url[1].startswith(b'http'): user = urlreq.unquote(url[3]) @@ -386,9 +386,9 @@ def _pkhook(ui, repo, hooktype, node, source, user, **kwargs): - if kwargs[r'namespace'] == b'bookmarks': - bookmark = kwargs[r'key'] - ctx = kwargs[r'new'] + if kwargs['namespace'] == b'bookmarks': + bookmark = kwargs['key'] + ctx = kwargs['new'] allowbookmarks = buildmatch(ui, None, user, b'acl.allow.bookmarks') denybookmarks = buildmatch(ui, None, user, b'acl.deny.bookmarks') diff -r 61881b170140 -r 84a0102c05c7 hgext/beautifygraph.py --- a/hgext/beautifygraph.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/beautifygraph.py Tue Jan 21 13:14:51 2020 -0500 @@ -94,7 +94,7 @@ ui.warn(_(b'beautifygraph: unsupported encoding, UTF-8 required\n')) return - if r'A' in encoding._wide: + if 'A' in encoding._wide: ui.warn( _( b'beautifygraph: unsupported terminal settings, ' diff -r 61881b170140 -r 84a0102c05c7 hgext/blackbox.py --- a/hgext/blackbox.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/blackbox.py Tue Jan 21 13:14:51 2020 -0500 @@ -201,7 +201,7 @@ if not repo.vfs.exists(b'blackbox.log'): return - limit = opts.get(r'limit') + limit = opts.get('limit') fp = repo.vfs(b'blackbox.log', b'r') lines = fp.read().split(b'\n') diff -r 61881b170140 -r 84a0102c05c7 hgext/bookflow.py --- a/hgext/bookflow.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/bookflow.py Tue Jan 21 13:14:51 2020 -0500 @@ -101,7 +101,7 @@ def commands_branch(orig, ui, repo, label=None, **opts): - if label and not opts.get(r'clean') and not opts.get(r'rev'): + if label and not opts.get('clean') and not opts.get('rev'): raise error.Abort( _( b"creating named branches is disabled and you should use bookmarks" diff -r 61881b170140 -r 84a0102c05c7 hgext/bugzilla.py --- a/hgext/bugzilla.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/bugzilla.py Tue Jan 21 13:14:51 2020 -0500 @@ -612,7 +612,7 @@ self.ui.warn(_(b"Bugzilla/MySQL cannot update bug state\n")) (user, userid) = self.get_bugzilla_user(committer) - now = time.strftime(r'%Y-%m-%d %H:%M:%S') + now = time.strftime('%Y-%m-%d %H:%M:%S') self.run( '''insert into longdescs (bug_id, who, bug_when, thetext) @@ -1099,7 +1099,6 @@ the given changeset in their comments. ''' start = 0 - hours = 0.0 bugs = {} bugmatch = self.bug_re.search(ctx.description(), start) fixmatch = self.fix_re.search(ctx.description(), start) diff -r 61881b170140 -r 84a0102c05c7 hgext/censor.py --- a/hgext/censor.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/censor.py Tue Jan 21 13:14:51 2020 -0500 @@ -23,6 +23,9 @@ ``hg update``, must be capable of tolerating censored data to continue to function in a meaningful way. Such commands only tolerate censored file revisions if they are allowed by the "censor.policy=ignore" config option. + +A few informative commands such as ``hg grep`` will unconditionally +ignore censored data and merely report that it was encountered. """ from __future__ import absolute_import diff -r 61881b170140 -r 84a0102c05c7 hgext/churn.py --- a/hgext/churn.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/churn.py Tue Jan 21 13:14:51 2020 -0500 @@ -197,7 +197,7 @@ return s + b" " * (l - encoding.colwidth(s)) amap = {} - aliases = opts.get(r'aliases') + aliases = opts.get('aliases') if not aliases and os.path.exists(repo.wjoin(b'.hgchurn')): aliases = repo.wjoin(b'.hgchurn') if aliases: @@ -215,7 +215,7 @@ if not rate: return - if opts.get(r'sort'): + if opts.get('sort'): rate.sort() else: rate.sort(key=lambda x: (-sum(x[1]), x)) @@ -228,7 +228,7 @@ ui.debug(b"assuming %i character terminal\n" % ttywidth) width = ttywidth - maxname - 2 - 2 - 2 - if opts.get(r'diffstat'): + if opts.get('diffstat'): width -= 15 def format(name, diffstat): diff -r 61881b170140 -r 84a0102c05c7 hgext/commitextras.py --- a/hgext/commitextras.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/commitextras.py Tue Jan 21 13:14:51 2020 -0500 @@ -58,7 +58,7 @@ class repoextra(repo.__class__): def commit(self, *innerpats, **inneropts): - extras = opts.get(r'extra') + extras = opts.get('extra') for raw in extras: if b'=' not in raw: msg = _( @@ -82,7 +82,7 @@ b"manually" ) raise error.Abort(msg % k) - inneropts[r'extra'][k] = v + inneropts['extra'][k] = v return super(repoextra, self).commit(*innerpats, **inneropts) repo.__class__ = repoextra diff -r 61881b170140 -r 84a0102c05c7 hgext/convert/common.py --- a/hgext/convert/common.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/convert/common.py Tue Jan 21 13:14:51 2020 -0500 @@ -57,7 +57,7 @@ def shlexer(data=None, filepath=None, wordchars=None, whitespace=None): if data is None: if pycompat.ispy3: - data = open(filepath, b'r', encoding=r'latin1') + data = open(filepath, b'r', encoding='latin1') else: data = open(filepath, b'r') else: @@ -493,7 +493,7 @@ # POSIX requires at least 4096 bytes for ARG_MAX argmax = 4096 try: - argmax = os.sysconf(r"SC_ARG_MAX") + argmax = os.sysconf("SC_ARG_MAX") except (AttributeError, ValueError): pass diff -r 61881b170140 -r 84a0102c05c7 hgext/convert/convcmd.py --- a/hgext/convert/convcmd.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/convert/convcmd.py Tue Jan 21 13:14:51 2020 -0500 @@ -56,6 +56,36 @@ orig_encoding = b'ascii' +def readauthormap(ui, authorfile, authors=None): + if authors is None: + authors = {} + with open(authorfile, b'rb') as afile: + for line in afile: + + line = line.strip() + if not line or line.startswith(b'#'): + continue + + try: + srcauthor, dstauthor = line.split(b'=', 1) + except ValueError: + msg = _(b'ignoring bad line in author map file %s: %s\n') + ui.warn(msg % (authorfile, line.rstrip())) + continue + + srcauthor = srcauthor.strip() + dstauthor = dstauthor.strip() + if authors.get(srcauthor) in (None, dstauthor): + msg = _(b'mapping author %s to %s\n') + ui.debug(msg % (srcauthor, dstauthor)) + authors[srcauthor] = dstauthor + continue + + m = _(b'overriding mapping for author %s, was %s, will be %s\n') + ui.status(m % (srcauthor, authors[srcauthor], dstauthor)) + return authors + + def recode(s): if isinstance(s, pycompat.unicode): return s.encode(pycompat.sysstr(orig_encoding), 'replace') @@ -448,32 +478,7 @@ ofile.close() def readauthormap(self, authorfile): - afile = open(authorfile, b'rb') - for line in afile: - - line = line.strip() - if not line or line.startswith(b'#'): - continue - - try: - srcauthor, dstauthor = line.split(b'=', 1) - except ValueError: - msg = _(b'ignoring bad line in author map file %s: %s\n') - self.ui.warn(msg % (authorfile, line.rstrip())) - continue - - srcauthor = srcauthor.strip() - dstauthor = dstauthor.strip() - if self.authors.get(srcauthor) in (None, dstauthor): - msg = _(b'mapping author %s to %s\n') - self.ui.debug(msg % (srcauthor, dstauthor)) - self.authors[srcauthor] = dstauthor - continue - - m = _(b'overriding mapping for author %s, was %s, will be %s\n') - self.ui.status(m % (srcauthor, self.authors[srcauthor], dstauthor)) - - afile.close() + self.authors = readauthormap(self.ui, authorfile, self.authors) def cachecommit(self, rev): commit = self.source.getcommit(rev) diff -r 61881b170140 -r 84a0102c05c7 hgext/convert/cvs.py --- a/hgext/convert/cvs.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/convert/cvs.py Tue Jan 21 13:14:51 2020 -0500 @@ -144,9 +144,7 @@ if root.startswith(b":pserver:"): root = root[9:] - m = re.match( - r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)', root - ) + m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:/]*)(?::(\d*))?(.*)', root) if m: conntype = b"pserver" user, passw, serv, port, root = m.groups() diff -r 61881b170140 -r 84a0102c05c7 hgext/convert/cvsps.py --- a/hgext/convert/cvsps.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/convert/cvsps.py Tue Jan 21 13:14:51 2020 -0500 @@ -54,10 +54,8 @@ self.__dict__.update(entries) def __repr__(self): - items = ( - r"%s=%r" % (k, self.__dict__[k]) for k in sorted(self.__dict__) - ) - return r"%s(%s)" % (type(self).__name__, r", ".join(items)) + items = ("%s=%r" % (k, self.__dict__[k]) for k in sorted(self.__dict__)) + return "%s(%s)" % (type(self).__name__, ", ".join(items)) class logerror(Exception): @@ -112,7 +110,7 @@ _scache = {} def scache(s): - b"return a shared version of a string" + """return a shared version of a string""" return _scache.setdefault(s, s) ui.status(_(b'collecting CVS rlog\n')) @@ -713,7 +711,7 @@ # Sort files in each changeset def entitycompare(l, r): - b'Mimic cvsps sorting order' + """Mimic cvsps sorting order""" l = l.file.split(b'/') r = r.file.split(b'/') nl = len(l) diff -r 61881b170140 -r 84a0102c05c7 hgext/convert/gnuarch.py --- a/hgext/convert/gnuarch.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/convert/gnuarch.py Tue Jan 21 13:14:51 2020 -0500 @@ -302,25 +302,25 @@ # Commit date self.changes[rev].date = dateutil.datestr( - dateutil.strdate(catlog[r'Standard-date'], b'%Y-%m-%d %H:%M:%S') + dateutil.strdate(catlog['Standard-date'], b'%Y-%m-%d %H:%M:%S') ) # Commit author - self.changes[rev].author = self.recode(catlog[r'Creator']) + self.changes[rev].author = self.recode(catlog['Creator']) # Commit description self.changes[rev].summary = b'\n\n'.join( ( - self.recode(catlog[r'Summary']), + self.recode(catlog['Summary']), self.recode(catlog.get_payload()), ) ) self.changes[rev].summary = self.recode(self.changes[rev].summary) # Commit revision origin when dealing with a branch or tag - if r'Continuation-of' in catlog: + if 'Continuation-of' in catlog: self.changes[rev].continuationof = self.recode( - catlog[r'Continuation-of'] + catlog['Continuation-of'] ) except Exception: raise error.Abort(_(b'could not parse cat-log of %s') % rev) diff -r 61881b170140 -r 84a0102c05c7 hgext/convert/monotone.py --- a/hgext/convert/monotone.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/convert/monotone.py Tue Jan 21 13:14:51 2020 -0500 @@ -96,7 +96,7 @@ return self.mtnrunsingle(*args, **kwargs) def mtnrunsingle(self, *args, **kwargs): - kwargs[r'd'] = self.path + kwargs['d'] = self.path return self.run0(b'automate', *args, **kwargs) def mtnrunstdio(self, *args, **kwargs): @@ -239,7 +239,7 @@ # key "test@selenic.com" # mtn >= 0.45: # key [ff58a7ffb771907c4ff68995eada1c4da068d328] - certlist = re.split(br'\n\n key ["\[]', certlist) + certlist = re.split(br'\n\n {6}key ["\[]', certlist) for e in certlist: m = self.cert_re.match(e) if m: diff -r 61881b170140 -r 84a0102c05c7 hgext/convert/p4.py --- a/hgext/convert/p4.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/convert/p4.py Tue Jan 21 13:14:51 2020 -0500 @@ -24,7 +24,7 @@ def loaditer(f): - b"Yield the dictionary objects generated by p4" + """Yield the dictionary objects generated by p4""" try: while True: d = marshal.load(f) @@ -105,7 +105,7 @@ self.revmap = revmap def _parse_view(self, path): - b"Read changes affecting the path" + """Read changes affecting the path""" cmd = b'p4 -G changes -s submitted %s' % procutil.shellquote(path) stdout = procutil.popen(cmd, mode=b'rb') p4changes = {} @@ -116,7 +116,7 @@ return p4changes def _parse(self, ui, path): - b"Prepare list of P4 filenames and revisions to import" + """Prepare list of P4 filenames and revisions to import""" p4changes = {} changeset = {} files_map = {} diff -r 61881b170140 -r 84a0102c05c7 hgext/convert/subversion.py --- a/hgext/convert/subversion.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/convert/subversion.py Tue Jan 21 13:14:51 2020 -0500 @@ -643,7 +643,7 @@ if not re.match( r'svn:[0-9a-f]{8,8}-[0-9a-f]{4,4}-' r'[0-9a-f]{4,4}-[0-9a-f]{4,4}-[0-9a-f]' - r'{12,12}(.*)\@[0-9]+$', + r'{12,12}(.*)@[0-9]+$', revstr, ): raise error.Abort( @@ -1303,7 +1303,7 @@ self.wc = os.path.realpath(path) self.run0(b'update') else: - if not re.search(br'^(file|http|https|svn|svn\+ssh)\://', path): + if not re.search(br'^(file|http|https|svn|svn\+ssh)://', path): path = os.path.realpath(path) if os.path.isdir(os.path.dirname(path)): if not os.path.exists( @@ -1359,11 +1359,11 @@ m = set() output = self.run0(b'ls', recursive=True, xml=True) doc = xml.dom.minidom.parseString(output) - for e in doc.getElementsByTagName(r'entry'): + for e in doc.getElementsByTagName('entry'): for n in e.childNodes: - if n.nodeType != n.ELEMENT_NODE or n.tagName != r'name': + if n.nodeType != n.ELEMENT_NODE or n.tagName != 'name': continue - name = r''.join( + name = ''.join( c.data for c in n.childNodes if c.nodeType == c.TEXT_NODE ) # Entries are compared with names coming from @@ -1502,7 +1502,7 @@ self.setexec = [] fd, messagefile = pycompat.mkstemp(prefix=b'hg-convert-') - fp = os.fdopen(fd, r'wb') + fp = os.fdopen(fd, 'wb') fp.write(util.tonativeeol(commit.desc)) fp.close() try: diff -r 61881b170140 -r 84a0102c05c7 hgext/extdiff.py --- a/hgext/extdiff.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/extdiff.py Tue Jan 21 13:14:51 2020 -0500 @@ -271,7 +271,7 @@ path1a = os.path.join(tmproot, dir1a, commonfile) label1a = commonfile + rev1a if not os.path.isfile(path1a): - path1a = os.devnull + path1a = pycompat.osdevnull path1b = b'' label1b = b'' @@ -279,7 +279,7 @@ path1b = os.path.join(tmproot, dir1b, commonfile) label1b = commonfile + rev1b if not os.path.isfile(path1b): - path1b = os.devnull + path1b = pycompat.osdevnull path2 = os.path.join(dir2root, dir2, commonfile) label2 = commonfile + rev2 @@ -401,13 +401,14 @@ if node2 is None: raise error.Abort(_(b'--patch requires two revisions')) else: - mod_a, add_a, rem_a = map( - set, repo.status(node1a, node2, matcher, listsubrepos=subrepos)[:3] - ) + st = repo.status(node1a, node2, matcher, listsubrepos=subrepos) + mod_a, add_a, rem_a = set(st.modified), set(st.added), set(st.removed) if do3way: - mod_b, add_b, rem_b = map( - set, - repo.status(node1b, node2, matcher, listsubrepos=subrepos)[:3], + stb = repo.status(node1b, node2, matcher, listsubrepos=subrepos) + mod_b, add_b, rem_b = ( + set(stb.modified), + set(stb.added), + set(stb.removed), ) else: mod_b, add_b, rem_b = set(), set(), set() @@ -467,12 +468,12 @@ dir1a = os.path.join(tmproot, dir1a, common_file) label1a = common_file + rev1a if not os.path.isfile(dir1a): - dir1a = os.devnull + dir1a = pycompat.osdevnull if do3way: dir1b = os.path.join(tmproot, dir1b, common_file) label1b = common_file + rev1b if not os.path.isfile(dir1b): - dir1b = os.devnull + dir1b = pycompat.osdevnull dir2 = os.path.join(dir2root, dir2, common_file) label2 = common_file + rev2 else: @@ -655,7 +656,7 @@ # in an unknown encoding anyway), but avoid double separators on # Windows docpath = stringutil.escapestr(path).replace(b'\\\\', b'\\') - self.__doc__ %= {r'path': pycompat.sysstr(stringutil.uirepr(docpath))} + self.__doc__ %= {'path': pycompat.sysstr(stringutil.uirepr(docpath))} self._cmdline = cmdline self._isgui = isgui diff -r 61881b170140 -r 84a0102c05c7 hgext/fastannotate/commands.py --- a/hgext/fastannotate/commands.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/fastannotate/commands.py Tue Jan 21 13:14:51 2020 -0500 @@ -82,7 +82,7 @@ fastannotatecommandargs = { - r'options': [ + 'options': [ (b'r', b'rev', b'.', _(b'annotate the specified revision'), _(b'REV')), (b'u', b'user', None, _(b'list the author (long with -v)')), (b'f', b'file', None, _(b'list the filename')), @@ -133,8 +133,8 @@ + commands.diffwsopts + commands.walkopts + commands.formatteropts, - r'synopsis': _(b'[-r REV] [-f] [-a] [-u] [-d] [-n] [-c] [-l] FILE...'), - r'inferrepo': True, + 'synopsis': _(b'[-r REV] [-f] [-a] [-u] [-d] [-n] [-c] [-l] FILE...'), + 'inferrepo': True, } @@ -257,7 +257,7 @@ _newopts = set() _knownopts = { opt[1].replace(b'-', b'_') - for opt in (fastannotatecommandargs[r'options'] + commands.globalopts) + for opt in (fastannotatecommandargs['options'] + commands.globalopts) } @@ -269,10 +269,10 @@ # treat the file as text (skip the isbinary check) if ui.configbool(b'fastannotate', b'forcetext'): - opts[r'text'] = True + opts['text'] = True # check if we need to do prefetch (client-side) - rev = opts.get(r'rev') + rev = opts.get('rev') if util.safehasattr(repo, 'prefetchfastannotate') and rev is not None: paths = list(_matchpaths(repo, rev, pats, pycompat.byteskwargs(opts))) repo.prefetchfastannotate(paths) diff -r 61881b170140 -r 84a0102c05c7 hgext/fastannotate/context.py --- a/hgext/fastannotate/context.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/fastannotate/context.py Tue Jan 21 13:14:51 2020 -0500 @@ -9,7 +9,6 @@ import collections import contextlib -import hashlib import os from mercurial.i18n import _ @@ -28,7 +27,10 @@ scmutil, util, ) -from mercurial.utils import stringutil +from mercurial.utils import ( + hashutil, + stringutil, +) from . import ( error as faerror, @@ -148,7 +150,7 @@ diffoptstr = stringutil.pprint( sorted((k, getattr(diffopts, k)) for k in mdiff.diffopts.defaults) ) - return node.hex(hashlib.sha1(diffoptstr).digest())[:6] + return node.hex(hashutil.sha1(diffoptstr).digest())[:6] _defaultdiffopthash = hashdiffopts(mdiff.defaultopts) diff -r 61881b170140 -r 84a0102c05c7 hgext/fastannotate/support.py --- a/hgext/fastannotate/support.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/fastannotate/support.py Tue Jan 21 13:14:51 2020 -0500 @@ -74,7 +74,6 @@ may raise Exception, and always return line numbers. """ master = _getmaster(fctx) - annotated = contents = None with context.fctxannotatecontext(fctx, follow, diffopts) as ac: try: diff -r 61881b170140 -r 84a0102c05c7 hgext/fix.py --- a/hgext/fix.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/fix.py Tue Jan 21 13:14:51 2020 -0500 @@ -144,9 +144,9 @@ match as matchmod, mdiff, merge, - obsolete, pycompat, registrar, + rewriteutil, scmutil, util, worker, @@ -249,9 +249,8 @@ override this default behavior, though it is not usually desirable to do so. """ opts = pycompat.byteskwargs(opts) + cmdutil.check_at_most_one_arg(opts, b'all', b'rev') if opts[b'all']: - if opts[b'rev']: - raise error.Abort(_(b'cannot specify both "--rev" and "--all"')) opts[b'rev'] = [b'not public() and not obsolete()'] opts[b'working_dir'] = True with repo.wlock(), repo.lock(), repo.transaction(b'fix'): @@ -404,7 +403,7 @@ checkfixablectx(ui, repo, repo[rev]) if revs: cmdutil.checkunfinished(repo) - checknodescendants(repo, revs) + rewriteutil.precheck(repo, revs, b'fix') if opts.get(b'working_dir'): revs.add(wdirrev) if list(merge.mergestate.read(repo).unresolved()): @@ -416,22 +415,8 @@ return revs -def checknodescendants(repo, revs): - if not obsolete.isenabled(repo, obsolete.allowunstableopt) and repo.revs( - b'(%ld::) - (%ld)', revs, revs - ): - raise error.Abort( - _(b'can only fix a changeset together with all its descendants') - ) - - def checkfixablectx(ui, repo, ctx): """Aborts if the revision shouldn't be replaced with a fixed one.""" - if not ctx.mutable(): - raise error.Abort( - b'can\'t fix immutable changeset %s' - % (scmutil.formatchangeid(ctx),) - ) if ctx.obsolete(): # It would be better to actually check if the revision has a successor. allowdivergence = ui.configbool( @@ -681,7 +666,7 @@ if rev is None: ui.warn(_(b'wdir'), label=b'evolve.rev') else: - ui.warn((str(rev)), label=b'evolve.rev') + ui.warn(b'%d' % rev, label=b'evolve.rev') ui.warn(b'] %s: %s\n' % (fixername, line)) @@ -745,36 +730,38 @@ ): return - def filectxfn(repo, memctx, path): - if path not in ctx: - return None - fctx = ctx[path] - copysource = fctx.copysource() - return context.memfilectx( - repo, - memctx, - path=fctx.path(), - data=filedata.get(path, fctx.data()), - islink=fctx.islink(), - isexec=fctx.isexec(), - copysource=copysource, - ) - extra = ctx.extra().copy() extra[b'fix_source'] = ctx.hex() - memctx = context.memctx( + wctx = context.overlayworkingctx(repo) + wctx.setbase(repo[newp1node]) + merge.update( repo, - parents=(newp1node, newp2node), + ctx.rev(), + branchmerge=False, + force=True, + ancestor=p1rev, + mergeancestor=False, + wc=wctx, + ) + copies.graftcopies(wctx, ctx, ctx.p1()) + + for path in filedata.keys(): + fctx = ctx[path] + copysource = fctx.copysource() + wctx.write(path, filedata[path], flags=fctx.flags()) + if copysource: + wctx.markcopied(path, copysource) + + memctx = wctx.tomemctx( text=ctx.description(), - files=set(ctx.files()) | set(filedata.keys()), - filectxfn=filectxfn, - user=ctx.user(), + branch=ctx.branch(), + extra=extra, date=ctx.date(), - extra=extra, - branch=ctx.branch(), - editor=None, + parents=(newp1node, newp2node), + user=ctx.user(), ) + sucnode = memctx.commit() prenode = ctx.node() if prenode == sucnode: diff -r 61881b170140 -r 84a0102c05c7 hgext/fsmonitor/__init__.py --- a/hgext/fsmonitor/__init__.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/fsmonitor/__init__.py Tue Jan 21 13:14:51 2020 -0500 @@ -108,7 +108,6 @@ from __future__ import absolute_import import codecs -import hashlib import os import stat import sys @@ -132,7 +131,10 @@ util, ) from mercurial import match as matchmod -from mercurial.utils import stringutil +from mercurial.utils import ( + hashutil, + stringutil, +) from . import ( pywatchman, @@ -235,7 +237,7 @@ copy. """ - sha1 = hashlib.sha1() + sha1 = hashutil.sha1() sha1.update(pycompat.byterepr(ignore)) return pycompat.sysbytes(sha1.hexdigest()) diff -r 61881b170140 -r 84a0102c05c7 hgext/fsmonitor/pywatchman/capabilities.py --- a/hgext/fsmonitor/pywatchman/capabilities.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/fsmonitor/pywatchman/capabilities.py Tue Jan 21 13:14:51 2020 -0500 @@ -29,8 +29,6 @@ # no unicode literals from __future__ import absolute_import, division, print_function -import re - def parse_version(vstr): res = 0 @@ -64,7 +62,7 @@ vers["capabilities"] = {} for name in opts["optional"]: vers["capabilities"][name] = check(parsed_version, name) - failed = False # noqa: F841 T25377293 Grandfathered in + for name in opts["required"]: have = check(parsed_version, name) vers["capabilities"][name] = have diff -r 61881b170140 -r 84a0102c05c7 hgext/fsmonitor/pywatchman/pybser.py --- a/hgext/fsmonitor/pywatchman/pybser.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/fsmonitor/pywatchman/pybser.py Tue Jan 21 13:14:51 2020 -0500 @@ -506,7 +506,6 @@ def _pdu_info_helper(buf): - bser_version = -1 if buf[0:2] == EMPTY_HEADER[0:2]: bser_version = 1 bser_capabilities = 0 diff -r 61881b170140 -r 84a0102c05c7 hgext/fsmonitor/watchmanclient.py --- a/hgext/fsmonitor/watchmanclient.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/fsmonitor/watchmanclient.py Tue Jan 21 13:14:51 2020 -0500 @@ -105,11 +105,11 @@ ) return self._watchmanclient.query(*watchmanargs) except pywatchman.CommandError as ex: - if b'unable to resolve root' in ex.msg: + if 'unable to resolve root' in ex.msg: raise WatchmanNoRoot( self._root, stringutil.forcebytestr(ex.msg) ) - raise Unavailable(ex.msg) + raise Unavailable(stringutil.forcebytestr(ex.msg)) except pywatchman.WatchmanError as ex: raise Unavailable(stringutil.forcebytestr(ex)) diff -r 61881b170140 -r 84a0102c05c7 hgext/githelp.py --- a/hgext/githelp.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/githelp.py Tue Jan 21 13:14:51 2020 -0500 @@ -90,11 +90,11 @@ args = fancyopts.fancyopts(list(args), cmdoptions, opts, True) break except getopt.GetoptError as ex: - if r"requires argument" in ex.msg: + if "requires argument" in ex.msg: raise - if (r'--' + ex.opt) in ex.msg: + if ('--' + ex.opt) in ex.msg: flag = b'--' + pycompat.bytestr(ex.opt) - elif (r'-' + ex.opt) in ex.msg: + elif ('-' + ex.opt) in ex.msg: flag = b'-' + pycompat.bytestr(ex.opt) else: raise error.Abort( @@ -209,7 +209,7 @@ def am(ui, repo, *args, **kwargs): cmdoptions = [] - args, opts = parseoptions(ui, cmdoptions, args) + parseoptions(ui, cmdoptions, args) cmd = Command(b'import') ui.status(bytes(cmd), b"\n") @@ -1139,7 +1139,7 @@ def svndcommit(ui, repo, *args, **kwargs): cmdoptions = [] - args, opts = parseoptions(ui, cmdoptions, args) + parseoptions(ui, cmdoptions, args) cmd = Command(b'push') @@ -1148,7 +1148,7 @@ def svnfetch(ui, repo, *args, **kwargs): cmdoptions = [] - args, opts = parseoptions(ui, cmdoptions, args) + parseoptions(ui, cmdoptions, args) cmd = Command(b'pull') cmd.append(b'default-push') @@ -1173,7 +1173,7 @@ cmdoptions = [ (b'l', b'local', None, b''), ] - args, opts = parseoptions(ui, cmdoptions, args) + parseoptions(ui, cmdoptions, args) pullcmd = Command(b'pull') pullcmd.append(b'default-push') diff -r 61881b170140 -r 84a0102c05c7 hgext/gpg.py --- a/hgext/gpg.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/gpg.py Tue Jan 21 13:14:51 2020 -0500 @@ -69,11 +69,11 @@ try: # create temporary files fd, sigfile = pycompat.mkstemp(prefix=b"hg-gpg-", suffix=b".sig") - fp = os.fdopen(fd, r'wb') + fp = os.fdopen(fd, 'wb') fp.write(sig) fp.close() fd, datafile = pycompat.mkstemp(prefix=b"hg-gpg-", suffix=b".txt") - fp = os.fdopen(fd, r'wb') + fp = os.fdopen(fd, 'wb') fp.write(data) fp.close() gpgcmd = ( @@ -121,7 +121,7 @@ def newgpg(ui, **opts): """create a new gpg instance""" gpgpath = ui.config(b"gpg", b"cmd") - gpgkey = opts.get(r'key') + gpgkey = opts.get('key') if not gpgkey: gpgkey = ui.config(b"gpg", b"key") return gpg(gpgpath, gpgkey) diff -r 61881b170140 -r 84a0102c05c7 hgext/graphlog.py --- a/hgext/graphlog.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/graphlog.py Tue Jan 21 13:14:51 2020 -0500 @@ -121,5 +121,5 @@ This is an alias to :hg:`log -G`. """ - opts[r'graph'] = True + opts['graph'] = True return commands.log(ui, repo, *pats, **opts) diff -r 61881b170140 -r 84a0102c05c7 hgext/hgk.py --- a/hgext/hgk.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/hgk.py Tue Jan 21 13:14:51 2020 -0500 @@ -92,21 +92,21 @@ mmap = repo[node1].manifest() mmap2 = repo[node2].manifest() m = scmutil.match(repo[node1], files) - modified, added, removed = repo.status(node1, node2, m)[:3] + st = repo.status(node1, node2, m) empty = short(nullid) - for f in modified: + for f in st.modified: # TODO get file permissions ui.writenoi18n( b":100664 100664 %s %s M\t%s\t%s\n" % (short(mmap[f]), short(mmap2[f]), f, f) ) - for f in added: + for f in st.added: ui.writenoi18n( b":000000 100664 %s %s N\t%s\t%s\n" % (empty, short(mmap2[f]), f, f) ) - for f in removed: + for f in st.removed: ui.writenoi18n( b":100664 000000 %s %s D\t%s\t%s\n" % (short(mmap[f]), empty, f, f) @@ -115,7 +115,7 @@ ## while True: - if opts[r'stdin']: + if opts['stdin']: line = ui.fin.readline() if not line: break @@ -131,8 +131,8 @@ else: node2 = node1 node1 = repo.changelog.parents(node1)[0] - if opts[r'patch']: - if opts[r'pretty']: + if opts['patch']: + if opts['pretty']: catcommit(ui, repo, node2, b"") m = scmutil.match(repo[node1], files) diffopts = patch.difffeatureopts(ui) @@ -142,7 +142,7 @@ ui.write(chunk) else: __difftree(repo, node1, node2, files=files) - if not opts[r'stdin']: + if not opts['stdin']: break @@ -201,7 +201,7 @@ # strings # prefix = b"" - if opts[r'stdin']: + if opts['stdin']: line = ui.fin.readline() if not line: return @@ -218,7 +218,7 @@ return 1 n = repo.lookup(r) catcommit(ui, repo, n, prefix) - if opts[r'stdin']: + if opts['stdin']: line = ui.fin.readline() if not line: break @@ -363,7 +363,7 @@ else: full = None copy = [x for x in revs] - revtree(ui, copy, repo, full, opts[r'max_count'], opts[r'parents']) + revtree(ui, copy, repo, full, opts['max_count'], opts[r'parents']) @command( @@ -373,7 +373,7 @@ helpcategory=command.CATEGORY_CHANGE_NAVIGATION, ) def view(ui, repo, *etc, **opts): - b"start interactive history viewer" + """start interactive history viewer""" opts = pycompat.byteskwargs(opts) os.chdir(repo.root) optstr = b' '.join( diff -r 61881b170140 -r 84a0102c05c7 hgext/highlight/__init__.py --- a/hgext/highlight/__init__.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/highlight/__init__.py Tue Jan 21 13:14:51 2020 -0500 @@ -52,7 +52,7 @@ filenameonly = web.configbool(b'web', b'highlightonlymatchfilename', False) ctx = fctx.changectx() - m = ctx.matchfileset(expr) + m = ctx.matchfileset(fctx.repo().root, expr) if m(fctx.path()): highlight.pygmentize( field, fctx, style, tmpl, guessfilenameonly=filenameonly diff -r 61881b170140 -r 84a0102c05c7 hgext/histedit.py --- a/hgext/histedit.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/histedit.py Tue Jan 21 13:14:51 2020 -0500 @@ -230,6 +230,7 @@ pycompat, registrar, repair, + rewriteutil, scmutil, state as statemod, util, @@ -307,7 +308,7 @@ if len(a.verbs): v = b', '.join(sorted(a.verbs, key=lambda v: len(v))) actions.append(b" %s = %s" % (v, lines[0])) - actions.extend([b' %s' for l in lines[1:]]) + actions.extend([b' %s'] * (len(lines) - 1)) for v in ( sorted(primaryactions) @@ -624,9 +625,9 @@ def commitfunc(**kwargs): overrides = {(b'phases', b'new-commit'): phasemin} with repo.ui.configoverride(overrides, b'histedit'): - extra = kwargs.get(r'extra', {}).copy() + extra = kwargs.get('extra', {}).copy() extra[b'histedit_source'] = src.hex() - kwargs[r'extra'] = extra + kwargs['extra'] = extra return repo.commit(**kwargs) return commitfunc @@ -1056,6 +1057,7 @@ COLOR_HELP, COLOR_SELECTED, COLOR_OK, COLOR_WARN, COLOR_CURRENT = 1, 2, 3, 4, 5 COLOR_DIFF_ADD_LINE, COLOR_DIFF_DEL_LINE, COLOR_DIFF_OFFSET = 6, 7, 8 +COLOR_ROLL, COLOR_ROLL_CURRENT, COLOR_ROLL_SELECTED = 9, 10, 11 E_QUIT, E_HISTEDIT = 1, 2 E_PAGEDOWN, E_PAGEUP, E_LINEUP, E_LINEDOWN, E_RESIZE = 3, 4, 5, 6, 7 @@ -1119,32 +1121,42 @@ self.conflicts = [] def __bytes__(self): - # Some actions ('fold' and 'roll') combine a patch with a previous one. - # Add a marker showing which patch they apply to, and also omit the - # description for 'roll' (since it will get discarded). Example display: + # Example display of several histeditrules: # # #10 pick 316392:06a16c25c053 add option to skip tests - # #11 ^roll 316393:71313c964cc5 + # #11 ^roll 316393:71313c964cc5 oops a fixup commit # #12 pick 316394:ab31f3973b0d include mfbt for mozilla-config.h # #13 ^fold 316395:14ce5803f4c3 fix warnings # # The carets point to the changeset being folded into ("roll this # changeset into the changeset above"). + return b'%s%s' % (self.prefix, self.desc) + + __str__ = encoding.strmethod(__bytes__) + + @property + def prefix(self): + # Some actions ('fold' and 'roll') combine a patch with a + # previous one. Add a marker showing which patch they apply + # to. action = ACTION_LABELS.get(self.action, self.action) + h = self.ctx.hex()[0:12] r = self.ctx.rev() - desc = self.ctx.description().splitlines()[0].strip() - if self.action == b'roll': - desc = b'' - return b"#%s %s %d:%s %s" % ( + + return b"#%s %s %d:%s " % ( (b'%d' % self.origpos).ljust(2), action.ljust(6), r, h, - desc, ) - __str__ = encoding.strmethod(__bytes__) + @property + def desc(self): + # This is split off from the prefix property so that we can + # separately make the description for 'roll' red (since it + # will get discarded). + return self.ctx.description().splitlines()[0].strip() def checkconflicts(self, other): if other.pos > self.pos and other.origpos <= self.origpos: @@ -1382,6 +1394,11 @@ curses.init_pair(COLOR_DIFF_ADD_LINE, curses.COLOR_GREEN, -1) curses.init_pair(COLOR_DIFF_DEL_LINE, curses.COLOR_RED, -1) curses.init_pair(COLOR_DIFF_OFFSET, curses.COLOR_MAGENTA, -1) + curses.init_pair(COLOR_ROLL, curses.COLOR_RED, -1) + curses.init_pair( + COLOR_ROLL_CURRENT, curses.COLOR_BLACK, curses.COLOR_MAGENTA + ) + curses.init_pair(COLOR_ROLL_SELECTED, curses.COLOR_RED, curses.COLOR_WHITE) # don't display the cursor try: @@ -1483,9 +1500,12 @@ rulesscr.addstr(y, 0, b" ", curses.color_pair(COLOR_WARN)) else: rulesscr.addstr(y, 0, b" ", curses.COLOR_BLACK) + if y + start == selected: + rollcolor = COLOR_ROLL_SELECTED addln(rulesscr, y, 2, rule, curses.color_pair(COLOR_SELECTED)) elif y + start == pos: + rollcolor = COLOR_ROLL_CURRENT addln( rulesscr, y, @@ -1494,7 +1514,17 @@ curses.color_pair(COLOR_CURRENT) | curses.A_BOLD, ) else: + rollcolor = COLOR_ROLL addln(rulesscr, y, 2, rule) + + if rule.action == b'roll': + rulesscr.addstr( + y, + 2 + len(rule.prefix), + rule.desc, + curses.color_pair(rollcolor), + ) + rulesscr.noutrefresh() def renderstring(win, state, output, diffcolors=False): @@ -1674,7 +1704,7 @@ # Curses requires setting the locale or it will default to the C # locale. This sets the locale to the user's default system # locale. - locale.setlocale(locale.LC_ALL, r'') + locale.setlocale(locale.LC_ALL, '') rc = curses.wrapper(functools.partial(_chisteditmain, repo, ctxs)) curses.echo() curses.endwin() @@ -2046,11 +2076,11 @@ mapping[n] = () # remove entries about unknown nodes - nodemap = repo.unfiltered().changelog.nodemap + has_node = repo.unfiltered().changelog.index.has_node mapping = { k: v for k, v in mapping.items() - if k in nodemap and all(n in nodemap for n in v) + if has_node(k) and all(has_node(n) for n in v) } scmutil.cleanupnodes(repo, mapping, b'histedit') hf = fm.hexfunc @@ -2277,23 +2307,9 @@ When keep is false, the specified set can't have children.""" revs = repo.revs(b'%n::%n', old, new) if revs and not keep: - if not obsolete.isenabled( - repo, obsolete.allowunstableopt - ) and repo.revs(b'(%ld::) - (%ld)', revs, revs): - raise error.Abort( - _( - b'can only histedit a changeset together ' - b'with all its descendants' - ) - ) + rewriteutil.precheck(repo, revs, b'edit') if repo.revs(b'(%ld) and merge()', revs): raise error.Abort(_(b'cannot edit history that contains merges')) - root = repo[revs.first()] # list is already sorted by repo.revs() - if not root.mutable(): - raise error.Abort( - _(b'cannot edit public changeset: %s') % root, - hint=_(b"see 'hg help phases' for details"), - ) return pycompat.maplist(repo.changelog.node, revs) @@ -2447,7 +2463,7 @@ return oldreplacements unfi = repo.unfiltered() - nm = unfi.changelog.nodemap + get_rev = unfi.changelog.index.get_rev obsstore = repo.obsstore newreplacements = list(oldreplacements) oldsuccs = [r[1] for r in oldreplacements] @@ -2458,7 +2474,7 @@ succstocheck = list(seensuccs) while succstocheck: n = succstocheck.pop() - missing = nm.get(n) is None + missing = get_rev(n) is None markers = obsstore.successors.get(n, ()) if missing and not markers: # dead end, mark it as such @@ -2517,9 +2533,9 @@ del final[n] # we expect all changes involved in final to exist in the repo # turn `final` into list (topologically sorted) - nm = state.repo.changelog.nodemap + get_rev = state.repo.changelog.index.get_rev for prec, succs in final.items(): - final[prec] = sorted(succs, key=nm.get) + final[prec] = sorted(succs, key=get_rev) # computed topmost element (necessary for bookmark) if new: @@ -2565,8 +2581,8 @@ repo = repo.unfiltered() # Find all nodes that need to be stripped # (we use %lr instead of %ln to silently ignore unknown items) - nm = repo.changelog.nodemap - nodes = sorted(n for n in nodes if n in nm) + has_node = repo.changelog.index.has_node + nodes = sorted(n for n in nodes if has_node(n)) roots = [c.node() for c in repo.set(b"roots(%ln)", nodes)] if roots: backup = not nobackup diff -r 61881b170140 -r 84a0102c05c7 hgext/infinitepush/__init__.py --- a/hgext/infinitepush/__init__.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/infinitepush/__init__.py Tue Jan 21 13:14:51 2020 -0500 @@ -195,7 +195,7 @@ revsetpredicate = registrar.revsetpredicate() templatekeyword = registrar.templatekeyword() _scratchbranchmatcher = lambda x: False -_maybehash = re.compile(r'^[a-f0-9]+$').search +_maybehash = re.compile('^[a-f0-9]+$').search def _buildexternalbundlestore(ui): @@ -548,7 +548,7 @@ allbundlestocleanup = [] try: for head in heads: - if head not in repo.changelog.nodemap: + if not repo.changelog.index.has_node(head): if head not in nodestobundle: newbundlefile = common.downloadbundle(repo, head) bundlepath = b"bundle:%s+%s" % (repo.root, newbundlefile) @@ -1031,7 +1031,7 @@ fd, bundlefile = pycompat.mkstemp() try: try: - fp = os.fdopen(fd, r'wb') + fp = os.fdopen(fd, 'wb') fp.write(buf.read()) finally: fp.close() @@ -1122,7 +1122,7 @@ fd, bundlefile = pycompat.mkstemp() try: try: - fp = os.fdopen(fd, r'wb') + fp = os.fdopen(fd, 'wb') fp.write(buf.read()) finally: fp.close() @@ -1254,7 +1254,7 @@ fd, bundlefile = pycompat.mkstemp() try: try: - fp = os.fdopen(fd, r'wb') + fp = os.fdopen(fd, 'wb') fp.write(buf.read()) finally: fp.close() diff -r 61881b170140 -r 84a0102c05c7 hgext/infinitepush/common.py --- a/hgext/infinitepush/common.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/infinitepush/common.py Tue Jan 21 13:14:51 2020 -0500 @@ -37,7 +37,7 @@ fd, bundlefile = pycompat.mkstemp() try: # guards bundlefile try: # guards fp - fp = os.fdopen(fd, r'wb') + fp = os.fdopen(fd, 'wb') fp.write(data) finally: fp.close() diff -r 61881b170140 -r 84a0102c05c7 hgext/infinitepush/store.py --- a/hgext/infinitepush/store.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/infinitepush/store.py Tue Jan 21 13:14:51 2020 -0500 @@ -6,7 +6,6 @@ from __future__ import absolute_import import abc -import hashlib import os import subprocess import tempfile @@ -16,7 +15,10 @@ node, pycompat, ) -from mercurial.utils import procutil +from mercurial.utils import ( + hashutil, + procutil, +) NamedTemporaryFile = tempfile.NamedTemporaryFile @@ -29,7 +31,7 @@ pass -class abstractbundlestore(object): +class abstractbundlestore(object): # pytype: disable=ignored-metaclass """Defines the interface for bundle stores. A bundle store is an entity that stores raw bundle data. It is a simple @@ -87,7 +89,7 @@ return os.path.join(self._dirpath(filename), filename) def write(self, data): - filename = node.hex(hashlib.sha1(data).digest()) + filename = node.hex(hashutil.sha1(data).digest()) dirpath = self._dirpath(filename) if not os.path.exists(dirpath): diff -r 61881b170140 -r 84a0102c05c7 hgext/journal.py --- a/hgext/journal.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/journal.py Tue Jan 21 13:14:51 2020 -0500 @@ -149,7 +149,7 @@ Note that by default entries go from most recent to oldest. """ - order = kwargs.pop(r'order', max) + order = kwargs.pop('order', max) iterables = [iter(it) for it in iterables] # this tracks still active iterables; iterables are deleted as they are # exhausted, which is why this is a dictionary and why each entry also @@ -214,8 +214,8 @@ class journalentry( collections.namedtuple( - r'journalentry', - r'timestamp user command namespace name oldhashes newhashes', + 'journalentry', + 'timestamp user command namespace name oldhashes newhashes', ) ): """Individual journal entry diff -r 61881b170140 -r 84a0102c05c7 hgext/keyword.py --- a/hgext/keyword.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/keyword.py Tue Jan 21 13:14:51 2020 -0500 @@ -482,16 +482,16 @@ ui.setconfig(b'keywordset', b'svn', svn, b'keyword') uikwmaps = ui.configitems(b'keywordmaps') - if args or opts.get(r'rcfile'): + if args or opts.get('rcfile'): ui.status(_(b'\n\tconfiguration using custom keyword template maps\n')) if uikwmaps: ui.status(_(b'\textending current template maps\n')) - if opts.get(r'default') or not uikwmaps: + if opts.get('default') or not uikwmaps: if svn: ui.status(_(b'\toverriding default svn keywordset\n')) else: ui.status(_(b'\toverriding default cvs keywordset\n')) - if opts.get(r'rcfile'): + if opts.get('rcfile'): ui.readconfig(opts.get(b'rcfile')) if args: # simulate hgrc parsing @@ -499,7 +499,7 @@ repo.vfs.write(b'hgrc', rcmaps) ui.readconfig(repo.vfs.join(b'hgrc')) kwmaps = dict(ui.configitems(b'keywordmaps')) - elif opts.get(r'default'): + elif opts.get('default'): if svn: ui.status(_(b'\n\tconfiguration using default svn keywordset\n')) else: diff -r 61881b170140 -r 84a0102c05c7 hgext/largefiles/lfcommands.py --- a/hgext/largefiles/lfcommands.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/largefiles/lfcommands.py Tue Jan 21 13:14:51 2020 -0500 @@ -10,7 +10,6 @@ from __future__ import absolute_import import errno -import hashlib import os import shutil @@ -29,6 +28,7 @@ scmutil, util, ) +from mercurial.utils import hashutil from ..convert import ( convcmd, @@ -273,7 +273,7 @@ ) # largefile was modified, update standins - m = hashlib.sha1(b'') + m = hashutil.sha1(b'') m.update(ctx[f].data()) hash = node.hex(m.digest()) if f not in lfiletohash or lfiletohash[f] != hash: @@ -648,7 +648,7 @@ """ repo.lfpullsource = source - revs = opts.get(r'rev', []) + revs = opts.get('rev', []) if not revs: raise error.Abort(_(b'no revisions specified')) revs = scmutil.revrange(repo, revs) diff -r 61881b170140 -r 84a0102c05c7 hgext/largefiles/lfutil.py --- a/hgext/largefiles/lfutil.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/largefiles/lfutil.py Tue Jan 21 13:14:51 2020 -0500 @@ -9,8 +9,8 @@ '''largefiles utility code: must not import other modules in this package.''' from __future__ import absolute_import +import contextlib import copy -import hashlib import os import stat @@ -31,6 +31,7 @@ util, vfs as vfsmod, ) +from mercurial.utils import hashutil shortname = b'.hglf' shortnameslash = shortname + b'/' @@ -39,6 +40,16 @@ # -- Private worker functions ------------------------------------------ +@contextlib.contextmanager +def lfstatus(repo, value=True): + oldvalue = getattr(repo, 'lfstatus', False) + repo.lfstatus = value + try: + yield + finally: + repo.lfstatus = oldvalue + + def getminsize(ui, assumelfiles, opt, default=10): lfsize = opt if not lfsize and assumelfiles: @@ -421,7 +432,7 @@ def copyandhash(instream, outfile): '''Read bytes from instream (iterable) and write them to outfile, computing the SHA-1 hash of the data along the way. Return the hash.''' - hasher = hashlib.sha1(b'') + hasher = hashutil.sha1(b'') for data in instream: hasher.update(data) outfile.write(data) @@ -461,7 +472,7 @@ def hexsha1(fileobj): """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like object data""" - h = hashlib.sha1() + h = hashutil.sha1() for chunk in util.filechunkiter(fileobj): h.update(chunk) return hex(h.digest()) @@ -580,12 +591,8 @@ progress.update(i) parents = [p for p in repo[n].parents() if p != node.nullid] - oldlfstatus = repo.lfstatus - repo.lfstatus = False - try: + with lfstatus(repo, value=False): ctx = repo[n] - finally: - repo.lfstatus = oldlfstatus files = set(ctx.files()) if len(parents) == 2: diff -r 61881b170140 -r 84a0102c05c7 hgext/largefiles/overrides.py --- a/hgext/largefiles/overrides.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/largefiles/overrides.py Tue Jan 21 13:14:51 2020 -0500 @@ -9,7 +9,6 @@ '''Overridden Mercurial commands and functions for the largefiles extension''' from __future__ import absolute_import -import contextlib import copy import os @@ -50,6 +49,8 @@ eh = exthelper.exthelper() +lfstatus = lfutil.lfstatus + # -- Utility functions: commonly/repeatedly needed functionality --------------- @@ -84,9 +85,9 @@ def addlargefiles(ui, repo, isaddremove, matcher, uipathfn, **opts): - large = opts.get(r'large') + large = opts.get('large') lfsize = lfutil.getminsize( - ui, lfutil.islfilesrepo(repo), opts.get(r'lfsize') + ui, lfutil.islfilesrepo(repo), opts.get('lfsize') ) lfmatcher = None @@ -131,7 +132,7 @@ # Need to lock, otherwise there could be a race condition between # when standins are created and added to the repo. with repo.wlock(): - if not opts.get(r'dry_run'): + if not opts.get('dry_run'): standins = [] lfdirstate = lfutil.openlfdirstate(ui, repo) for f in lfnames: @@ -158,18 +159,8 @@ return added, bad -@contextlib.contextmanager -def lfstatus(repo): - oldvalue = getattr(repo, 'lfstatus', False) - repo.lfstatus = True - try: - yield - finally: - repo.lfstatus = oldvalue - - def removelargefiles(ui, repo, isaddremove, matcher, uipathfn, dryrun, **opts): - after = opts.get(r'after') + after = opts.get('after') m = composelargefilematcher(matcher, repo[None].manifest()) with lfstatus(repo): s = repo.status(match=m, clean=not isaddremove) @@ -269,7 +260,7 @@ ], ) def overrideadd(orig, ui, repo, *pats, **opts): - if opts.get(r'normal') and opts.get(r'large'): + if opts.get('normal') and opts.get('large'): raise error.Abort(_(b'--normal cannot be used with --large')) return orig(ui, repo, *pats, **opts) @@ -277,7 +268,7 @@ @eh.wrapfunction(cmdutil, b'add') def cmdutiladd(orig, ui, repo, matcher, prefix, uipathfn, explicitonly, **opts): # The --normal flag short circuits this override - if opts.get(r'normal'): + if opts.get('normal'): return orig(ui, repo, matcher, prefix, uipathfn, explicitonly, **opts) ladded, lbad = addlargefiles(ui, repo, False, matcher, uipathfn, **opts) @@ -477,9 +468,9 @@ ], ) def overrideverify(orig, ui, repo, *pats, **opts): - large = opts.pop(r'large', False) - all = opts.pop(r'lfa', False) - contents = opts.pop(r'lfc', False) + large = opts.pop('large', False) + all = opts.pop('lfa', False) + contents = opts.pop('lfc', False) result = orig(ui, repo, *pats, **opts) if large or all or contents: @@ -492,7 +483,7 @@ opts=[(b'', b'large', None, _(b'display largefiles dirstate'))], ) def overridedebugstate(orig, ui, repo, *pats, **opts): - large = opts.pop(r'large', False) + large = opts.pop('large', False) if large: class fakerepo(object): @@ -975,8 +966,8 @@ repo.lfpullsource = source result = orig(ui, repo, source, **opts) revspostpull = len(repo) - lfrevs = opts.get(r'lfrev', []) - if opts.get(r'all_largefiles'): + lfrevs = opts.get('lfrev', []) + if opts.get('all_largefiles'): lfrevs.append(b'pulled()') if lfrevs and revspostpull > revsprepull: numcached = 0 @@ -1006,9 +997,9 @@ ) def overridepush(orig, ui, repo, *args, **kwargs): """Override push command and store --lfrev parameters in opargs""" - lfrevs = kwargs.pop(r'lfrev', None) + lfrevs = kwargs.pop('lfrev', None) if lfrevs: - opargs = kwargs.setdefault(r'opargs', {}) + opargs = kwargs.setdefault('opargs', {}) opargs[b'lfrevs'] = scmutil.revrange(repo, lfrevs) return orig(ui, repo, *args, **kwargs) @@ -1016,7 +1007,7 @@ @eh.wrapfunction(exchange, b'pushoperation') def exchangepushoperation(orig, *args, **kwargs): """Override pushoperation constructor and store lfrevs parameter""" - lfrevs = kwargs.pop(r'lfrevs', None) + lfrevs = kwargs.pop('lfrevs', None) pushop = orig(*args, **kwargs) pushop.lfrevs = lfrevs return pushop @@ -1064,7 +1055,7 @@ d = dest if d is None: d = hg.defaultdest(source) - if opts.get(r'all_largefiles') and not hg.islocal(d): + if opts.get('all_largefiles') and not hg.islocal(d): raise error.Abort( _(b'--all-largefiles is incompatible with non-local destination %s') % d @@ -1104,7 +1095,7 @@ if not util.safehasattr(repo, b'_largefilesenabled'): return orig(ui, repo, **opts) - resuming = opts.get(r'continue') + resuming = opts.get('continue') repo._lfcommithooks.append(lfutil.automatedcommithook(resuming)) repo._lfstatuswriters.append(lambda *msg, **opts: None) try: @@ -1613,7 +1604,7 @@ @eh.wrapcommand(b'transplant', extension=b'transplant') def overridetransplant(orig, ui, repo, *revs, **opts): - resuming = opts.get(r'continue') + resuming = opts.get('continue') repo._lfcommithooks.append(lfutil.automatedcommithook(resuming)) repo._lfstatuswriters.append(lambda *msg, **opts: None) try: @@ -1698,7 +1689,7 @@ @eh.wrapfunction(merge, b'update') def mergeupdate(orig, repo, node, branchmerge, force, *args, **kwargs): - matcher = kwargs.get(r'matcher', None) + matcher = kwargs.get('matcher', None) # note if this is a partial update partial = matcher and not matcher.always() with repo.wlock(): @@ -1758,7 +1749,7 @@ # Make sure the merge runs on disk, not in-memory. largefiles is not a # good candidate for in-memory merge (large files, custom dirstate, # matcher usage). - kwargs[r'wc'] = repo[None] + kwargs['wc'] = repo[None] result = orig(repo, node, branchmerge, force, *args, **kwargs) newstandins = lfutil.getstandinsstate(repo) diff -r 61881b170140 -r 84a0102c05c7 hgext/largefiles/proto.py --- a/hgext/largefiles/proto.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/largefiles/proto.py Tue Jan 21 13:14:51 2020 -0500 @@ -116,7 +116,7 @@ b'putlfile', data=fd, sha=sha, - headers={r'content-type': r'application/mercurial-0.1'}, + headers={'content-type': 'application/mercurial-0.1'}, ) try: d, output = res.split(b'\n', 1) @@ -206,7 +206,7 @@ if cmd == b'heads' and self.capable(b'largefiles'): cmd = b'lheads' if cmd == b'batch' and self.capable(b'largefiles'): - args[r'cmds'] = args[r'cmds'].replace(b'heads ', b'lheads ') + args['cmds'] = args[r'cmds'].replace(b'heads ', b'lheads ') return ssholdcallstream(self, cmd, **args) @@ -217,5 +217,5 @@ if cmd == b'heads' and self.capable(b'largefiles'): cmd = b'lheads' if cmd == b'batch' and self.capable(b'largefiles'): - args[r'cmds'] = headsre.sub(b'lheads', args[r'cmds']) + args['cmds'] = headsre.sub(b'lheads', args['cmds']) return httpoldcallstream(self, cmd, **args) diff -r 61881b170140 -r 84a0102c05c7 hgext/largefiles/reposetup.py --- a/hgext/largefiles/reposetup.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/largefiles/reposetup.py Tue Jan 21 13:14:51 2020 -0500 @@ -15,9 +15,11 @@ from mercurial import ( error, + extensions, localrepo, match as matchmod, scmutil, + util, ) from . import ( @@ -38,9 +40,6 @@ lfstatus = False - def status_nolfiles(self, *args, **kwargs): - return super(lfilesrepo, self).status(*args, **kwargs) - # When lfstatus is set, return a context that gives the names # of largefiles instead of their corresponding standins and # identifies the largefiles as always binary, regardless of @@ -49,45 +48,46 @@ ctx = super(lfilesrepo, self).__getitem__(changeid) if self.lfstatus: - class lfilesctx(ctx.__class__): - def files(self): - filenames = super(lfilesctx, self).files() - return [lfutil.splitstandin(f) or f for f in filenames] + def files(orig): + filenames = orig() + return [lfutil.splitstandin(f) or f for f in filenames] - def manifest(self): - man1 = super(lfilesctx, self).manifest() + extensions.wrapfunction(ctx, 'files', files) + + def manifest(orig): + man1 = orig() - class lfilesmanifest(man1.__class__): - def __contains__(self, filename): - orig = super(lfilesmanifest, self).__contains__ - return orig(filename) or orig( - lfutil.standin(filename) - ) + class lfilesmanifest(man1.__class__): + def __contains__(self, filename): + orig = super(lfilesmanifest, self).__contains__ + return orig(filename) or orig( + lfutil.standin(filename) + ) - man1.__class__ = lfilesmanifest - return man1 + man1.__class__ = lfilesmanifest + return man1 - def filectx(self, path, fileid=None, filelog=None): - orig = super(lfilesctx, self).filectx - try: - if filelog is not None: - result = orig(path, fileid, filelog) - else: - result = orig(path, fileid) - except error.LookupError: - # Adding a null character will cause Mercurial to - # identify this as a binary file. - if filelog is not None: - result = orig( - lfutil.standin(path), fileid, filelog - ) - else: - result = orig(lfutil.standin(path), fileid) - olddata = result.data - result.data = lambda: olddata() + b'\0' - return result + extensions.wrapfunction(ctx, 'manifest', manifest) - ctx.__class__ = lfilesctx + def filectx(orig, path, fileid=None, filelog=None): + try: + if filelog is not None: + result = orig(path, fileid, filelog) + else: + result = orig(path, fileid) + except error.LookupError: + # Adding a null character will cause Mercurial to + # identify this as a binary file. + if filelog is not None: + result = orig(lfutil.standin(path), fileid, filelog) + else: + result = orig(lfutil.standin(path), fileid) + olddata = result.data + result.data = lambda: olddata() + b'\0' + return result + + extensions.wrapfunction(ctx, 'filectx', filectx) + return ctx # Figure out the status of big files and insert them into the @@ -130,14 +130,15 @@ if match is None: match = matchmod.always() - wlock = None try: - try: - # updating the dirstate is optional - # so we don't wait on the lock - wlock = self.wlock(False) - except error.LockError: - pass + # updating the dirstate is optional + # so we don't wait on the lock + wlock = self.wlock(False) + gotlock = True + except error.LockError: + wlock = util.nullcontextmanager() + gotlock = False + with wlock: # First check if paths or patterns were specified on the # command line. If there were, and they don't match any @@ -308,13 +309,9 @@ for items in result ] - if wlock: + if gotlock: lfdirstate.write() - finally: - if wlock: - wlock.release() - self.lfstatus = True return scmutil.status(*result) @@ -360,20 +357,6 @@ ) return result - def push(self, remote, force=False, revs=None, newbranch=False): - if remote.local(): - missing = set(self.requirements) - remote.local().supported - if missing: - msg = _( - b"required features are not" - b" supported in the destination:" - b" %s" - ) % (b', '.join(sorted(missing))) - raise error.Abort(msg) - return super(lfilesrepo, self).push( - remote, force=force, revs=revs, newbranch=newbranch - ) - # TODO: _subdirlfs should be moved into "lfutil.py", because # it is referred only from "lfutil.updatestandinsbymatch" def _subdirlfs(self, files, lfiles): diff -r 61881b170140 -r 84a0102c05c7 hgext/lfs/__init__.py --- a/hgext/lfs/__init__.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/lfs/__init__.py Tue Jan 21 13:14:51 2020 -0500 @@ -241,12 +241,12 @@ if b'lfs' in repo.requirements: return 0 - last = kwargs.get(r'node_last') + last = kwargs.get('node_last') _bin = node.bin if last: - s = repo.set(b'%n:%n', _bin(kwargs[r'node']), _bin(last)) + s = repo.set(b'%n:%n', _bin(kwargs['node']), _bin(last)) else: - s = repo.set(b'%n', _bin(kwargs[r'node'])) + s = repo.set(b'%n', _bin(kwargs['node'])) match = repo._storenarrowmatch for ctx in s: # TODO: is there a way to just walk the files in the commit? @@ -399,6 +399,28 @@ ) def debuglfsupload(ui, repo, **opts): """upload lfs blobs added by the working copy parent or given revisions""" - revs = opts.get(r'rev', []) + revs = opts.get('rev', []) pointers = wrapper.extractpointers(repo, scmutil.revrange(repo, revs)) wrapper.uploadblobs(repo, pointers) + + +@eh.wrapcommand( + b'verify', + opts=[(b'', b'no-lfs', None, _(b'skip missing lfs blob content'))], +) +def verify(orig, ui, repo, **opts): + skipflags = repo.ui.configint(b'verify', b'skipflags') + no_lfs = opts.pop('no_lfs') + + if skipflags: + # --lfs overrides the config bit, if set. + if no_lfs is False: + skipflags &= ~repository.REVISION_FLAG_EXTSTORED + else: + skipflags = 0 + + if no_lfs is True: + skipflags |= repository.REVISION_FLAG_EXTSTORED + + with ui.configoverride({(b'verify', b'skipflags'): skipflags}): + return orig(ui, repo, **opts) diff -r 61881b170140 -r 84a0102c05c7 hgext/lfs/blobstore.py --- a/hgext/lfs/blobstore.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/lfs/blobstore.py Tue Jan 21 13:14:51 2020 -0500 @@ -155,15 +155,29 @@ return self.vfs(oid, b'rb') - def download(self, oid, src): + def download(self, oid, src, content_length): """Read the blob from the remote source in chunks, verify the content, and write to this local blobstore.""" sha256 = hashlib.sha256() + size = 0 with self.vfs(oid, b'wb', atomictemp=True) as fp: for chunk in util.filechunkiter(src, size=1048576): fp.write(chunk) sha256.update(chunk) + size += len(chunk) + + # If the server advertised a length longer than what we actually + # received, then we should expect that the server crashed while + # producing the response (but the server has no way of telling us + # that), and we really don't need to try to write the response to + # the localstore, because it's not going to match the expected. + if content_length is not None and int(content_length) != size: + msg = ( + b"Response length (%s) does not match Content-Length " + b"header (%d): likely server-side crash" + ) + raise LfsRemoteError(_(msg) % (size, int(content_length))) realoid = node.hex(sha256.digest()) if realoid != oid: @@ -280,11 +294,11 @@ """Enforces that any authentication performed is HTTP Basic Authentication. No authentication is also acceptable. """ - authreq = headers.get(r'www-authenticate', None) + authreq = headers.get('www-authenticate', None) if authreq: scheme = authreq.split()[0] - if scheme.lower() != r'basic': + if scheme.lower() != 'basic': msg = _(b'the server must support Basic Authentication') raise util.urlerr.httperror( req.get_full_url(), @@ -324,18 +338,18 @@ See https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md """ objects = [ - {r'oid': pycompat.strurl(p.oid()), r'size': p.size()} + {'oid': pycompat.strurl(p.oid()), 'size': p.size()} for p in pointers ] requestdata = pycompat.bytesurl( json.dumps( - {r'objects': objects, r'operation': pycompat.strurl(action),} + {'objects': objects, 'operation': pycompat.strurl(action),} ) ) url = b'%s/objects/batch' % self.baseurl batchreq = util.urlreq.request(pycompat.strurl(url), data=requestdata) - batchreq.add_header(r'Accept', r'application/vnd.git-lfs+json') - batchreq.add_header(r'Content-Type', r'application/vnd.git-lfs+json') + batchreq.add_header('Accept', 'application/vnd.git-lfs+json') + batchreq.add_header('Content-Type', 'application/vnd.git-lfs+json') try: with contextlib.closing(self.urlopener.open(batchreq)) as rsp: rawjson = rsp.read() @@ -376,9 +390,9 @@ headers = pycompat.bytestr(rsp.info()).strip() self.ui.debug(b'%s\n' % b'\n'.join(sorted(headers.splitlines()))) - if r'objects' in response: - response[r'objects'] = sorted( - response[r'objects'], key=lambda p: p[r'oid'] + if 'objects' in response: + response['objects'] = sorted( + response['objects'], key=lambda p: p['oid'] ) self.ui.debug( b'%s\n' @@ -386,7 +400,7 @@ json.dumps( response, indent=2, - separators=(r'', r': '), + separators=('', ': '), sort_keys=True, ) ) @@ -483,33 +497,36 @@ ) request.data = filewithprogress(localstore.open(oid), None) request.get_method = lambda: r'PUT' - request.add_header(r'Content-Type', r'application/octet-stream') - request.add_header(r'Content-Length', len(request.data)) + request.add_header('Content-Type', 'application/octet-stream') + request.add_header('Content-Length', len(request.data)) for k, v in headers: request.add_header(pycompat.strurl(k), pycompat.strurl(v)) - response = b'' try: - with contextlib.closing(self.urlopener.open(request)) as req: + with contextlib.closing(self.urlopener.open(request)) as res: + contentlength = res.info().get(b"content-length") ui = self.ui # Shorten debug lines if self.ui.debugflag: - ui.debug(b'Status: %d\n' % req.status) + ui.debug(b'Status: %d\n' % res.status) # lfs-test-server and hg serve return headers in different # order - headers = pycompat.bytestr(req.info()).strip() + headers = pycompat.bytestr(res.info()).strip() ui.debug(b'%s\n' % b'\n'.join(sorted(headers.splitlines()))) if action == b'download': # If downloading blobs, store downloaded data to local # blobstore - localstore.download(oid, req) + localstore.download(oid, res, contentlength) else: + blocks = [] while True: - data = req.read(1048576) + data = res.read(1048576) if not data: break - response += data + blocks.append(data) + + response = b"".join(blocks) if response: ui.debug(b'lfs %s response: %s' % (action, response)) except util.urlerr.httperror as ex: @@ -588,7 +605,9 @@ else: oids = transfer(sorted(objects, key=lambda o: o.get(b'oid'))) - with self.ui.makeprogress(topic, total=total) as progress: + with self.ui.makeprogress( + topic, unit=_(b"bytes"), total=total + ) as progress: progress.update(0) processed = 0 blobs = 0 @@ -635,7 +654,7 @@ def readbatch(self, pointers, tostore): for p in _deduplicate(pointers): with self.vfs(p.oid(), b'rb') as fp: - tostore.download(p.oid(), fp) + tostore.download(p.oid(), fp, None) class _nullremote(object): diff -r 61881b170140 -r 84a0102c05c7 hgext/lfs/wireprotolfsserver.py --- a/hgext/lfs/wireprotolfsserver.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/lfs/wireprotolfsserver.py Tue Jan 21 13:14:51 2020 -0500 @@ -136,7 +136,7 @@ lfsreq = pycompat.json_loads(req.bodyfh.read()) # If no transfer handlers are explicitly requested, 'basic' is assumed. - if r'basic' not in lfsreq.get(r'transfers', [r'basic']): + if 'basic' not in lfsreq.get('transfers', ['basic']): _sethttperror( res, HTTP_BAD_REQUEST, @@ -144,7 +144,7 @@ ) return True - operation = lfsreq.get(r'operation') + operation = lfsreq.get('operation') operation = pycompat.bytestr(operation) if operation not in (b'upload', b'download'): @@ -160,13 +160,13 @@ objects = [ p for p in _batchresponseobjects( - req, lfsreq.get(r'objects', []), operation, localstore + req, lfsreq.get('objects', []), operation, localstore ) ] rsp = { - r'transfer': r'basic', - r'objects': objects, + 'transfer': 'basic', + 'objects': objects, } res.status = hgwebcommon.statusmessage(HTTP_OK) @@ -206,12 +206,12 @@ for obj in objects: # Convert unicode to ASCII to create a filesystem path - soid = obj.get(r'oid') - oid = soid.encode(r'ascii') + soid = obj.get('oid') + oid = soid.encode('ascii') rsp = { - r'oid': soid, - r'size': obj.get(r'size'), # XXX: should this check the local size? - # r'authenticated': True, + 'oid': soid, + 'size': obj.get('size'), # XXX: should this check the local size? + # 'authenticated': True, } exists = True @@ -234,9 +234,9 @@ if inst.errno != errno.ENOENT: _logexception(req) - rsp[r'error'] = { - r'code': 500, - r'message': inst.strerror or r'Internal Server Server', + rsp['error'] = { + 'code': 500, + 'message': inst.strerror or 'Internal Server Server', } yield rsp continue @@ -247,17 +247,17 @@ # IFF they already exist locally. if action == b'download': if not exists: - rsp[r'error'] = { - r'code': 404, - r'message': r"The object does not exist", + rsp['error'] = { + 'code': 404, + 'message': "The object does not exist", } yield rsp continue elif not verifies: - rsp[r'error'] = { - r'code': 422, # XXX: is this the right code? - r'message': r"The object is corrupt", + rsp['error'] = { + 'code': 422, # XXX: is this the right code? + 'message': "The object is corrupt", } yield rsp continue @@ -272,23 +272,23 @@ # The spec doesn't mention the Accept header here, but avoid # a gratuitous deviation from lfs-test-server in the test # output. - hdr = {r'Accept': r'application/vnd.git-lfs'} + hdr = {'Accept': 'application/vnd.git-lfs'} auth = req.headers.get(b'Authorization', b'') if auth.startswith(b'Basic '): - hdr[r'Authorization'] = pycompat.strurl(auth) + hdr['Authorization'] = pycompat.strurl(auth) return hdr - rsp[r'actions'] = { - r'%s' + rsp['actions'] = { + '%s' % pycompat.strurl(action): { - r'href': pycompat.strurl( + 'href': pycompat.strurl( b'%s%s/.hg/lfs/objects/%s' % (req.baseurl, req.apppath, oid) ), # datetime.isoformat() doesn't include the 'Z' suffix - r"expires_at": expiresat.strftime(r'%Y-%m-%dT%H:%M:%SZ'), - r'header': _buildheader(), + "expires_at": expiresat.strftime('%Y-%m-%dT%H:%M:%SZ'), + 'header': _buildheader(), } } @@ -327,7 +327,7 @@ statusmessage = hgwebcommon.statusmessage try: - localstore.download(oid, req.bodyfh) + localstore.download(oid, req.bodyfh, req.headers[b'Content-Length']) res.status = statusmessage(HTTP_OK if existed else HTTP_CREATED) except blobstore.LfsCorruptionError: _logexception(req) diff -r 61881b170140 -r 84a0102c05c7 hgext/lfs/wrapper.py --- a/hgext/lfs/wrapper.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/lfs/wrapper.py Tue Jan 21 13:14:51 2020 -0500 @@ -151,12 +151,12 @@ if node is None: # both None - likely working copy content where node is not ready return False - rev = rlog._revlog.rev(node) + rev = rlog.rev(node) else: - node = rlog._revlog.node(rev) + node = rlog.node(rev) if node == nullid: return False - flags = rlog._revlog.flags(rev) + flags = rlog.flags(rev) return bool(flags & revlog.REVIDX_EXTSTORED) @@ -203,7 +203,7 @@ # Wrapping may also be applied by remotefilelog def filelogrenamed(orig, self, node): - if _islfs(self, node): + if _islfs(self._revlog, node): rawtext = self._revlog.rawdata(node) if not rawtext: return False @@ -217,7 +217,7 @@ # Wrapping may also be applied by remotefilelog def filelogsize(orig, self, rev): - if _islfs(self, rev=rev): + if _islfs(self._revlog, rev=rev): # fast path: use lfs metadata to answer size rawtext = self._revlog.rawdata(rev) metadata = pointer.deserialize(rawtext) @@ -225,6 +225,25 @@ return orig(self, rev) +@eh.wrapfunction(revlog, b'_verify_revision') +def _verify_revision(orig, rl, skipflags, state, node): + if _islfs(rl, node=node): + rawtext = rl.rawdata(node) + metadata = pointer.deserialize(rawtext) + + # Don't skip blobs that are stored locally, as local verification is + # relatively cheap and there's no other way to verify the raw data in + # the revlog. + if rl.opener.lfslocalblobstore.has(metadata.oid()): + skipflags &= ~revlog.REVIDX_EXTSTORED + elif skipflags & revlog.REVIDX_EXTSTORED: + # The wrapped method will set `skipread`, but there's enough local + # info to check renames. + state[b'safe_renamed'].add(node) + + orig(rl, skipflags, state, node) + + @eh.wrapfunction(context.basefilectx, b'cmp') def filectxcmp(orig, self, fctx): """returns True if text is different than fctx""" @@ -248,7 +267,7 @@ def filectxislfs(self): - return _islfs(self.filelog(), self.filenode()) + return _islfs(self.filelog()._revlog, self.filenode()) @eh.wrapfunction(cmdutil, b'_updatecatformatter') @@ -459,7 +478,7 @@ else: return None fctx = _ctx[f] - if not _islfs(fctx.filelog(), fctx.filenode()): + if not _islfs(fctx.filelog()._revlog, fctx.filenode()): return None try: p = pointer.deserialize(fctx.rawdata()) diff -r 61881b170140 -r 84a0102c05c7 hgext/mq.py --- a/hgext/mq.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/mq.py Tue Jan 21 13:14:51 2020 -0500 @@ -68,6 +68,7 @@ import os import re import shutil +import sys from mercurial.i18n import _ from mercurial.node import ( bin, @@ -490,7 +491,7 @@ def __init__(self, ui, baseui, path, patchdir=None): self.basepath = path try: - with open(os.path.join(path, b'patches.queue'), r'rb') as fh: + with open(os.path.join(path, b'patches.queue'), 'rb') as fh: cur = fh.read().rstrip() if not cur: @@ -1251,16 +1252,19 @@ return None, None def putsubstate2changes(self, substatestate, changes): - for files in changes[:3]: - if b'.hgsubstate' in files: - return # already listed up + if isinstance(changes, list): + mar = changes[:3] + else: + mar = (changes.modified, changes.added, changes.removed) + if any((b'.hgsubstate' in files for files in mar)): + return # already listed up # not yet listed up if substatestate in b'a?': - changes[1].append(b'.hgsubstate') + mar[1].append(b'.hgsubstate') elif substatestate in b'r': - changes[2].append(b'.hgsubstate') + mar[2].append(b'.hgsubstate') else: # modified - changes[0].append(b'.hgsubstate') + mar[0].append(b'.hgsubstate') def checklocalchanges(self, repo, force=False, refresh=True): excsuffix = b'' @@ -1377,8 +1381,9 @@ else: changes = self.checklocalchanges(repo, force=True) commitfiles = list(inclsubs) - for files in changes[:3]: - commitfiles.extend(files) + commitfiles.extend(changes.modified) + commitfiles.extend(changes.added) + commitfiles.extend(changes.removed) match = scmutil.matchfiles(repo, commitfiles) if len(repo[None].parents()) > 1: raise error.Abort(_(b'cannot manage merge changesets')) @@ -1818,7 +1823,8 @@ if update: qp = self.qparents(repo, rev) ctx = repo[qp] - m, a, r, d = repo.status(qp, b'.')[:4] + st = repo.status(qp, b'.') + m, a, r, d = st.modified, st.added, st.removed, st.deleted if d: raise error.Abort(_(b"deletions found between repo revs")) @@ -1910,10 +1916,11 @@ # and then commit. # # this should really read: - # mm, dd, aa = repo.status(top, patchparent)[:3] + # st = repo.status(top, patchparent) # but we do it backwards to take advantage of manifest/changelog # caching against the next repo.status call - mm, aa, dd = repo.status(patchparent, top)[:3] + st = repo.status(patchparent, top) + mm, aa, dd = st.modified, st.added, st.removed ctx = repo[top] aaa = aa[:] match1 = scmutil.match(repo[None], pats, opts) @@ -1927,7 +1934,8 @@ match1 = scmutil.match(repo[None], opts=opts) else: match = scmutil.matchall(repo) - m, a, r, d = repo.status(match=match)[:4] + stb = repo.status(match=match) + m, a, r, d = stb.modified, stb.added, stb.removed, stb.deleted mm = set(mm) aa = set(aa) dd = set(dd) @@ -1966,7 +1974,8 @@ # create 'match' that includes the files to be recommitted. # apply match1 via repo.status to ensure correct case handling. - cm, ca, cr, cd = repo.status(patchparent, match=match1)[:4] + st = repo.status(patchparent, match=match1) + cm, ca, cr, cd = st.modified, st.added, st.removed, st.deleted allmatches = set(cm + ca + cr + cd) refreshchanges = [x.intersection(allmatches) for x in (mm, aa, dd)] @@ -2248,7 +2257,6 @@ def restore(self, repo, rev, delete=None, qupdate=None): desc = repo[rev].description().strip() lines = desc.splitlines() - i = 0 datastart = None series = [] applied = [] @@ -2777,7 +2785,7 @@ This command is deprecated. Without -c, it's implied by other relevant commands. With -c, use :hg:`init --mq` instead.""" - return qinit(ui, repo, create=opts.get(r'create_repo')) + return qinit(ui, repo, create=opts.get('create_repo')) @command( @@ -2933,7 +2941,7 @@ Returns 0 on success.""" repo.mq.qseries( - repo, missing=opts.get(r'missing'), summary=opts.get(r'summary') + repo, missing=opts.get('missing'), summary=opts.get('summary') ) return 0 @@ -2960,7 +2968,7 @@ start=t - 1, length=1, status=b'A', - summary=opts.get(r'summary'), + summary=opts.get('summary'), ) else: ui.write(_(b"no patches applied\n")) @@ -2982,7 +2990,7 @@ if end == len(q.series): ui.write(_(b"all patches applied\n")) return 1 - q.qseries(repo, start=end, length=1, summary=opts.get(r'summary')) + q.qseries(repo, start=end, length=1, summary=opts.get('summary')) @command( @@ -3005,7 +3013,7 @@ return 1 idx = q.series.index(q.applied[-2].name) q.qseries( - repo, start=idx, length=1, status=b'A', summary=opts.get(r'summary') + repo, start=idx, length=1, status=b'A', summary=opts.get('summary') ) @@ -3356,8 +3364,8 @@ applied = set(p.name for p in q.applied) patch = None args = list(args) - if opts.get(r'list'): - if args or opts.get(r'none'): + if opts.get('list'): + if args or opts.get('none'): raise error.Abort( _(b'cannot mix -l/--list with options or arguments') ) @@ -3372,7 +3380,7 @@ patch = args.pop(0) if patch is None: raise error.Abort(_(b'no patch to work with')) - if args or opts.get(r'none'): + if args or opts.get('none'): idx = q.findseries(patch) if idx is None: raise error.Abort(_(b'no patch named %s') % patch) @@ -3634,9 +3642,7 @@ This command is deprecated, use :hg:`rebase` instead.""" rev = repo.lookup(rev) q = repo.mq - q.restore( - repo, rev, delete=opts.get(r'delete'), qupdate=opts.get(r'update') - ) + q.restore(repo, rev, delete=opts.get('delete'), qupdate=opts.get('update')) q.savedirty() return 0 @@ -3841,9 +3847,9 @@ Returns 0 on success. """ - if not opts.get(r'applied') and not revrange: + if not opts.get('applied') and not revrange: raise error.Abort(_(b'no revisions specified')) - elif opts.get(r'applied'): + elif opts.get('applied'): revrange = (b'qbase::qtip',) + revrange q = repo.mq @@ -4072,9 +4078,9 @@ def invalidateall(self): super(mqrepo, self).invalidateall() - if localrepo.hasunfilteredcache(self, r'mq'): + if localrepo.hasunfilteredcache(self, 'mq'): # recreate mq in case queue path was changed - delattr(self.unfiltered(), r'mq') + delattr(self.unfiltered(), 'mq') def abortifwdirpatched(self, errmsg, force=False): if self.mq.applied and self.mq.checkapplied and not force: @@ -4172,16 +4178,16 @@ def mqimport(orig, ui, repo, *args, **kwargs): if util.safehasattr(repo, b'abortifwdirpatched') and not kwargs.get( - r'no_commit', False + 'no_commit', False ): repo.abortifwdirpatched( - _(b'cannot import over an applied patch'), kwargs.get(r'force') + _(b'cannot import over an applied patch'), kwargs.get('force') ) return orig(ui, repo, *args, **kwargs) def mqinit(orig, ui, *args, **kwargs): - mq = kwargs.pop(r'mq', None) + mq = kwargs.pop('mq', None) if not mq: return orig(ui, *args, **kwargs) @@ -4206,7 +4212,7 @@ """Add --mq option to operate on patch repository instead of main""" # some commands do not like getting unknown options - mq = kwargs.pop(r'mq', None) + mq = kwargs.pop('mq', None) if not mq: return orig(ui, repo, *args, **kwargs) @@ -4272,8 +4278,9 @@ dotable(commands.table) + thismodule = sys.modules["hgext.mq"] for extname, extmodule in extensions.extensions(): - if extmodule.__file__ != __file__: + if extmodule != thismodule: dotable(getattr(extmodule, 'cmdtable', {})) diff -r 61881b170140 -r 84a0102c05c7 hgext/narrow/__init__.py --- a/hgext/narrow/__init__.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/narrow/__init__.py Tue Jan 21 13:14:51 2020 -0500 @@ -8,12 +8,6 @@ from __future__ import absolute_import -# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for -# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should -# be specifying the version(s) of Mercurial they are tested with, or -# leave the attribute unspecified. -testedwith = b'ships-with-hg-core' - from mercurial import ( localrepo, registrar, @@ -29,6 +23,12 @@ narrowwirepeer, ) +# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for +# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should +# be specifying the version(s) of Mercurial they are tested with, or +# leave the attribute unspecified. +testedwith = b'ships-with-hg-core' + configtable = {} configitem = registrar.configitem(configtable) # Narrowhg *has* support for serving ellipsis nodes (which are used at diff -r 61881b170140 -r 84a0102c05c7 hgext/narrow/narrowbundle2.py --- a/hgext/narrow/narrowbundle2.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/narrow/narrowbundle2.py Tue Jan 21 13:14:51 2020 -0500 @@ -62,8 +62,8 @@ raise ValueError(_(b'no common changegroup version')) version = max(cgversions) - include = sorted(filter(bool, kwargs.get(r'includepats', []))) - exclude = sorted(filter(bool, kwargs.get(r'excludepats', []))) + include = sorted(filter(bool, kwargs.get('includepats', []))) + exclude = sorted(filter(bool, kwargs.get('excludepats', []))) generateellipsesbundle2( bundler, repo, @@ -72,7 +72,7 @@ version, common, heads, - kwargs.get(r'depth', None), + kwargs.get('depth', None), ) @@ -316,7 +316,7 @@ if repo.ui.has_section(_NARROWACL_SECTION): kwargs = exchange.applynarrowacl(repo, kwargs) - if kwargs.get(r'narrow', False) and repo.ui.configbool( + if kwargs.get('narrow', False) and repo.ui.configbool( b'experimental', b'narrowservebrokenellipses' ): getbundlechangegrouppart_narrow(*args, **kwargs) diff -r 61881b170140 -r 84a0102c05c7 hgext/narrow/narrowcommands.py --- a/hgext/narrow/narrowcommands.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/narrow/narrowcommands.py Tue Jan 21 13:14:51 2020 -0500 @@ -22,6 +22,7 @@ hg, narrowspec, node, + pathutil, pycompat, registrar, repair, @@ -136,8 +137,8 @@ def pullbundle2extraprepare_widen(orig, pullop, kwargs): orig(pullop, kwargs) - if opts.get(r'depth'): - kwargs[b'depth'] = opts[r'depth'] + if opts.get('depth'): + kwargs[b'depth'] = opts['depth'] wrappedextraprepare = extensions.wrappedfunction( exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen @@ -151,15 +152,15 @@ """Wraps archive command to narrow the default includes.""" if repository.NARROW_REQUIREMENT in repo.requirements: repo_includes, repo_excludes = repo.narrowpats - includes = set(opts.get(r'include', [])) - excludes = set(opts.get(r'exclude', [])) + includes = set(opts.get('include', [])) + excludes = set(opts.get('exclude', [])) includes, excludes, unused_invalid = narrowspec.restrictpatterns( includes, excludes, repo_includes, repo_excludes ) if includes: - opts[r'include'] = includes + opts['include'] = includes if excludes: - opts[r'exclude'] = excludes + opts['exclude'] = excludes return orig(ui, repo, *args, **opts) @@ -277,7 +278,7 @@ todelete.append(f) elif f.startswith(b'meta/'): dir = f[5:-13] - dirs = sorted(util.dirs({dir})) + [dir] + dirs = sorted(pathutil.dirs({dir})) + [dir] include = True for d in dirs: visit = newmatch.visitdir(d) diff -r 61881b170140 -r 84a0102c05c7 hgext/narrow/narrowwirepeer.py --- a/hgext/narrow/narrowwirepeer.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/narrow/narrowwirepeer.py Tue Jan 21 13:14:51 2020 -0500 @@ -33,8 +33,8 @@ # TODO: don't blindly add include/exclude wireproto # arguments to unbundle. include, exclude = repo.narrowpats - kwargs[r"includepats"] = b','.join(include) - kwargs[r"excludepats"] = b','.join(exclude) + kwargs["includepats"] = b','.join(include) + kwargs["excludepats"] = b','.join(exclude) return orig(cmd, *args, **kwargs) extensions.wrapfunction(peer, b'_calltwowaystream', wrapped) @@ -139,12 +139,12 @@ def peernarrowwiden(remote, **kwargs): - for ch in (r'commonheads', r'known'): + for ch in ('commonheads', 'known'): kwargs[ch] = wireprototypes.encodelist(kwargs[ch]) - for ch in (r'oldincludes', r'newincludes', r'oldexcludes', r'newexcludes'): + for ch in ('oldincludes', 'newincludes', 'oldexcludes', 'newexcludes'): kwargs[ch] = b','.join(kwargs[ch]) - kwargs[r'ellipses'] = b'%i' % bool(kwargs[r'ellipses']) + kwargs['ellipses'] = b'%i' % bool(kwargs['ellipses']) f = remote._callcompressable(b'narrow_widen', **kwargs) return bundle2.getunbundler(remote.ui, f) diff -r 61881b170140 -r 84a0102c05c7 hgext/notify.py --- a/hgext/notify.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/notify.py Tue Jan 21 13:14:51 2020 -0500 @@ -388,13 +388,13 @@ raise error.Abort(inst) # store sender and subject - sender = msg[r'From'] - subject = msg[r'Subject'] + sender = msg['From'] + subject = msg['Subject'] if sender is not None: sender = mail.headdecode(sender) if subject is not None: subject = mail.headdecode(subject) - del msg[r'From'], msg[r'Subject'] + del msg['From'], msg['Subject'] if not msg.is_multipart(): # create fresh mime message from scratch @@ -407,7 +407,7 @@ for k, v in headers: msg[k] = v - msg[r'Date'] = encoding.strfromlocal( + msg['Date'] = encoding.strfromlocal( dateutil.datestr(format=b"%a, %d %b %Y %H:%M:%S %1%2") ) @@ -421,8 +421,8 @@ maxsubject = int(self.ui.config(b'notify', b'maxsubject')) if maxsubject: subject = stringutil.ellipsis(subject, maxsubject) - msg[r'Subject'] = encoding.strfromlocal( - mail.headencode(self.ui, subject, self.charsets, self.test) + msg['Subject'] = mail.headencode( + self.ui, subject, self.charsets, self.test ) # try to make message have proper sender @@ -430,14 +430,14 @@ sender = self.ui.config(b'email', b'from') or self.ui.username() if b'@' not in sender or b'@localhost' in sender: sender = self.fixmail(sender) - msg[r'From'] = encoding.strfromlocal( - mail.addressencode(self.ui, sender, self.charsets, self.test) + msg['From'] = mail.addressencode( + self.ui, sender, self.charsets, self.test ) - msg[r'X-Hg-Notification'] = r'changeset %s' % ctx - if not msg[r'Message-Id']: - msg[r'Message-Id'] = messageid(ctx, self.domain, self.messageidseed) - msg[r'To'] = encoding.strfromlocal(b', '.join(sorted(subs))) + msg['X-Hg-Notification'] = 'changeset %s' % ctx + if not msg['Message-Id']: + msg['Message-Id'] = messageid(ctx, self.domain, self.messageidseed) + msg['To'] = ', '.join(sorted(subs)) msgtext = msg.as_bytes() if pycompat.ispy3 else msg.as_string() if self.test: @@ -451,7 +451,7 @@ ) mail.sendmail( self.ui, - emailutils.parseaddr(msg[r'From'])[1], + emailutils.parseaddr(msg['From'])[1], subs, msgtext, mbox=self.mbox, diff -r 61881b170140 -r 84a0102c05c7 hgext/patchbomb.py --- a/hgext/patchbomb.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/patchbomb.py Tue Jan 21 13:14:51 2020 -0500 @@ -285,7 +285,7 @@ if body: msg.attach(mail.mimeencode(ui, body, _charsets, opts.get(b'test'))) p = mail.mimetextpatch( - b'\n'.join(patchlines), b'x-patch', opts.get(b'test') + b'\n'.join(patchlines), 'x-patch', opts.get(b'test') ) binnode = nodemod.bin(node) # if node is mq patch, it will have the patch file's name as a tag @@ -306,8 +306,8 @@ disposition = r'inline' if opts.get(b'attach'): disposition = r'attachment' - p[r'Content-Disposition'] = ( - disposition + r'; filename=' + encoding.strfromlocal(patchname) + p['Content-Disposition'] = ( + disposition + '; filename=' + encoding.strfromlocal(patchname) ) msg.attach(p) else: @@ -321,10 +321,10 @@ subj = b' '.join([prefix, opts.get(b'subject') or subj]) else: subj = b' '.join([prefix, subj]) - msg[b'Subject'] = mail.headencode(ui, subj, _charsets, opts.get(b'test')) - msg[b'X-Mercurial-Node'] = node - msg[b'X-Mercurial-Series-Index'] = b'%i' % idx - msg[b'X-Mercurial-Series-Total'] = b'%i' % total + msg['Subject'] = mail.headencode(ui, subj, _charsets, opts.get(b'test')) + msg['X-Mercurial-Node'] = pycompat.sysstr(node) + msg['X-Mercurial-Series-Index'] = '%i' % idx + msg['X-Mercurial-Series-Total'] = '%i' % total return msg, subj, ds @@ -358,7 +358,7 @@ tmpfn = os.path.join(tmpdir, b'bundle') btype = ui.config(b'patchbomb', b'bundletype') if btype: - opts[r'type'] = btype + opts['type'] = btype try: commands.bundle(ui, repo, tmpfn, dest, **opts) return util.readfile(tmpfn) @@ -379,8 +379,8 @@ the user through the editor. """ ui = repo.ui - if opts.get(r'desc'): - body = open(opts.get(r'desc')).read() + if opts.get('desc'): + body = open(opts.get('desc')).read() else: ui.write( _(b'\nWrite the introductory message for the patch series.\n\n') @@ -403,25 +403,25 @@ """ ui = repo.ui _charsets = mail._charsets(ui) - subj = opts.get(r'subject') or prompt( + subj = opts.get('subject') or prompt( ui, b'Subject:', b'A bundle for your repository' ) body = _getdescription(repo, b'', sender, **opts) msg = emimemultipart.MIMEMultipart() if body: - msg.attach(mail.mimeencode(ui, body, _charsets, opts.get(r'test'))) - datapart = emimebase.MIMEBase(r'application', r'x-mercurial-bundle') + msg.attach(mail.mimeencode(ui, body, _charsets, opts.get('test'))) + datapart = emimebase.MIMEBase('application', 'x-mercurial-bundle') datapart.set_payload(bundle) - bundlename = b'%s.hg' % opts.get(r'bundlename', b'bundle') + bundlename = b'%s.hg' % opts.get('bundlename', b'bundle') datapart.add_header( - r'Content-Disposition', - r'attachment', + 'Content-Disposition', + 'attachment', filename=encoding.strfromlocal(bundlename), ) emailencoders.encode_base64(datapart) msg.attach(datapart) - msg[b'Subject'] = mail.headencode(ui, subj, _charsets, opts.get(r'test')) + msg['Subject'] = mail.headencode(ui, subj, _charsets, opts.get('test')) return [(msg, subj, None)] @@ -434,9 +434,9 @@ # use the last revision which is likely to be a bookmarked head prefix = _formatprefix( - ui, repo, revs.last(), opts.get(r'flag'), 0, len(patches), numbered=True + ui, repo, revs.last(), opts.get('flag'), 0, len(patches), numbered=True ) - subj = opts.get(r'subject') or prompt( + subj = opts.get('subject') or prompt( ui, b'(optional) Subject: ', rest=prefix, default=b'' ) if not subj: @@ -445,7 +445,7 @@ subj = prefix + b' ' + subj body = b'' - if opts.get(r'diffstat'): + if opts.get('diffstat'): # generate a cumulative diffstat of the whole patch series diffstat = patch.diffstat(sum(patches, [])) body = b'\n' + diffstat @@ -453,8 +453,8 @@ diffstat = None body = _getdescription(repo, body, sender, **opts) - msg = mail.mimeencode(ui, body, _charsets, opts.get(r'test')) - msg[b'Subject'] = mail.headencode(ui, subj, _charsets, opts.get(r'test')) + msg = mail.mimeencode(ui, body, _charsets, opts.get('test')) + msg['Subject'] = mail.headencode(ui, subj, _charsets, opts.get('test')) return (msg, subj, diffstat) @@ -522,9 +522,11 @@ def _msgid(node, timestamp): - hostname = encoding.strtolocal(socket.getfqdn()) - hostname = encoding.environ.get(b'HGHOSTNAME', hostname) - return b'<%s.%d@%s>' % (node, timestamp, hostname) + try: + hostname = encoding.strfromlocal(encoding.environ[b'HGHOSTNAME']) + except KeyError: + hostname = socket.getfqdn() + return '<%s.%d@%s>' % (node, timestamp, hostname) emailopts = [ @@ -765,8 +767,7 @@ b" do not re-specify --outgoing" ) ) - if rev and bookmark: - raise error.Abort(_(b"-r and -B are mutually exclusive")) + cmdutil.check_at_most_one_arg(opts, b'rev', b'bookmark') if outgoing or bundle: if len(revs) > 1: @@ -847,7 +848,7 @@ stropts = pycompat.strkwargs(opts) bundledata = _getbundle(repo, dest, **stropts) bundleopts = stropts.copy() - bundleopts.pop(r'bundle', None) # already processed + bundleopts.pop('bundle', None) # already processed msgs = _getbundlemsgs(repo, sender, bundledata, **bundleopts) else: msgs = _getpatchmsgs(repo, sender, revs, **pycompat.strkwargs(opts)) @@ -912,10 +913,11 @@ parent = opts.get(b'in_reply_to') or None # angle brackets may be omitted, they're not semantically part of the msg-id if parent is not None: - if not parent.startswith(b'<'): - parent = b'<' + parent - if not parent.endswith(b'>'): - parent += b'>' + parent = encoding.strfromlocal(parent) + if not parent.startswith('<'): + parent = '<' + parent + if not parent.endswith('>'): + parent += '>' sender_addr = eutil.parseaddr(encoding.strfromlocal(sender))[1] sender = mail.addressencode(ui, sender, _charsets, opts.get(b'test')) @@ -926,56 +928,36 @@ ) for i, (m, subj, ds) in enumerate(msgs): try: - m[b'Message-Id'] = genmsgid(m[b'X-Mercurial-Node']) + m['Message-Id'] = genmsgid(m['X-Mercurial-Node']) if not firstpatch: - firstpatch = m[b'Message-Id'] - m[b'X-Mercurial-Series-Id'] = firstpatch + firstpatch = m['Message-Id'] + m['X-Mercurial-Series-Id'] = firstpatch except TypeError: - m[b'Message-Id'] = genmsgid(b'patchbomb') + m['Message-Id'] = genmsgid('patchbomb') if parent: - m[b'In-Reply-To'] = parent - m[b'References'] = parent - if not parent or b'X-Mercurial-Node' not in m: - parent = m[b'Message-Id'] + m['In-Reply-To'] = parent + m['References'] = parent + if not parent or 'X-Mercurial-Node' not in m: + parent = m['Message-Id'] - m[b'User-Agent'] = b'Mercurial-patchbomb/%s' % util.version() - m[b'Date'] = eutil.formatdate(start_time[0], localtime=True) + m['User-Agent'] = 'Mercurial-patchbomb/%s' % util.version().decode() + m['Date'] = eutil.formatdate(start_time[0], localtime=True) start_time = (start_time[0] + 1, start_time[1]) - m[b'From'] = sender - m[b'To'] = b', '.join(to) + m['From'] = sender + m['To'] = ', '.join(to) if cc: - m[b'Cc'] = b', '.join(cc) + m['Cc'] = ', '.join(cc) if bcc: - m[b'Bcc'] = b', '.join(bcc) + m['Bcc'] = ', '.join(bcc) if replyto: - m[b'Reply-To'] = b', '.join(replyto) - # Fix up all headers to be native strings. - # TODO(durin42): this should probably be cleaned up above in the future. - if pycompat.ispy3: - for hdr, val in list(m.items()): - change = False - if isinstance(hdr, bytes): - del m[hdr] - hdr = pycompat.strurl(hdr) - change = True - if isinstance(val, bytes): - # header value should be ASCII since it's encoded by - # mail.headencode(), but -n/--test disables it and raw - # value of platform encoding is stored. - val = encoding.strfromlocal(val) - if not change: - # prevent duplicate headers - del m[hdr] - change = True - if change: - m[hdr] = val + m['Reply-To'] = ', '.join(replyto) if opts.get(b'test'): ui.status(_(b'displaying '), subj, b' ...\n') ui.pager(b'email') generator = mail.Generator(ui, mangle_from_=False) try: - generator.flatten(m, 0) + generator.flatten(m, False) ui.write(b'\n') except IOError as inst: if inst.errno != errno.EPIPE: @@ -987,12 +969,11 @@ progress.update(i, item=subj) if not mbox: # Exim does not remove the Bcc field - del m[b'Bcc'] + del m['Bcc'] fp = stringio() generator = mail.Generator(fp, mangle_from_=False) - generator.flatten(m, 0) + generator.flatten(m, False) alldests = to + bcc + cc - alldests = [encoding.strfromlocal(d) for d in alldests] sendmail(sender_addr, alldests, fp.getvalue()) progress.complete() diff -r 61881b170140 -r 84a0102c05c7 hgext/phabricator.py --- a/hgext/phabricator.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/phabricator.py Tue Jan 21 13:14:51 2020 -0500 @@ -11,6 +11,10 @@ revisions in a format suitable for :hg:`import`, and a ``phabupdate`` command to update statuses in batch. +A "phabstatus" view for :hg:`show` is also provided; it displays status +information of Phabricator differentials associated with unfinished +changesets. + By default, Phabricator requires ``Test Plan`` which might prevent some changeset from being sent. The requirement could be disabled by changing ``differential.require-test-plan-field`` config server side. @@ -60,7 +64,10 @@ encoding, error, exthelper, + graphmod, httpconnection as httpconnectionmod, + localrepo, + logcmdutil, match, mdiff, obsutil, @@ -80,6 +87,8 @@ procutil, stringutil, ) +from . import show + # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should @@ -93,6 +102,7 @@ command = eh.command configtable = eh.configtable templatekeyword = eh.templatekeyword +uisetup = eh.finaluisetup # developer config: phabricator.batchsize eh.configitem( @@ -122,6 +132,12 @@ b'phabricator.desc': b'', b'phabricator.drev': b'bold', b'phabricator.node': b'', + b'phabricator.status.abandoned': b'magenta dim', + b'phabricator.status.accepted': b'green bold', + b'phabricator.status.closed': b'green', + b'phabricator.status.needsreview': b'yellow', + b'phabricator.status.needsrevision': b'red', + b'phabricator.status.changesplanned': b'red', } _VCR_FLAGS = [ @@ -138,6 +154,44 @@ ] +@eh.wrapfunction(localrepo, "loadhgrc") +def _loadhgrc(orig, ui, wdirvfs, hgvfs, requirements): + """Load ``.arcconfig`` content into a ui instance on repository open. + """ + result = False + arcconfig = {} + + try: + # json.loads only accepts bytes from 3.6+ + rawparams = encoding.unifromlocal(wdirvfs.read(b".arcconfig")) + # json.loads only returns unicode strings + arcconfig = pycompat.rapply( + lambda x: encoding.unitolocal(x) + if isinstance(x, pycompat.unicode) + else x, + pycompat.json_loads(rawparams), + ) + + result = True + except ValueError: + ui.warn(_(b"invalid JSON in %s\n") % wdirvfs.join(b".arcconfig")) + except IOError: + pass + + cfg = util.sortdict() + + if b"repository.callsign" in arcconfig: + cfg[(b"phabricator", b"callsign")] = arcconfig[b"repository.callsign"] + + if b"phabricator.uri" in arcconfig: + cfg[(b"phabricator", b"url")] = arcconfig[b"phabricator.uri"] + + if cfg: + ui.applyconfig(cfg, source=wdirvfs.join(b".arcconfig")) + + return orig(ui, wdirvfs, hgvfs, requirements) or result # Load .hg/hgrc + + def vcrcommand(name, flags, spec, helpcategory=None, optionalrepo=False): fullflags = flags + _VCR_FLAGS @@ -167,13 +221,13 @@ return request def sanitiseresponse(response): - if r'set-cookie' in response[r'headers']: - del response[r'headers'][r'set-cookie'] + if 'set-cookie' in response['headers']: + del response['headers']['set-cookie'] return response def decorate(fn): def inner(*args, **kwargs): - cassette = pycompat.fsdecode(kwargs.pop(r'test_vcr', None)) + cassette = pycompat.fsdecode(kwargs.pop('test_vcr', None)) if cassette: import hgdemandimport @@ -182,24 +236,24 @@ import vcr.stubs as stubs vcr = vcrmod.VCR( - serializer=r'json', + serializer='json', before_record_request=sanitiserequest, before_record_response=sanitiseresponse, custom_patches=[ ( urlmod, - r'httpconnection', + 'httpconnection', stubs.VCRHTTPConnection, ), ( urlmod, - r'httpsconnection', + 'httpsconnection', stubs.VCRHTTPSConnection, ), ], ) - vcr.register_matcher(r'hgmatcher', hgmatcher) - with vcr.use_cassette(cassette, match_on=[r'hgmatcher']): + vcr.register_matcher('hgmatcher', hgmatcher) + with vcr.use_cassette(cassette, match_on=['hgmatcher']): return fn(*args, **kwargs) return fn(*args, **kwargs) @@ -389,7 +443,7 @@ corresponding Differential Revision, and exist in the repo. """ unfi = repo.unfiltered() - nodemap = unfi.changelog.nodemap + has_node = unfi.changelog.index.has_node result = {} # {node: (oldnode?, lastdiff?, drev)} toconfirm = {} # {node: (force, {precnode}, drev)} @@ -398,17 +452,20 @@ # For tags like "D123", put them into "toconfirm" to verify later precnodes = list(obsutil.allpredecessors(unfi.obsstore, [node])) for n in precnodes: - if n in nodemap: + if has_node(n): for tag in unfi.nodetags(n): m = _differentialrevisiontagre.match(tag) if m: toconfirm[node] = (0, set(precnodes), int(m.group(1))) - continue - - # Check commit message - m = _differentialrevisiondescre.search(ctx.description()) - if m: - toconfirm[node] = (1, set(precnodes), int(m.group(r'id'))) + break + else: + continue # move to next predecessor + break # found a tag, stop + else: + # Check commit message + m = _differentialrevisiondescre.search(ctx.description()) + if m: + toconfirm[node] = (1, set(precnodes), int(m.group('id'))) # Double check if tags are genuine by collecting all old nodes from # Phabricator, and expect precursors overlap with it. @@ -454,7 +511,7 @@ if diffs: lastdiff = max(diffs, key=lambda d: int(d[b'id'])) oldnode = getnode(lastdiff) - if oldnode and oldnode not in nodemap: + if oldnode and not has_node(oldnode): oldnode = None result[newnode] = (oldnode, lastdiff, drev) @@ -462,6 +519,29 @@ return result +def getdrevmap(repo, revs): + """Return a dict mapping each rev in `revs` to their Differential Revision + ID or None. + """ + result = {} + for rev in revs: + result[rev] = None + ctx = repo[rev] + # Check commit message + m = _differentialrevisiondescre.search(ctx.description()) + if m: + result[rev] = int(m.group('id')) + continue + # Check tags + for tag in repo.nodetags(ctx.node()): + m = _differentialrevisiontagre.match(tag) + if m: + result[rev] = int(m.group(1)) + break + + return result + + def getdiff(ctx, diffopts): """plain-text diff without header (user, commit message, etc)""" output = util.stringio() @@ -609,26 +689,25 @@ """ ui = fctx.repo().ui chunks = callconduit(ui, b'file.querychunks', {b'filePHID': fphid}) - progress = ui.makeprogress( + with ui.makeprogress( _(b'uploading file chunks'), unit=_(b'chunks'), total=len(chunks) - ) - for chunk in chunks: - progress.increment() - if chunk[b'complete']: - continue - bstart = int(chunk[b'byteStart']) - bend = int(chunk[b'byteEnd']) - callconduit( - ui, - b'file.uploadchunk', - { - b'filePHID': fphid, - b'byteStart': bstart, - b'data': base64.b64encode(fctx.data()[bstart:bend]), - b'dataEncoding': b'base64', - }, - ) - progress.complete() + ) as progress: + for chunk in chunks: + progress.increment() + if chunk[b'complete']: + continue + bstart = int(chunk[b'byteStart']) + bend = int(chunk[b'byteEnd']) + callconduit( + ui, + b'file.uploadchunk', + { + b'filePHID': fphid, + b'byteStart': bstart, + b'data': base64.b64encode(fctx.data()[bstart:bend]), + b'dataEncoding': b'base64', + }, + ) def uploadfile(fctx): @@ -1026,6 +1105,7 @@ opts = pycompat.byteskwargs(opts) revs = list(revs) + opts.get(b'rev', []) revs = scmutil.revrange(repo, revs) + revs.sort() # ascending order to preserve topological parent/child in phab if not revs: raise error.Abort(_(b'phabsend requires at least one changeset')) @@ -1089,7 +1169,7 @@ # Create a local tag to note the association, if commit message # does not have it already m = _differentialrevisiondescre.search(ctx.description()) - if not m or int(m.group(r'id')) != newrevid: + if not m or int(m.group('id')) != newrevid: tagname = b'D%d' % newrevid tags.tag( repo, @@ -1235,6 +1315,7 @@ b'needsrevision', b'closed', b'abandoned', + b'changesplanned', } @@ -1636,7 +1717,7 @@ m = _differentialrevisiondescre.search(ctx.description()) if m: return templateutil.hybriddict( - {b'url': m.group(r'url'), b'id': b"D%s" % m.group(r'id'),} + {b'url': m.group('url'), b'id': b"D%s" % m.group('id'),} ) else: tags = ctx.repo().nodetags(ctx.node()) @@ -1649,3 +1730,68 @@ return templateutil.hybriddict({b'url': url, b'id': t,}) return None + + +@eh.templatekeyword(b'phabstatus', requires={b'ctx', b'repo', b'ui'}) +def template_status(context, mapping): + """:phabstatus: String. Status of Phabricator differential. + """ + ctx = context.resource(mapping, b'ctx') + repo = context.resource(mapping, b'repo') + ui = context.resource(mapping, b'ui') + + rev = ctx.rev() + try: + drevid = getdrevmap(repo, [rev])[rev] + except KeyError: + return None + drevs = callconduit(ui, b'differential.query', {b'ids': [drevid]}) + for drev in drevs: + if int(drev[b'id']) == drevid: + return templateutil.hybriddict( + {b'url': drev[b'uri'], b'status': drev[b'statusName'],} + ) + return None + + +@show.showview(b'phabstatus', csettopic=b'work') +def phabstatusshowview(ui, repo, displayer): + """Phabricator differiential status""" + revs = repo.revs('sort(_underway(), topo)') + drevmap = getdrevmap(repo, revs) + unknownrevs, drevids, revsbydrevid = [], set([]), {} + for rev, drevid in pycompat.iteritems(drevmap): + if drevid is not None: + drevids.add(drevid) + revsbydrevid.setdefault(drevid, set([])).add(rev) + else: + unknownrevs.append(rev) + + drevs = callconduit(ui, b'differential.query', {b'ids': list(drevids)}) + drevsbyrev = {} + for drev in drevs: + for rev in revsbydrevid[int(drev[b'id'])]: + drevsbyrev[rev] = drev + + def phabstatus(ctx): + drev = drevsbyrev[ctx.rev()] + status = ui.label( + b'%(statusName)s' % drev, + b'phabricator.status.%s' % _getstatusname(drev), + ) + ui.write(b"\n%s %s\n" % (drev[b'uri'], status)) + + revs -= smartset.baseset(unknownrevs) + revdag = graphmod.dagwalker(repo, revs) + + ui.setconfig(b'experimental', b'graphshorten', True) + displayer._exthook = phabstatus + nodelen = show.longestshortest(repo, revs) + logcmdutil.displaygraph( + ui, + repo, + revdag, + displayer, + graphmod.asciiedges, + props={b'nodelen': nodelen}, + ) diff -r 61881b170140 -r 84a0102c05c7 hgext/rebase.py --- a/hgext/rebase.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/rebase.py Tue Jan 21 13:14:51 2020 -0500 @@ -46,6 +46,7 @@ repair, revset, revsetlang, + rewriteutil, scmutil, smartset, state as statemod, @@ -393,17 +394,13 @@ return _nothingtorebase() rebaseset = destmap.keys() - allowunstable = obsolete.isenabled(self.repo, obsolete.allowunstableopt) - if not (self.keepf or allowunstable) and self.repo.revs( - b'first(children(%ld) - %ld)', rebaseset, rebaseset - ): - raise error.Abort( - _( - b"can't remove original changesets with" - b" unrebased descendants" - ), - hint=_(b'use --keep to keep original changesets'), - ) + if not self.keepf: + try: + rewriteutil.precheck(self.repo, rebaseset, action=b'rebase') + except error.Abort as e: + if e.hint is None: + e.hint = _(b'use --keep to keep original changesets') + raise e result = buildstate(self.repo, destmap, self.collapsef) @@ -412,13 +409,6 @@ self.ui.status(_(b'nothing to rebase\n')) return _nothingtorebase() - for root in self.repo.set(b'roots(%ld)', rebaseset): - if not self.keepf and not root.mutable(): - raise error.Abort( - _(b"can't rebase public changeset %s") % root, - hint=_(b"see 'hg help phases' for details"), - ) - (self.originalwd, self.destmap, self.state) = result if self.collapsef: dests = set(self.destmap.values()) @@ -797,7 +787,6 @@ cleanup = False if cleanup: - shouldupdate = False if rebased: strippoints = [ c.node() for c in repo.set(b'roots(%ld)', rebased) @@ -809,7 +798,7 @@ shouldupdate = repo[b'.'].rev() in updateifonnodes # Update away from the rebase if necessary - if shouldupdate or needupdate(repo, self.state): + if shouldupdate: mergemod.update( repo, self.originalwd, branchmerge=False, force=True ) @@ -1019,20 +1008,16 @@ """ opts = pycompat.byteskwargs(opts) inmemory = ui.configbool(b'rebase', b'experimental.inmemory') - dryrun = opts.get(b'dry_run') - confirm = opts.get(b'confirm') - selactions = [k for k in [b'abort', b'stop', b'continue'] if opts.get(k)] - if len(selactions) > 1: - raise error.Abort( - _(b'cannot use --%s with --%s') % tuple(selactions[:2]) + action = cmdutil.check_at_most_one_arg(opts, b'abort', b'stop', b'continue') + if action: + cmdutil.check_incompatible_arguments( + opts, action, b'confirm', b'dry_run' ) - action = selactions[0] if selactions else None - if dryrun and action: - raise error.Abort(_(b'cannot specify both --dry-run and --%s') % action) - if confirm and action: - raise error.Abort(_(b'cannot specify both --confirm and --%s') % action) - if dryrun and confirm: - raise error.Abort(_(b'cannot specify both --confirm and --dry-run')) + cmdutil.check_incompatible_arguments( + opts, action, b'rev', b'source', b'base', b'dest' + ) + cmdutil.check_at_most_one_arg(opts, b'confirm', b'dry_run') + cmdutil.check_at_most_one_arg(opts, b'rev', b'source', b'base') if action or repo.currenttransaction() is not None: # in-memory rebase is not compatible with resuming rebases. @@ -1041,16 +1026,16 @@ inmemory = False if opts.get(b'auto_orphans'): - for key in opts: - if key != b'auto_orphans' and opts.get(key): - raise error.Abort( - _(b'--auto-orphans is incompatible with %s') % (b'--' + key) - ) + disallowed_opts = set(opts) - {b'auto_orphans'} + cmdutil.check_incompatible_arguments( + opts, b'auto_orphans', *disallowed_opts + ) + userrevs = list(repo.revs(opts.get(b'auto_orphans'))) opts[b'rev'] = [revsetlang.formatspec(b'%ld and orphan()', userrevs)] opts[b'dest'] = b'_destautoorphanrebase(SRC)' - if dryrun or confirm: + if opts.get(b'dry_run') or opts.get(b'confirm'): return _dryrunrebase(ui, repo, action, opts) elif action == b'stop': rbsrt = rebaseruntime(repo, ui) @@ -1071,10 +1056,9 @@ b'changesets' ), ) - if needupdate(repo, rbsrt.state): - # update to the current working revision - # to clear interrupted merge - hg.updaterepo(repo, rbsrt.originalwd, overwrite=True) + # update to the current working revision + # to clear interrupted merge + hg.updaterepo(repo, rbsrt.originalwd, overwrite=True) rbsrt._finishrebase() return 0 elif inmemory: @@ -1167,14 +1151,6 @@ ): assert action != b'stop' with repo.wlock(), repo.lock(): - # Validate input and define rebasing points - destf = opts.get(b'dest', None) - srcf = opts.get(b'source', None) - basef = opts.get(b'base', None) - revf = opts.get(b'rev', []) - # search default destination in this space - # used in the 'hg pull --rebase' case, see issue 5214. - destspace = opts.get(b'_destspace') if opts.get(b'interactive'): try: if extensions.find(b'histedit'): @@ -1199,10 +1175,6 @@ raise error.Abort( _(b'cannot use collapse with continue or abort') ) - if srcf or basef or destf: - raise error.Abort( - _(b'abort and continue do not allow specifying revisions') - ) if action == b'abort' and opts.get(b'tool', False): ui.warn(_(b'tool option will be ignored\n')) if action == b'continue': @@ -1215,14 +1187,17 @@ if retcode is not None: return retcode else: + # search default destination in this space + # used in the 'hg pull --rebase' case, see issue 5214. + destspace = opts.get(b'_destspace') destmap = _definedestmap( ui, repo, inmemory, - destf, - srcf, - basef, - revf, + opts.get(b'dest', None), + opts.get(b'source', None), + opts.get(b'base', None), + opts.get(b'rev', []), destspace=destspace, ) retcode = rbsrt._preparenewrebase(destmap) @@ -1267,15 +1242,9 @@ # destspace is here to work around issues with `hg pull --rebase` see # issue5214 for details - if srcf and basef: - raise error.Abort(_(b'cannot specify both a source and a base')) - if revf and basef: - raise error.Abort(_(b'cannot specify both a revision and a base')) - if revf and srcf: - raise error.Abort(_(b'cannot specify both a revision and a source')) + cmdutil.checkunfinished(repo) if not inmemory: - cmdutil.checkunfinished(repo) cmdutil.bailifchanged(repo) if ui.configbool(b'commands', b'rebase.requiredest') and not destf: @@ -1460,13 +1429,13 @@ # By convention, ``extra['branch']`` (set by extrafn) clobbers # ``branch`` (used when passing ``--keepbranches``). - branch = repo[p1].branch() + branch = None if b'branch' in extra: branch = extra[b'branch'] + wctx.setparents(repo[p1].node(), repo[p2].node()) memctx = wctx.tomemctx( commitmsg, - parents=(p1, p2), date=date, extra=extra, user=user, @@ -1497,14 +1466,15 @@ def rebasenode(repo, rev, p1, base, collapse, dest, wctx): - b'Rebase a single revision rev on top of p1 using base as merge ancestor' + """Rebase a single revision rev on top of p1 using base as merge ancestor""" # Merge phase # Update to destination and merge it with local + p1ctx = repo[p1] if wctx.isinmemory(): - wctx.setbase(repo[p1]) + wctx.setbase(p1ctx) else: if repo[b'.'].rev() != p1: - repo.ui.debug(b" update to %d:%s\n" % (p1, repo[p1])) + repo.ui.debug(b" update to %d:%s\n" % (p1, p1ctx)) mergemod.update(repo, p1, branchmerge=False, force=True) else: repo.ui.debug(b" already in destination\n") @@ -1512,31 +1482,30 @@ # as well as other data we litter on it in other places. wctx = repo[None] repo.dirstate.write(repo.currenttransaction()) - repo.ui.debug(b" merge against %d:%s\n" % (rev, repo[rev])) + ctx = repo[rev] + repo.ui.debug(b" merge against %d:%s\n" % (rev, ctx)) if base is not None: repo.ui.debug(b" detach base %d:%s\n" % (base, repo[base])) - # When collapsing in-place, the parent is the common ancestor, we - # have to allow merging with it. + + # See explanation in merge.graft() + mergeancestor = repo.changelog.isancestor(p1ctx.node(), ctx.node()) stats = mergemod.update( repo, rev, branchmerge=True, force=True, ancestor=base, - mergeancestor=collapse, + mergeancestor=mergeancestor, labels=[b'dest', b'source'], wc=wctx, ) if collapse: - copies.duplicatecopies(repo, wctx, rev, dest) + copies.graftcopies(wctx, ctx, repo[dest]) else: # If we're not using --collapse, we need to # duplicate copies between the revision we're - # rebasing and its first parent, but *not* - # duplicate any copies that have already been - # performed in the destination. - p1rev = repo[rev].p1().rev() - copies.duplicatecopies(repo, wctx, rev, p1rev, skiprev=dest) + # rebasing and its first parent. + copies.graftcopies(wctx, ctx, ctx.p1()) return stats @@ -1643,10 +1612,11 @@ def successorrevs(unfi, rev): """yield revision numbers for successors of rev""" assert unfi.filtername is None - nodemap = unfi.changelog.nodemap + get_rev = unfi.changelog.index.get_rev for s in obsutil.allsuccessors(unfi.obsstore, [unfi[rev].node()]): - if s in nodemap: - yield nodemap[s] + r = get_rev(s) + if r is not None: + yield r def defineparents(repo, rev, destmap, state, skipped, obsskipped): @@ -1790,7 +1760,7 @@ # But our merge base candidates (D and E in above case) could still be # better than the default (ancestor(F, Z) == null). Therefore still # pick one (so choose p1 above). - if sum(1 for b in bases if b != nullrev) > 1: + if sum(1 for b in set(bases) if b != nullrev) > 1: unwanted = [None, None] # unwanted[i]: unwanted revs if choose bases[i] for i, base in enumerate(bases): if base == nullrev: @@ -1852,7 +1822,7 @@ def isagitpatch(repo, patchname): - b'Return true if the given patch is in git format' + """Return true if the given patch is in git format""" mqpatch = os.path.join(repo.mq.path, patchname) for line in patch.linereader(open(mqpatch, b'rb')): if line.startswith(b'diff --git'): @@ -1861,7 +1831,7 @@ def updatemq(repo, state, skipped, **opts): - b'Update rebased mq patches - finalize and then import them' + """Update rebased mq patches - finalize and then import them""" mqrebase = {} mq = repo.mq original_series = mq.fullseries[:] @@ -1915,7 +1885,7 @@ def storecollapsemsg(repo, collapsemsg): - b'Store the collapse message to allow recovery' + """Store the collapse message to allow recovery""" collapsemsg = collapsemsg or b'' f = repo.vfs(b"last-message.txt", b"w") f.write(b"%s\n" % collapsemsg) @@ -1923,12 +1893,12 @@ def clearcollapsemsg(repo): - b'Remove collapse message file' + """Remove collapse message file""" repo.vfs.unlinkpath(b"last-message.txt", ignoremissing=True) def restorecollapsemsg(repo, isabort): - b'Restore previously stored collapse message' + """Restore previously stored collapse message""" try: f = repo.vfs(b"last-message.txt") collapsemsg = f.readline().strip() @@ -1945,7 +1915,7 @@ def clearstatus(repo): - b'Remove the status files' + """Remove the status files""" # Make sure the active transaction won't write the state file tr = repo.currenttransaction() if tr: @@ -1953,25 +1923,6 @@ repo.vfs.unlinkpath(b"rebasestate", ignoremissing=True) -def needupdate(repo, state): - '''check whether we should `update --clean` away from a merge, or if - somehow the working dir got forcibly updated, e.g. by older hg''' - parents = [p.rev() for p in repo[None].parents()] - - # Are we in a merge state at all? - if len(parents) < 2: - return False - - # We should be standing on the first as-of-yet unrebased commit. - firstunrebased = min( - [old for old, new in pycompat.iteritems(state) if new == nullrev] - ) - if firstunrebased in parents: - return True - - return False - - def sortsource(destmap): """yield source revisions in an order that we only rebase things once @@ -2126,16 +2077,16 @@ def pullrebase(orig, ui, repo, *args, **opts): - b'Call rebase after pull if the latter has been invoked with --rebase' - if opts.get(r'rebase'): + """Call rebase after pull if the latter has been invoked with --rebase""" + if opts.get('rebase'): if ui.configbool(b'commands', b'rebase.requiredest'): msg = _(b'rebase destination required by configuration') hint = _(b'use hg pull followed by hg rebase -d DEST') raise error.Abort(msg, hint=hint) with repo.wlock(), repo.lock(): - if opts.get(r'update'): - del opts[r'update'] + if opts.get('update'): + del opts['update'] ui.debug( b'--update and --rebase are not compatible, ignoring ' b'the update flag\n' @@ -2165,15 +2116,15 @@ if revspostpull > revsprepull: # --rev option from pull conflict with rebase own --rev # dropping it - if r'rev' in opts: - del opts[r'rev'] + if 'rev' in opts: + del opts['rev'] # positional argument from pull conflicts with rebase's own # --source. - if r'source' in opts: - del opts[r'source'] + if 'source' in opts: + del opts['source'] # revsprepull is the len of the repo, not revnum of tip. destspace = list(repo.changelog.revs(start=revsprepull)) - opts[r'_destspace'] = destspace + opts['_destspace'] = destspace try: rebase(ui, repo, **opts) except error.NoMergeDestAbort: @@ -2187,7 +2138,7 @@ # with warning and trumpets commands.update(ui, repo) else: - if opts.get(r'tool'): + if opts.get('tool'): raise error.Abort(_(b'--tool can only be used with --rebase')) ret = orig(ui, repo, *args, **opts) @@ -2217,7 +2168,7 @@ assert repo.filtername is None cl = repo.changelog - nodemap = cl.nodemap + get_rev = cl.index.get_rev extinctrevs = set(repo.revs(b'extinct()')) for srcrev in rebaseobsrevs: srcnode = cl.node(srcrev) @@ -2225,7 +2176,8 @@ successors = set(obsutil.allsuccessors(repo.obsstore, [srcnode])) # obsutil.allsuccessors includes node itself successors.remove(srcnode) - succrevs = {nodemap[s] for s in successors if s in nodemap} + succrevs = {get_rev(s) for s in successors} + succrevs.discard(None) if succrevs.issubset(extinctrevs): # all successors are extinct obsoleteextinctsuccessors.add(srcrev) diff -r 61881b170140 -r 84a0102c05c7 hgext/record.py --- a/hgext/record.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/record.py Tue Jan 21 13:14:51 2020 -0500 @@ -72,14 +72,14 @@ _(b'running non-interactively, use %s instead') % b'commit' ) - opts[r"interactive"] = True + opts["interactive"] = True overrides = {(b'experimental', b'crecord'): False} with ui.configoverride(overrides, b'record'): return commands.commit(ui, repo, *pats, **opts) def qrefresh(origfn, ui, repo, *pats, **opts): - if not opts[r'interactive']: + if not opts['interactive']: return origfn(ui, repo, *pats, **opts) mq = extensions.find(b'mq') @@ -123,7 +123,7 @@ repo.mq.checkpatchname(patch) def committomq(ui, repo, *pats, **opts): - opts[r'checkname'] = False + opts['checkname'] = False mq.new(ui, repo, patch, *pats, **opts) overrides = {(b'experimental', b'crecord'): False} @@ -142,7 +142,7 @@ def qnew(origfn, ui, repo, patch, *args, **opts): - if opts[r'interactive']: + if opts['interactive']: return _qrecord(None, ui, repo, patch, *args, **opts) return origfn(ui, repo, patch, *args, **opts) diff -r 61881b170140 -r 84a0102c05c7 hgext/releasenotes.py --- a/hgext/releasenotes.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/releasenotes.py Tue Jan 21 13:14:51 2020 -0500 @@ -20,6 +20,7 @@ from mercurial.i18n import _ from mercurial.pycompat import open from mercurial import ( + cmdutil, config, error, minirst, @@ -653,14 +654,9 @@ opts = pycompat.byteskwargs(opts) sections = releasenotessections(ui, repo) - listflag = opts.get(b'list') + cmdutil.check_incompatible_arguments(opts, b'list', b'rev', b'check') - if listflag and opts.get(b'rev'): - raise error.Abort(_(b'cannot use both \'--list\' and \'--rev\'')) - if listflag and opts.get(b'check'): - raise error.Abort(_(b'cannot use both \'--list\' and \'--check\'')) - - if listflag: + if opts.get(b'list'): return _getadmonitionlist(ui, sections) rev = opts.get(b'rev') diff -r 61881b170140 -r 84a0102c05c7 hgext/remotefilelog/__init__.py --- a/hgext/remotefilelog/__init__.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/remotefilelog/__init__.py Tue Jan 21 13:14:51 2020 -0500 @@ -288,7 +288,7 @@ # Prevent 'hg manifest --all' def _manifest(orig, ui, repo, *args, **opts): - if isenabled(repo) and opts.get(r'all'): + if isenabled(repo) and opts.get('all'): raise error.Abort(_(b"--all is not supported in a shallow repo")) return orig(ui, repo, *args, **opts) @@ -344,7 +344,7 @@ def cloneshallow(orig, ui, repo, *args, **opts): - if opts.get(r'shallow'): + if opts.get('shallow'): repos = [] def pull_shallow(orig, self, *args, **kwargs): @@ -381,13 +381,9 @@ if constants.NETWORK_CAP_LEGACY_SSH_GETFILES in caps: opts = {} if repo.includepattern: - opts[r'includepattern'] = b'\0'.join( - repo.includepattern - ) + opts['includepattern'] = b'\0'.join(repo.includepattern) if repo.excludepattern: - opts[r'excludepattern'] = b'\0'.join( - repo.excludepattern - ) + opts['excludepattern'] = b'\0'.join(repo.excludepattern) return remote._callstream(b'stream_out_shallow', **opts) else: return orig() @@ -424,7 +420,7 @@ try: orig(ui, repo, *args, **opts) finally: - if opts.get(r'shallow'): + if opts.get('shallow'): for r in repos: if util.safehasattr(r, b'fileservice'): r.fileservice.close() @@ -723,9 +719,9 @@ remotefilelog.remotefilelog, b'addrawrevision', addrawrevision ) - def changelogadd(orig, self, *args): + def changelogadd(orig, self, *args, **kwargs): oldlen = len(self) - node = orig(self, *args) + node = orig(self, *args, **kwargs) newlen = len(self) if oldlen != newlen: for oldargs in pendingfilecommits: @@ -991,14 +987,14 @@ if not isenabled(repo): return orig(ui, repo, *pats, **opts) - follow = opts.get(r'follow') - revs = opts.get(r'rev') + follow = opts.get('follow') + revs = opts.get('rev') if pats: # Force slowpath for non-follow patterns and follows that start from # non-working-copy-parent revs. if not follow or revs: # This forces the slowpath - opts[r'removed'] = True + opts['removed'] = True # If this is a non-follow log without any revs specified, recommend that # the user add -f to speed it up. @@ -1067,7 +1063,7 @@ # update a revset with a date limit bgprefetchrevs = revdatelimit(ui, bgprefetchrevs) - def anon(): + def anon(unused_success): if util.safehasattr(repo, b'ranprefetch') and repo.ranprefetch: return repo.ranprefetch = True @@ -1268,18 +1264,18 @@ _(b'hg repack [OPTIONS]'), ) def repack_(ui, repo, *pats, **opts): - if opts.get(r'background'): + if opts.get('background'): repackmod.backgroundrepack( repo, - incremental=opts.get(r'incremental'), - packsonly=opts.get(r'packsonly', False), + incremental=opts.get('incremental'), + packsonly=opts.get('packsonly', False), ) return - options = {b'packsonly': opts.get(r'packsonly')} + options = {b'packsonly': opts.get('packsonly')} try: - if opts.get(r'incremental'): + if opts.get('incremental'): repackmod.incrementalrepack(repo, options=options) else: repackmod.fullrepack(repo, options=options) diff -r 61881b170140 -r 84a0102c05c7 hgext/remotefilelog/basepack.py --- a/hgext/remotefilelog/basepack.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/remotefilelog/basepack.py Tue Jan 21 13:14:51 2020 -0500 @@ -2,7 +2,6 @@ import collections import errno -import hashlib import mmap import os import struct @@ -20,9 +19,10 @@ util, vfs as vfsmod, ) +from mercurial.utils import hashutil from . import shallowutil -osutil = policy.importmod(r'osutil') +osutil = policy.importmod('osutil') # The pack version supported by this implementation. This will need to be # rev'd whenever the byte format changes. Ex: changing the fanout prefix, @@ -390,9 +390,9 @@ self.idxfp, self.idxpath = opener.mkstemp( suffix=self.INDEXSUFFIX + b'-tmp' ) - self.packfp = os.fdopen(self.packfp, r'wb+') - self.idxfp = os.fdopen(self.idxfp, r'wb+') - self.sha = hashlib.sha1() + self.packfp = os.fdopen(self.packfp, 'wb+') + self.idxfp = os.fdopen(self.idxfp, 'wb+') + self.sha = hashutil.sha1() self._closed = False # The opener provides no way of doing permission fixup on files created @@ -530,11 +530,11 @@ class indexparams(object): __slots__ = ( - r'fanoutprefix', - r'fanoutstruct', - r'fanoutcount', - r'fanoutsize', - r'indexstart', + 'fanoutprefix', + 'fanoutstruct', + 'fanoutcount', + 'fanoutsize', + 'indexstart', ) def __init__(self, prefixsize, version): diff -r 61881b170140 -r 84a0102c05c7 hgext/remotefilelog/basestore.py --- a/hgext/remotefilelog/basestore.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/remotefilelog/basestore.py Tue Jan 21 13:14:51 2020 -0500 @@ -1,7 +1,6 @@ from __future__ import absolute_import import errno -import hashlib import os import shutil import stat @@ -15,6 +14,7 @@ pycompat, util, ) +from mercurial.utils import hashutil from . import ( constants, shallowutil, @@ -166,7 +166,7 @@ # Start with a full manifest, since it'll cover the majority of files for filename in self.repo[b'tip'].manifest(): - sha = hashlib.sha1(filename).digest() + sha = hashutil.sha1(filename).digest() if sha in missingfilename: filenames[filename] = sha missingfilename.discard(sha) @@ -178,7 +178,7 @@ break files = cl.readfiles(cl.node(rev)) for filename in files: - sha = hashlib.sha1(filename).digest() + sha = hashutil.sha1(filename).digest() if sha in missingfilename: filenames[filename] = sha missingfilename.discard(sha) @@ -225,7 +225,7 @@ data = shallowutil.readfile(filepath) if self._validatecache and not self._validatedata(data, filepath): if self._validatecachelog: - with open(self._validatecachelog, b'a+') as f: + with open(self._validatecachelog, b'ab+') as f: f.write(b"corrupt %s during read\n" % filepath) os.rename(filepath, filepath + b".corrupt") raise KeyError(b"corrupt local cache file %s" % filepath) @@ -420,10 +420,10 @@ # throw a KeyError, try this many times with a full refresh between # attempts. A repack operation may have moved data from one store to # another while we were running. - self.numattempts = kwargs.get(r'numretries', 0) + 1 + self.numattempts = kwargs.get('numretries', 0) + 1 # If not-None, call this function on every retry and if the attempts are # exhausted. - self.retrylog = kwargs.get(r'retrylog', None) + self.retrylog = kwargs.get('retrylog', None) def markforrefresh(self): for store in self.stores: diff -r 61881b170140 -r 84a0102c05c7 hgext/remotefilelog/contentstore.py --- a/hgext/remotefilelog/contentstore.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/remotefilelog/contentstore.py Tue Jan 21 13:14:51 2020 -0500 @@ -40,12 +40,12 @@ super(unioncontentstore, self).__init__(*args, **kwargs) self.stores = args - self.writestore = kwargs.get(r'writestore') + self.writestore = kwargs.get('writestore') # If allowincomplete==True then the union store can return partial # delta chains, otherwise it will throw a KeyError if a full # deltachain can't be found. - self.allowincomplete = kwargs.get(r'allowincomplete', False) + self.allowincomplete = kwargs.get('allowincomplete', False) def get(self, name, node): """Fetches the full text revision contents of the given name+node pair. diff -r 61881b170140 -r 84a0102c05c7 hgext/remotefilelog/debugcommands.py --- a/hgext/remotefilelog/debugcommands.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/remotefilelog/debugcommands.py Tue Jan 21 13:14:51 2020 -0500 @@ -6,7 +6,6 @@ # GNU General Public License version 2 or any later version. from __future__ import absolute_import -import hashlib import os import zlib @@ -21,6 +20,7 @@ pycompat, revlog, ) +from mercurial.utils import hashutil from . import ( constants, datapack, @@ -32,7 +32,7 @@ def debugremotefilelog(ui, path, **opts): - decompress = opts.get(r'decompress') + decompress = opts.get('decompress') size, firstnode, mapping = parsefileblob(path, decompress) @@ -61,7 +61,7 @@ def buildtemprevlog(repo, file): # get filename key - filekey = nodemod.hex(hashlib.sha1(file).digest()) + filekey = nodemod.hex(hashutil.sha1(file).digest()) filedir = os.path.join(repo.path, b'store/data', filekey) # sort all entries based on linkrev @@ -101,9 +101,9 @@ def debugindex(orig, ui, repo, file_=None, **opts): """dump the contents of an index file""" if ( - opts.get(r'changelog') - or opts.get(r'manifest') - or opts.get(r'dir') + opts.get('changelog') + or opts.get('manifest') + or opts.get('dir') or not shallowutil.isenabled(repo) or not repo.shallowmatch(file_) ): @@ -199,7 +199,7 @@ def verifyremotefilelog(ui, path, **opts): - decompress = opts.get(r'decompress') + decompress = opts.get('decompress') for root, dirs, files in os.walk(path): for file in files: @@ -262,13 +262,13 @@ path = path[: path.index(b'.data')] ui.write(b"%s:\n" % path) dpack = datapack.datapack(path) - node = opts.get(r'node') + node = opts.get('node') if node: deltachain = dpack.getdeltachain(b'', bin(node)) dumpdeltachain(ui, deltachain, **opts) return - if opts.get(r'long'): + if opts.get('long'): hashformatter = hex hashlen = 42 else: @@ -421,7 +421,7 @@ % ( hashformatter(node), hashformatter(deltabasenode), - nodemod.hex(hashlib.sha1(delta).digest()), + nodemod.hex(hashutil.sha1(delta).digest()), len(delta), ) ) diff -r 61881b170140 -r 84a0102c05c7 hgext/remotefilelog/fileserverclient.py --- a/hgext/remotefilelog/fileserverclient.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/remotefilelog/fileserverclient.py Tue Jan 21 13:14:51 2020 -0500 @@ -7,7 +7,6 @@ from __future__ import absolute_import -import hashlib import io import os import threading @@ -25,7 +24,10 @@ util, wireprotov1peer, ) -from mercurial.utils import procutil +from mercurial.utils import ( + hashutil, + procutil, +) from . import ( constants, @@ -45,12 +47,12 @@ def getcachekey(reponame, file, id): - pathhash = node.hex(hashlib.sha1(file).digest()) + pathhash = node.hex(hashutil.sha1(file).digest()) return os.path.join(reponame, pathhash[:2], pathhash[2:], id) def getlocalkey(file, id): - pathhash = node.hex(hashlib.sha1(file).digest()) + pathhash = node.hex(hashutil.sha1(file).digest()) return os.path.join(pathhash, id) @@ -663,5 +665,5 @@ self.ui.log( b'remotefilelog', b'excess remotefilelog fetching:\n%s\n', - b''.join(traceback.format_stack()), + b''.join(pycompat.sysbytes(s) for s in traceback.format_stack()), ) diff -r 61881b170140 -r 84a0102c05c7 hgext/remotefilelog/historypack.py --- a/hgext/remotefilelog/historypack.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/remotefilelog/historypack.py Tue Jan 21 13:14:51 2020 -0500 @@ -1,6 +1,5 @@ from __future__ import absolute_import -import hashlib import struct from mercurial.node import hex, nullid @@ -8,6 +7,7 @@ pycompat, util, ) +from mercurial.utils import hashutil from . import ( basepack, constants, @@ -197,7 +197,7 @@ def _findsection(self, name): params = self.params - namehash = hashlib.sha1(name).digest() + namehash = hashutil.sha1(name).digest() fanoutkey = struct.unpack( params.fanoutstruct, namehash[: params.fanoutprefix] )[0] @@ -499,7 +499,7 @@ # Record metadata for the index self.files[filename] = (sectionstart, sectionlen) - node = hashlib.sha1(filename).digest() + node = hashutil.sha1(filename).digest() self.entries[node] = node def close(self, ledger=None): @@ -517,7 +517,7 @@ nodeindexlength = self.NODEINDEXENTRYLENGTH files = ( - (hashlib.sha1(filename).digest(), filename, offset, size) + (hashutil.sha1(filename).digest(), filename, offset, size) for filename, (offset, size) in pycompat.iteritems(self.files) ) files = sorted(files) diff -r 61881b170140 -r 84a0102c05c7 hgext/remotefilelog/metadatastore.py --- a/hgext/remotefilelog/metadatastore.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/remotefilelog/metadatastore.py Tue Jan 21 13:14:51 2020 -0500 @@ -12,12 +12,12 @@ super(unionmetadatastore, self).__init__(*args, **kwargs) self.stores = args - self.writestore = kwargs.get(r'writestore') + self.writestore = kwargs.get('writestore') # If allowincomplete==True then the union store can return partial # ancestor lists, otherwise it will throw a KeyError if a full # history can't be found. - self.allowincomplete = kwargs.get(r'allowincomplete', False) + self.allowincomplete = kwargs.get('allowincomplete', False) def getancestors(self, name, node, known=None): """Returns as many ancestors as we're aware of. diff -r 61881b170140 -r 84a0102c05c7 hgext/remotefilelog/remotefilectx.py --- a/hgext/remotefilelog/remotefilectx.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/remotefilelog/remotefilectx.py Tue Jan 21 13:14:51 2020 -0500 @@ -48,11 +48,11 @@ @propertycache def _changeid(self): - if r'_changeid' in self.__dict__: + if '_changeid' in self.__dict__: return self._changeid - elif r'_changectx' in self.__dict__: + elif '_changectx' in self.__dict__: return self._changectx.rev() - elif r'_descendantrev' in self.__dict__: + elif '_descendantrev' in self.__dict__: # this file context was created from a revision with a known # descendant, we can (lazily) correct for linkrev aliases linknode = self._adjustlinknode( @@ -83,7 +83,7 @@ ancestormap = self.ancestormap() p1, p2, linknode, copyfrom = ancestormap[self._filenode] - rev = self._repo.changelog.nodemap.get(linknode) + rev = self._repo.changelog.index.get_rev(linknode) if rev is not None: return rev @@ -119,7 +119,7 @@ """ lkr = self.linkrev() attrs = vars(self) - noctx = not (r'_changeid' in attrs or r'_changectx' in attrs) + noctx = not ('_changeid' in attrs or r'_changectx' in attrs) if noctx or self.rev() == lkr: return lkr linknode = self._adjustlinknode( @@ -246,11 +246,11 @@ return linknode commonlogkwargs = { - r'revs': b' '.join([hex(cl.node(rev)) for rev in revs]), - r'fnode': hex(fnode), - r'filepath': path, - r'user': shallowutil.getusername(repo.ui), - r'reponame': shallowutil.getreponame(repo.ui), + 'revs': b' '.join([hex(cl.node(rev)) for rev in revs]), + 'fnode': hex(fnode), + 'filepath': path, + 'user': shallowutil.getusername(repo.ui), + 'reponame': shallowutil.getreponame(repo.ui), } repo.ui.log(b'linkrevfixup', b'adjusting linknode\n', **commonlogkwargs) @@ -439,7 +439,7 @@ def annotate(self, *args, **kwargs): introctx = self - prefetchskip = kwargs.pop(r'prefetchskip', None) + prefetchskip = kwargs.pop('prefetchskip', None) if prefetchskip: # use introrev so prefetchskip can be accurately tested introrev = self.introrev() diff -r 61881b170140 -r 84a0102c05c7 hgext/remotefilelog/repack.py --- a/hgext/remotefilelog/repack.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/remotefilelog/repack.py Tue Jan 21 13:14:51 2020 -0500 @@ -29,7 +29,7 @@ shallowutil, ) -osutil = policy.importmod(r'osutil') +osutil = policy.importmod('osutil') class RepackAlreadyRunning(error.Abort): @@ -878,13 +878,13 @@ """ __slots__ = ( - r'filename', - r'node', - r'datasource', - r'historysource', - r'datarepacked', - r'historyrepacked', - r'gced', + 'filename', + 'node', + 'datasource', + 'historysource', + 'datarepacked', + 'historyrepacked', + 'gced', ) def __init__(self, filename, node): diff -r 61881b170140 -r 84a0102c05c7 hgext/remotefilelog/shallowbundle.py --- a/hgext/remotefilelog/shallowbundle.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/remotefilelog/shallowbundle.py Tue Jan 21 13:14:51 2020 -0500 @@ -153,7 +153,7 @@ try: # if serving, only send files the clients has patterns for if source == b'serve': - bundlecaps = kwargs.get(r'bundlecaps') + bundlecaps = kwargs.get('bundlecaps') includepattern = None excludepattern = None for cap in bundlecaps or []: diff -r 61881b170140 -r 84a0102c05c7 hgext/remotefilelog/shallowutil.py --- a/hgext/remotefilelog/shallowutil.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/remotefilelog/shallowutil.py Tue Jan 21 13:14:51 2020 -0500 @@ -8,7 +8,6 @@ import collections import errno -import hashlib import os import stat import struct @@ -24,6 +23,7 @@ util, ) from mercurial.utils import ( + hashutil, storageutil, stringutil, ) @@ -39,12 +39,12 @@ def getcachekey(reponame, file, id): - pathhash = node.hex(hashlib.sha1(file).digest()) + pathhash = node.hex(hashutil.sha1(file).digest()) return os.path.join(reponame, pathhash[:2], pathhash[2:], id) def getlocalkey(file, id): - pathhash = node.hex(hashlib.sha1(file).digest()) + pathhash = node.hex(hashutil.sha1(file).digest()) return os.path.join(pathhash, id) @@ -260,9 +260,9 @@ # v0, str(int(size)) is the header size = int(header) except ValueError: - raise RuntimeError(r"unexpected remotefilelog header: illegal format") + raise RuntimeError("unexpected remotefilelog header: illegal format") if size is None: - raise RuntimeError(r"unexpected remotefilelog header: no size found") + raise RuntimeError("unexpected remotefilelog header: no size found") return index + 1, size, flags diff -r 61881b170140 -r 84a0102c05c7 hgext/schemes.py --- a/hgext/schemes.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/schemes.py Tue Jan 21 13:14:51 2020 -0500 @@ -63,7 +63,7 @@ # leave the attribute unspecified. testedwith = b'ships-with-hg-core' -_partre = re.compile(br'\{(\d+)\}') +_partre = re.compile(br'{(\d+)\}') class ShortRepository(object): diff -r 61881b170140 -r 84a0102c05c7 hgext/share.py --- a/hgext/share.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/share.py Tue Jan 21 13:14:51 2020 -0500 @@ -122,7 +122,7 @@ if pool: pool = util.expandpath(pool) - opts[r'shareopts'] = { + opts['shareopts'] = { b'pool': pool, b'mode': ui.config(b'share', b'poolnaming'), } diff -r 61881b170140 -r 84a0102c05c7 hgext/sparse.py --- a/hgext/sparse.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/sparse.py Tue Jan 21 13:14:51 2020 -0500 @@ -153,10 +153,10 @@ def _clonesparsecmd(orig, ui, repo, *args, **opts): - include_pat = opts.get(r'include') - exclude_pat = opts.get(r'exclude') - enableprofile_pat = opts.get(r'enable_profile') - narrow_pat = opts.get(r'narrow') + include_pat = opts.get('include') + exclude_pat = opts.get('exclude') + enableprofile_pat = opts.get('enable_profile') + narrow_pat = opts.get('narrow') include = exclude = enableprofile = False if include_pat: pat = include_pat @@ -209,7 +209,7 @@ ) def _add(orig, ui, repo, *pats, **opts): - if opts.get(r'sparse'): + if opts.get('sparse'): dirs = set() for pat in pats: dirname, basename = util.split(pat) diff -r 61881b170140 -r 84a0102c05c7 hgext/split.py --- a/hgext/split.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/split.py Tue Jan 21 13:14:51 2020 -0500 @@ -22,11 +22,10 @@ commands, error, hg, - obsolete, - phases, pycompat, registrar, revsetlang, + rewriteutil, scmutil, ) @@ -77,45 +76,26 @@ rev = revs.first() ctx = repo[rev] + # Handle nullid specially here (instead of leaving for precheck() + # below) so we get a nicer message and error code. if rev is None or ctx.node() == nullid: ui.status(_(b'nothing to split\n')) return 1 if ctx.node() is None: raise error.Abort(_(b'cannot split working directory')) - # rewriteutil.precheck is not very useful here because: - # 1. null check is done above and it's more friendly to return 1 - # instead of abort - # 2. mergestate check is done below by cmdutil.bailifchanged - # 3. unstable check is more complex here because of --rebase - # - # So only "public" check is useful and it's checked directly here. - if ctx.phase() == phases.public: - raise error.Abort( - _(b'cannot split public changeset'), - hint=_(b"see 'hg help phases' for details"), - ) - - descendants = list(repo.revs(b'(%d::) - (%d)', rev, rev)) - alloworphaned = obsolete.isenabled(repo, obsolete.allowunstableopt) if opts.get(b'rebase'): # Skip obsoleted descendants and their descendants so the rebase # won't cause conflicts for sure. + descendants = list(repo.revs(b'(%d::) - (%d)', rev, rev)) torebase = list( repo.revs( b'%ld - (%ld & obsolete())::', descendants, descendants ) ) - if not alloworphaned and len(torebase) != len(descendants): - raise error.Abort( - _(b'split would leave orphaned changesets behind') - ) else: - if not alloworphaned and descendants: - raise error.Abort( - _(b'cannot split changeset with children without rebase') - ) - torebase = () + torebase = [] + rewriteutil.precheck(repo, [rev] + torebase, b'split') if len(ctx.parents()) > 1: raise error.Abort(_(b'cannot split a merge changeset')) @@ -152,7 +132,9 @@ scmutil.movedirstate(repo, ctx.p1()) # Any modified, added, removed, deleted result means split is incomplete - incomplete = lambda repo: any(repo.status()[:4]) + def incomplete(repo): + st = repo.status() + return any((st.modified, st.added, st.removed, st.deleted)) # Main split loop while incomplete(repo): diff -r 61881b170140 -r 84a0102c05c7 hgext/sqlitestore.py --- a/hgext/sqlitestore.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/sqlitestore.py Tue Jan 21 13:14:51 2020 -0500 @@ -45,7 +45,6 @@ from __future__ import absolute_import -import hashlib import sqlite3 import struct import threading @@ -75,7 +74,10 @@ repository, util as interfaceutil, ) -from mercurial.utils import storageutil +from mercurial.utils import ( + hashutil, + storageutil, +) try: from mercurial import zstd @@ -121,19 +123,19 @@ # Deltas are stored as content-indexed blobs. # compression column holds COMPRESSION_* constant for how the # delta is encoded. - r'CREATE TABLE delta (' - r' id INTEGER PRIMARY KEY, ' - r' compression INTEGER NOT NULL, ' - r' hash BLOB UNIQUE ON CONFLICT ABORT, ' - r' delta BLOB NOT NULL ' - r')', + 'CREATE TABLE delta (' + ' id INTEGER PRIMARY KEY, ' + ' compression INTEGER NOT NULL, ' + ' hash BLOB UNIQUE ON CONFLICT ABORT, ' + ' delta BLOB NOT NULL ' + ')', # Tracked paths are denormalized to integers to avoid redundant # storage of the path name. - r'CREATE TABLE filepath (' - r' id INTEGER PRIMARY KEY, ' - r' path BLOB NOT NULL ' - r')', - r'CREATE UNIQUE INDEX filepath_path ' r' ON filepath (path)', + 'CREATE TABLE filepath (' + ' id INTEGER PRIMARY KEY, ' + ' path BLOB NOT NULL ' + ')', + 'CREATE UNIQUE INDEX filepath_path ON filepath (path)', # We have a single table for all file revision data. # Each file revision is uniquely described by a (path, rev) and # (path, node). @@ -145,39 +147,38 @@ # # flags column holds bitwise integer flags controlling storage options. # These flags are defined by the FLAG_* constants. - r'CREATE TABLE fileindex (' - r' id INTEGER PRIMARY KEY, ' - r' pathid INTEGER REFERENCES filepath(id), ' - r' revnum INTEGER NOT NULL, ' - r' p1rev INTEGER NOT NULL, ' - r' p2rev INTEGER NOT NULL, ' - r' linkrev INTEGER NOT NULL, ' - r' flags INTEGER NOT NULL, ' - r' deltaid INTEGER REFERENCES delta(id), ' - r' deltabaseid INTEGER REFERENCES fileindex(id), ' - r' node BLOB NOT NULL ' - r')', - r'CREATE UNIQUE INDEX fileindex_pathrevnum ' - r' ON fileindex (pathid, revnum)', - r'CREATE UNIQUE INDEX fileindex_pathnode ' - r' ON fileindex (pathid, node)', + 'CREATE TABLE fileindex (' + ' id INTEGER PRIMARY KEY, ' + ' pathid INTEGER REFERENCES filepath(id), ' + ' revnum INTEGER NOT NULL, ' + ' p1rev INTEGER NOT NULL, ' + ' p2rev INTEGER NOT NULL, ' + ' linkrev INTEGER NOT NULL, ' + ' flags INTEGER NOT NULL, ' + ' deltaid INTEGER REFERENCES delta(id), ' + ' deltabaseid INTEGER REFERENCES fileindex(id), ' + ' node BLOB NOT NULL ' + ')', + 'CREATE UNIQUE INDEX fileindex_pathrevnum ' + ' ON fileindex (pathid, revnum)', + 'CREATE UNIQUE INDEX fileindex_pathnode ON fileindex (pathid, node)', # Provide a view over all file data for convenience. - r'CREATE VIEW filedata AS ' - r'SELECT ' - r' fileindex.id AS id, ' - r' filepath.id AS pathid, ' - r' filepath.path AS path, ' - r' fileindex.revnum AS revnum, ' - r' fileindex.node AS node, ' - r' fileindex.p1rev AS p1rev, ' - r' fileindex.p2rev AS p2rev, ' - r' fileindex.linkrev AS linkrev, ' - r' fileindex.flags AS flags, ' - r' fileindex.deltaid AS deltaid, ' - r' fileindex.deltabaseid AS deltabaseid ' - r'FROM filepath, fileindex ' - r'WHERE fileindex.pathid=filepath.id', - r'PRAGMA user_version=%d' % CURRENT_SCHEMA_VERSION, + 'CREATE VIEW filedata AS ' + 'SELECT ' + ' fileindex.id AS id, ' + ' filepath.id AS pathid, ' + ' filepath.path AS path, ' + ' fileindex.revnum AS revnum, ' + ' fileindex.node AS node, ' + ' fileindex.p1rev AS p1rev, ' + ' fileindex.p2rev AS p2rev, ' + ' fileindex.linkrev AS linkrev, ' + ' fileindex.flags AS flags, ' + ' fileindex.deltaid AS deltaid, ' + ' fileindex.deltabaseid AS deltabaseid ' + 'FROM filepath, fileindex ' + 'WHERE fileindex.pathid=filepath.id', + 'PRAGMA user_version=%d' % CURRENT_SCHEMA_VERSION, ] @@ -190,22 +191,22 @@ # baseid "poisoned" to null and limited the recursive filter to # "is not null". res = db.execute( - r'WITH RECURSIVE ' - r' deltachain(deltaid, baseid) AS (' - r' SELECT deltaid, deltabaseid FROM fileindex ' - r' WHERE pathid=? AND node=? ' - r' UNION ALL ' - r' SELECT fileindex.deltaid, deltabaseid ' - r' FROM fileindex, deltachain ' - r' WHERE ' - r' fileindex.id=deltachain.baseid ' - r' AND deltachain.baseid IS NOT NULL ' - r' AND fileindex.id NOT IN ({stops}) ' - r' ) ' - r'SELECT deltachain.baseid, compression, delta ' - r'FROM deltachain, delta ' - r'WHERE delta.id=deltachain.deltaid'.format( - stops=r','.join([r'?'] * len(stoprids)) + 'WITH RECURSIVE ' + ' deltachain(deltaid, baseid) AS (' + ' SELECT deltaid, deltabaseid FROM fileindex ' + ' WHERE pathid=? AND node=? ' + ' UNION ALL ' + ' SELECT fileindex.deltaid, deltabaseid ' + ' FROM fileindex, deltachain ' + ' WHERE ' + ' fileindex.id=deltachain.baseid ' + ' AND deltachain.baseid IS NOT NULL ' + ' AND fileindex.id NOT IN ({stops}) ' + ' ) ' + 'SELECT deltachain.baseid, compression, delta ' + 'FROM deltachain, delta ' + 'WHERE delta.id=deltachain.deltaid'.format( + stops=','.join(['?'] * len(stoprids)) ), tuple([pathid, node] + list(stoprids.keys())), ) @@ -249,13 +250,12 @@ def insertdelta(db, compression, hash, delta): try: return db.execute( - r'INSERT INTO delta (compression, hash, delta) ' - r'VALUES (?, ?, ?)', + 'INSERT INTO delta (compression, hash, delta) VALUES (?, ?, ?)', (compression, hash, delta), ).lastrowid except sqlite3.IntegrityError: return db.execute( - r'SELECT id FROM delta WHERE hash=?', (hash,) + 'SELECT id FROM delta WHERE hash=?', (hash,) ).fetchone()[0] @@ -335,7 +335,7 @@ res = list( self._db.execute( - r'SELECT id FROM filepath WHERE path=?', (self._path,) + 'SELECT id FROM filepath WHERE path=?', (self._path,) ) ) @@ -346,10 +346,10 @@ self._pathid = res[0][0] res = self._db.execute( - r'SELECT id, revnum, node, p1rev, p2rev, linkrev, flags ' - r'FROM fileindex ' - r'WHERE pathid=? ' - r'ORDER BY revnum ASC', + 'SELECT id, revnum, node, p1rev, p2rev, linkrev, flags ' + 'FROM fileindex ' + 'WHERE pathid=? ' + 'ORDER BY revnum ASC', (self._pathid,), ) @@ -496,11 +496,11 @@ rev = self.rev(node) res = self._db.execute( - r'SELECT' - r' node ' - r' FROM filedata ' - r' WHERE path=? AND (p1rev=? OR p2rev=?) ' - r' ORDER BY revnum ASC', + 'SELECT' + ' node ' + ' FROM filedata ' + ' WHERE path=? AND (p1rev=? OR p2rev=?) ' + ' ORDER BY revnum ASC', (self._path, rev, rev), ) @@ -598,9 +598,9 @@ # TODO perform in a single query. res = self._db.execute( - r'SELECT revnum, deltaid FROM fileindex ' - r'WHERE pathid=? ' - r' AND node in (%s)' % (r','.join([r'?'] * len(nodes))), + 'SELECT revnum, deltaid FROM fileindex ' + 'WHERE pathid=? ' + ' AND node in (%s)' % (','.join(['?'] * len(nodes))), tuple([self._pathid] + nodes), ) @@ -608,7 +608,7 @@ for rev, deltaid in res: res = self._db.execute( - r'SELECT revnum from fileindex WHERE pathid=? AND deltaid=?', + 'SELECT revnum from fileindex WHERE pathid=? AND deltaid=?', (self._pathid, deltaid), ) deltabases[rev] = res.fetchone()[0] @@ -726,7 +726,7 @@ entry.flags &= ~FLAG_MISSING_P1 self._db.execute( - r'UPDATE fileindex SET p1rev=?, flags=? ' r'WHERE id=?', + 'UPDATE fileindex SET p1rev=?, flags=? WHERE id=?', (self._nodetorev[p1], entry.flags, entry.rid), ) @@ -736,7 +736,7 @@ entry.flags &= ~FLAG_MISSING_P2 self._db.execute( - r'UPDATE fileindex SET p2rev=?, flags=? ' r'WHERE id=?', + 'UPDATE fileindex SET p2rev=?, flags=? WHERE id=?', (self._nodetorev[p1], entry.flags, entry.rid), ) @@ -787,7 +787,7 @@ # Find the delta to be censored. censoreddeltaid = self._db.execute( - r'SELECT deltaid FROM fileindex WHERE id=?', + 'SELECT deltaid FROM fileindex WHERE id=?', (self._revisions[censornode].rid,), ).fetchone()[0] @@ -796,8 +796,8 @@ # for those delta chains too. rows = list( self._db.execute( - r'SELECT id, pathid, node FROM fileindex ' - r'WHERE deltabaseid=? OR deltaid=?', + 'SELECT id, pathid, node FROM fileindex ' + 'WHERE deltabaseid=? OR deltaid=?', (censoreddeltaid, censoreddeltaid), ) ) @@ -809,7 +809,7 @@ self._db, pathid, node, {}, {-1: None}, zstddctx=self._dctx ) - deltahash = hashlib.sha1(fulltext).digest() + deltahash = hashutil.sha1(fulltext).digest() if self._compengine == b'zstd': deltablob = self._cctx.compress(fulltext) @@ -832,14 +832,14 @@ deltaid = insertdelta(self._db, compression, deltahash, deltablob) self._db.execute( - r'UPDATE fileindex SET deltaid=?, deltabaseid=NULL ' - r'WHERE id=?', + 'UPDATE fileindex SET deltaid=?, deltabaseid=NULL ' + 'WHERE id=?', (deltaid, rid), ) # Now create the tombstone delta and replace the delta on the censored # node. - deltahash = hashlib.sha1(tombstone).digest() + deltahash = hashutil.sha1(tombstone).digest() tombstonedeltaid = insertdelta( self._db, COMPRESSION_NONE, deltahash, tombstone ) @@ -848,12 +848,12 @@ flags |= FLAG_CENSORED self._db.execute( - r'UPDATE fileindex SET flags=?, deltaid=?, deltabaseid=NULL ' - r'WHERE pathid=? AND node=?', + 'UPDATE fileindex SET flags=?, deltaid=?, deltabaseid=NULL ' + 'WHERE pathid=? AND node=?', (flags, tombstonedeltaid, self._pathid, censornode), ) - self._db.execute(r'DELETE FROM delta WHERE id=?', (censoreddeltaid,)) + self._db.execute('DELETE FROM delta WHERE id=?', (censoreddeltaid,)) self._refreshindex() self._revisioncache.clear() @@ -878,7 +878,7 @@ for rev in self.revs(rev): self._db.execute( - r'DELETE FROM fileindex WHERE pathid=? AND node=?', + 'DELETE FROM fileindex WHERE pathid=? AND node=?', (self._pathid, self.node(rev)), ) @@ -971,7 +971,7 @@ ): if self._pathid is None: res = self._db.execute( - r'INSERT INTO filepath (path) VALUES (?)', (self._path,) + 'INSERT INTO filepath (path) VALUES (?)', (self._path,) ) self._pathid = res.lastrowid @@ -1006,7 +1006,7 @@ # us to de-duplicate. The table is configured to ignore conflicts # and it is faster to just insert and silently noop than to look # first. - deltahash = hashlib.sha1(delta).digest() + deltahash = hashutil.sha1(delta).digest() if self._compengine == b'zstd': deltablob = self._cctx.compress(delta) @@ -1042,10 +1042,10 @@ p2rev = self._nodetorev[p2] rid = self._db.execute( - r'INSERT INTO fileindex (' - r' pathid, revnum, node, p1rev, p2rev, linkrev, flags, ' - r' deltaid, deltabaseid) ' - r' VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', + 'INSERT INTO fileindex (' + ' pathid, revnum, node, p1rev, p2rev, linkrev, flags, ' + ' deltaid, deltabaseid) ' + ' VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', ( self._pathid, rev, @@ -1090,7 +1090,7 @@ if current: return tr - self._dbconn.execute(r'BEGIN TRANSACTION') + self._dbconn.execute('BEGIN TRANSACTION') def committransaction(_): self._dbconn.commit() @@ -1122,7 +1122,7 @@ db = sqlite3.connect(encoding.strfromlocal(path)) db.text_factory = bytes - res = db.execute(r'PRAGMA user_version').fetchone()[0] + res = db.execute('PRAGMA user_version').fetchone()[0] # New database. if res == 0: @@ -1137,7 +1137,7 @@ else: raise error.Abort(_(b'sqlite database has unrecognized version')) - db.execute(r'PRAGMA journal_mode=WAL') + db.execute('PRAGMA journal_mode=WAL') return db diff -r 61881b170140 -r 84a0102c05c7 hgext/transplant.py --- a/hgext/transplant.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/transplant.py Tue Jan 21 13:14:51 2020 -0500 @@ -235,7 +235,7 @@ patchfile = None else: fd, patchfile = pycompat.mkstemp(prefix=b'hg-transplant-') - fp = os.fdopen(fd, r'wb') + fp = os.fdopen(fd, 'wb') gen = patch.diff(source, parent, node, opts=diffopts) for chunk in gen: fp.write(chunk) @@ -290,7 +290,7 @@ self.ui.status(_(b'filtering %s\n') % patchfile) user, date, msg = (changelog[1], changelog[2], changelog[4]) fd, headerfile = pycompat.mkstemp(prefix=b'hg-transplant-') - fp = os.fdopen(fd, r'wb') + fp = os.fdopen(fd, 'wb') fp.write(b"# HG changeset patch\n") fp.write(b"# User %s\n" % user) fp.write(b"# Date %d %d\n" % date) @@ -443,7 +443,13 @@ ) if merge: repo.setparents(p1, parents[1]) - modified, added, removed, deleted = repo.status()[:4] + st = repo.status() + modified, added, removed, deleted = ( + st.modified, + st.added, + st.removed, + st.deleted, + ) if merge or modified or added or removed or deleted: n = repo.commit( message, @@ -754,22 +760,14 @@ def checkopts(opts, revs): if opts.get(b'continue'): - if opts.get(b'branch') or opts.get(b'all') or opts.get(b'merge'): - raise error.Abort( - _( - b'--continue is incompatible with ' - b'--branch, --all and --merge' - ) - ) + cmdutil.check_incompatible_arguments( + opts, b'continue', b'branch', b'all', b'merge' + ) return if opts.get(b'stop'): - if opts.get(b'branch') or opts.get(b'all') or opts.get(b'merge'): - raise error.Abort( - _( - b'--stop is incompatible with ' - b'--branch, --all and --merge' - ) - ) + cmdutil.check_incompatible_arguments( + opts, b'stop', b'branch', b'all', b'merge' + ) return if not ( opts.get(b'source') diff -r 61881b170140 -r 84a0102c05c7 hgext/uncommit.py --- a/hgext/uncommit.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/uncommit.py Tue Jan 21 13:14:51 2020 -0500 @@ -29,11 +29,11 @@ error, node, obsutil, + pathutil, pycompat, registrar, rewriteutil, scmutil, - util, ) cmdtable = {} @@ -157,7 +157,8 @@ with repo.wlock(), repo.lock(): - m, a, r, d = repo.status()[:4] + st = repo.status() + m, a, r, d = st.modified, st.added, st.removed, st.deleted isdirtypath = any(set(m + a + r + d) & set(pats)) allowdirtywcopy = opts[ b'allow_dirty_working_copy' @@ -185,7 +186,7 @@ # if not everything tracked in that directory can be # uncommitted. if badfiles: - badfiles -= {f for f in util.dirs(eligible)} + badfiles -= {f for f in pathutil.dirs(eligible)} for f in sorted(badfiles): if f in s.clean: diff -r 61881b170140 -r 84a0102c05c7 hgext/zeroconf/Zeroconf.py --- a/hgext/zeroconf/Zeroconf.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/zeroconf/Zeroconf.py Tue Jan 21 13:14:51 2020 -0500 @@ -684,7 +684,7 @@ break t = len & 0xC0 if t == 0x00: - result = r''.join((result, self.readUTF(off, len) + r'.')) + result = ''.join((result, self.readUTF(off, len) + '.')) off += len elif t == 0xC0: if next < 0: @@ -1429,7 +1429,7 @@ self.socket.setsockopt( socket.SOL_IP, socket.IP_ADD_MEMBERSHIP, - socket.inet_aton(_MDNS_ADDR) + socket.inet_aton(r'0.0.0.0'), + socket.inet_aton(_MDNS_ADDR) + socket.inet_aton('0.0.0.0'), ) self.listeners = [] @@ -1845,7 +1845,7 @@ self.socket.setsockopt( socket.SOL_IP, socket.IP_DROP_MEMBERSHIP, - socket.inet_aton(_MDNS_ADDR) + socket.inet_aton(r'0.0.0.0'), + socket.inet_aton(_MDNS_ADDR) + socket.inet_aton('0.0.0.0'), ) self.socket.close() diff -r 61881b170140 -r 84a0102c05c7 hgext/zeroconf/__init__.py --- a/hgext/zeroconf/__init__.py Thu Jan 09 14:19:20 2020 -0500 +++ b/hgext/zeroconf/__init__.py Tue Jan 21 13:14:51 2020 -0500 @@ -6,9 +6,9 @@ # GNU General Public License version 2 or any later version. '''discover and advertise repositories on the local network -Zeroconf-enabled repositories will be announced in a network without -the need to configure a server or a service. They can be discovered -without knowing their actual IP address. +The zeroconf extension will advertise :hg:`serve` instances over +DNS-SD so that they can be discovered using the :hg:`paths` command +without knowing the server's IP address. To allow other people to discover your repository using run :hg:`serve` in your repository:: @@ -55,7 +55,7 @@ # finds external-facing interface without sending any packets (Linux) try: s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - s.connect((r'1.0.0.1', 0)) + s.connect(('1.0.0.1', 0)) ip = s.getsockname()[0] return ip except socket.error: @@ -64,17 +64,17 @@ # Generic method, sometimes gives useless results try: dumbip = socket.gethostbyaddr(socket.gethostname())[2][0] - if r':' in dumbip: - dumbip = r'127.0.0.1' - if not dumbip.startswith(r'127.'): + if ':' in dumbip: + dumbip = '127.0.0.1' + if not dumbip.startswith('127.'): return dumbip except (socket.gaierror, socket.herror): - dumbip = r'127.0.0.1' + dumbip = '127.0.0.1' # works elsewhere, but actually sends a packet try: s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - s.connect((r'1.0.0.1', 1)) + s.connect(('1.0.0.1', 1)) ip = s.getsockname()[0] return ip except socket.error: @@ -87,20 +87,20 @@ global server, localip if not server: ip = getip() - if ip.startswith(r'127.'): + if ip.startswith('127.'): # if we have no internet connection, this can happen. return localip = socket.inet_aton(ip) server = Zeroconf.Zeroconf(ip) - hostname = socket.gethostname().split(r'.')[0] - host = hostname + r".local" - name = r"%s-%s" % (hostname, name) + hostname = socket.gethostname().split('.')[0] + host = hostname + ".local" + name = "%s-%s" % (hostname, name) # advertise to browsers svc = Zeroconf.ServiceInfo( b'_http._tcp.local.', - pycompat.bytestr(name + r'._http._tcp.local.'), + pycompat.bytestr(name + '._http._tcp.local.'), server=host, port=port, properties={b'description': desc, b'path': b"/" + path}, @@ -113,7 +113,7 @@ # advertise to Mercurial clients svc = Zeroconf.ServiceInfo( b'_hg._tcp.local.', - pycompat.bytestr(name + r'._hg._tcp.local.'), + pycompat.bytestr(name + '._hg._tcp.local.'), server=host, port=port, properties={b'description': desc, b'path': b"/" + path}, @@ -171,7 +171,7 @@ def getzcpaths(): ip = getip() - if ip.startswith(r'127.'): + if ip.startswith('127.'): return server = Zeroconf.Zeroconf(ip) l = listener() @@ -180,10 +180,10 @@ server.close() for value in l.found.values(): name = value.name[: value.name.index(b'.')] - url = r"http://%s:%s%s" % ( + url = "http://%s:%s%s" % ( socket.inet_ntoa(value.address), value.port, - value.properties.get(r"path", r"/"), + value.properties.get("path", "/"), ) yield b"zc-" + name, pycompat.bytestr(url) diff -r 61881b170140 -r 84a0102c05c7 hgweb.cgi --- a/hgweb.cgi Thu Jan 09 14:19:20 2020 -0500 +++ b/hgweb.cgi Tue Jan 21 13:14:51 2020 -0500 @@ -8,12 +8,15 @@ # Uncomment and adjust if Mercurial is not installed system-wide # (consult "installed modules" path from 'hg debuginstall'): -#import sys; sys.path.insert(0, "/path/to/python/lib") +# import sys; sys.path.insert(0, "/path/to/python/lib") # Uncomment to send python tracebacks to the browser if an error occurs: -#import cgitb; cgitb.enable() +# import cgitb; cgitb.enable() + +from mercurial import demandimport -from mercurial import demandimport; demandimport.enable() +demandimport.enable() from mercurial.hgweb import hgweb, wsgicgi + application = hgweb(config) wsgicgi.launch(application) diff -r 61881b170140 -r 84a0102c05c7 i18n/hggettext --- a/i18n/hggettext Thu Jan 09 14:19:20 2020 -0500 +++ b/i18n/hggettext Tue Jan 21 13:14:51 2020 -0500 @@ -57,18 +57,22 @@ def poentry(path, lineno, s): - return ('#: %s:%d\n' % (path, lineno) + - 'msgid %s\n' % normalize(s) + - 'msgstr ""\n') + return ( + '#: %s:%d\n' % (path, lineno) + + 'msgid %s\n' % normalize(s) + + 'msgstr ""\n' + ) + doctestre = re.compile(r'^ +>>> ', re.MULTILINE) + def offset(src, doc, name, lineno, default): """Compute offset or issue a warning on stdout.""" # remove doctest part, in order to avoid backslash mismatching m = doctestre.search(doc) if m: - doc = doc[:m.start()] + doc = doc[: m.start()] # Backslashes in doc appear doubled in src. end = src.find(doc.replace('\\', '\\\\')) @@ -76,9 +80,11 @@ # This can happen if the docstring contains unnecessary escape # sequences such as \" in a triple-quoted string. The problem # is that \" is turned into " and so doc wont appear in src. - sys.stderr.write("%s:%d:warning:" - " unknown docstr offset, assuming %d lines\n" - % (name, lineno, default)) + sys.stderr.write( + "%s:%d:warning:" + " unknown docstr offset, assuming %d lines\n" + % (name, lineno, default) + ) return default else: return src.count('\n', 0, end) @@ -121,7 +127,7 @@ for func, rstrip in functions: if func.__doc__: - docobj = func # this might be a proxy to provide formatted doc + docobj = func # this might be a proxy to provide formatted doc func = getattr(func, '_origfunc', func) funcmod = inspect.getmodule(func) extra = '' @@ -155,7 +161,9 @@ # accidentally import and extract strings from a Mercurial # installation mentioned in PYTHONPATH. sys.path.insert(0, os.getcwd()) - from mercurial import demandimport; demandimport.enable() + from mercurial import demandimport + + demandimport.enable() for path in sys.argv[1:]: if path.endswith('.txt'): rawtext(path) diff -r 61881b170140 -r 84a0102c05c7 i18n/polib.py --- a/i18n/polib.py Thu Jan 09 14:19:20 2020 -0500 +++ b/i18n/polib.py Tue Jan 21 13:14:51 2020 -0500 @@ -551,18 +551,6 @@ offsets = [] entries = self.translated_entries() - # the keys are sorted in the .mo file - def cmp(_self, other): - # msgfmt compares entries with msgctxt if it exists - self_msgid = _self.msgctxt and _self.msgctxt or _self.msgid - other_msgid = other.msgctxt and other.msgctxt or other.msgid - if self_msgid > other_msgid: - return 1 - elif self_msgid < other_msgid: - return -1 - else: - return 0 - # add metadata entry entries.sort(key=lambda o: o.msgctxt or o.msgid) mentry = self.metadata_as_entry() diff -r 61881b170140 -r 84a0102c05c7 i18n/posplit --- a/i18n/posplit Thu Jan 09 14:19:20 2020 -0500 +++ b/i18n/posplit Tue Jan 21 13:14:51 2020 -0500 @@ -11,6 +11,7 @@ import re import sys + def addentry(po, entry, cache): e = cache.get(entry.msgid) if e: @@ -27,6 +28,7 @@ po.append(entry) cache[entry.msgid] = entry + def mkentry(orig, delta, msgid, msgstr): entry = polib.POEntry() entry.merge(orig) @@ -35,13 +37,14 @@ entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences] return entry + if __name__ == "__main__": po = polib.pofile(sys.argv[1]) cache = {} entries = po[:] po[:] = [] - findd = re.compile(r' *\.\. (\w+)::') # for finding directives + findd = re.compile(r' *\.\. (\w+)::') # for finding directives for entry in entries: msgids = entry.msgid.split(u'\n\n') if entry.msgstr: @@ -65,7 +68,7 @@ newentry = mkentry(entry, delta, msgid, msgstr) mdirective = findd.match(msgid) if mdirective: - if not msgid[mdirective.end():].rstrip(): + if not msgid[mdirective.end() :].rstrip(): # only directive, nothing to translate here delta += 2 continue @@ -77,8 +80,10 @@ continue else: # lines following directly, unexpected - print('Warning: text follows line with directive' - ' %s' % directive) + print( + 'Warning: text follows line with directive' + ' %s' % directive + ) comment = 'do not translate: .. %s::' % directive if not newentry.comment: newentry.comment = comment diff -r 61881b170140 -r 84a0102c05c7 mercurial/ancestor.py --- a/mercurial/ancestor.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/ancestor.py Tue Jan 21 13:14:51 2020 -0500 @@ -16,7 +16,7 @@ pycompat, ) -parsers = policy.importmod(r'parsers') +parsers = policy.importmod('parsers') def commonancestorsheads(pfunc, *nodes): @@ -108,12 +108,12 @@ if p == nullrev: continue dp = depth[p] - nsp = sp = seen[p] + sp = seen[p] if dp <= dv: depth[p] = dv + 1 if sp != sv: interesting[sv] += 1 - nsp = seen[p] = sv + seen[p] = sv if sp: interesting[sp] -= 1 if interesting[sp] == 0: @@ -331,7 +331,7 @@ Result does not include the null revision.""" self._parentrevs = pfunc - self._initrevs = revs = [r for r in revs if r >= stoprev] + self._initrevs = [r for r in revs if r >= stoprev] self._stoprev = stoprev self._inclusive = inclusive diff -r 61881b170140 -r 84a0102c05c7 mercurial/archival.py --- a/mercurial/archival.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/archival.py Tue Jan 21 13:14:51 2020 -0500 @@ -138,8 +138,8 @@ class GzipFileWithTime(gzip.GzipFile): def __init__(self, *args, **kw): timestamp = None - if r'timestamp' in kw: - timestamp = kw.pop(r'timestamp') + if 'timestamp' in kw: + timestamp = kw.pop('timestamp') if timestamp is None: self.timestamp = time.time() else: @@ -154,9 +154,11 @@ fname = fname[:-3] flags = 0 if fname: - flags = gzip.FNAME + flags = gzip.FNAME # pytype: disable=module-attr self.fileobj.write(pycompat.bytechr(flags)) - gzip.write32u(self.fileobj, int(self.timestamp)) + gzip.write32u( # pytype: disable=module-attr + self.fileobj, int(self.timestamp) + ) self.fileobj.write(b'\002') self.fileobj.write(b'\377') if fname: @@ -179,7 +181,7 @@ timestamp=mtime, ) self.fileobj = gzfileobj - return tarfile.TarFile.taropen( + return tarfile.TarFile.taropen( # pytype: disable=attribute-error name, pycompat.sysstr(mode), gzfileobj ) else: @@ -220,7 +222,7 @@ if isinstance(dest, bytes): dest = pycompat.fsdecode(dest) self.z = zipfile.ZipFile( - dest, r'w', compress and zipfile.ZIP_DEFLATED or zipfile.ZIP_STORED + dest, 'w', compress and zipfile.ZIP_DEFLATED or zipfile.ZIP_STORED ) # Python's zipfile module emits deprecation warnings if we try @@ -234,7 +236,7 @@ def addfile(self, name, mode, islink, data): i = zipfile.ZipInfo(pycompat.fsdecode(name), self.date_time) - i.compress_type = self.z.compression + i.compress_type = self.z.compression # pytype: disable=attribute-error # unzip will not honor unix file modes unless file creator is # set to unix (id 3). i.create_system = 3 diff -r 61881b170140 -r 84a0102c05c7 mercurial/bookmarks.py --- a/mercurial/bookmarks.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/bookmarks.py Tue Jan 21 13:14:51 2020 -0500 @@ -78,7 +78,7 @@ self._nodemap = nodemap = {} # node: sorted([refspec, ...]) self._clean = True self._aclean = True - nm = repo.changelog.nodemap + has_node = repo.changelog.index.has_node tonode = bin # force local lookup try: with _getbkfile(repo) as bkfile: @@ -89,7 +89,7 @@ try: sha, refspec = line.split(b' ', 1) node = tonode(sha) - if node in nm: + if has_node(node): refspec = encoding.tolocal(refspec) refmap[refspec] = node nrefs = nodemap.get(node) @@ -953,38 +953,43 @@ cur = repo[b'.'].node() newact = None changes = [] - hiddenrev = None # unhide revs if any if rev: repo = scmutil.unhidehashlikerevs(repo, [rev], b'nowarn') + ctx = scmutil.revsingle(repo, rev, None) + # bookmarking wdir means creating a bookmark on p1 and activating it + activatenew = not inactive and ctx.rev() is None + if ctx.node() is None: + ctx = ctx.p1() + tgt = ctx.node() + assert tgt + for mark in names: mark = checkformat(repo, mark) if newact is None: newact = mark if inactive and mark == repo._activebookmark: deactivate(repo) - return - tgt = cur - if rev: - ctx = scmutil.revsingle(repo, rev) - if ctx.hidden(): - hiddenrev = ctx.hex()[:12] - tgt = ctx.node() + continue for bm in marks.checkconflict(mark, force, tgt): changes.append((bm, None)) changes.append((mark, tgt)) - if hiddenrev: - repo.ui.warn(_(b"bookmarking hidden changeset %s\n") % hiddenrev) + # nothing changed but for the one deactivated above + if not changes: + return + + if ctx.hidden(): + repo.ui.warn(_(b"bookmarking hidden changeset %s\n") % ctx.hex()[:12]) if ctx.obsolete(): - msg = obsutil._getfilteredreason(repo, b"%s" % hiddenrev, ctx) + msg = obsutil._getfilteredreason(repo, ctx.hex()[:12], ctx) repo.ui.warn(b"(%s)\n" % msg) marks.applychanges(repo, tr, changes) - if not inactive and cur == marks[newact] and not rev: + if activatenew and cur == marks[newact]: activate(repo, newact) elif cur != tgt and newact == repo._activebookmark: deactivate(repo) diff -r 61881b170140 -r 84a0102c05c7 mercurial/branchmap.py --- a/mercurial/branchmap.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/branchmap.py Tue Jan 21 13:14:51 2020 -0500 @@ -27,6 +27,23 @@ stringutil, ) +if pycompat.TYPE_CHECKING: + from typing import ( + Any, + Callable, + Dict, + Iterable, + List, + Optional, + Set, + Tuple, + Union, + ) + + assert any( + (Any, Callable, Dict, Iterable, List, Optional, Set, Tuple, Union,) + ) + subsettable = repoviewutil.subsettable calcsize = struct.calcsize @@ -90,14 +107,14 @@ clrev = cl.rev clbranchinfo = cl.branchinfo rbheads = [] - closed = [] + closed = set() for bheads in pycompat.itervalues(remotebranchmap): rbheads += bheads for h in bheads: r = clrev(h) b, c = clbranchinfo(r) if c: - closed.append(h) + closed.add(h) if rbheads: rtiprev = max((int(clrev(node)) for node in rbheads)) @@ -124,7 +141,7 @@ def _unknownnode(node): """ raises ValueError when branchcache found a node which does not exists """ - raise ValueError(r'node %s does not exist' % pycompat.sysstr(hex(node))) + raise ValueError('node %s does not exist' % pycompat.sysstr(hex(node))) def _branchcachedesc(repo): @@ -165,6 +182,7 @@ closednodes=None, hasnode=None, ): + # type: (Union[Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]], bytes, int, Optional[bytes], Optional[Set[bytes]], Optional[Callable[[bytes], bool]]) -> None """ hasnode is a function which can be used to verify whether changelog has a given node or not. If it's not provided, we assume that every node we have exists in changelog """ @@ -260,7 +278,7 @@ ) if not bcache.validfor(repo): # invalidate the cache - raise ValueError(r'tip differs') + raise ValueError('tip differs') bcache.load(repo, lineiter) except (IOError, OSError): return None @@ -269,7 +287,13 @@ if repo.ui.debugflag: msg = b'invalid %s: %s\n' repo.ui.debug( - msg % (_branchcachedesc(repo), pycompat.bytestr(inst)) + msg + % ( + _branchcachedesc(repo), + pycompat.bytestr( + inst # pytype: disable=wrong-arg-types + ), + ) ) bcache = None @@ -288,7 +312,7 @@ continue node, state, label = line.split(b" ", 2) if state not in b'oc': - raise ValueError(r'invalid branch state') + raise ValueError('invalid branch state') label = encoding.tolocal(label.strip()) node = bin(node) self._entries.setdefault(label, []).append(node) @@ -640,7 +664,7 @@ # self.branchinfo = self._branchinfo # # Since we now have data in the cache, we need to drop this bypassing. - if r'branchinfo' in vars(self): + if 'branchinfo' in vars(self): del self.branchinfo def _setcachedata(self, rev, node, branchidx): diff -r 61881b170140 -r 84a0102c05c7 mercurial/bundle2.py --- a/mercurial/bundle2.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/bundle2.py Tue Jan 21 13:14:51 2020 -0500 @@ -653,7 +653,9 @@ """add a stream level parameter""" if not name: raise error.ProgrammingError(b'empty parameter name') - if name[0:1] not in pycompat.bytestr(string.ascii_letters): + if name[0:1] not in pycompat.bytestr( + string.ascii_letters # pytype: disable=wrong-arg-types + ): raise error.ProgrammingError( b'non letter first character: %s' % name ) @@ -835,9 +837,11 @@ ignored or failing. """ if not name: - raise ValueError(r'empty parameter name') - if name[0:1] not in pycompat.bytestr(string.ascii_letters): - raise ValueError(r'non letter first character: %s' % name) + raise ValueError('empty parameter name') + if name[0:1] not in pycompat.bytestr( + string.ascii_letters # pytype: disable=wrong-arg-types + ): + raise ValueError('non letter first character: %s' % name) try: handler = b2streamparamsmap[name.lower()] except KeyError: @@ -1141,8 +1145,8 @@ headerchunk = b''.join(header) except TypeError: raise TypeError( - r'Found a non-bytes trying to ' - r'build bundle part header: %r' % header + 'Found a non-bytes trying to ' + 'build bundle part header: %r' % header ) outdebug(ui, b'header chunk size: %i' % len(headerchunk)) yield _pack(_fpartheadersize, len(headerchunk)) @@ -1793,7 +1797,7 @@ def addpartbundlestream2(bundler, repo, **kwargs): - if not kwargs.get(r'stream', False): + if not kwargs.get('stream', False): return if not streamclone.allowservergeneration(repo): @@ -1815,8 +1819,8 @@ bundler.prefercompressed = False # get the includes and excludes - includepats = kwargs.get(r'includepats') - excludepats = kwargs.get(r'excludepats') + includepats = kwargs.get('includepats') + excludepats = kwargs.get('excludepats') narrowstream = repo.ui.configbool( b'experimental', b'server.stream-narrow-clones' @@ -1985,7 +1989,7 @@ extrakwargs = {} targetphase = inpart.params.get(b'targetphase') if targetphase is not None: - extrakwargs[r'targetphase'] = int(targetphase) + extrakwargs['targetphase'] = int(targetphase) ret = _processchangegroup( op, cg, @@ -2368,7 +2372,7 @@ if pushkeycompat: - def runhook(): + def runhook(unused_success): for hookargs in allhooks: op.repo.hook(b'pushkey', **pycompat.strkwargs(hookargs)) diff -r 61881b170140 -r 84a0102c05c7 mercurial/bundlerepo.py --- a/mercurial/bundlerepo.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/bundlerepo.py Tue Jan 21 13:14:51 2020 -0500 @@ -64,18 +64,18 @@ start = cgunpacker.tell() - size link = linkmapper(cs) - if node in self.nodemap: + if self.index.has_node(node): # this can happen if two branches make the same change - self.bundlerevs.add(self.nodemap[node]) + self.bundlerevs.add(self.index.rev(node)) continue for p in (p1, p2): - if p not in self.nodemap: + if not self.index.has_node(p): raise error.LookupError( p, self.indexfile, _(b"unknown parent") ) - if deltabase not in self.nodemap: + if not self.index.has_node(deltabase): raise LookupError( deltabase, self.indexfile, _(b'unknown delta base') ) @@ -93,7 +93,6 @@ node, ) self.index.append(e) - self.nodemap[node] = n self.bundlerevs.add(n) n += 1 @@ -331,7 +330,7 @@ fdtemp, temp = self.vfs.mkstemp(prefix=b"hg-bundle-", suffix=suffix) self.tempfile = temp - with os.fdopen(fdtemp, r'wb') as fptemp: + with os.fdopen(fdtemp, 'wb') as fptemp: fptemp.write(header) while True: chunk = readfn(2 ** 18) @@ -393,7 +392,7 @@ # manifestlog implementation did not consume the manifests from the # changegroup (ex: it might be consuming trees from a separate bundle2 # part instead). So we need to manually consume it. - if r'filestart' not in self.__dict__: + if 'filestart' not in self.__dict__: self._consumemanifest() return self.filestart diff -r 61881b170140 -r 84a0102c05c7 mercurial/cext/dirs.c --- a/mercurial/cext/dirs.c Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/cext/dirs.c Tue Jan 21 13:14:51 2020 -0500 @@ -9,11 +9,12 @@ #define PY_SSIZE_T_CLEAN #include +#include #include "util.h" #ifdef IS_PY3K -#define PYLONG_VALUE(o) ((PyLongObject *)o)->ob_digit[1] +#define PYLONG_VALUE(o) ((PyLongObject *)o)->ob_digit[0] #else #define PYLONG_VALUE(o) PyInt_AS_LONG(o) #endif @@ -48,12 +49,19 @@ return pos; } +/* Mercurial will fail to run on directory hierarchies deeper than + * this constant, so we should try and keep this constant as big as + * possible. + */ +#define MAX_DIRS_DEPTH 2048 + static int _addpath(PyObject *dirs, PyObject *path) { const char *cpath = PyBytes_AS_STRING(path); Py_ssize_t pos = PyBytes_GET_SIZE(path); PyObject *key = NULL; int ret = -1; + size_t num_slashes = 0; /* This loop is super critical for performance. That's why we inline * access to Python structs instead of going through a supported API. @@ -65,6 +73,20 @@ * unnoticed. */ while ((pos = _finddir(cpath, pos - 1)) != -1) { PyObject *val; + ++num_slashes; + if (num_slashes > MAX_DIRS_DEPTH) { + PyErr_SetString(PyExc_ValueError, + "Directory hierarchy too deep."); + goto bail; + } + + /* Sniff for trailing slashes, a marker of an invalid input. */ + if (pos > 0 && cpath[pos - 1] == '/') { + PyErr_SetString( + PyExc_ValueError, + "found invalid consecutive slashes in path"); + goto bail; + } key = PyBytes_FromStringAndSize(cpath, pos); if (key == NULL) diff -r 61881b170140 -r 84a0102c05c7 mercurial/cext/manifest.c --- a/mercurial/cext/manifest.c Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/cext/manifest.c Tue Jan 21 13:14:51 2020 -0500 @@ -42,17 +42,17 @@ #define MANIFEST_TOO_SHORT_LINE -5 /* get the length of the path for a line */ -static size_t pathlen(line *l) +static Py_ssize_t pathlen(line *l) { const char *end = memchr(l->start, '\0', l->len); - return (end) ? (size_t)(end - l->start) : l->len; + return (end) ? (Py_ssize_t)(end - l->start) : l->len; } /* get the node value of a single line */ static PyObject *nodeof(line *l) { char *s = l->start; - ssize_t llen = pathlen(l); + Py_ssize_t llen = pathlen(l); PyObject *hash; if (llen + 1 + 40 + 1 > l->len) { /* path '\0' hash '\n' */ PyErr_SetString(PyExc_ValueError, "manifest line too short"); @@ -76,7 +76,7 @@ static PyObject *hashflags(line *l) { char *s = l->start; - size_t plen = pathlen(l); + Py_ssize_t plen = pathlen(l); PyObject *hash = nodeof(l); /* 40 for hash, 1 for null byte, 1 for newline */ @@ -270,7 +270,7 @@ static PyObject *lmiter_iterentriesnext(PyObject *o) { - size_t pl; + Py_ssize_t pl; line *l; Py_ssize_t consumed; PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL; @@ -337,7 +337,7 @@ static PyObject *lmiter_iterkeysnext(PyObject *o) { - size_t pl; + Py_ssize_t pl; line *l = lmiter_nextline((lmIter *)o); if (!l) { return NULL; diff -r 61881b170140 -r 84a0102c05c7 mercurial/cext/parsers.c --- a/mercurial/cext/parsers.c Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/cext/parsers.c Tue Jan 21 13:14:51 2020 -0500 @@ -667,7 +667,7 @@ void manifest_module_init(PyObject *mod); void revlog_module_init(PyObject *mod); -static const int version = 13; +static const int version = 16; static void module_init(PyObject *mod) { diff -r 61881b170140 -r 84a0102c05c7 mercurial/cext/revlog.c --- a/mercurial/cext/revlog.c Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/cext/revlog.c Tue Jan 21 13:14:51 2020 -0500 @@ -37,6 +37,11 @@ int children[16]; } nodetreenode; +typedef struct { + int abi_version; + int (*index_parents)(PyObject *, int, int *); +} Revlog_CAPI; + /* * A base-16 trie for fast node->rev mapping. * @@ -62,10 +67,9 @@ * This class has two behaviors. * * When used in a list-like way (with integer keys), we decode an - * entry in a RevlogNG index file on demand. Our last entry is a - * sentinel, always a nullid. We have limited support for + * entry in a RevlogNG index file on demand. We have limited support for * integer-keyed insert and delete, only at elements right before the - * sentinel. + * end. * * With string keys, we lazily perform a reverse mapping from node to * rev, using a base-16 trie. @@ -2065,6 +2069,29 @@ } } +static PyObject *index_m_has_node(indexObject *self, PyObject *args) +{ + int ret = index_contains(self, args); + if (ret < 0) + return NULL; + return PyBool_FromLong((long)ret); +} + +static PyObject *index_m_rev(indexObject *self, PyObject *val) +{ + char *node; + int rev; + + if (node_check(val, &node) == -1) + return NULL; + rev = index_find_node(self, node, 20); + if (rev >= -1) + return PyInt_FromLong(rev); + if (rev == -2) + raise_revlog_error(); + return NULL; +} + typedef uint64_t bitmask; /* @@ -2443,7 +2470,7 @@ /* * Delete a numeric range of revs, which must be at the end of the - * range, but exclude the sentinel nullid entry. + * range. */ static int index_slice_del(indexObject *self, PyObject *item) { @@ -2489,7 +2516,7 @@ if (self->ntinitialized) { Py_ssize_t i; - for (i = start + 1; i < self->length; i++) { + for (i = start; i < self->length; i++) { const char *node = index_node_existing(self, i); if (node == NULL) return -1; @@ -2500,7 +2527,10 @@ index_invalidate_added(self, 0); if (self->ntrev > start) self->ntrev = (int)start; + } else if (self->added) { + Py_CLEAR(self->added); } + self->length = start; if (start < self->raw_length) { if (self->cache) { @@ -2723,6 +2753,12 @@ {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS, "clear the index caches"}, {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"}, + {"get_rev", (PyCFunction)index_m_get, METH_VARARGS, + "return `rev` associated with a node or None"}, + {"has_node", (PyCFunction)index_m_has_node, METH_O, + "return True if the node exist in the index"}, + {"rev", (PyCFunction)index_m_rev, METH_O, + "return `rev` associated with a node or raise RevlogError"}, {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS, "compute phases"}, {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS, @@ -3001,6 +3037,13 @@ }; #endif /* WITH_RUST */ +static Revlog_CAPI CAPI = { + /* increment the abi_version field upon each change in the Revlog_CAPI + struct or in the ABI of the listed functions */ + 1, + HgRevlogIndex_GetParents, +}; + void revlog_module_init(PyObject *mod) { PyObject *caps = NULL; @@ -3024,11 +3067,9 @@ if (nullentry) PyObject_GC_UnTrack(nullentry); - caps = PyCapsule_New(HgRevlogIndex_GetParents, - "mercurial.cext.parsers.index_get_parents_CAPI", - NULL); + caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL); if (caps != NULL) - PyModule_AddObject(mod, "index_get_parents_CAPI", caps); + PyModule_AddObject(mod, "revlog_CAPI", caps); #ifdef WITH_RUST rustlazyancestorsType.tp_new = PyType_GenericNew; diff -r 61881b170140 -r 84a0102c05c7 mercurial/changegroup.py --- a/mercurial/changegroup.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/changegroup.py Tue Jan 21 13:14:51 2020 -0500 @@ -85,7 +85,7 @@ fh = open(filename, b"wb", 131072) else: fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg") - fh = os.fdopen(fd, r"wb") + fh = os.fdopen(fd, "wb") cleanup = filename for c in chunks: fh.write(c) @@ -287,8 +287,6 @@ def revmap(x): return cl.rev(x) - changesets = 0 - try: # The transaction may already carry source information. In this # case we use the top level data. We overwrite the argument @@ -315,15 +313,15 @@ ) self.callback = progress.increment - efiles = set() + efilesset = set() def onchangelog(cl, node): - efiles.update(cl.readfiles(node)) + efilesset.update(cl.readfiles(node)) self.changelogheader() deltas = self.deltaiter() cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog) - efiles = len(efiles) + efiles = len(efilesset) if not cgnodes: repo.ui.develwarn( @@ -436,7 +434,7 @@ if changesets > 0: - def runhooks(): + def runhooks(unused_success): # These hooks run when the lock releases, not when the # transaction closes. So it's possible for the changelog # to have changed since we last saw it. @@ -1150,7 +1148,9 @@ def makelookupmflinknode(tree, nodes): if fastpathlinkrev: assert not tree - return manifests.__getitem__ + return ( + manifests.__getitem__ # pytype: disable=unsupported-operands + ) def lookupmflinknode(x): """Callback for looking up the linknode for manifests. diff -r 61881b170140 -r 84a0102c05c7 mercurial/changelog.py --- a/mercurial/changelog.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/changelog.py Tue Jan 21 13:14:51 2020 -0500 @@ -21,7 +21,6 @@ error, pycompat, revlog, - util, ) from .utils import ( dateutil, @@ -168,10 +167,10 @@ def _divertopener(opener, target): """build an opener that writes in 'target.a' instead of 'target'""" - def _divert(name, mode=b'r', checkambig=False): + def _divert(name, mode=b'r', checkambig=False, **kwargs): if name != target: - return opener(name, mode) - return opener(name + b".a", mode) + return opener(name, mode, **kwargs) + return opener(name + b".a", mode, **kwargs) return _divert @@ -179,9 +178,10 @@ def _delayopener(opener, target, buf): """build an opener that stores chunks in 'buf' instead of 'target'""" - def _delay(name, mode=b'r', checkambig=False): + def _delay(name, mode=b'r', checkambig=False, **kwargs): if name != target: - return opener(name, mode) + return opener(name, mode, **kwargs) + assert not kwargs return appender(opener, name, mode, buf) return _delay @@ -212,10 +212,10 @@ """ __slots__ = ( - r'_offsets', - r'_text', - r'_sidedata', - r'_cpsd', + '_offsets', + '_text', + '_sidedata', + '_cpsd', ) def __new__(cls, text, sidedata, cpsd): @@ -405,112 +405,8 @@ self.filteredrevs = frozenset() self._copiesstorage = opener.options.get(b'copies-storage') - def tiprev(self): - for i in pycompat.xrange(len(self) - 1, -2, -1): - if i not in self.filteredrevs: - return i - - def tip(self): - """filtered version of revlog.tip""" - return self.node(self.tiprev()) - - def __contains__(self, rev): - """filtered version of revlog.__contains__""" - return 0 <= rev < len(self) and rev not in self.filteredrevs - - def __iter__(self): - """filtered version of revlog.__iter__""" - if len(self.filteredrevs) == 0: - return revlog.revlog.__iter__(self) - - def filterediter(): - for i in pycompat.xrange(len(self)): - if i not in self.filteredrevs: - yield i - - return filterediter() - - def revs(self, start=0, stop=None): - """filtered version of revlog.revs""" - for i in super(changelog, self).revs(start, stop): - if i not in self.filteredrevs: - yield i - - def _checknofilteredinrevs(self, revs): - """raise the appropriate error if 'revs' contains a filtered revision - - This returns a version of 'revs' to be used thereafter by the caller. - In particular, if revs is an iterator, it is converted into a set. - """ - safehasattr = util.safehasattr - if safehasattr(revs, '__next__'): - # Note that inspect.isgenerator() is not true for iterators, - revs = set(revs) - - filteredrevs = self.filteredrevs - if safehasattr(revs, 'first'): # smartset - offenders = revs & filteredrevs - else: - offenders = filteredrevs.intersection(revs) - - for rev in offenders: - raise error.FilteredIndexError(rev) - return revs - - def headrevs(self, revs=None): - if revs is None and self.filteredrevs: - try: - return self.index.headrevsfiltered(self.filteredrevs) - # AttributeError covers non-c-extension environments and - # old c extensions without filter handling. - except AttributeError: - return self._headrevs() - - if self.filteredrevs: - revs = self._checknofilteredinrevs(revs) - return super(changelog, self).headrevs(revs) - - def strip(self, *args, **kwargs): - # XXX make something better than assert - # We can't expect proper strip behavior if we are filtered. - assert not self.filteredrevs - super(changelog, self).strip(*args, **kwargs) - - def rev(self, node): - """filtered version of revlog.rev""" - r = super(changelog, self).rev(node) - if r in self.filteredrevs: - raise error.FilteredLookupError( - hex(node), self.indexfile, _(b'filtered node') - ) - return r - - def node(self, rev): - """filtered version of revlog.node""" - if rev in self.filteredrevs: - raise error.FilteredIndexError(rev) - return super(changelog, self).node(rev) - - def linkrev(self, rev): - """filtered version of revlog.linkrev""" - if rev in self.filteredrevs: - raise error.FilteredIndexError(rev) - return super(changelog, self).linkrev(rev) - - def parentrevs(self, rev): - """filtered version of revlog.parentrevs""" - if rev in self.filteredrevs: - raise error.FilteredIndexError(rev) - return super(changelog, self).parentrevs(rev) - - def flags(self, rev): - """filtered version of revlog.flags""" - if rev in self.filteredrevs: - raise error.FilteredIndexError(rev) - return super(changelog, self).flags(rev) - def delayupdate(self, tr): - b"delay visibility of index updates to other readers" + """delay visibility of index updates to other readers""" if not self._delayed: if len(self) == 0: @@ -528,7 +424,7 @@ tr.addfinalize(b'cl-%i' % id(self), self._finalize) def _finalize(self, tr): - b"finalize index updates" + """finalize index updates""" self._delayed = False self.opener = self._realopener # move redirected index data back into place @@ -548,7 +444,8 @@ self._enforceinlinesize(tr) def _writepending(self, tr): - b"create a file containing the unfinalized state for pretxnchangegroup" + """create a file containing the unfinalized state for + pretxnchangegroup""" if self._delaybuf: # make a temporary copy of the index fp1 = self._realopener(self.indexfile) diff -r 61881b170140 -r 84a0102c05c7 mercurial/chgserver.py --- a/mercurial/chgserver.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/chgserver.py Tue Jan 21 13:14:51 2020 -0500 @@ -41,7 +41,6 @@ from __future__ import absolute_import -import hashlib import inspect import os import re @@ -67,6 +66,7 @@ ) from .utils import ( + hashutil, procutil, stringutil, ) @@ -74,7 +74,7 @@ def _hashlist(items): """return sha1 hexdigest for a list""" - return node.hex(hashlib.sha1(stringutil.pprint(items)).digest()) + return node.hex(hashutil.sha1(stringutil.pprint(items)).digest()) # sensitive config sections affecting confighash @@ -345,9 +345,9 @@ _iochannels = [ # server.ch, ui.fp, mode - (b'cin', b'fin', r'rb'), - (b'cout', b'fout', r'wb'), - (b'cerr', b'ferr', r'wb'), + (b'cin', b'fin', 'rb'), + (b'cout', b'fout', 'wb'), + (b'cerr', b'ferr', 'wb'), ] @@ -505,7 +505,7 @@ path = self._readstr() if not path: return - self.ui.log(b'chgserver', b'chdir to %r\n', path) + self.ui.log(b'chgserver', b"chdir to '%s'\n", path) os.chdir(path) def setumask(self): @@ -549,6 +549,41 @@ except ValueError: raise ValueError(b'unexpected value in setenv request') self.ui.log(b'chgserver', b'setenv: %r\n', sorted(newenv.keys())) + + # Python3 has some logic to "coerce" the C locale to a UTF-8 capable + # one, and it sets LC_CTYPE in the environment to C.UTF-8 if none of + # 'LC_CTYPE', 'LC_ALL' or 'LANG' are set (to any value). This can be + # disabled with PYTHONCOERCECLOCALE=0 in the environment. + # + # When fromui is called via _inithashstate, python has already set + # this, so that's in the environment right when we start up the hg + # process. Then chg will call us and tell us to set the environment to + # the one it has; this might NOT have LC_CTYPE, so we'll need to + # carry-forward the LC_CTYPE that was coerced in these situations. + # + # If this is not handled, we will fail config+env validation and fail + # to start chg. If this is just ignored instead of carried forward, we + # may have different behavior between chg and non-chg. + if pycompat.ispy3: + # Rename for wordwrapping purposes + oldenv = encoding.environ + if not any( + e.get(b'PYTHONCOERCECLOCALE') == b'0' for e in [oldenv, newenv] + ): + keys = [b'LC_CTYPE', b'LC_ALL', b'LANG'] + old_keys = [k for k, v in oldenv.items() if k in keys and v] + new_keys = [k for k, v in newenv.items() if k in keys and v] + # If the user's environment (from chg) doesn't have ANY of the + # keys that python looks for, and the environment (from + # initialization) has ONLY LC_CTYPE and it's set to C.UTF-8, + # carry it forward. + if ( + not new_keys + and old_keys == [b'LC_CTYPE'] + and oldenv[b'LC_CTYPE'] == b'C.UTF-8' + ): + newenv[b'LC_CTYPE'] = oldenv[b'LC_CTYPE'] + encoding.environ.clear() encoding.environ.update(newenv) diff -r 61881b170140 -r 84a0102c05c7 mercurial/cmdutil.py --- a/mercurial/cmdutil.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/cmdutil.py Tue Jan 21 13:14:51 2020 -0500 @@ -24,6 +24,7 @@ open, setattr, ) +from .thirdparty import attr from . import ( bookmarks, @@ -61,6 +62,15 @@ stringutil, ) +if pycompat.TYPE_CHECKING: + from typing import ( + Any, + Dict, + ) + + for t in (Any, Dict): + assert t + stringio = util.stringio # templates of common command options @@ -250,16 +260,45 @@ _linebelow = b"^HG: ------------------------ >8 ------------------------$" +def check_at_most_one_arg(opts, *args): + """abort if more than one of the arguments are in opts + + Returns the unique argument or None if none of them were specified. + """ + + def to_display(name): + return pycompat.sysbytes(name).replace(b'_', b'-') + + previous = None + for x in args: + if opts.get(x): + if previous: + raise error.Abort( + _(b'cannot specify both --%s and --%s') + % (to_display(previous), to_display(x)) + ) + previous = x + return previous + + +def check_incompatible_arguments(opts, first, *others): + """abort if the first argument is given along with any of the others + + Unlike check_at_most_one_arg(), `others` are not mutually exclusive + among themselves. + """ + for other in others: + check_at_most_one_arg(opts, first, other) + + def resolvecommitoptions(ui, opts): """modify commit options dict to handle related options The return value indicates that ``rewrite.update-timestamp`` is the reason the ``date`` option is set. """ - if opts.get(b'date') and opts.get(b'currentdate'): - raise error.Abort(_(b'--date and --currentdate are mutually exclusive')) - if opts.get(b'user') and opts.get(b'currentuser'): - raise error.Abort(_(b'--user and --currentuser are mutually exclusive')) + check_at_most_one_arg(opts, b'date', b'currentdate') + check_at_most_one_arg(opts, b'user', b'currentuser') datemaydiffer = False # date-only change should be ignored? @@ -320,7 +359,7 @@ def setupwrapcolorwrite(ui): # wrap ui.write so diff output can be labeled/colorized def wrapwrite(orig, *args, **kw): - label = kw.pop(r'label', b'') + label = kw.pop('label', b'') for chunk, l in patch.difflabel(lambda: args): orig(chunk, label=label + l) @@ -347,7 +386,7 @@ ui, originalhunks, recordfn, operation ) except crecordmod.fallbackerror as e: - ui.warn(b'%s\n' % e.message) + ui.warn(b'%s\n' % e) ui.warn(_(b'falling back to text mode\n')) return patch.filterpatch(ui, originalhunks, match, operation) @@ -418,9 +457,7 @@ force = opts.get(b'force') if not force: - vdirs = [] match = matchmod.badmatch(match, fail) - match.explicitdir = vdirs.append status = repo.status(match=match) @@ -429,13 +466,13 @@ with repo.ui.configoverride(overrides, b'record'): # subrepoutil.precommit() modifies the status tmpstatus = scmutil.status( - copymod.copy(status[0]), - copymod.copy(status[1]), - copymod.copy(status[2]), - copymod.copy(status[3]), - copymod.copy(status[4]), - copymod.copy(status[5]), - copymod.copy(status[6]), + copymod.copy(status.modified), + copymod.copy(status.added), + copymod.copy(status.removed), + copymod.copy(status.deleted), + copymod.copy(status.unknown), + copymod.copy(status.ignored), + copymod.copy(status.clean), # pytype: disable=wrong-arg-count ) # Force allows -X subrepo to skip the subrepo. @@ -448,7 +485,7 @@ raise error.Abort(dirtyreason) if not force: - repo.checkcommitpatterns(wctx, vdirs, match, status, fail) + repo.checkcommitpatterns(wctx, match, status, fail) diffopts = patch.difffeatureopts( ui, opts=opts, @@ -761,7 +798,7 @@ tersedict[st].sort() tersedlist.append(tersedict[st]) - return tersedlist + return scmutil.status(*tersedlist) def _commentlines(raw): @@ -771,48 +808,101 @@ return b'\n'.join(commentedlines) + b'\n' -def _conflictsmsg(repo): - mergestate = mergemod.mergestate.read(repo) - if not mergestate.active(): - return - - m = scmutil.match(repo[None]) - unresolvedlist = [f for f in mergestate.unresolved() if m(f)] - if unresolvedlist: - mergeliststr = b'\n'.join( - [ - b' %s' % util.pathto(repo.root, encoding.getcwd(), path) - for path in sorted(unresolvedlist) - ] - ) - msg = ( - _( - '''Unresolved merge conflicts: +@attr.s(frozen=True) +class morestatus(object): + reporoot = attr.ib() + unfinishedop = attr.ib() + unfinishedmsg = attr.ib() + activemerge = attr.ib() + unresolvedpaths = attr.ib() + _formattedpaths = attr.ib(init=False, default=set()) + _label = b'status.morestatus' + + def formatfile(self, path, fm): + self._formattedpaths.add(path) + if self.activemerge and path in self.unresolvedpaths: + fm.data(unresolved=True) + + def formatfooter(self, fm): + if self.unfinishedop or self.unfinishedmsg: + fm.startitem() + fm.data(itemtype=b'morestatus') + + if self.unfinishedop: + fm.data(unfinished=self.unfinishedop) + statemsg = ( + _(b'The repository is in an unfinished *%s* state.') + % self.unfinishedop + ) + fm.plain(b'%s\n' % _commentlines(statemsg), label=self._label) + if self.unfinishedmsg: + fm.data(unfinishedmsg=self.unfinishedmsg) + + # May also start new data items. + self._formatconflicts(fm) + + if self.unfinishedmsg: + fm.plain( + b'%s\n' % _commentlines(self.unfinishedmsg), label=self._label + ) + + def _formatconflicts(self, fm): + if not self.activemerge: + return + + if self.unresolvedpaths: + mergeliststr = b'\n'.join( + [ + b' %s' + % util.pathto(self.reporoot, encoding.getcwd(), path) + for path in self.unresolvedpaths + ] + ) + msg = ( + _( + '''Unresolved merge conflicts: %s To mark files as resolved: hg resolve --mark FILE''' + ) + % mergeliststr ) - % mergeliststr - ) - else: - msg = _(b'No unresolved merge conflicts.') - - return _commentlines(msg) - - -def morestatus(repo, fm): + + # If any paths with unresolved conflicts were not previously + # formatted, output them now. + for f in self.unresolvedpaths: + if f in self._formattedpaths: + # Already output. + continue + fm.startitem() + # We can't claim to know the status of the file - it may just + # have been in one of the states that were not requested for + # display, so it could be anything. + fm.data(itemtype=b'file', path=f, unresolved=True) + + else: + msg = _(b'No unresolved merge conflicts.') + + fm.plain(b'%s\n' % _commentlines(msg), label=self._label) + + +def readmorestatus(repo): + """Returns a morestatus object if the repo has unfinished state.""" statetuple = statemod.getrepostate(repo) - label = b'status.morestatus' + mergestate = mergemod.mergestate.read(repo) + activemerge = mergestate.active() + if not statetuple and not activemerge: + return None + + unfinishedop = unfinishedmsg = unresolved = None if statetuple: - state, helpfulmsg = statetuple - statemsg = _(b'The repository is in an unfinished *%s* state.') % state - fm.plain(b'%s\n' % _commentlines(statemsg), label=label) - conmsg = _conflictsmsg(repo) - if conmsg: - fm.plain(b'%s\n' % conmsg, label=label) - if helpfulmsg: - fm.plain(b'%s\n' % _commentlines(helpfulmsg), label=label) + unfinishedop, unfinishedmsg = statetuple + if activemerge: + unresolved = sorted(mergestate.unresolved()) + return morestatus( + repo.root, unfinishedop, unfinishedmsg, activemerge, unresolved + ) def findpossible(cmd, table, strict=False): @@ -991,8 +1081,8 @@ if merge and repo.dirstate.p2() != nullid: raise error.Abort(_(b'outstanding uncommitted merge'), hint=hint) - modified, added, removed, deleted = repo.status()[:4] - if modified or added or removed or deleted: + st = repo.status() + if st.modified or st.added or st.removed or st.deleted: raise error.Abort(_(b'uncommitted changes'), hint=hint) ctx = repo[None] for s in sorted(ctx.substate): @@ -1001,13 +1091,12 @@ def logmessage(ui, opts): """ get the log message according to -m and -l option """ + + check_at_most_one_arg(opts, b'message', b'logfile') + message = opts.get(b'message') logfile = opts.get(b'logfile') - if message and logfile: - raise error.Abort( - _(b'options --message and --logfile are mutually exclusive') - ) if not message and logfile: try: if isstdiofilename(logfile): @@ -1289,7 +1378,7 @@ if isinstance(r, revlog.revlog): pass elif util.safehasattr(r, b'_revlog'): - r = r._revlog + r = r._revlog # pytype: disable=attribute-error elif r is not None: raise error.Abort(_(b'%r does not appear to be a revlog') % r) @@ -1764,6 +1853,8 @@ overrides = {} if partial: overrides[(b'ui', b'allowemptycommit')] = True + if opts.get(b'secret'): + overrides[(b'phases', b'new-commit')] = b'secret' with repo.ui.configoverride(overrides, b'import'): n = repo.commit( message, user, date, match=m, editor=editor, extra=extra @@ -2022,7 +2113,7 @@ rev = ctx.rev() if rev in results: ui.status( - _(b"found revision %s from %s\n") + _(b"found revision %d from %s\n") % (rev, dateutil.datestr(results[rev])) ) return b'%d' % rev @@ -2338,12 +2429,16 @@ def fns_generator(): if allfiles: - fiter = iter(ctx) + + def bad(f, msg): + pass + + for f in ctx.matches(matchmod.badmatch(match, bad)): + yield f else: - fiter = ctx.files() - for f in fiter: - if match(f): - yield f + for f in ctx.files(): + if match(f): + yield f fns = fns_generator() prepare(ctx, fns) @@ -2397,7 +2492,7 @@ submatch = matchmod.subdirmatcher(subpath, match) subprefix = repo.wvfs.reljoin(prefix, subpath) subuipathfn = scmutil.subdiruipathfn(subpath, uipathfn) - if opts.get(r'subrepos'): + if opts.get('subrepos'): bad.extend( sub.add(ui, submatch, subprefix, subuipathfn, False, **opts) ) @@ -2410,7 +2505,7 @@ _(b"skipping missing subrepository: %s\n") % uipathfn(subpath) ) - if not opts.get(r'dry_run'): + if not opts.get('dry_run'): rejected = wctx.add(names, prefix) bad.extend(f for f in rejected if f in match.files()) return bad @@ -2565,7 +2660,7 @@ ): ret = 0 s = repo.status(match=m, clean=True) - modified, added, deleted, clean = s[0], s[1], s[3], s[6] + modified, added, deleted, clean = s.modified, s.added, s.deleted, s.clean wctx = repo[None] @@ -2606,7 +2701,7 @@ progress.complete() # warn about failure to delete explicit files/dirs - deleteddirs = util.dirs(deleted) + deleteddirs = pathutil.dirs(deleted) files = m.files() progress = ui.makeprogress( _(b'deleting'), total=len(files), unit=_(b'files') @@ -2876,7 +2971,8 @@ if len(old.parents()) > 1: # ctx.files() isn't reliable for merges, so fall back to the # slower repo.status() method - files = {fn for st in base.status(old)[:3] for fn in st} + st = base.status(old) + files = set(st.modified) | set(st.added) | set(st.removed) else: files = set(old.files()) @@ -3044,11 +3140,13 @@ # selectively update the dirstate only for the amended files. dirstate = repo.dirstate - # Update the state of the files which were added and - # and modified in the amend to "normal" in the dirstate. + # Update the state of the files which were added and modified in the + # amend to "normal" in the dirstate. We need to use "normallookup" since + # the files may have changed since the command started; using "normal" + # would mark them as clean but with uncommitted contents. normalfiles = set(wctx.modified() + wctx.added()) & filestoamend for f in normalfiles: - dirstate.normal(f) + dirstate.normallookup(f) # Update the state of files which were removed in the amend # to "removed" in the dirstate. @@ -3958,6 +4056,7 @@ def readgraftstate(repo, graftstate): + # type: (Any, statemod.cmdstate) -> Dict[bytes, Any] """read the graft state file and return a dict of the data stored in it""" try: return graftstate.read() diff -r 61881b170140 -r 84a0102c05c7 mercurial/color.py --- a/mercurial/color.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/color.py Tue Jan 21 13:14:51 2020 -0500 @@ -145,6 +145,9 @@ b'status.unknown': b'magenta bold underline', b'tags.normal': b'green', b'tags.local': b'black bold', + b'upgrade-repo.requirement.preserved': b'cyan', + b'upgrade-repo.requirement.added': b'green', + b'upgrade-repo.requirement.removed': b'red', } @@ -184,7 +187,7 @@ # noisy and use ui.debug(). ui.debug(b"no terminfo entry for %s\n" % e) del ui._terminfoparams[key] - if not curses.tigetstr(r'setaf') or not curses.tigetstr(r'setab'): + if not curses.tigetstr('setaf') or not curses.tigetstr('setab'): # Only warn about missing terminfo entries if we explicitly asked for # terminfo mode and we're in a formatted terminal. if mode == b"terminfo" and formatted: @@ -330,7 +333,7 @@ def valideffect(ui, effect): - b'Determine if the effect is valid or not.' + """Determine if the effect is valid or not.""" return (not ui._terminfoparams and effect in _activeeffects(ui)) or ( effect in ui._terminfoparams or effect[:-11] in ui._terminfoparams ) @@ -353,9 +356,9 @@ else: return curses.tigetstr(pycompat.sysstr(val)) elif bg: - return curses.tparm(curses.tigetstr(r'setab'), val) + return curses.tparm(curses.tigetstr('setab'), val) else: - return curses.tparm(curses.tigetstr(r'setaf'), val) + return curses.tparm(curses.tigetstr('setaf'), val) def _mergeeffects(text, start, stop): @@ -377,7 +380,7 @@ def _render_effects(ui, text, effects): - b'Wrap text in commands to turn on each effect.' + """Wrap text in commands to turn on each effect.""" if not text: return text if ui._terminfoparams: @@ -435,30 +438,30 @@ if pycompat.iswindows: import ctypes - _kernel32 = ctypes.windll.kernel32 + _kernel32 = ctypes.windll.kernel32 # pytype: disable=module-attr _WORD = ctypes.c_ushort _INVALID_HANDLE_VALUE = -1 class _COORD(ctypes.Structure): - _fields_ = [(r'X', ctypes.c_short), (r'Y', ctypes.c_short)] + _fields_ = [('X', ctypes.c_short), ('Y', ctypes.c_short)] class _SMALL_RECT(ctypes.Structure): _fields_ = [ - (r'Left', ctypes.c_short), - (r'Top', ctypes.c_short), - (r'Right', ctypes.c_short), - (r'Bottom', ctypes.c_short), + ('Left', ctypes.c_short), + ('Top', ctypes.c_short), + ('Right', ctypes.c_short), + ('Bottom', ctypes.c_short), ] class _CONSOLE_SCREEN_BUFFER_INFO(ctypes.Structure): _fields_ = [ - (r'dwSize', _COORD), - (r'dwCursorPosition', _COORD), - (r'wAttributes', _WORD), - (r'srWindow', _SMALL_RECT), - (r'dwMaximumWindowSize', _COORD), + ('dwSize', _COORD), + ('dwCursorPosition', _COORD), + ('wAttributes', _WORD), + ('srWindow', _SMALL_RECT), + ('dwMaximumWindowSize', _COORD), ] _STD_OUTPUT_HANDLE = 0xFFFFFFF5 # (DWORD)-11 @@ -529,7 +532,7 @@ ) def win32print(ui, writefunc, text, **opts): - label = opts.get(r'label', b'') + label = opts.get('label', b'') attr = origattr def mapcolor(val, attr): diff -r 61881b170140 -r 84a0102c05c7 mercurial/commands.py --- a/mercurial/commands.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/commands.py Tue Jan 21 13:14:51 2020 -0500 @@ -180,7 +180,7 @@ use --dry-run/-n to dry run the command. """ - dryrun = opts.get(r'dry_run') + dryrun = opts.get('dry_run') abortstate = cmdutil.getunfinishedstate(repo) if not abortstate: raise error.Abort(_(b'no operation in progress')) @@ -362,7 +362,7 @@ b'', b'skip', [], - _(b'revision to not display (EXPERIMENTAL)'), + _(b'revset to not display (EXPERIMENTAL)'), _(b'REV'), ), ] @@ -559,7 +559,7 @@ ml = max(sizes) formats.append([sep + b' ' * (ml - w) + b'%s' for w in sizes]) else: - formats.append([b'%s' for x in l]) + formats.append([b'%s'] * len(l)) pieces.append(l) for f, p, n in zip(zip(*formats), zip(*pieces), lines): @@ -1073,7 +1073,7 @@ raise error.Abort(_(b'current bisect revision is a merge')) if rev: node = repo[scmutil.revsingle(repo, rev, node)].node() - try: + with hbisect.restore_state(repo, state, node): while changesets: # update state state[b'current'] = [node] @@ -1105,9 +1105,6 @@ # update to next check node = nodes[0] mayupdate(repo, node, show_stats=False) - finally: - state[b'current'] = [node] - hbisect.save_state(repo, state) hbisect.printresult(ui, repo, state, displayer, nodes, bgood) return @@ -1229,13 +1226,9 @@ rev = opts.get(b'rev') inactive = opts.get(b'inactive') # meaning add/rename to inactive bookmark - selactions = [k for k in [b'delete', b'rename', b'list'] if opts.get(k)] - if len(selactions) > 1: - raise error.Abort( - _(b'--%s and --%s are incompatible') % tuple(selactions[:2]) - ) - if selactions: - action = selactions[0] + action = cmdutil.check_at_most_one_arg(opts, b'delete', b'rename', b'list') + if action: + cmdutil.check_incompatible_arguments(opts, action, b'rev') elif names or rev: action = b'add' elif inactive: @@ -1243,10 +1236,7 @@ else: action = b'list' - if rev and action in {b'delete', b'rename', b'list'}: - raise error.Abort(_(b"--rev is incompatible with --%s") % action) - if inactive and action in {b'delete', b'list'}: - raise error.Abort(_(b"--inactive is incompatible with --%s") % action) + cmdutil.check_incompatible_arguments(opts, b'inactive', b'delete', b'list') if not names and action in {b'add', b'delete'}: raise error.Abort(_(b"bookmark name required")) @@ -1892,8 +1882,7 @@ Returns 0 on success. """ opts = pycompat.byteskwargs(opts) - if opts.get(b'noupdate') and opts.get(b'updaterev'): - raise error.Abort(_(b"cannot specify both --noupdate and --updaterev")) + cmdutil.check_at_most_one_arg(opts, b'noupdate', b'updaterev') # --include/--exclude can come from narrow or sparse. includepats, excludepats = None, None @@ -2019,8 +2008,8 @@ def _docommit(ui, repo, *pats, **opts): - if opts.get(r'interactive'): - opts.pop(r'interactive') + if opts.get('interactive'): + opts.pop('interactive') ret = cmdutil.dorecord( ui, repo, commit, None, False, cmdutil.recordfilter, *pats, **opts ) @@ -2116,13 +2105,13 @@ if not node: stat = cmdutil.postcommitstatus(repo, pats, opts) - if stat[3]: + if stat.deleted: ui.status( _( b"nothing changed (%d missing files, see " b"'hg status')\n" ) - % len(stat[3]) + % len(stat.deleted) ) else: ui.status(_(b"nothing changed\n")) @@ -2234,9 +2223,11 @@ for t, f in rcutil.rccomponents(): if t == b'path': ui.debug(b'read config from: %s\n' % f) + elif t == b'resource': + ui.debug(b'read config from: resource:%s.%s\n' % (f[0], f[1])) elif t == b'items': - for section, name, value, source in f: - ui.debug(b'set config by: %s\n' % source) + # Don't print anything for 'items'. + pass else: raise error.ProgrammingError(b'unknown rctype: %s' % t) untrusted = bool(opts.get(b'untrusted')) @@ -2295,7 +2286,7 @@ use --dry-run/-n to dry run the command. """ - dryrun = opts.get(r'dry_run') + dryrun = opts.get('dry_run') contstate = cmdutil.getunfinishedstate(repo) if not contstate: raise error.Abort(_(b'no operation in progress')) @@ -2375,7 +2366,7 @@ def debugcomplete(ui, cmd=b'', **opts): """returns the completion list associated with the given command""" - if opts.get(r'options'): + if opts.get('options'): options = [] otables = [globalopts] if cmd: @@ -2614,8 +2605,7 @@ bookmark = opts.get(b'bookmark') changesets += tuple(opts.get(b'rev', [])) - if bookmark and changesets: - raise error.Abort(_(b"-r and -B are mutually exclusive")) + cmdutil.check_at_most_one_arg(opts, b'rev', b'bookmark') if bookmark: if bookmark not in repo._bookmarks: @@ -2974,14 +2964,7 @@ # list of new nodes created by ongoing graft statedata[b'newnodes'] = [] - if opts.get(b'user') and opts.get(b'currentuser'): - raise error.Abort(_(b'--user and --currentuser are mutually exclusive')) - if opts.get(b'date') and opts.get(b'currentdate'): - raise error.Abort(_(b'--date and --currentdate are mutually exclusive')) - if not opts.get(b'user') and opts.get(b'currentuser'): - opts[b'user'] = ui.username() - if not opts.get(b'date') and opts.get(b'currentdate'): - opts[b'date'] = b"%d %d" % dateutil.makedate() + cmdutil.resolvecommitoptions(ui, opts) editor = cmdutil.getcommiteditor( editform=b'graft', **pycompat.strkwargs(opts) @@ -3096,17 +3079,12 @@ # already, they'd have been in the graftstate. if not (cont or opts.get(b'force')) and basectx is None: # check for ancestors of dest branch - crev = repo[b'.'].rev() - ancestors = repo.changelog.ancestors([crev], inclusive=True) - # XXX make this lazy in the future - # don't mutate while iterating, create a copy - for rev in list(revs): - if rev in ancestors: - ui.warn( - _(b'skipping ancestor revision %d:%s\n') % (rev, repo[rev]) - ) - # XXX remove on list is slow - revs.remove(rev) + ancestors = repo.revs(b'%ld & (::.)', revs) + for rev in ancestors: + ui.warn(_(b'skipping ancestor revision %d:%s\n') % (rev, repo[rev])) + + revs = [r for r in revs if r not in ancestors] + if not revs: return -1 @@ -3123,7 +3101,7 @@ # The only changesets we can be sure doesn't contain grafts of any # revs, are the ones that are common ancestors of *all* revs: - for rev in repo.revs(b'only(%d,ancestor(%ld))', crev, revs): + for rev in repo.revs(b'only(%d,ancestor(%ld))', repo[b'.'].rev(), revs): ctx = repo[rev] n = ctx.extra().get(b'source') if n in ids: @@ -3446,6 +3424,9 @@ def grepbody(fn, rev, body): matches[rev].setdefault(fn, []) m = matches[rev][fn] + if body is None: + return + for lnum, cstart, cend, line in matchlines(body): s = linestate(line, lnum, cstart, cend) m.append(s) @@ -3453,13 +3434,13 @@ def difflinestates(a, b): sm = difflib.SequenceMatcher(None, a, b) for tag, alo, ahi, blo, bhi in sm.get_opcodes(): - if tag == r'insert': + if tag == 'insert': for i in pycompat.xrange(blo, bhi): yield (b'+', b[i]) - elif tag == r'delete': + elif tag == 'delete': for i in pycompat.xrange(alo, ahi): yield (b'-', a[i]) - elif tag == r'replace': + elif tag == 'replace': for i in pycompat.xrange(alo, ahi): yield (b'-', a[i]) for i in pycompat.xrange(blo, bhi): @@ -3581,6 +3562,19 @@ getrenamed = scmutil.getrenamedfn(repo) + def get_file_content(filename, filelog, filenode, context, revision): + try: + content = filelog.read(filenode) + except error.WdirUnsupported: + content = context[filename].data() + except error.CensoredNodeError: + content = None + ui.warn( + _(b'cannot search in censored file: %(filename)s:%(revnum)s\n') + % {b'filename': filename, b'revnum': pycompat.bytestr(revision)} + ) + return content + def prep(ctx, fns): rev = ctx.rev() pctx = ctx.p1() @@ -3607,17 +3601,15 @@ files.append(fn) if fn not in matches[rev]: - try: - content = flog.read(fnode) - except error.WdirUnsupported: - content = ctx[fn].data() + content = get_file_content(fn, flog, fnode, ctx, rev) grepbody(fn, rev, content) pfn = copy or fn if pfn not in matches[parent]: try: - fnode = pctx.filenode(pfn) - grepbody(pfn, parent, flog.read(fnode)) + pfnode = pctx.filenode(pfn) + pcontent = get_file_content(pfn, flog, pfnode, pctx, parent) + grepbody(pfn, parent, pcontent) except error.LookupError: pass @@ -3775,7 +3767,7 @@ Returns 0 if successful. """ - keep = opts.get(r'system') or [] + keep = opts.get('system') or [] if len(keep) == 0: if pycompat.sysplatform.startswith(b'win'): keep.append(b'windows') @@ -4022,6 +4014,7 @@ _(b'NUM'), ), (b'b', b'base', b'', _(b'base path (DEPRECATED)'), _(b'PATH')), + (b'', b'secret', None, _(b'use the secret phase for committing')), (b'e', b'edit', False, _(b'invoke editor on commit messages')), ( b'f', @@ -4170,6 +4163,8 @@ update = not opts.get(b'bypass') if not update and opts.get(b'no_commit'): raise error.Abort(_(b'cannot use --no-commit with --bypass')) + if opts.get(b'secret') and opts.get(b'no_commit'): + raise error.Abort(_(b'cannot use --no-commit with --secret')) try: sim = float(opts.get(b'similarity') or 0) except ValueError: @@ -4874,6 +4869,13 @@ node = scmutil.revsingle(repo, node).node() if not node and not abort: + if ui.configbool(b'commands', b'merge.require-rev'): + raise error.Abort( + _( + b'configuration requires specifying revision to merge ' + b'with' + ) + ) node = repo[destutil.destmerge(repo)].node() if opts.get(b'preview'): @@ -5685,7 +5687,7 @@ """ ret = repo.recover() if ret: - if opts[r'verify']: + if opts['verify']: return hg.verify(repo) else: msg = _( @@ -6330,7 +6332,7 @@ _(b'rollback is disabled because it is unsafe'), hint=b'see `hg help -v rollback` for information', ) - return repo.rollback(dryrun=opts.get(r'dry_run'), force=opts.get(r'force')) + return repo.rollback(dryrun=opts.get('dry_run'), force=opts.get('force')) @command( @@ -6803,7 +6805,6 @@ end = b'\0' else: end = b'\n' - copy = {} states = b'modified added removed deleted unknown ignored clean'.split() show = [k for k in states if opts.get(k)] if opts.get(b'all'): @@ -6840,8 +6841,13 @@ opts.get(b'subrepos'), ) - changestates = zip(states, pycompat.iterbytestr(b'MAR!?IC'), stat) - + changestates = zip( + states, + pycompat.iterbytestr(b'MAR!?IC'), + [getattr(stat, s.decode('utf8')) for s in states], + ) + + copy = {} if ( opts.get(b'all') or opts.get(b'copies') @@ -6849,6 +6855,12 @@ ) and not opts.get(b'no_status'): copy = copies.pathcopies(ctx1, ctx2, m) + morestatus = None + if ( + ui.verbose or ui.configbool(b'commands', b'status.verbose') + ) and not ui.plain(): + morestatus = cmdutil.readmorestatus(repo) + ui.pager(b'status') fm = ui.formatter(b'status', opts) fmt = b'%s' + end @@ -6860,7 +6872,7 @@ for f in files: fm.startitem() fm.context(ctx=ctx2) - fm.data(path=f) + fm.data(itemtype=b'file', path=f) fm.condwrite(showchar, b'status', b'%s ', char, label=label) fm.plain(fmt % uipathfn(f), label=label) if f in copy: @@ -6869,11 +6881,11 @@ (b' %s' + end) % uipathfn(copy[f]), label=b'status.copied', ) - - if ( - ui.verbose or ui.configbool(b'commands', b'status.verbose') - ) and not ui.plain(): - cmdutil.morestatus(repo, fm) + if morestatus: + morestatus.formatfile(f, fm) + + if morestatus: + morestatus.formatfooter(fm) fm.end() @@ -7480,7 +7492,7 @@ ) modheads = bundle2.combinechangegroupresults(op) - return postincoming(ui, repo, modheads, opts.get(r'update'), None, None) + return postincoming(ui, repo, modheads, opts.get('update'), None, None) @command( @@ -7511,7 +7523,7 @@ _(b'DATE'), ), ], - _(b'hg unshelve [OPTION]... [FILE]... [-n SHELVED]'), + _(b'hg unshelve [OPTION]... [[-n] SHELVED]'), helpcategory=command.CATEGORY_WORKING_DIRECTORY, ) def unshelve(ui, repo, *shelved, **opts): @@ -7535,9 +7547,9 @@ that causes a conflict. This reverts the unshelved changes, and leaves the bundle in place.) - If bare shelved change (when no files are specified, without interactive, - include and exclude option) was done on newly created branch it would - restore branch information to the working directory. + If bare shelved change (without interactive, include and exclude + option) was done on newly created branch it would restore branch + information to the working directory. After a successful unshelve, the shelved changes are stored in a backup directory. Only the N most recent backups are kept. N @@ -7641,11 +7653,11 @@ Returns 0 on success, 1 if there are unresolved files. """ - rev = opts.get(r'rev') - date = opts.get(r'date') - clean = opts.get(r'clean') - check = opts.get(r'check') - merge = opts.get(r'merge') + rev = opts.get('rev') + date = opts.get('date') + clean = opts.get('clean') + check = opts.get('check') + merge = opts.get('merge') if rev and node: raise error.Abort(_(b"please specify just one revision")) @@ -7688,7 +7700,7 @@ ctx = scmutil.revsingle(repo, rev, default=None) rev = ctx.rev() hidden = ctx.hidden() - overrides = {(b'ui', b'forcemerge'): opts.get(r'tool', b'')} + overrides = {(b'ui', b'forcemerge'): opts.get('tool', b'')} with ui.configoverride(overrides, b'update'): ret = hg.updatetotally( ui, repo, rev, brev, clean=clean, updatecheck=updatecheck diff -r 61881b170140 -r 84a0102c05c7 mercurial/commandserver.py --- a/mercurial/commandserver.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/commandserver.py Tue Jan 21 13:14:51 2020 -0500 @@ -64,7 +64,7 @@ self.out.flush() def __getattr__(self, attr): - if attr in (r'isatty', r'fileno', r'tell', r'seek'): + if attr in ('isatty', 'fileno', 'tell', 'seek'): raise AttributeError(attr) return getattr(self.out, attr) @@ -180,7 +180,7 @@ __next__ = next def __getattr__(self, attr): - if attr in (r'isatty', r'fileno', r'tell', r'seek'): + if attr in ('isatty', 'fileno', 'tell', 'seek'): raise AttributeError(attr) return getattr(self.in_, attr) @@ -450,8 +450,8 @@ def _serverequest(ui, repo, conn, createcmdserver, prereposetups): - fin = conn.makefile(r'rb') - fout = conn.makefile(r'wb') + fin = conn.makefile('rb') + fout = conn.makefile('wb') sv = None try: sv = createcmdserver(repo, conn, fin, fout, prereposetups) diff -r 61881b170140 -r 84a0102c05c7 mercurial/config.py --- a/mercurial/config.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/config.py Tue Jan 21 13:14:51 2020 -0500 @@ -212,7 +212,7 @@ def read(self, path, fp=None, sections=None, remap=None): if not fp: fp = util.posixfile(path, b'rb') - assert getattr(fp, 'mode', r'rb') == r'rb', ( + assert getattr(fp, 'mode', 'rb') == 'rb', ( b'config files must be opened in binary mode, got fp=%r mode=%r' % (fp, fp.mode,) ) diff -r 61881b170140 -r 84a0102c05c7 mercurial/configitems.py --- a/mercurial/configitems.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/configitems.py Tue Jan 21 13:14:51 2020 -0500 @@ -228,6 +228,9 @@ b'commands', b'grep.all-files', default=False, experimental=True, ) coreconfigitem( + b'commands', b'merge.require-rev', default=False, +) +coreconfigitem( b'commands', b'push.require-revs', default=False, ) coreconfigitem( @@ -433,6 +436,9 @@ b'devel', b'debug.extensions', default=False, ) coreconfigitem( + b'devel', b'debug.repo-filters', default=False, +) +coreconfigitem( b'devel', b'debug.peer-request', default=False, ) coreconfigitem( @@ -651,6 +657,9 @@ b'experimental', b'revisions.disambiguatewithin', default=None, ) coreconfigitem( + b'experimental', b'rust.index', default=False, +) +coreconfigitem( b'experimental', b'server.filesdata.recommended-batch-size', default=50000, ) coreconfigitem( @@ -703,6 +712,9 @@ b'experimental', b'worker.wdir-get-thread-safe', default=False, ) coreconfigitem( + b'experimental', b'worker.repository-upgrade', default=False, +) +coreconfigitem( b'experimental', b'xdiff', default=False, ) coreconfigitem( diff -r 61881b170140 -r 84a0102c05c7 mercurial/context.py --- a/mercurial/context.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/context.py Tue Jan 21 13:14:51 2020 -0500 @@ -71,7 +71,7 @@ __str__ = encoding.strmethod(__bytes__) def __repr__(self): - return r"<%s %s>" % (type(self).__name__, str(self)) + return "<%s %s>" % (type(self).__name__, str(self)) def __eq__(self, other): try: @@ -200,8 +200,8 @@ def mutable(self): return self.phase() > phases.public - def matchfileset(self, expr, badfn=None): - return fileset.match(self, expr, badfn=badfn) + def matchfileset(self, cwd, expr, badfn=None): + return fileset.match(self, cwd, expr, badfn=badfn) def obsolete(self): """True if the changeset is obsolete""" @@ -265,14 +265,14 @@ return self._repo[nullrev] def _fileinfo(self, path): - if r'_manifest' in self.__dict__: + if '_manifest' in self.__dict__: try: return self._manifest[path], self._manifest.flags(path) except KeyError: raise error.ManifestLookupError( self._node, path, _(b'not found in manifest') ) - if r'_manifestdelta' in self.__dict__ or path in self.files(): + if '_manifestdelta' in self.__dict__ or path in self.files(): if path in self._manifestdelta: return ( self._manifestdelta[path], @@ -328,11 +328,14 @@ default=b'glob', listsubrepos=False, badfn=None, + cwd=None, ): r = self._repo + if not cwd: + cwd = r.getcwd() return matchmod.match( r.root, - r.getcwd(), + cwd, pats, include, exclude, @@ -449,11 +452,25 @@ unknown=listunknown, listsubrepos=True, ) - for rfiles, sfiles in zip(r, s): + for k in ( + 'modified', + 'added', + 'removed', + 'deleted', + 'unknown', + 'ignored', + 'clean', + ): + rfiles, sfiles = getattr(r, k), getattr(s, k) rfiles.extend(b"%s/%s" % (subpath, f) for f in sfiles) - for l in r: - l.sort() + r.modified.sort() + r.added.sort() + r.removed.sort() + r.deleted.sort() + r.unknown.sort() + r.ignored.sort() + r.clean.sort() return r @@ -463,10 +480,17 @@ changeset convenient. It represents a read-only context already present in the repo.""" - def __init__(self, repo, rev, node): + def __init__(self, repo, rev, node, maybe_filtered=True): super(changectx, self).__init__(repo) self._rev = rev self._node = node + # When maybe_filtered is True, the revision might be affected by + # changelog filtering and operation through the filtered changelog must be used. + # + # When maybe_filtered is False, the revision has already been checked + # against filtering and is not filtered. Operation through the + # unfiltered changelog might be used in some case. + self._maybe_filtered = maybe_filtered def __hash__(self): try: @@ -481,7 +505,11 @@ @propertycache def _changeset(self): - return self._repo.changelog.changelogrevision(self.rev()) + if self._maybe_filtered: + repo = self._repo + else: + repo = self._repo.unfiltered() + return repo.changelog.changelogrevision(self.rev()) @propertycache def _manifest(self): @@ -498,10 +526,18 @@ @propertycache def _parents(self): repo = self._repo - p1, p2 = repo.changelog.parentrevs(self._rev) + if self._maybe_filtered: + cl = repo.changelog + else: + cl = repo.unfiltered().changelog + + p1, p2 = cl.parentrevs(self._rev) if p2 == nullrev: - return [repo[p1]] - return [repo[p1], repo[p2]] + return [changectx(repo, p1, cl.node(p1), maybe_filtered=False)] + return [ + changectx(repo, p1, cl.node(p1), maybe_filtered=False), + changectx(repo, p2, cl.node(p2), maybe_filtered=False), + ] def changeset(self): c = self._changeset @@ -746,9 +782,9 @@ @propertycache def _changeid(self): - if r'_changectx' in self.__dict__: + if '_changectx' in self.__dict__: return self._changectx.rev() - elif r'_descendantrev' in self.__dict__: + elif '_descendantrev' in self.__dict__: # this file context was created from a revision with a known # descendant, we can (lazily) correct for linkrev aliases return self._adjustlinkrev(self._descendantrev) @@ -757,7 +793,7 @@ @propertycache def _filenode(self): - if r'_fileid' in self.__dict__: + if '_fileid' in self.__dict__: return self._filelog.lookup(self._fileid) else: return self._changectx.filenode(self._path) @@ -789,7 +825,7 @@ __str__ = encoding.strmethod(__bytes__) def __repr__(self): - return r"<%s %s>" % (type(self).__name__, str(self)) + return "<%s %s>" % (type(self).__name__, str(self)) def __hash__(self): try: @@ -1024,16 +1060,16 @@ """ toprev = None attrs = vars(self) - if r'_changeid' in attrs: + if '_changeid' in attrs: # We have a cached value already toprev = self._changeid - elif r'_changectx' in attrs: + elif '_changectx' in attrs: # We know which changelog entry we are coming from toprev = self._changectx.rev() if toprev is not None: return self._adjustlinkrev(toprev, inclusive=True, stoprev=stoprev) - elif r'_descendantrev' in attrs: + elif '_descendantrev' in attrs: introrev = self._adjustlinkrev(self._descendantrev, stoprev=stoprev) # be nice and cache the result of the computation if introrev is not None: @@ -1053,14 +1089,14 @@ def _parentfilectx(self, path, fileid, filelog): """create parent filectx keeping ancestry info for _adjustlinkrev()""" fctx = filectx(self._repo, path, fileid=fileid, filelog=filelog) - if r'_changeid' in vars(self) or r'_changectx' in vars(self): + if '_changeid' in vars(self) or '_changectx' in vars(self): # If self is associated with a changeset (probably explicitly # fed), ensure the created filectx is associated with a # changeset that is an ancestor of self.changectx. # This lets us later use _adjustlinkrev to get a correct link. fctx._descendantrev = self.rev() fctx._ancestrycontext = getattr(self, '_ancestrycontext', None) - elif r'_descendantrev' in vars(self): + elif '_descendantrev' in vars(self): # Otherwise propagate _descendantrev if we have one associated. fctx._descendantrev = self._descendantrev fctx._ancestrycontext = getattr(self, '_ancestrycontext', None) @@ -1120,7 +1156,7 @@ # renamed filectx won't have a filelog yet, so set it # from the cache to save time for p in pl: - if not r'_filelog' in p.__dict__: + if not '_filelog' in p.__dict__: p._filelog = getlog(p.path()) return pl @@ -1128,7 +1164,9 @@ # use linkrev to find the first changeset where self appeared base = self.introfilectx() if getattr(base, '_ancestrycontext', None) is None: - cl = self._repo.changelog + # it is safe to use an unfiltered repository here because we are + # walking ancestors only. + cl = self._repo.unfiltered().changelog if base.rev() is None: # wctx is not inclusive, but works because _ancestrycontext # is used to test filelog revisions @@ -1409,7 +1447,7 @@ return b def phase(self): - phase = phases.draft # default phase to draft + phase = phases.newcommitphase(self._repo.ui) for p in self.parents(): phase = max(phase, p.phase()) return phase @@ -1488,7 +1526,29 @@ p = p[:-1] # use unfiltered repo to delay/avoid loading obsmarkers unfi = self._repo.unfiltered() - return [changectx(self._repo, unfi.changelog.rev(n), n) for n in p] + return [ + changectx( + self._repo, unfi.changelog.rev(n), n, maybe_filtered=False + ) + for n in p + ] + + def setparents(self, p1node, p2node=nullid): + dirstate = self._repo.dirstate + with dirstate.parentchange(): + copies = dirstate.setparents(p1node, p2node) + pctx = self._repo[p1node] + if copies: + # Adjust copy records, the dirstate cannot do it, it + # requires access to parents manifests. Preserve them + # only for entries added to first parent. + for f in copies: + if f not in pctx and copies[f] in pctx: + dirstate.copy(copies[f], f) + if p2node == nullid: + for f, s in sorted(dirstate.copies().items()): + if f not in pctx and s not in pctx: + dirstate.copy(None, f) def _fileinfo(self, path): # populate __dict__['_manifest'] as workingctx has no _manifestdelta @@ -1534,7 +1594,7 @@ return self._repo.dirstate.flagfunc(self._buildflagfunc) def flags(self, path): - if r'_manifest' in self.__dict__: + if '_manifest' in self.__dict__: try: return self._manifest.flags(path) except KeyError: @@ -1552,7 +1612,7 @@ ) def dirty(self, missing=False, merge=True, branch=True): - b"check whether a working directory is modified" + """check whether a working directory is modified""" # check subrepos first for s in sorted(self.substate): if self.sub(s).dirty(missing=missing): @@ -1659,15 +1719,18 @@ default=b'glob', listsubrepos=False, badfn=None, + cwd=None, ): r = self._repo + if not cwd: + cwd = r.getcwd() # Only a case insensitive filesystem needs magic to translate user input # to actual case in the filesystem. icasefs = not util.fscasesensitive(r.root) return matchmod.match( r.root, - r.getcwd(), + cwd, pats, include, exclude, @@ -1931,6 +1994,7 @@ for f in self.removed(): self._repo.dirstate.drop(f) self._repo.dirstate.setparents(node) + self._repo._quick_access_changeid_invalidate() # write changes out explicitly, because nesting wlock at # runtime may prevent 'wlock.release()' in 'repo.commit()' @@ -2080,7 +2144,7 @@ # warned and backed up if wvfs.isdir(f) and not wvfs.islink(f): wvfs.rmtree(f, forcibly=True) - for p in reversed(list(util.finddirs(f))): + for p in reversed(list(pathutil.finddirs(f))): if wvfs.isfileorlink(p): wvfs.unlink(p) break @@ -2120,6 +2184,10 @@ # ``overlayworkingctx`` (e.g. with --collapse). util.clearcachedproperty(self, b'_manifest') + def setparents(self, p1node, p2node=nullid): + assert p1node == self._wrappedctx.node() + self._parents = [self._wrappedctx, self._repo.unfiltered()[p2node]] + def data(self, path): if self.isdirty(path): if self._cache[path][b'exists']: @@ -2183,7 +2251,7 @@ ] def p1copies(self): - copies = self._repo._wrappedctx.p1copies().copy() + copies = {} narrowmatch = self._repo.narrowmatch() for f in self._cache.keys(): if not narrowmatch(f): @@ -2195,7 +2263,7 @@ return copies def p2copies(self): - copies = self._repo._wrappedctx.p2copies().copy() + copies = {} narrowmatch = self._repo.narrowmatch() for f in self._cache.keys(): if not narrowmatch(f): @@ -2374,9 +2442,9 @@ ``text`` is the commit message. ``parents`` (optional) are rev numbers. """ - # Default parents to the wrapped contexts' if not passed. + # Default parents to the wrapped context if not passed. if parents is None: - parents = self._wrappedctx.parents() + parents = self.parents() if len(parents) == 1: parents = (parents[0], None) @@ -2404,6 +2472,9 @@ # necessary for memctx to register a deletion. return None + if branch is None: + branch = self._wrappedctx.branch() + return memctx( self._repo, parents, @@ -2697,7 +2768,7 @@ date=None, extra=None, branch=None, - editor=False, + editor=None, ): super(memctx, self).__init__( repo, text, user, date, extra, branch=branch @@ -2858,7 +2929,7 @@ user=None, date=None, extra=None, - editor=False, + editor=None, ): if text is None: text = originalctx.description() diff -r 61881b170140 -r 84a0102c05c7 mercurial/copies.py --- a/mercurial/copies.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/copies.py Tue Jan 21 13:14:51 2020 -0500 @@ -8,6 +8,7 @@ from __future__ import absolute_import import collections +import multiprocessing import os from .i18n import _ @@ -63,12 +64,12 @@ del t[k] -def _chain(a, b): - """chain two sets of copies 'a' and 'b'""" - t = a.copy() - for k, v in pycompat.iteritems(b): - t[k] = t.get(v, v) - return t +def _chain(prefix, suffix): + """chain two sets of copies 'prefix' and 'suffix'""" + result = prefix.copy() + for key, value in pycompat.iteritems(suffix): + result[key] = prefix.get(value, value) + return result def _tracefile(fctx, am, basemf): @@ -231,7 +232,7 @@ else: p1copies = {} p2copies = {} - removed = () + removed = [] return p1, p2, p1copies, p2copies, removed else: @@ -281,10 +282,28 @@ iterrevs &= mrset iterrevs.update(roots) iterrevs.remove(b.rev()) - all_copies = {r: {} for r in roots} + revs = sorted(iterrevs) + return _combinechangesetcopies(revs, children, b.rev(), revinfo, match) + + +def _combinechangesetcopies(revs, children, targetrev, revinfo, match): + """combine the copies information for each item of iterrevs + + revs: sorted iterable of revision to visit + children: a {parent: [children]} mapping. + targetrev: the final copies destination revision (not in iterrevs) + revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed) + match: a matcher + + It returns the aggregated copies information for `targetrev`. + """ + all_copies = {} alwaysmatch = match.always() - for r in sorted(iterrevs): - copies = all_copies.pop(r) + for r in revs: + copies = all_copies.pop(r, None) + if copies is None: + # this is a root + copies = {} for i, c in enumerate(children[r]): p1, p2, p1copies, p2copies, removed = revinfo(c) if r == p1: @@ -333,7 +352,7 @@ else: newcopies.update(othercopies) all_copies[c] = newcopies - return all_copies[b.rev()] + return all_copies[targetrev] def _forwardcopies(a, b, base=None, match=None): @@ -837,30 +856,26 @@ return False -def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None): - """reproduce copies from fromrev to rev in the dirstate +def graftcopies(wctx, ctx, base): + """reproduce copies between base and ctx in the wctx - If skiprev is specified, it's a revision that should be used to - filter copy records. Any copies that occur between fromrev and - skiprev will not be duplicated, even if they appear in the set of - copies between fromrev and rev. + Unlike mergecopies(), this function will only consider copies between base + and ctx; it will ignore copies between base and wctx. Also unlike + mergecopies(), this function will apply copies to the working copy (instead + of just returning information about the copies). That makes it cheaper + (especially in the common case of base==ctx.p1()) and useful also when + experimental.copytrace=off. + + merge.update() will have already marked most copies, but it will only + mark copies if it thinks the source files are related (see + merge._related()). It will also not mark copies if the file wasn't modified + on the local side. This function adds the copies that were "missed" + by merge.update(). """ - exclude = {} - ctraceconfig = repo.ui.config(b'experimental', b'copytrace') - bctrace = stringutil.parsebool(ctraceconfig) - if skiprev is not None and ( - ctraceconfig == b'heuristics' or bctrace or bctrace is None - ): - # copytrace='off' skips this line, but not the entire function because - # the line below is O(size of the repo) during a rebase, while the rest - # of the function is much faster (and is required for carrying copy - # metadata across the rebase anyway). - exclude = pathcopies(repo[fromrev], repo[skiprev]) - for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])): - if dst in exclude: - continue - if dst in wctx: - wctx[dst].markcopied(src) + new_copies = pathcopies(base, ctx) + _filter(wctx.p1(), wctx, new_copies) + for dst, src in pycompat.iteritems(new_copies): + wctx[dst].markcopied(src) def computechangesetfilesadded(ctx): @@ -989,6 +1004,102 @@ def getsidedataadder(srcrepo, destrepo): + use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade') + if pycompat.iswindows or not use_w: + return _get_simple_sidedata_adder(srcrepo, destrepo) + else: + return _get_worker_sidedata_adder(srcrepo, destrepo) + + +def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens): + """The function used by worker precomputing sidedata + + It read an input queue containing revision numbers + It write in an output queue containing (rev, ) + + The `None` input value is used as a stop signal. + + The `tokens` semaphore is user to avoid having too many unprocessed + entries. The workers needs to acquire one token before fetching a task. + They will be released by the consumer of the produced data. + """ + tokens.acquire() + rev = revs_queue.get() + while rev is not None: + data = _getsidedata(srcrepo, rev) + sidedata_queue.put((rev, data)) + tokens.acquire() + rev = revs_queue.get() + # processing of `None` is completed, release the token. + tokens.release() + + +BUFF_PER_WORKER = 50 + + +def _get_worker_sidedata_adder(srcrepo, destrepo): + """The parallel version of the sidedata computation + + This code spawn a pool of worker that precompute a buffer of sidedata + before we actually need them""" + # avoid circular import copies -> scmutil -> worker -> copies + from . import worker + + nbworkers = worker._numworkers(srcrepo.ui) + + tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER) + revsq = multiprocessing.Queue() + sidedataq = multiprocessing.Queue() + + assert srcrepo.filtername is None + # queue all tasks beforehand, revision numbers are small and it make + # synchronisation simpler + # + # Since the computation for each node can be quite expensive, the overhead + # of using a single queue is not revelant. In practice, most computation + # are fast but some are very expensive and dominate all the other smaller + # cost. + for r in srcrepo.changelog.revs(): + revsq.put(r) + # queue the "no more tasks" markers + for i in range(nbworkers): + revsq.put(None) + + allworkers = [] + for i in range(nbworkers): + args = (srcrepo, revsq, sidedataq, tokens) + w = multiprocessing.Process(target=_sidedata_worker, args=args) + allworkers.append(w) + w.start() + + # dictionnary to store results for revision higher than we one we are + # looking for. For example, if we need the sidedatamap for 42, and 43 is + # received, when shelve 43 for later use. + staging = {} + + def sidedata_companion(revlog, rev): + sidedata = {} + if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog + # Is the data previously shelved ? + sidedata = staging.pop(rev, None) + if sidedata is None: + # look at the queued result until we find the one we are lookig + # for (shelve the other ones) + r, sidedata = sidedataq.get() + while r != rev: + staging[r] = sidedata + r, sidedata = sidedataq.get() + tokens.release() + return False, (), sidedata + + return sidedata_companion + + +def _get_simple_sidedata_adder(srcrepo, destrepo): + """The simple version of the sidedata computation + + It just compute it in the same thread on request""" + def sidedatacompanion(revlog, rev): sidedata = {} if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog diff -r 61881b170140 -r 84a0102c05c7 mercurial/crecord.py --- a/mercurial/crecord.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/crecord.py Tue Jan 21 13:14:51 2020 -0500 @@ -102,7 +102,7 @@ raise NotImplementedError(b"method must be implemented by subclass") def allchildren(self): - b"Return a list of all of the direct children of this node" + """Return a list of all of the direct children of this node""" raise NotImplementedError(b"method must be implemented by subclass") def nextsibling(self): @@ -264,21 +264,23 @@ return None def firstchild(self): - b"return the first child of this item, if one exists. otherwise None." + """return the first child of this item, if one exists. otherwise + None.""" if len(self.hunks) > 0: return self.hunks[0] else: return None def lastchild(self): - b"return the last child of this item, if one exists. otherwise None." + """return the last child of this item, if one exists. otherwise + None.""" if len(self.hunks) > 0: return self.hunks[-1] else: return None def allchildren(self): - b"return a list of all of the direct children of this node" + """return a list of all of the direct children of this node""" return self.hunks def __getattr__(self, name): @@ -286,7 +288,7 @@ class uihunkline(patchnode): - b"represents a changed line in a hunk" + """represents a changed line in a hunk""" def __init__(self, linetext, hunk): self.linetext = linetext @@ -319,16 +321,18 @@ return None def parentitem(self): - b"return the parent to the current item" + """return the parent to the current item""" return self.hunk def firstchild(self): - b"return the first child of this item, if one exists. otherwise None." + """return the first child of this item, if one exists. otherwise + None.""" # hunk-lines don't have children return None def lastchild(self): - b"return the last child of this item, if one exists. otherwise None." + """return the last child of this item, if one exists. otherwise + None.""" # hunk-lines don't have children return None @@ -372,25 +376,27 @@ return None def parentitem(self): - b"return the parent to the current item" + """return the parent to the current item""" return self.header def firstchild(self): - b"return the first child of this item, if one exists. otherwise None." + """return the first child of this item, if one exists. otherwise + None.""" if len(self.changedlines) > 0: return self.changedlines[0] else: return None def lastchild(self): - b"return the last child of this item, if one exists. otherwise None." + """return the last child of this item, if one exists. otherwise + None.""" if len(self.changedlines) > 0: return self.changedlines[-1] else: return None def allchildren(self): - b"return a list of all of the direct children of this node" + """return a list of all of the direct children of this node""" return self.changedlines def countchanges(self): @@ -522,7 +528,7 @@ return getattr(self._hunk, name) def __repr__(self): - return r'' % (self.filename(), self.fromline) + return '' % (self.filename(), self.fromline) def filterpatch(ui, chunks, chunkselector, operation=None): @@ -569,7 +575,7 @@ chunkselector = curseschunkselector(headerlist, ui, operation) # This is required for ncurses to display non-ASCII characters in # default user locale encoding correctly. --immerrr - locale.setlocale(locale.LC_ALL, r'') + locale.setlocale(locale.LC_ALL, '') origsigtstp = sentinel = object() if util.safehasattr(signal, b'SIGTSTP'): origsigtstp = signal.getsignal(signal.SIGTSTP) @@ -853,7 +859,7 @@ self.currentselecteditem = currentitem def updatescroll(self): - b"scroll the screen to fully show the currently-selected" + """scroll the screen to fully show the currently-selected""" selstart = self.selecteditemstartline selend = self.selecteditemendline @@ -871,7 +877,7 @@ self.scrolllines(selstart - padstartbuffered) def scrolllines(self, numlines): - b"scroll the screen up (down) by numlines when numlines >0 (<0)." + """scroll the screen up (down) by numlines when numlines >0 (<0).""" self.firstlineofpadtoprint += numlines if self.firstlineofpadtoprint < 0: self.firstlineofpadtoprint = 0 @@ -973,7 +979,7 @@ ) def toggleall(self): - b"toggle the applied flag of all items." + """toggle the applied flag of all items.""" if self.waslasttoggleallapplied: # then unapply them this time for item in self.headerlist: if item.applied: @@ -984,8 +990,19 @@ self.toggleapply(item) self.waslasttoggleallapplied = not self.waslasttoggleallapplied + def flipselections(self): + """ + Flip all selections. Every selected line is unselected and vice + versa. + """ + for header in self.headerlist: + for hunk in header.allchildren(): + for line in hunk.allchildren(): + self.toggleapply(line) + def toggleallbetween(self): - b"toggle applied on or off for all items in range [lastapplied,current]." + """toggle applied on or off for all items in range [lastapplied, + current]. """ if ( not self.lastapplieditem or self.currentselecteditem == self.lastapplieditem @@ -1026,7 +1043,8 @@ nextitem = nextitem.nextitem() def togglefolded(self, item=None, foldparent=False): - b"toggle folded flag of specified item (defaults to currently selected)" + """toggle folded flag of specified item (defaults to currently + selected)""" if item is None: item = self.currentselecteditem if foldparent or (isinstance(item, uiheader) and item.neverunfolded): @@ -1320,7 +1338,7 @@ def printhunklinesbefore( self, hunk, selected=False, towin=True, ignorefolding=False ): - b"includes start/end line indicator" + """includes start/end line indicator""" outstr = b"" # where hunk is in list of siblings hunkindex = hunk.header.hunks.index(hunk) @@ -1529,7 +1547,7 @@ return numlines def sigwinchhandler(self, n, frame): - b"handle window resizing" + """handle window resizing""" try: curses.endwin() self.xscreensize, self.yscreensize = scmutil.termsize(self.ui) @@ -1599,20 +1617,21 @@ return colorpair def initcolorpair(self, *args, **kwargs): - b"same as getcolorpair." + """same as getcolorpair.""" self.getcolorpair(*args, **kwargs) def helpwindow(self): - b"print a help window to the screen. exit after any keypress." + """print a help window to the screen. exit after any keypress.""" helptext = _( """ [press any key to return to the patch-display] -crecord allows you to interactively choose among the changes you have made, -and confirm only those changes you select for further processing by the command -you are running (commit/shelve/revert), after confirming the selected -changes, the unselected changes are still present in your working copy, so you -can use crecord multiple times to split large changes into smaller changesets. -the following are valid keystrokes: +The curses hunk selector allows you to interactively choose among the +changes you have made, and confirm only those changes you select for +further processing by the command you are running (such as commit, +shelve, or revert). After confirming the selected changes, the +unselected changes are still present in your working copy, so you can +use the hunk selector multiple times to split large changes into +smaller changesets. the following are valid keystrokes: x [space] : (un-)select item ([~]/[x] = partly/fully applied) [enter] : (un-)select item and go to next item of same type @@ -1629,7 +1648,7 @@ ctrl-l : scroll the selected line to the top of the screen m : edit / resume editing the commit message e : edit the currently selected hunk - a : toggle amend mode, only with commit -i + a : toggle all selections c : confirm selected changes r : review/edit and confirm selected changes q : quit without confirming (no changes will be made) @@ -1654,7 +1673,7 @@ pass def commitMessageWindow(self): - b"Create a temporary commit message editing window on the screen." + """Create a temporary commit message editing window on the screen.""" curses.raw() curses.def_prog_mode() @@ -1704,7 +1723,8 @@ self.recenterdisplayedarea() def confirmationwindow(self, windowtext): - b"display an informational window, then wait for and return a keypress." + """display an informational window, then wait for and return a + keypress.""" confirmwin = curses.newwin(self.yscreensize, 0, 0, 0) try: @@ -1747,32 +1767,6 @@ else: return False - def toggleamend(self, opts, test): - """Toggle the amend flag. - - When the amend flag is set, a commit will modify the most recently - committed changeset, instead of creating a new changeset. Otherwise, a - new changeset will be created (the normal commit behavior). - """ - - if opts.get(b'amend') is None: - opts[b'amend'] = True - msg = _( - b"Amend option is turned on -- committing the currently " - b"selected changes will not create a new changeset, but " - b"instead update the most recently committed changeset.\n\n" - b"Press any key to continue." - ) - elif opts.get(b'amend') is True: - opts[b'amend'] = None - msg = _( - b"Amend option is turned off -- committing the currently " - b"selected changes will create a new changeset.\n\n" - b"Press any key to continue." - ) - if not test: - self.confirmationwindow(msg) - def recenterdisplayedarea(self): """ once we scrolled with pg up pg down we can be pointing outside of the @@ -1904,7 +1898,7 @@ elif keypressed in ["q"]: raise error.Abort(_(b'user quit')) elif keypressed in ['a']: - self.toggleamend(self.opts, test) + self.flipselections() elif keypressed in ["c"]: return True elif keypressed in ["r"]: diff -r 61881b170140 -r 84a0102c05c7 mercurial/dagparser.py --- a/mercurial/dagparser.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/dagparser.py Tue Jan 21 13:14:51 2020 -0500 @@ -168,7 +168,9 @@ if not desc: return - wordchars = pycompat.bytestr(string.ascii_letters + string.digits) + wordchars = pycompat.bytestr( + string.ascii_letters + string.digits + ) # pytype: disable=wrong-arg-types labels = {} p1 = -1 @@ -177,7 +179,9 @@ def resolve(ref): if not ref: return p1 - elif ref[0] in pycompat.bytestr(string.digits): + elif ref[0] in pycompat.bytestr( + string.digits + ): # pytype: disable=wrong-arg-types return r - int(ref) else: return labels[ref] @@ -211,7 +215,9 @@ c = nextch() while c != b'\0': - while c in pycompat.bytestr(string.whitespace): + while c in pycompat.bytestr( + string.whitespace + ): # pytype: disable=wrong-arg-types c = nextch() if c == b'.': yield b'n', (r, [p1]) @@ -219,7 +225,9 @@ r += 1 c = nextch() elif c == b'+': - c, digs = nextrun(nextch(), pycompat.bytestr(string.digits)) + c, digs = nextrun( + nextch(), pycompat.bytestr(string.digits) + ) # pytype: disable=wrong-arg-types n = int(digs) for i in pycompat.xrange(0, n): yield b'n', (r, [p1]) diff -r 61881b170140 -r 84a0102c05c7 mercurial/debugcommands.py --- a/mercurial/debugcommands.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/debugcommands.py Tue Jan 21 13:14:51 2020 -0500 @@ -59,6 +59,7 @@ merge as mergemod, obsolete, obsutil, + pathutil, phases, policy, pvec, @@ -330,9 +331,9 @@ ) ) - chunkdata = gen.changelogheader() + gen.changelogheader() showchunks(b"changelog") - chunkdata = gen.manifestheader() + gen.manifestheader() showchunks(b"manifest") for chunkdata in iter(gen.filelogheader, {}): fname = chunkdata[b'filename'] @@ -340,7 +341,7 @@ else: if isinstance(gen, bundle2.unbundle20): raise error.Abort(_(b'use debugbundle2 for this file')) - chunkdata = gen.changelogheader() + gen.changelogheader() for deltadata in gen.deltaiter(): node, p1, p2, cs, deltabase, delta, flags = deltadata ui.write(b"%s%s\n" % (indent_string, hex(node))) @@ -393,7 +394,7 @@ if not isinstance(gen, bundle2.unbundle20): raise error.Abort(_(b'not a bundle2 file')) ui.write((b'Stream params: %s\n' % _quasirepr(gen.params))) - parttypes = opts.get(r'part_type', []) + parttypes = opts.get('part_type', []) for part in gen.iterparts(): if parttypes and part.type not in parttypes: continue @@ -480,8 +481,8 @@ ui.warn(_(b"%s in manifest1, but listed as state %s") % (f, state)) errors += 1 if errors: - error = _(b".hg/dirstate inconsistent with current parent's manifest") - raise error.Abort(error) + errstr = _(b".hg/dirstate inconsistent with current parent's manifest") + raise error.Abort(errstr) @command( @@ -492,7 +493,7 @@ def debugcolor(ui, repo, **opts): """show available color, effects or style""" ui.writenoi18n(b'color mode: %s\n' % stringutil.pprint(ui._colormode)) - if opts.get(r'style'): + if opts.get('style'): return _debugdisplaystyle(ui) else: return _debugdisplaycolor(ui) @@ -573,8 +574,8 @@ Otherwise, the changelog DAG of the current repo is emitted. """ - spaces = opts.get(r'spaces') - dots = opts.get(r'dots') + spaces = opts.get('spaces') + dots = opts.get('dots') if file_: rlog = revlog.revlog(vfsmod.vfs(encoding.getcwd(), audit=False), file_) revs = set((int(r) for r in revs)) @@ -587,8 +588,8 @@ elif repo: cl = repo.changelog - tags = opts.get(r'tags') - branches = opts.get(r'branches') + tags = opts.get('tags') + branches = opts.get('branches') if tags: labels = {} for l, n in repo.tags().items(): @@ -651,8 +652,8 @@ ) def debugdate(ui, date, range=None, **opts): """parse and display a date""" - if opts[r"extended"]: - d = dateutil.parsedate(date, util.extendeddateformats) + if opts["extended"]: + d = dateutil.parsedate(date, dateutil.extendeddateformats) else: d = dateutil.parsedate(date) ui.writenoi18n(b"internal: %d %d\n" % d) @@ -861,10 +862,10 @@ def debugstate(ui, repo, **opts): """show the contents of the current dirstate""" - nodates = not opts[r'dates'] - if opts.get(r'nodates') is not None: + nodates = not opts['dates'] + if opts.get('nodates') is not None: nodates = True - datesort = opts.get(r'datesort') + datesort = opts.get('datesort') if datesort: keyfunc = lambda x: (x[1][3], x[0]) # sort by mtime, then by filename @@ -877,7 +878,7 @@ timestr = b'set ' else: timestr = time.strftime( - r"%Y-%m-%d %H:%M:%S ", time.localtime(ent[3]) + "%Y-%m-%d %H:%M:%S ", time.localtime(ent[3]) ) timestr = encoding.strtolocal(timestr) if ent[1] & 0o20000: @@ -1028,7 +1029,12 @@ fm = ui.formatter(b'debugextensions', opts) for extname, extmod in sorted(exts, key=operator.itemgetter(0)): isinternal = extensions.ismoduleinternal(extmod) - extsource = pycompat.fsencode(extmod.__file__) + extsource = None + + if util.safehasattr(extmod, '__file__'): + extsource = pycompat.fsencode(extmod.__file__) + elif getattr(sys, 'oxidized', False): + extsource = pycompat.sysexecutable if isinternal: exttestedwith = [] # never expose magic string to users else: @@ -1165,7 +1171,7 @@ files.update(ctx.files()) files.update(ctx.substate) - m = ctx.matchfileset(expr) + m = ctx.matchfileset(repo.getcwd(), expr) if opts[b'show_matcher'] or (opts[b'show_matcher'] is None and ui.verbose): ui.writenoi18n(b'* matcher:\n', stringutil.prettyrepr(m), b'\n') for f in sorted(files): @@ -1298,11 +1304,11 @@ raise error.Abort(b"getbundle() not supported by target repository") args = {} if common: - args[r'common'] = [bin(s) for s in common] + args['common'] = [bin(s) for s in common] if head: - args[r'heads'] = [bin(s) for s in head] + args['heads'] = [bin(s) for s in head] # TODO: get desired bundlecaps from command line. - args[r'bundlecaps'] = None + args['bundlecaps'] = None bundle = repo.getbundle(b'debug', **args) bundletype = opts.get(b'type', b'bzip2').lower() @@ -1343,7 +1349,7 @@ ignored = nf ignoredata = repo.dirstate._ignorefileandline(nf) else: - for p in util.finddirs(nf): + for p in pathutil.finddirs(nf): if ignore(p): ignored = p ignoredata = repo.dirstate._ignorefileandline(p) @@ -1469,6 +1475,12 @@ ) # Python + pythonlib = None + if util.safehasattr(os, '__file__'): + pythonlib = os.path.dirname(pycompat.fsencode(os.__file__)) + elif getattr(sys, 'oxidized', False): + pythonlib = pycompat.sysexecutable + fm.write( b'pythonexe', _(b"checking Python executable (%s)\n"), @@ -1482,7 +1494,7 @@ fm.write( b'pythonlib', _(b"checking Python lib (%s)...\n"), - os.path.dirname(pycompat.fsencode(os.__file__)), + pythonlib or _(b"unknown"), ) security = set(sslutil.supportedprotocols) @@ -1526,13 +1538,19 @@ ) # compiled modules + hgmodules = None + if util.safehasattr(sys.modules[__name__], '__file__'): + hgmodules = os.path.dirname(pycompat.fsencode(__file__)) + elif getattr(sys, 'oxidized', False): + hgmodules = pycompat.sysexecutable + fm.write( b'hgmodulepolicy', _(b"checking module policy (%s)\n"), policy.policy ) fm.write( b'hgmodules', _(b"checking installed modules (%s)...\n"), - os.path.dirname(pycompat.fsencode(__file__)), + hgmodules or _(b"unknown"), ) rustandc = policy.policy in (b'rust+c', b'rust+c-allow') @@ -1543,7 +1561,7 @@ err = None try: if cext: - from .cext import ( + from .cext import ( # pytype: disable=import-error base85, bdiff, mpatch, @@ -1553,7 +1571,7 @@ # quiet pyflakes dir(bdiff), dir(mpatch), dir(base85), dir(osutil) if rustext: - from .rustext import ( + from .rustext import ( # pytype: disable=import-error ancestor, dirstate, ) @@ -1775,21 +1793,21 @@ """ - if opts.get(r'force_lock'): + if opts.get('force_lock'): repo.svfs.unlink(b'lock') - if opts.get(r'force_wlock'): + if opts.get('force_wlock'): repo.vfs.unlink(b'wlock') - if opts.get(r'force_lock') or opts.get(r'force_wlock'): + if opts.get('force_lock') or opts.get('force_wlock'): return 0 locks = [] try: - if opts.get(r'set_wlock'): + if opts.get('set_wlock'): try: locks.append(repo.wlock(False)) except error.LockHeld: raise error.Abort(_(b'wlock is already held')) - if opts.get(r'set_lock'): + if opts.get('set_lock'): try: locks.append(repo.lock(False)) except error.LockHeld: @@ -1871,7 +1889,7 @@ ) raise error.Abort(msg) - if opts.get(r'clear'): + if opts.get('clear'): with repo.wlock(): cache = getcache() cache.clear(clear_persisted_data=True) @@ -2265,7 +2283,7 @@ if fixpaths: spec = spec.replace(pycompat.ossep, b'/') speclen = len(spec) - fullpaths = opts[r'full'] + fullpaths = opts['full'] files, dirs = set(), set() adddir, addfile = dirs.add, files.add for f, st in pycompat.iteritems(dirstate): @@ -2283,11 +2301,11 @@ return files, dirs acceptable = b'' - if opts[r'normal']: + if opts['normal']: acceptable += b'nm' - if opts[r'added']: + if opts['added']: acceptable += b'a' - if opts[r'removed']: + if opts['removed']: acceptable += b'r' cwd = repo.getcwd() if not specs: @@ -2526,7 +2544,7 @@ dirstate = repo.dirstate changedfiles = None # See command doc for what minimal does. - if opts.get(r'minimal'): + if opts.get('minimal'): manifestfiles = set(ctx.manifest().keys()) dirstatefiles = set(dirstate) manifestonly = manifestfiles - dirstatefiles @@ -3147,13 +3165,13 @@ ui.writenoi18n(b'+++ optimized\n', label=b'diff.file_b') sm = difflib.SequenceMatcher(None, arevs, brevs) for tag, alo, ahi, blo, bhi in sm.get_opcodes(): - if tag in (r'delete', r'replace'): + if tag in ('delete', 'replace'): for c in arevs[alo:ahi]: ui.write(b'-%d\n' % c, label=b'diff.deleted') - if tag in (r'insert', r'replace'): + if tag in ('insert', 'replace'): for c in brevs[blo:bhi]: ui.write(b'+%d\n' % c, label=b'diff.inserted') - if tag == r'equal': + if tag == 'equal': for c in arevs[alo:ahi]: ui.write(b' %d\n' % c) return 1 @@ -3200,16 +3218,19 @@ raise error.Abort(_(b'cannot use both --logiofd and --logiofile')) if opts[b'logiofd']: - # Line buffered because output is line based. + # Ideally we would be line buffered. But line buffering in binary + # mode isn't supported and emits a warning in Python 3.8+. Disabling + # buffering could have performance impacts. But since this isn't + # performance critical code, it should be fine. try: - logfh = os.fdopen(int(opts[b'logiofd']), r'ab', 1) + logfh = os.fdopen(int(opts[b'logiofd']), 'ab', 0) except OSError as e: if e.errno != errno.ESPIPE: raise # can't seek a pipe, so `ab` mode fails on py3 - logfh = os.fdopen(int(opts[b'logiofd']), r'wb', 1) + logfh = os.fdopen(int(opts[b'logiofd']), 'wb', 0) elif opts[b'logiofile']: - logfh = open(opts[b'logiofile'], b'ab', 1) + logfh = open(opts[b'logiofile'], b'ab', 0) s = wireprotoserver.sshserver(ui, repo, logfh=logfh) s.serve_forever() @@ -3391,7 +3412,7 @@ ctx = repo[rev] ui.write(b'%s\n' % ctx2str(ctx)) for succsset in obsutil.successorssets( - repo, ctx.node(), closest=opts[r'closest'], cache=cache + repo, ctx.node(), closest=opts['closest'], cache=cache ): if succsset: ui.write(b' ') @@ -3421,15 +3442,15 @@ Use --verbose to print the parsed tree. """ revs = None - if opts[r'rev']: + if opts['rev']: if repo is None: raise error.RepoError( _(b'there is no Mercurial repository here (.hg not found)') ) - revs = scmutil.revrange(repo, opts[r'rev']) + revs = scmutil.revrange(repo, opts['rev']) props = {} - for d in opts[r'define']: + for d in opts['define']: try: k, v = (e.strip() for e in d.split(b'=', 1)) if not k or k == b'ui': @@ -3985,27 +4006,27 @@ url, authinfo = u.authinfo() openerargs = { - r'useragent': b'Mercurial debugwireproto', + 'useragent': b'Mercurial debugwireproto', } # Turn pipes/sockets into observers so we can log I/O. if ui.verbose: openerargs.update( { - r'loggingfh': ui, - r'loggingname': b's', - r'loggingopts': {r'logdata': True, r'logdataapis': False,}, + 'loggingfh': ui, + 'loggingname': b's', + 'loggingopts': {'logdata': True, 'logdataapis': False,}, } ) if ui.debugflag: - openerargs[r'loggingopts'][r'logdataapis'] = True + openerargs['loggingopts']['logdataapis'] = True # Don't send default headers when in raw mode. This allows us to # bypass most of the behavior of our URL handling code so we can # have near complete control over what's sent on the wire. if opts[b'peer'] == b'raw': - openerargs[r'sendaccept'] = False + openerargs['sendaccept'] = False opener = urlmod.opener(ui, authinfo, **openerargs) @@ -4105,7 +4126,7 @@ ui.status(_(b'sending %s command\n') % command) if b'PUSHFILE' in args: - with open(args[b'PUSHFILE'], r'rb') as fh: + with open(args[b'PUSHFILE'], 'rb') as fh: del args[b'PUSHFILE'] res, output = peer._callpush( command, fh, **pycompat.strkwargs(args) @@ -4143,6 +4164,7 @@ _(b'sending batch with %d sub-commands\n') % len(batchedcommands) ) + assert peer is not None for i, chunk in enumerate(peer._submitbatch(batchedcommands)): ui.status( _(b'response #%d: %s\n') % (i, stringutil.escapestr(chunk)) @@ -4213,8 +4235,8 @@ getattr(e, 'read', lambda: None)() continue - ct = res.headers.get(r'Content-Type') - if ct == r'application/mercurial-cbor': + ct = res.headers.get('Content-Type') + if ct == 'application/mercurial-cbor': ui.write( _(b'cbor> %s\n') % stringutil.pprint( @@ -4223,6 +4245,7 @@ ) elif action == b'close': + assert peer is not None peer.close() elif action == b'readavailable': if not stdout or not stderr: diff -r 61881b170140 -r 84a0102c05c7 mercurial/default.d/mergetools.rc --- a/mercurial/default.d/mergetools.rc Thu Jan 09 14:19:20 2020 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,146 +0,0 @@ -# Some default global settings for common merge tools - -[merge-tools] -kdiff3.args=--auto --L1 $labelbase --L2 $labellocal --L3 $labelother $base $local $other -o $output -kdiff3.regkey=Software\KDiff3 -kdiff3.regkeyalt=Software\Wow6432Node\KDiff3 -kdiff3.regappend=\kdiff3.exe -kdiff3.fixeol=True -kdiff3.gui=True -kdiff3.diffargs=--L1 $plabel1 --L2 $clabel $parent $child - -gvimdiff.args=--nofork -d -g -O $local $other $base -gvimdiff.regkey=Software\Vim\GVim -gvimdiff.regkeyalt=Software\Wow6432Node\Vim\GVim -gvimdiff.regname=path -gvimdiff.priority=-9 -gvimdiff.diffargs=--nofork -d -g -O $parent $child - -vimdiff.args=$local $other $base -c 'redraw | echomsg "hg merge conflict, type \":cq\" to abort vimdiff"' -vimdiff.check=changed -vimdiff.priority=-10 - -merge.check=conflicts -merge.priority=-100 - -gpyfm.gui=True - -meld.gui=True -meld.args=--label=$labellocal $local --label='merged' $base --label=$labelother $other -o $output --auto-merge -meld.check=changed -meld.diffargs=-a --label=$plabel1 $parent --label=$clabel $child - -tkdiff.args=$local $other -a $base -o $output -tkdiff.gui=True -tkdiff.priority=-8 -tkdiff.diffargs=-L $plabel1 $parent -L $clabel $child - -xxdiff.args=--show-merged-pane --exit-with-merge-status --title1 $labellocal --title2 $labelbase --title3 $labelother --merged-filename $output --merge $local $base $other -xxdiff.gui=True -xxdiff.priority=-8 -xxdiff.diffargs=--title1 $plabel1 $parent --title2 $clabel $child - -diffmerge.regkey=Software\SourceGear\SourceGear DiffMerge\ -diffmerge.regkeyalt=Software\Wow6432Node\SourceGear\SourceGear DiffMerge\ -diffmerge.regname=Location -diffmerge.priority=-7 -diffmerge.args=-nosplash -merge -title1=$labellocal -title2=merged -title3=$labelother $local $base $other -result=$output -diffmerge.check=changed -diffmerge.gui=True -diffmerge.diffargs=--nosplash --title1=$plabel1 --title2=$clabel $parent $child - -p4merge.args=$base $local $other $output -p4merge.regkey=Software\Perforce\Environment -p4merge.regkeyalt=Software\Wow6432Node\Perforce\Environment -p4merge.regname=P4INSTROOT -p4merge.regappend=\p4merge.exe -p4merge.gui=True -p4merge.priority=-8 -p4merge.diffargs=$parent $child - -p4mergeosx.executable = /Applications/p4merge.app/Contents/MacOS/p4merge -p4mergeosx.args = $base $local $other $output -p4mergeosx.gui = True -p4mergeosx.priority=-8 -p4mergeosx.diffargs=$parent $child - -tortoisemerge.args=/base:$base /mine:$local /theirs:$other /merged:$output -tortoisemerge.regkey=Software\TortoiseSVN -tortoisemerge.regkeyalt=Software\Wow6432Node\TortoiseSVN -tortoisemerge.check=changed -tortoisemerge.gui=True -tortoisemerge.priority=-8 -tortoisemerge.diffargs=/base:$parent /mine:$child /basename:$plabel1 /minename:$clabel - -ecmerge.args=$base $local $other --mode=merge3 --title0=$labelbase --title1=$labellocal --title2=$labelother --to=$output -ecmerge.regkey=Software\Elli\xc3\xa9 Computing\Merge -ecmerge.regkeyalt=Software\Wow6432Node\Elli\xc3\xa9 Computing\Merge -ecmerge.gui=True -ecmerge.diffargs=$parent $child --mode=diff2 --title1=$plabel1 --title2=$clabel - -# editmerge is a small script shipped in contrib. -# It needs this config otherwise it behaves the same as internal:local -editmerge.args=$output -editmerge.check=changed -editmerge.premerge=keep - -filemerge.executable=/Developer/Applications/Utilities/FileMerge.app/Contents/MacOS/FileMerge -filemerge.args=-left $other -right $local -ancestor $base -merge $output -filemerge.gui=True - -filemergexcode.executable=/Applications/Xcode.app/Contents/Applications/FileMerge.app/Contents/MacOS/FileMerge -filemergexcode.args=-left $other -right $local -ancestor $base -merge $output -filemergexcode.gui=True - -; Windows version of Beyond Compare -beyondcompare3.args=$local $other $base $output /ro /lefttitle=$labellocal /centertitle=$labelbase /righttitle=$labelother /automerge /reviewconflicts /solo -beyondcompare3.regkey=Software\Scooter Software\Beyond Compare 3 -beyondcompare3.regname=ExePath -beyondcompare3.gui=True -beyondcompare3.priority=-2 -beyondcompare3.diffargs=/lro /lefttitle=$plabel1 /righttitle=$clabel /solo /expandall $parent $child - -; Linux version of Beyond Compare -bcompare.args=$local $other $base -mergeoutput=$output -ro -lefttitle=$labellocal -centertitle=$labelbase -righttitle=$labelother -outputtitle=merged -automerge -reviewconflicts -solo -bcompare.gui=True -bcompare.priority=-1 -bcompare.diffargs=-lro -lefttitle=$plabel1 -righttitle=$clabel -solo -expandall $parent $child - -; OS X version of Beyond Compare -bcomposx.executable = /Applications/Beyond Compare.app/Contents/MacOS/bcomp -bcomposx.args=$local $other $base -mergeoutput=$output -ro -lefttitle=$labellocal -centertitle=$labelbase -righttitle=$labelother -outputtitle=merged -automerge -reviewconflicts -solo -bcomposx.gui=True -bcomposx.priority=-1 -bcomposx.diffargs=-lro -lefttitle=$plabel1 -righttitle=$clabel -solo -expandall $parent $child - -winmerge.args=/e /x /wl /ub /dl $labelother /dr $labellocal $other $local $output -winmerge.regkey=Software\Thingamahoochie\WinMerge -winmerge.regkeyalt=Software\Wow6432Node\Thingamahoochie\WinMerge\ -winmerge.regname=Executable -winmerge.check=changed -winmerge.gui=True -winmerge.priority=-10 -winmerge.diffargs=/r /e /x /ub /wl /dl $plabel1 /dr $clabel $parent $child - -araxis.regkey=SOFTWARE\Classes\TypeLib\{46799e0a-7bd1-4330-911c-9660bb964ea2}\7.0\HELPDIR -araxis.regappend=\ConsoleCompare.exe -araxis.priority=-2 -araxis.args=/3 /a2 /wait /merge /title1:"Other" /title2:"Base" /title3:"Local :"$local $other $base $local $output -araxis.checkconflict=True -araxis.binary=True -araxis.gui=True -araxis.diffargs=/2 /wait /title1:$plabel1 /title2:$clabel $parent $child - -diffuse.priority=-3 -diffuse.args=$local $base $other -diffuse.gui=True -diffuse.diffargs=$parent $child - -UltraCompare.regkey=Software\Microsoft\Windows\CurrentVersion\App Paths\UC.exe -UltraCompare.regkeyalt=Software\Wow6432Node\Microsoft\Windows\CurrentVersion\App Paths\UC.exe -UltraCompare.args = $base $local $other -title1 base -title3 other -UltraCompare.priority = -2 -UltraCompare.gui = True -UltraCompare.binary = True -UltraCompare.check = conflicts,changed -UltraCompare.diffargs=$child $parent -title1 $clabel -title2 $plabel1 diff -r 61881b170140 -r 84a0102c05c7 mercurial/defaultrc/__init__.py diff -r 61881b170140 -r 84a0102c05c7 mercurial/defaultrc/mergetools.rc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/defaultrc/mergetools.rc Tue Jan 21 13:14:51 2020 -0500 @@ -0,0 +1,146 @@ +# Some default global settings for common merge tools + +[merge-tools] +kdiff3.args=--auto --L1 $labelbase --L2 $labellocal --L3 $labelother $base $local $other -o $output +kdiff3.regkey=Software\KDiff3 +kdiff3.regkeyalt=Software\Wow6432Node\KDiff3 +kdiff3.regappend=\kdiff3.exe +kdiff3.fixeol=True +kdiff3.gui=True +kdiff3.diffargs=--L1 $plabel1 --L2 $clabel $parent $child + +gvimdiff.args=--nofork -d -g -O $local $other $base +gvimdiff.regkey=Software\Vim\GVim +gvimdiff.regkeyalt=Software\Wow6432Node\Vim\GVim +gvimdiff.regname=path +gvimdiff.priority=-9 +gvimdiff.diffargs=--nofork -d -g -O $parent $child + +vimdiff.args=$local $other $base -c 'redraw | echomsg "hg merge conflict, type \":cq\" to abort vimdiff"' +vimdiff.check=changed +vimdiff.priority=-10 + +merge.check=conflicts +merge.priority=-100 + +gpyfm.gui=True + +meld.gui=True +meld.args=--label=$labellocal $local --label='merged' $base --label=$labelother $other -o $output --auto-merge +meld.check=changed +meld.diffargs=-a --label=$plabel1 $parent --label=$clabel $child + +tkdiff.args=$local $other -a $base -o $output +tkdiff.gui=True +tkdiff.priority=-8 +tkdiff.diffargs=-L $plabel1 $parent -L $clabel $child + +xxdiff.args=--show-merged-pane --exit-with-merge-status --title1 $labellocal --title2 $labelbase --title3 $labelother --merged-filename $output --merge $local $base $other +xxdiff.gui=True +xxdiff.priority=-8 +xxdiff.diffargs=--title1 $plabel1 $parent --title2 $clabel $child + +diffmerge.regkey=Software\SourceGear\SourceGear DiffMerge\ +diffmerge.regkeyalt=Software\Wow6432Node\SourceGear\SourceGear DiffMerge\ +diffmerge.regname=Location +diffmerge.priority=-7 +diffmerge.args=-nosplash -merge -title1=$labellocal -title2=merged -title3=$labelother $local $base $other -result=$output +diffmerge.check=changed +diffmerge.gui=True +diffmerge.diffargs=--nosplash --title1=$plabel1 --title2=$clabel $parent $child + +p4merge.args=$base $local $other $output +p4merge.regkey=Software\Perforce\Environment +p4merge.regkeyalt=Software\Wow6432Node\Perforce\Environment +p4merge.regname=P4INSTROOT +p4merge.regappend=\p4merge.exe +p4merge.gui=True +p4merge.priority=-8 +p4merge.diffargs=$parent $child + +p4mergeosx.executable = /Applications/p4merge.app/Contents/MacOS/p4merge +p4mergeosx.args = $base $local $other $output +p4mergeosx.gui = True +p4mergeosx.priority=-8 +p4mergeosx.diffargs=$parent $child + +tortoisemerge.args=/base:$base /mine:$local /theirs:$other /merged:$output +tortoisemerge.regkey=Software\TortoiseSVN +tortoisemerge.regkeyalt=Software\Wow6432Node\TortoiseSVN +tortoisemerge.check=changed +tortoisemerge.gui=True +tortoisemerge.priority=-8 +tortoisemerge.diffargs=/base:$parent /mine:$child /basename:$plabel1 /minename:$clabel + +ecmerge.args=$base $local $other --mode=merge3 --title0=$labelbase --title1=$labellocal --title2=$labelother --to=$output +ecmerge.regkey=Software\Elli\xc3\xa9 Computing\Merge +ecmerge.regkeyalt=Software\Wow6432Node\Elli\xc3\xa9 Computing\Merge +ecmerge.gui=True +ecmerge.diffargs=$parent $child --mode=diff2 --title1=$plabel1 --title2=$clabel + +# editmerge is a small script shipped in contrib. +# It needs this config otherwise it behaves the same as internal:local +editmerge.args=$output +editmerge.check=changed +editmerge.premerge=keep + +filemerge.executable=/Developer/Applications/Utilities/FileMerge.app/Contents/MacOS/FileMerge +filemerge.args=-left $other -right $local -ancestor $base -merge $output +filemerge.gui=True + +filemergexcode.executable=/Applications/Xcode.app/Contents/Applications/FileMerge.app/Contents/MacOS/FileMerge +filemergexcode.args=-left $other -right $local -ancestor $base -merge $output +filemergexcode.gui=True + +; Windows version of Beyond Compare +beyondcompare3.args=$local $other $base $output /ro /lefttitle=$labellocal /centertitle=$labelbase /righttitle=$labelother /automerge /reviewconflicts /solo +beyondcompare3.regkey=Software\Scooter Software\Beyond Compare 3 +beyondcompare3.regname=ExePath +beyondcompare3.gui=True +beyondcompare3.priority=-2 +beyondcompare3.diffargs=/lro /lefttitle=$plabel1 /righttitle=$clabel /solo /expandall $parent $child + +; Linux version of Beyond Compare +bcompare.args=$local $other $base -mergeoutput=$output -ro -lefttitle=$labellocal -centertitle=$labelbase -righttitle=$labelother -outputtitle=merged -automerge -reviewconflicts -solo +bcompare.gui=True +bcompare.priority=-1 +bcompare.diffargs=-lro -lefttitle=$plabel1 -righttitle=$clabel -solo -expandall $parent $child + +; OS X version of Beyond Compare +bcomposx.executable = /Applications/Beyond Compare.app/Contents/MacOS/bcomp +bcomposx.args=$local $other $base -mergeoutput=$output -ro -lefttitle=$labellocal -centertitle=$labelbase -righttitle=$labelother -outputtitle=merged -automerge -reviewconflicts -solo +bcomposx.gui=True +bcomposx.priority=-1 +bcomposx.diffargs=-lro -lefttitle=$plabel1 -righttitle=$clabel -solo -expandall $parent $child + +winmerge.args=/e /x /wl /ub /dl $labelother /dr $labellocal $other $local $output +winmerge.regkey=Software\Thingamahoochie\WinMerge +winmerge.regkeyalt=Software\Wow6432Node\Thingamahoochie\WinMerge\ +winmerge.regname=Executable +winmerge.check=changed +winmerge.gui=True +winmerge.priority=-10 +winmerge.diffargs=/r /e /x /ub /wl /dl $plabel1 /dr $clabel $parent $child + +araxis.regkey=SOFTWARE\Classes\TypeLib\{46799e0a-7bd1-4330-911c-9660bb964ea2}\7.0\HELPDIR +araxis.regappend=\ConsoleCompare.exe +araxis.priority=-2 +araxis.args=/3 /a2 /wait /merge /title1:"Other" /title2:"Base" /title3:"Local :"$local $other $base $local $output +araxis.checkconflict=True +araxis.binary=True +araxis.gui=True +araxis.diffargs=/2 /wait /title1:$plabel1 /title2:$clabel $parent $child + +diffuse.priority=-3 +diffuse.args=$local $base $other +diffuse.gui=True +diffuse.diffargs=$parent $child + +UltraCompare.regkey=Software\Microsoft\Windows\CurrentVersion\App Paths\UC.exe +UltraCompare.regkeyalt=Software\Wow6432Node\Microsoft\Windows\CurrentVersion\App Paths\UC.exe +UltraCompare.args = $base $local $other -title1 base -title3 other +UltraCompare.priority = -2 +UltraCompare.gui = True +UltraCompare.binary = True +UltraCompare.check = conflicts,changed +UltraCompare.diffargs=$child $parent -title1 $clabel -title2 $plabel1 diff -r 61881b170140 -r 84a0102c05c7 mercurial/dirstate.py --- a/mercurial/dirstate.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/dirstate.py Tue Jan 21 13:14:51 2020 -0500 @@ -36,8 +36,8 @@ util as interfaceutil, ) -parsers = policy.importmod(r'parsers') -rustmod = policy.importrust(r'dirstate') +parsers = policy.importmod('parsers') +rustmod = policy.importrust('dirstate') propertycache = util.propertycache filecache = scmutil.filecache @@ -368,7 +368,7 @@ rereads the dirstate. Use localrepo.invalidatedirstate() if you want to check whether the dirstate has changed before rereading it.''' - for a in (r"_map", r"_branch", r"_ignore"): + for a in ("_map", "_branch", "_ignore"): if a in self.__dict__: delattr(self, a) self._lastnormaltime = 0 @@ -404,7 +404,7 @@ _(b'directory %r already in dirstate') % pycompat.bytestr(f) ) # shadows - for d in util.finddirs(f): + for d in pathutil.finddirs(f): if self._map.hastrackeddir(d): break entry = self._map.get(d) @@ -603,19 +603,34 @@ def rebuild(self, parent, allfiles, changedfiles=None): if changedfiles is None: # Rebuild entire dirstate - changedfiles = allfiles + to_lookup = allfiles + to_drop = [] lastnormaltime = self._lastnormaltime self.clear() self._lastnormaltime = lastnormaltime + elif len(changedfiles) < 10: + # Avoid turning allfiles into a set, which can be expensive if it's + # large. + to_lookup = [] + to_drop = [] + for f in changedfiles: + if f in allfiles: + to_lookup.append(f) + else: + to_drop.append(f) + else: + changedfilesset = set(changedfiles) + to_lookup = changedfilesset & set(allfiles) + to_drop = changedfilesset - to_lookup if self._origpl is None: self._origpl = self._pl self._map.setparents(parent, nullid) - for f in changedfiles: - if f in allfiles: - self.normallookup(f) - else: - self.drop(f) + + for f in to_lookup: + self.normallookup(f) + for f in to_drop: + self.drop(f) self._dirty = True @@ -687,8 +702,7 @@ delaywrite = self._ui.configint(b'debug', b'dirstate.delaywrite') if delaywrite > 0: # do we have any files to delay for? - items = pycompat.iteritems(self._map) - for f, e in items: + for f, e in pycompat.iteritems(self._map): if e[0] == b'n' and e[3] == now: import time # to avoid useless import @@ -700,12 +714,6 @@ time.sleep(end - clock) now = end # trust our estimate that the end is near now break - # since the iterator is potentially not deleted, - # delete the iterator to release the reference for the Rust - # implementation. - # TODO make the Rust implementation behave like Python - # since this would not work with a non ref-counting GC. - del items self._map.write(st, now) self._lastnormaltime = 0 @@ -714,7 +722,7 @@ def _dirignore(self, f): if self._ignore(f): return True - for p in util.finddirs(f): + for p in pathutil.finddirs(f): if self._ignore(p): return True return False @@ -776,7 +784,6 @@ kind = _(b'directory') return _(b'unsupported file type (type is %s)') % kind - matchedir = match.explicitdir badfn = match.bad dmap = self._map lstat = os.lstat @@ -830,8 +837,6 @@ if nf in dmap: # file replaced by dir on disk but still in dirstate results[nf] = None - if matchedir: - matchedir(nf) foundadd((nf, ff)) elif kind == regkind or kind == lnkkind: results[nf] = st @@ -844,8 +849,6 @@ results[nf] = None else: # does it match a missing directory? if self._map.hasdir(nf): - if matchedir: - matchedir(nf) notfoundadd(nf) else: badfn(ff, encoding.strtolocal(inst.strerror)) @@ -946,6 +949,11 @@ # step 1: find all explicit files results, work, dirsnotfound = self._walkexplicit(match, subrepos) + if matchtdir: + for d in work: + matchtdir(d[0]) + for d in dirsnotfound: + matchtdir(d) skipstep3 = skipstep3 and not (work or dirsnotfound) work = [d for d in work if not dirignore(d[0])] @@ -1075,6 +1083,46 @@ results[next(iv)] = st return results + def _rust_status(self, matcher, list_clean): + # Force Rayon (Rust parallelism library) to respect the number of + # workers. This is a temporary workaround until Rust code knows + # how to read the config file. + numcpus = self._ui.configint(b"worker", b"numcpus") + if numcpus is not None: + encoding.environ.setdefault(b'RAYON_NUM_THREADS', b'%d' % numcpus) + + workers_enabled = self._ui.configbool(b"worker", b"enabled", True) + if not workers_enabled: + encoding.environ[b"RAYON_NUM_THREADS"] = b"1" + + ( + lookup, + modified, + added, + removed, + deleted, + unknown, + clean, + ) = rustmod.status( + self._map._rustmap, + matcher, + self._rootdir, + bool(list_clean), + self._lastnormaltime, + self._checkexec, + ) + + status = scmutil.status( + modified=modified, + added=added, + removed=removed, + deleted=deleted, + unknown=unknown, + ignored=[], + clean=clean, + ) + return (lookup, status) + def status(self, match, subrepos, ignored, clean, unknown): '''Determine the status of the working copy relative to the dirstate and return a pair of (unsure, status), where status is of type @@ -1099,11 +1147,14 @@ dmap.preload() use_rust = True + + allowed_matchers = (matchmod.alwaysmatcher, matchmod.exactmatcher) + if rustmod is None: use_rust = False elif subrepos: use_rust = False - if bool(listunknown): + elif bool(listunknown): # Pathauditor does not exist yet in Rust, unknown files # can't be trusted. use_rust = False @@ -1111,60 +1162,26 @@ # Rust has no ignore mechanism yet, so don't use Rust for # commands that need ignore. use_rust = False - elif not match.always(): + elif not isinstance(match, allowed_matchers): # Matchers have yet to be implemented use_rust = False if use_rust: - # Force Rayon (Rust parallelism library) to respect the number of - # workers. This is a temporary workaround until Rust code knows - # how to read the config file. - numcpus = self._ui.configint(b"worker", b"numcpus") - if numcpus is not None: - encoding.environ.setdefault(b'RAYON_NUM_THREADS', b'%d' % numcpus) - - workers_enabled = self._ui.configbool(b"worker", b"enabled", True) - if not workers_enabled: - encoding.environ[b"RAYON_NUM_THREADS"] = b"1" + return self._rust_status(match, listclean) - ( - lookup, - modified, - added, - removed, - deleted, - unknown, - clean, - ) = rustmod.status( - dmap._rustmap, - self._rootdir, - match.files(), - bool(listclean), - self._lastnormaltime, - self._checkexec, - ) - - status = scmutil.status( - modified=modified, - added=added, - removed=removed, - deleted=deleted, - unknown=unknown, - ignored=ignored, - clean=clean, - ) - return (lookup, status) + def noop(f): + pass dcontains = dmap.__contains__ dget = dmap.__getitem__ ladd = lookup.append # aka "unsure" madd = modified.append aadd = added.append - uadd = unknown.append - iadd = ignored.append + uadd = unknown.append if listunknown else noop + iadd = ignored.append if listignored else noop radd = removed.append dadd = deleted.append - cadd = clean.append + cadd = clean.append if listclean else noop mexact = match.exact dirignore = self._dirignore checkexec = self._checkexec @@ -1418,9 +1435,9 @@ def addfile(self, f, oldstate, state, mode, size, mtime): """Add a tracked file to the dirstate.""" - if oldstate in b"?r" and r"_dirs" in self.__dict__: + if oldstate in b"?r" and "_dirs" in self.__dict__: self._dirs.addpath(f) - if oldstate == b"?" and r"_alldirs" in self.__dict__: + if oldstate == b"?" and "_alldirs" in self.__dict__: self._alldirs.addpath(f) self._map[f] = dirstatetuple(state, mode, size, mtime) if state != b'n' or mtime == -1: @@ -1436,11 +1453,11 @@ the file's previous state. In the future, we should refactor this to be more explicit about what that state is. """ - if oldstate not in b"?r" and r"_dirs" in self.__dict__: + if oldstate not in b"?r" and "_dirs" in self.__dict__: self._dirs.delpath(f) - if oldstate == b"?" and r"_alldirs" in self.__dict__: + if oldstate == b"?" and "_alldirs" in self.__dict__: self._alldirs.addpath(f) - if r"filefoldmap" in self.__dict__: + if "filefoldmap" in self.__dict__: normed = util.normcase(f) self.filefoldmap.pop(normed, None) self._map[f] = dirstatetuple(b'r', 0, size, 0) @@ -1453,11 +1470,11 @@ """ exists = self._map.pop(f, None) is not None if exists: - if oldstate != b"r" and r"_dirs" in self.__dict__: + if oldstate != b"r" and "_dirs" in self.__dict__: self._dirs.delpath(f) - if r"_alldirs" in self.__dict__: + if "_alldirs" in self.__dict__: self._alldirs.delpath(f) - if r"filefoldmap" in self.__dict__: + if "filefoldmap" in self.__dict__: normed = util.normcase(f) self.filefoldmap.pop(normed, None) self.nonnormalset.discard(f) @@ -1522,11 +1539,11 @@ @propertycache def _dirs(self): - return util.dirs(self._map, b'r') + return pathutil.dirs(self._map, b'r') @propertycache def _alldirs(self): - return util.dirs(self._map) + return pathutil.dirs(self._map) def _opendirstatefile(self): fp, mode = txnutil.trypending(self._root, self._opener, self._filename) diff -r 61881b170140 -r 84a0102c05c7 mercurial/discovery.py --- a/mercurial/discovery.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/discovery.py Tue Jan 21 13:14:51 2020 -0500 @@ -499,7 +499,7 @@ repo = pushop.repo unfi = repo.unfiltered() tonode = unfi.changelog.node - torev = unfi.changelog.nodemap.get + torev = unfi.changelog.index.get_rev public = phases.public getphase = unfi._phasecache.phase ispublic = lambda r: getphase(unfi, r) == public diff -r 61881b170140 -r 84a0102c05c7 mercurial/dispatch.py --- a/mercurial/dispatch.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/dispatch.py Tue Jan 21 13:14:51 2020 -0500 @@ -15,7 +15,6 @@ import re import signal import sys -import time import traceback @@ -102,7 +101,7 @@ def run(): - b"run the command in sys.argv" + """run the command in sys.argv""" initstdio() with tracing.log('parse args into request'): req = request(pycompat.sysargv[1:]) @@ -115,6 +114,8 @@ # In all cases we try to flush stdio streams. if util.safehasattr(req.ui, b'fout'): + assert req.ui is not None # help pytype + assert req.ui.fout is not None # help pytype try: req.ui.fout.flush() except IOError as e: @@ -122,6 +123,8 @@ status = -1 if util.safehasattr(req.ui, b'ferr'): + assert req.ui is not None # help pytype + assert req.ui.ferr is not None # help pytype try: if err is not None and err.errno != errno.EPIPE: req.ui.ferr.write( @@ -658,10 +661,10 @@ def __getattr__(self, name): adefaults = { - r'norepo': True, - r'intents': set(), - r'optionalrepo': False, - r'inferrepo': False, + 'norepo': True, + 'intents': set(), + 'optionalrepo': False, + 'inferrepo': False, } if name not in adefaults: raise AttributeError(name) @@ -1036,8 +1039,8 @@ def get_times(): t = os.times() if t[4] == 0.0: - # Windows leaves this as zero, so use time.clock() - t = (t[0], t[1], t[2], t[3], time.clock()) + # Windows leaves this as zero, so use time.perf_counter() + t = (t[0], t[1], t[2], t[3], util.timer()) return t s = get_times() @@ -1108,6 +1111,7 @@ repo = None cmdpats = args[:] + assert func is not None # help out pytype if not func.norepo: # use the repo from the request only if we don't have -R if not rpath and not cwd: diff -r 61881b170140 -r 84a0102c05c7 mercurial/encoding.py --- a/mercurial/encoding.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/encoding.py Tue Jan 21 13:14:51 2020 -0500 @@ -20,7 +20,24 @@ from .pure import charencode as charencodepure -charencode = policy.importmod(r'charencode') +if pycompat.TYPE_CHECKING: + from typing import ( + Any, + Callable, + List, + Text, + Type, + TypeVar, + Union, + ) + + # keep pyflakes happy + for t in (Any, Callable, List, Text, Type, Union): + assert t + + _Tlocalstr = TypeVar('_Tlocalstr', bound='localstr') + +charencode = policy.importmod('charencode') isasciistr = charencode.isasciistr asciilower = charencode.asciilower @@ -45,6 +62,7 @@ def hfsignoreclean(s): + # type: (bytes) -> bytes """Remove codepoints ignored by HFS+ from s. >>> hfsignoreclean(u'.h\u200cg'.encode('utf-8')) @@ -69,7 +87,7 @@ # preferred encoding isn't known yet; use utf-8 to avoid unicode error # and recreate it once encoding is settled environ = dict( - (k.encode(r'utf-8'), v.encode(r'utf-8')) + (k.encode('utf-8'), v.encode('utf-8')) for k, v in os.environ.items() # re-exports ) @@ -103,6 +121,13 @@ s._utf8 = u return s + if pycompat.TYPE_CHECKING: + # pseudo implementation to help pytype see localstr() constructor + def __init__(self, u, l): + # type: (bytes, bytes) -> None + super(localstr, self).__init__(l) + self._utf8 = u + def __hash__(self): return hash(self._utf8) # avoid collisions in local string space @@ -119,6 +144,7 @@ def tolocal(s): + # type: (bytes) -> bytes """ Convert a string from internal UTF-8 to local encoding @@ -162,7 +188,7 @@ if encoding == b'UTF-8': # fast path return s - r = u.encode(_sysstr(encoding), r"replace") + r = u.encode(_sysstr(encoding), "replace") if u == r.decode(_sysstr(encoding)): # r is a safe, non-lossy encoding of s return safelocalstr(r) @@ -171,7 +197,7 @@ # we should only get here if we're looking at an ancient changeset try: u = s.decode(_sysstr(fallbackencoding)) - r = u.encode(_sysstr(encoding), r"replace") + r = u.encode(_sysstr(encoding), "replace") if u == r.decode(_sysstr(encoding)): # r is a safe, non-lossy encoding of s return safelocalstr(r) @@ -179,12 +205,13 @@ except UnicodeDecodeError: u = s.decode("utf-8", "replace") # last ditch # can't round-trip - return u.encode(_sysstr(encoding), r"replace") + return u.encode(_sysstr(encoding), "replace") except LookupError as k: raise error.Abort(k, hint=b"please check your locale settings") def fromlocal(s): + # type: (bytes) -> bytes """ Convert a string from the local character encoding to UTF-8 @@ -214,16 +241,19 @@ def unitolocal(u): + # type: (Text) -> bytes """Convert a unicode string to a byte string of local encoding""" return tolocal(u.encode('utf-8')) def unifromlocal(s): + # type: (bytes) -> Text """Convert a byte string of local encoding to a unicode string""" return fromlocal(s).decode('utf-8') def unimethod(bytesfunc): + # type: (Callable[[Any], bytes]) -> Callable[[Any], Text] """Create a proxy method that forwards __unicode__() and __str__() of Python 3 to __bytes__()""" @@ -241,15 +271,22 @@ strfromlocal = unifromlocal strmethod = unimethod else: - strtolocal = pycompat.identity - strfromlocal = pycompat.identity + + def strtolocal(s): + # type: (str) -> bytes + return s # pytype: disable=bad-return-type + + def strfromlocal(s): + # type: (bytes) -> str + return s # pytype: disable=bad-return-type + strmethod = pycompat.identity if not _nativeenviron: # now encoding and helper functions are available, recreate the environ # dict to be exported to other modules environ = dict( - (tolocal(k.encode(r'utf-8')), tolocal(v.encode(r'utf-8'))) + (tolocal(k.encode('utf-8')), tolocal(v.encode('utf-8'))) for k, v in os.environ.items() # re-exports ) @@ -274,12 +311,14 @@ def colwidth(s): - b"Find the column width of a string for display in the local encoding" - return ucolwidth(s.decode(_sysstr(encoding), r'replace')) + # type: (bytes) -> int + """Find the column width of a string for display in the local encoding""" + return ucolwidth(s.decode(_sysstr(encoding), 'replace')) def ucolwidth(d): - b"Find the column width of a Unicode string for display" + # type: (Text) -> int + """Find the column width of a Unicode string for display""" eaw = getattr(unicodedata, 'east_asian_width', None) if eaw is not None: return sum([eaw(c) in _wide and 2 or 1 for c in d]) @@ -287,15 +326,18 @@ def getcols(s, start, c): + # type: (bytes, int, int) -> bytes '''Use colwidth to find a c-column substring of s starting at byte index start''' for x in pycompat.xrange(start + c, len(s)): t = s[start:x] if colwidth(t) == c: return t + raise ValueError('substring not found') def trim(s, width, ellipsis=b'', leftside=False): + # type: (bytes, int, bytes, bool) -> bytes """Trim string 's' to at most 'width' columns (including 'ellipsis'). If 'leftside' is True, left side of string 's' is trimmed. @@ -393,7 +435,8 @@ def lower(s): - b"best-effort encoding-aware case-folding of local string s" + # type: (bytes) -> bytes + """best-effort encoding-aware case-folding of local string s""" try: return asciilower(s) except UnicodeDecodeError: @@ -415,7 +458,8 @@ def upper(s): - b"best-effort encoding-aware case-folding of local string s" + # type: (bytes) -> bytes + """best-effort encoding-aware case-folding of local string s""" try: return asciiupper(s) except UnicodeDecodeError: @@ -423,6 +467,7 @@ def upperfallback(s): + # type: (Any) -> Any try: if isinstance(s, localstr): u = s._utf8.decode("utf-8") @@ -457,6 +502,7 @@ def jsonescape(s, paranoid=False): + # type: (Any, Any) -> Any '''returns a string suitable for JSON JSON is problematic for us because it doesn't support non-Unicode @@ -520,6 +566,7 @@ def getutf8char(s, pos): + # type: (bytes, int) -> bytes '''get the next full utf-8 character in the given string, starting at pos Raises a UnicodeError if the given location does not start a valid @@ -538,6 +585,7 @@ def toutf8b(s): + # type: (bytes) -> bytes '''convert a local, possibly-binary string into UTF-8b This is intended as a generic method to preserve data when working @@ -606,6 +654,7 @@ def fromutf8b(s): + # type: (bytes) -> bytes '''Given a UTF-8b string, return a local, possibly-binary string. return the original binary string. This diff -r 61881b170140 -r 84a0102c05c7 mercurial/error.py --- a/mercurial/error.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/error.py Tue Jan 21 13:14:51 2020 -0500 @@ -34,7 +34,7 @@ """ def __init__(self, *args, **kw): - self.hint = kw.pop(r'hint', None) + self.hint = kw.pop('hint', None) super(Hint, self).__init__(*args, **kw) diff -r 61881b170140 -r 84a0102c05c7 mercurial/exchange.py --- a/mercurial/exchange.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/exchange.py Tue Jan 21 13:14:51 2020 -0500 @@ -8,7 +8,6 @@ from __future__ import absolute_import import collections -import hashlib from .i18n import _ from .node import ( @@ -40,7 +39,10 @@ wireprototypes, ) from .interfaces import repository -from .utils import stringutil +from .utils import ( + hashutil, + stringutil, +) urlerr = util.urlerr urlreq = util.urlreq @@ -524,8 +526,8 @@ # We can pick: # * missingheads part of common (::commonheads) common = self.outgoing.common - nm = self.repo.changelog.nodemap - cheads = [node for node in self.revs if nm[node] in common] + rev = self.repo.changelog.index.rev + cheads = [node for node in self.revs if rev(node) in common] # and # * commonheads parents on missing revset = unfi.set( @@ -646,6 +648,8 @@ pushop.repo.checkpush(pushop) _checkpublish(pushop) _pushdiscovery(pushop) + if not pushop.force: + _checksubrepostate(pushop) if not _forcebundle1(pushop): _pushbundle2(pushop) _pushchangeset(pushop) @@ -694,6 +698,17 @@ step(pushop) +def _checksubrepostate(pushop): + """Ensure all outgoing referenced subrepo revisions are present locally""" + for n in pushop.outgoing.missing: + ctx = pushop.repo[n] + + if b'.hgsub' in ctx.manifest() and b'.hgsubstate' in ctx.files(): + for subpath in sorted(ctx.substate): + sub = ctx.sub(subpath) + sub.verify(onpush=True) + + @pushdiscovery(b'changeset') def _pushdiscoverychangeset(pushop): """discover the changeset that need to be pushed""" @@ -1851,7 +1866,7 @@ pullop.repo, pullop.remote, heads=pullop.heads, force=pullop.force ) common, fetch, rheads = tmp - nm = pullop.repo.unfiltered().changelog.nodemap + has_node = pullop.repo.unfiltered().changelog.index.has_node if fetch and rheads: # If a remote heads is filtered locally, put in back in common. # @@ -1864,7 +1879,7 @@ # but are not including a remote heads, we'll not be able to detect it, scommon = set(common) for n in rheads: - if n in nm: + if has_node(n): if n not in scommon: common.append(n) if set(rheads).issubset(set(common)): @@ -2097,7 +2112,7 @@ dheads = [] unfi = pullop.repo.unfiltered() phase = unfi._phasecache.phase - rev = unfi.changelog.nodemap.get + rev = unfi.changelog.index.get_rev public = phases.public draft = phases.draft @@ -2181,9 +2196,8 @@ ) if not user_includes: raise error.Abort( - _(b"{} configuration for user {} is empty").format( - _NARROWACL_SECTION, username - ) + _(b"%s configuration for user %s is empty") + % (_NARROWACL_SECTION, username) ) user_includes = [ @@ -2193,8 +2207,8 @@ b'path:.' if p == b'*' else b'path:' + p for p in user_excludes ] - req_includes = set(kwargs.get(r'includepats', [])) - req_excludes = set(kwargs.get(r'excludepats', [])) + req_includes = set(kwargs.get('includepats', [])) + req_excludes = set(kwargs.get('excludepats', [])) req_includes, req_excludes, invalid_includes = narrowspec.restrictpatterns( req_includes, req_excludes, user_includes, user_excludes @@ -2202,18 +2216,17 @@ if invalid_includes: raise error.Abort( - _(b"The following includes are not accessible for {}: {}").format( - username, invalid_includes - ) + _(b"The following includes are not accessible for %s: %s") + % (username, stringutil.pprint(invalid_includes)) ) new_args = {} new_args.update(kwargs) - new_args[r'narrow'] = True - new_args[r'narrow_acl'] = True - new_args[r'includepats'] = req_includes + new_args['narrow'] = True + new_args['narrow_acl'] = True + new_args['includepats'] = req_includes if req_excludes: - new_args[r'excludepats'] = req_excludes + new_args['excludepats'] = req_excludes return new_args @@ -2476,7 +2489,7 @@ **kwargs ): """add a changegroup part to the requested bundle""" - if not kwargs.get(r'cg', True): + if not kwargs.get('cg', True) or not b2caps: return version = b'01' @@ -2495,9 +2508,9 @@ if not outgoing.missing: return - if kwargs.get(r'narrow', False): - include = sorted(filter(bool, kwargs.get(r'includepats', []))) - exclude = sorted(filter(bool, kwargs.get(r'excludepats', []))) + if kwargs.get('narrow', False): + include = sorted(filter(bool, kwargs.get('includepats', []))) + exclude = sorted(filter(bool, kwargs.get('excludepats', []))) matcher = narrowspec.match(repo.root, include=include, exclude=exclude) else: matcher = None @@ -2519,8 +2532,8 @@ part.addparam(b'exp-sidedata', b'1') if ( - kwargs.get(r'narrow', False) - and kwargs.get(r'narrow_acl', False) + kwargs.get('narrow', False) + and kwargs.get('narrow_acl', False) and (include or exclude) ): # this is mandatory because otherwise ACL clients won't work @@ -2536,9 +2549,9 @@ bundler, repo, source, bundlecaps=None, b2caps=None, **kwargs ): """add a bookmark part to the requested bundle""" - if not kwargs.get(r'bookmarks', False): + if not kwargs.get('bookmarks', False): return - if b'bookmarks' not in b2caps: + if not b2caps or b'bookmarks' not in b2caps: raise error.Abort(_(b'no common bookmarks exchange method')) books = bookmod.listbinbookmarks(repo) data = bookmod.binaryencode(books) @@ -2551,7 +2564,7 @@ bundler, repo, source, bundlecaps=None, b2caps=None, **kwargs ): """add parts containing listkeys namespaces to the requested bundle""" - listkeys = kwargs.get(r'listkeys', ()) + listkeys = kwargs.get('listkeys', ()) for namespace in listkeys: part = bundler.newpart(b'listkeys') part.addparam(b'namespace', namespace) @@ -2564,7 +2577,7 @@ bundler, repo, source, bundlecaps=None, b2caps=None, heads=None, **kwargs ): """add an obsolescence markers part to the requested bundle""" - if kwargs.get(r'obsmarkers', False): + if kwargs.get('obsmarkers', False): if heads is None: heads = repo.heads() subset = [c.node() for c in repo.set(b'::%ln', heads)] @@ -2578,8 +2591,8 @@ bundler, repo, source, bundlecaps=None, b2caps=None, heads=None, **kwargs ): """add phase heads part to the requested bundle""" - if kwargs.get(r'phases', False): - if not b'heads' in b2caps.get(b'phases'): + if kwargs.get('phases', False): + if not b2caps or b'heads' not in b2caps.get(b'phases'): raise error.Abort(_(b'no common phases exchange method')) if heads is None: heads = repo.heads() @@ -2643,7 +2656,7 @@ # Don't send unless: # - changeset are being exchanged, # - the client supports it. - if not (kwargs.get(r'cg', True) and b'hgtagsfnodes' in b2caps): + if not b2caps or not (kwargs.get('cg', True) and b'hgtagsfnodes' in b2caps): return outgoing = _computeoutgoing(repo, heads, common) @@ -2676,9 +2689,10 @@ # - the client supports it. # - narrow bundle isn't in play (not currently compatible). if ( - not kwargs.get(r'cg', True) + not kwargs.get('cg', True) + or not b2caps or b'rev-branch-cache' not in b2caps - or kwargs.get(r'narrow', False) + or kwargs.get('narrow', False) or repo.ui.has_section(_NARROWACL_SECTION) ): return @@ -2693,7 +2707,7 @@ Used by peer for unbundling. """ heads = repo.heads() - heads_hash = hashlib.sha1(b''.join(sorted(heads))).digest() + heads_hash = hashutil.sha1(b''.join(sorted(heads))).digest() if not ( their_heads == [b'force'] or their_heads == heads diff -r 61881b170140 -r 84a0102c05c7 mercurial/exchangev2.py --- a/mercurial/exchangev2.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/exchangev2.py Tue Jan 21 13:14:51 2020 -0500 @@ -291,9 +291,9 @@ # See the comment in exchange._pulldiscoverychangegroup() for more. if fetch and remoteheads: - nodemap = repo.unfiltered().changelog.nodemap + has_node = repo.unfiltered().changelog.index.has_node - common |= {head for head in remoteheads if head in nodemap} + common |= {head for head in remoteheads if has_node(head)} if set(remoteheads).issubset(common): fetch = [] diff -r 61881b170140 -r 84a0102c05c7 mercurial/extensions.py --- a/mercurial/extensions.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/extensions.py Tue Jan 21 13:14:51 2020 -0500 @@ -92,7 +92,11 @@ # module/__init__.py style d, f = os.path.split(path) fd, fpath, desc = imp.find_module(f, [d]) - return imp.load_module(module_name, fd, fpath, desc) + # When https://github.com/python/typeshed/issues/3466 is fixed + # and in a pytype release we can drop this disable. + return imp.load_module( + module_name, fd, fpath, desc # pytype: disable=wrong-arg-types + ) else: try: return imp.load_source(module_name, path) @@ -591,9 +595,7 @@ break if currcls is object: - raise AttributeError( - r"type '%s' has no property '%s'" % (cls, propname) - ) + raise AttributeError("type '%s' has no property '%s'" % (cls, propname)) class wrappedfunction(object): @@ -783,7 +785,7 @@ def disabled(): '''find disabled extensions from hgext. returns a dict of {name: desc}''' try: - from hgext import __index__ + from hgext import __index__ # pytype: disable=import-error return dict( (name, gettext(desc)) @@ -809,7 +811,7 @@ def disabledext(name): '''find a specific disabled extension from hgext. returns desc''' try: - from hgext import __index__ + from hgext import __index__ # pytype: disable=import-error if name in _order: # enabled return @@ -836,7 +838,7 @@ continue if not isinstance(d.func, ast.Name): continue - if d.func.id != r'command': + if d.func.id != 'command': continue yield d diff -r 61881b170140 -r 84a0102c05c7 mercurial/fancyopts.py --- a/mercurial/fancyopts.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/fancyopts.py Tue Jan 21 13:14:51 2020 -0500 @@ -205,7 +205,7 @@ return parsedopts, parsedargs -class customopt(object): +class customopt(object): # pytype: disable=ignored-metaclass """Manage defaults and mutations for any type of opt.""" __metaclass__ = abc.ABCMeta diff -r 61881b170140 -r 84a0102c05c7 mercurial/filemerge.py --- a/mercurial/filemerge.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/filemerge.py Tue Jan 21 13:14:51 2020 -0500 @@ -119,7 +119,7 @@ """ return not ( fctx.isabsent() - and fctx.ctx() == self.ctx() + and fctx.changectx() == self.changectx() and fctx.path() == self.path() ) @@ -279,7 +279,7 @@ def _eoltype(data): - b"Guess the EOL type of a file" + """Guess the EOL type of a file""" if b'\0' in data: # binary return None if b'\r\n' in data: # Windows @@ -292,7 +292,7 @@ def _matcheol(file, back): - b"Convert EOL markers in a file to match origfile" + """Convert EOL markers in a file to match origfile""" tostyle = _eoltype(back.data()) # No repo.wread filters? if tostyle: data = util.readfile(file) @@ -693,7 +693,7 @@ ui.status(t.renderdefault(props)) -def _xmerge(repo, mynode, orig, fcd, fco, fca, toolconf, files, labels=None): +def _xmerge(repo, mynode, orig, fcd, fco, fca, toolconf, files, labels): tool, toolpath, binary, symlink, scriptfn = toolconf uipathfn = scmutil.getuipathfn(repo) if fcd.isabsent() or fco.isabsent(): @@ -934,10 +934,10 @@ name = os.path.join(tmproot, pre) if ext: name += ext - f = open(name, r"wb") + f = open(name, "wb") else: fd, name = pycompat.mkstemp(prefix=pre + b'.', suffix=ext) - f = os.fdopen(fd, r"wb") + f = os.fdopen(fd, "wb") return f, name def tempfromcontext(prefix, ctx): diff -r 61881b170140 -r 84a0102c05c7 mercurial/fileset.py --- a/mercurial/fileset.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/fileset.py Tue Jan 21 13:14:51 2020 -0500 @@ -520,29 +520,30 @@ class matchctx(object): - def __init__(self, basectx, ctx, badfn=None): + def __init__(self, basectx, ctx, cwd, badfn=None): self._basectx = basectx self.ctx = ctx self._badfn = badfn self._match = None self._status = None + self.cwd = cwd def narrowed(self, match): """Create matchctx for a sub-tree narrowed by the given matcher""" - mctx = matchctx(self._basectx, self.ctx, self._badfn) + mctx = matchctx(self._basectx, self.ctx, self.cwd, self._badfn) mctx._match = match # leave wider status which we don't have to care mctx._status = self._status return mctx def switch(self, basectx, ctx): - mctx = matchctx(basectx, ctx, self._badfn) + mctx = matchctx(basectx, ctx, self.cwd, self._badfn) mctx._match = self._match return mctx def withstatus(self, keys): """Create matchctx which has precomputed status specified by the keys""" - mctx = matchctx(self._basectx, self.ctx, self._badfn) + mctx = matchctx(self._basectx, self.ctx, self.cwd, self._badfn) mctx._match = self._match mctx._buildstatus(keys) return mctx @@ -560,7 +561,7 @@ return self._status def matcher(self, patterns): - return self.ctx.match(patterns, badfn=self._badfn) + return self.ctx.match(patterns, badfn=self._badfn, cwd=self.cwd) def predicate(self, predfn, predrepr=None, cache=False): """Create a matcher to select files by predfn(filename)""" @@ -617,12 +618,12 @@ return matchmod.never(badfn=self._badfn) -def match(ctx, expr, badfn=None): +def match(ctx, cwd, expr, badfn=None): """Create a matcher for a single fileset expression""" tree = filesetlang.parse(expr) tree = filesetlang.analyze(tree) tree = filesetlang.optimize(tree) - mctx = matchctx(ctx.p1(), ctx, badfn=badfn) + mctx = matchctx(ctx.p1(), ctx, cwd, badfn=badfn) return getmatch(mctx, tree) diff -r 61881b170140 -r 84a0102c05c7 mercurial/graphmod.py --- a/mercurial/graphmod.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/graphmod.py Tue Jan 21 13:14:51 2020 -0500 @@ -20,6 +20,7 @@ from __future__ import absolute_import from .node import nullrev +from .thirdparty import attr from . import ( dagop, pycompat, @@ -192,7 +193,7 @@ def asciiedges(type, char, state, rev, parents): """adds edge info to changelog DAG walk suitable for ascii()""" - seen = state[b'seen'] + seen = state.seen if rev not in seen: seen.append(rev) nodeidx = seen.index(rev) @@ -207,7 +208,7 @@ knownparents.append(parent) else: newparents.append(parent) - state[b'edges'][parent] = state[b'styles'].get(ptype, b'|') + state.edges[parent] = state.styles.get(ptype, b'|') ncols = len(seen) width = 1 + ncols * 2 @@ -240,7 +241,7 @@ if nmorecols > 0: width += 2 # remove current node from edge characters, no longer needed - state[b'edges'].pop(rev, None) + state.edges.pop(rev, None) yield (type, char, width, (nodeidx, edges, ncols, nmorecols)) @@ -322,7 +323,7 @@ while edgechars and edgechars[-1] is None: edgechars.pop() shift_size = max((edgechars.count(None) * 2) - 1, 0) - minlines = 3 if not state[b'graphshorten'] else 2 + minlines = 3 if not state.graphshorten else 2 while len(lines) < minlines + shift_size: lines.append(extra[:]) @@ -344,7 +345,7 @@ positions[i] = max(pos, targets[i]) line[pos] = b'/' if pos > targets[i] else extra[toshift[i]] - map = {1: b'|', 2: b'~'} if not state[b'graphshorten'] else {1: b'~'} + map = {1: b'|', 2: b'~'} if not state.graphshorten else {1: b'~'} for i, line in enumerate(lines): if None not in line: continue @@ -357,16 +358,16 @@ seen.remove(parent) -def asciistate(): - """returns the initial value for the "state" argument to ascii()""" - return { - b'seen': [], - b'edges': {}, - b'lastcoldiff': 0, - b'lastindex': 0, - b'styles': EDGES.copy(), - b'graphshorten': False, - } +@attr.s +class asciistate(object): + """State of ascii() graph rendering""" + + seen = attr.ib(init=False, default=attr.Factory(list)) + edges = attr.ib(init=False, default=attr.Factory(dict)) + lastcoldiff = attr.ib(init=False, default=0) + lastindex = attr.ib(init=False, default=0) + styles = attr.ib(init=False, default=attr.Factory(EDGES.copy)) + graphshorten = attr.ib(init=False, default=False) def outputgraph(ui, graph): @@ -409,7 +410,7 @@ idx, edges, ncols, coldiff = coldata assert -2 < coldiff < 2 - edgemap, seen = state[b'edges'], state[b'seen'] + edgemap, seen = state.edges, state.seen # Be tolerant of history issues; make sure we have at least ncols + coldiff # elements to work with. See test-glog.t for broken history test cases. echars = [c for p in seen for c in (edgemap.get(p, b'|'), b' ')] @@ -452,10 +453,10 @@ _getnodelineedgestail( echars, idx, - state[b'lastindex'], + state.lastindex, ncols, coldiff, - state[b'lastcoldiff'], + state.lastcoldiff, fix_nodeline_tail, ) ) @@ -485,7 +486,7 @@ # If 'graphshorten' config, only draw shift_interline # when there is any non vertical flow in graph. - if state[b'graphshorten']: + if state.graphshorten: if any(c in br'\/' for c in shift_interline if c): lines.append(shift_interline) # Else, no 'graphshorten' config so draw shift_interline. @@ -512,5 +513,5 @@ outputgraph(ui, zip(lines, text)) # ... and start over - state[b'lastcoldiff'] = coldiff - state[b'lastindex'] = idx + state.lastcoldiff = coldiff + state.lastindex = idx diff -r 61881b170140 -r 84a0102c05c7 mercurial/hbisect.py --- a/mercurial/hbisect.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/hbisect.py Tue Jan 21 13:14:51 2020 -0500 @@ -11,6 +11,7 @@ from __future__ import absolute_import import collections +import contextlib from .i18n import _ from .node import ( @@ -180,6 +181,15 @@ raise error.Abort(_(b'cannot bisect (no known bad revisions)')) +@contextlib.contextmanager +def restore_state(repo, state, node): + try: + yield + finally: + state[b'current'] = [node] + save_state(repo, state) + + def get(repo, status): """ Return a list of revision(s) that match the given status: diff -r 61881b170140 -r 84a0102c05c7 mercurial/help.py --- a/mercurial/help.py Thu Jan 09 14:19:20 2020 -0500 +++ b/mercurial/help.py Tue Jan 21 13:14:51 2020 -0500 @@ -8,7 +8,6 @@ from __future__ import absolute_import import itertools -import os import re import textwrap @@ -36,7 +35,10 @@ util, ) from .hgweb import webcommands -from .utils import compression +from .utils import ( + compression, + resourceutil, +) _exclkeywords = { b"(ADVANCED)", @@ -311,11 +313,11 @@ """Return a delayed loader for help/topic.txt.""" def loader(ui): - docdir = os.path.join(util.datapath, b'help') + package = b'mercurial.helptext' if subdir: - docdir = os.path.join(docdir, subdir) - path = os.path.join(docdir, topic + b".txt") - doc = gettext(util.readfile(path)) + package += b'.' + subdir + with resourceutil.open_resource(package, topic + b'.txt') as fp: + doc = gettext(fp.read()) for rewriter in helphooks.get(topic, []): doc = rewriter(ui, topic, doc) return doc @@ -805,7 +807,7 @@ appendcmds(catfns) ex = opts.get - anyopts = ex(r'keyword') or not (ex(r'command') or ex(r'extension')) + anyopts = ex('keyword') or not (ex('command') or ex('extension')) if not name and anyopts: exts = listexts( _(b'enabled extensions:'), diff -r 61881b170140 -r 84a0102c05c7 mercurial/help/bundlespec.txt --- a/mercurial/help/bundlespec.txt Thu Jan 09 14:19:20 2020 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,84 +0,0 @@ -Mercurial supports generating standalone "bundle" files that hold repository -data. These "bundles" are typically saved locally and used later or exchanged -between different repositories, possibly on different machines. Example -commands using bundles are :hg:`bundle` and :hg:`unbundle`. - -Generation of bundle files is controlled by a "bundle specification" -("bundlespec") string. This string tells the bundle generation process how -to create the bundle. - -A "bundlespec" string is composed of the following elements: - -type - A string denoting the bundle format to use. - -compression - Denotes the compression engine to use compressing the raw bundle data. - -parameters - Arbitrary key-value parameters to further control bundle generation. - -A "bundlespec" string has the following formats: - - - The literal bundle format string is used. - -- - The compression engine and format are delimited by a hyphen (``-``). - -Optional parameters follow the ````. Parameters are URI escaped -``key=value`` pairs. Each pair is delimited by a semicolon (``;``). The -first parameter begins after a ``;`` immediately following the ```` -value. - -Available Types -=============== - -The following bundle strings are available: - -v1 - Produces a legacy "changegroup" version 1 bundle. - - This format is compatible with nearly all Mercurial clients because it is - the oldest. However, it has some limitations, which is why it is no longer - the default for new repositories. - - ``v1`` bundles can be used with modern repositories using the "generaldelta" - storage format. However, it may take longer to produce the bundle and the - resulting bundle may be significantly larger than a ``v2`` bundle. - - ``v1`` bundles can only use the ``gzip``, ``bzip2``, and ``none`` compression - formats. - -v2 - Produces a version 2 bundle. - - Version 2 bundles are an extensible format that can store additional - repository data (such as bookmarks and phases information) and they can - store data more efficiently, resulting in smaller bundles. - - Version 2 bundles can also use modern compression engines, such as - ``zstd``, making them faster to compress and often smaller. - -Available Compression Engines -============================= - -The following bundle engines can be used: - -.. bundlecompressionmarker - -Examples -======== - -``v2`` - Produce a ``v2`` bundle using default options, including compression. - -``none-v1`` - Produce a ``v1`` bundle with no compression. - -``zstd-v2`` - Produce a ``v2`` bundle with zstandard compression using default - settings. - -``zstd-v1`` - This errors because ``zstd`` is not supported for ``v1`` types. diff -r 61881b170140 -r 84a0102c05c7 mercurial/help/color.txt --- a/mercurial/help/color.txt Thu Jan 09 14:19:20 2020 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,149 +0,0 @@ -Mercurial colorizes output from several commands. - -For example, the diff command shows additions in green and deletions -in red, while the status command shows modified files in magenta. Many -other commands have analogous colors. It is possible to customize -these colors. - -To enable color (default) whenever possible use:: - - [ui] - color = yes - -To disable color use:: - - [ui] - color = no - -See :hg:`help config.ui.color` for details. - -.. container:: windows - - The default pager on Windows does not support color, so enabling the pager - will effectively disable color. See :hg:`help config.ui.paginate` to disable - the pager. Alternately, MSYS and Cygwin shells provide `less` as a pager, - which can be configured to support ANSI color mode. Windows 10 natively - supports ANSI color mode. - -Mode -==== - -Mercurial can use various systems to display color. The supported modes are -``ansi``, ``win32``, and ``terminfo``. See :hg:`help config.color` for details -about how to control the mode. - -Effects -======= - -Other effects in addition to color, like bold and underlined text, are -also available. By default, the terminfo database is used to find the -terminal codes used to change color and effect. If terminfo is not -available, then effects are rendered with the ECMA-48 SGR control -function (aka ANSI escape codes). - -The available effects in terminfo mode are 'blink', 'bold', 'dim', -'inverse', 'invisible', 'italic', 'standout', and 'underline'; in -ECMA-48 mode, the options are 'bold', 'inverse', 'italic', and -'underline'. How each is rendered depends on the terminal emulator. -Some may not be available for a given terminal type, and will be -silently ignored. - -If the terminfo entry for your terminal is missing codes for an effect -or has the wrong codes, you can add or override those codes in your -configuration:: - - [color] - terminfo.dim = \E[2m - -where '\E' is substituted with an escape character. - -Labels -====== - -Text receives color effects depending on the labels that it has. Many -default Mercurial commands emit labelled text. You can also define -your own labels in templates using the label function, see :hg:`help -templates`. A single portion of text may have more than one label. In -that case, effects given to the last label will override any other -effects. This includes the special "none" effect, which nullifies -other effects. - -Labels are normally invisible. In order to see these labels and their -position in the text, use the global --color=debug option. The same -anchor text may be associated to multiple labels, e.g. - - [log.changeset changeset.secret|changeset: 22611:6f0a53c8f587] - -The following are the default effects for some default labels. Default -effects may be overridden from your configuration file:: - - [color] - status.modified = blue bold underline red_background - status.added = green bold - status.removed = red bold blue_background - status.deleted = cyan bold underline - status.unknown = magenta bold underline - status.ignored = black bold - - # 'none' turns off all effects - status.clean = none - status.copied = none - - qseries.applied = blue bold underline - qseries.unapplied = black bold - qseries.missing = red bold - - diff.diffline = bold - diff.extended = cyan bold - diff.file_a = red bold - diff.file_b = green bold - diff.hunk = magenta - diff.deleted = red - diff.inserted = green - diff.changed = white - diff.tab = - diff.trailingwhitespace = bold red_background - - # Blank so it inherits the style of the surrounding label - changeset.public = - changeset.draft = - changeset.secret = - - resolve.unresolved = red bold - resolve.resolved = green bold - - bookmarks.active = green - - branches.active = none - branches.closed = black bold - branches.current = green - branches.inactive = none - - tags.normal = green - tags.local = black bold - - rebase.rebased = blue - rebase.remaining = red bold - - shelve.age = cyan - shelve.newest = green bold - shelve.name = blue bold - - histedit.remaining = red bold - -Custom colors -============= - -Because there are only eight standard colors, Mercurial allows you -to define color names for other color slots which might be available -for your terminal type, assuming terminfo mode. For instance:: - - color.brightblue = 12 - color.pink = 207 - color.orange = 202 - -to set 'brightblue' to color slot 12 (useful for 16 color terminals -that have brighter colors defined in the upper eight) and, 'pink' and -'orange' to colors in 256-color xterm's default color cube. These -defined colors may then be used as any of the pre-defined eight, -including appending '_background' to set the background to that color. diff -r 61881b170140 -r 84a0102c05c7 mercurial/help/common.txt --- a/mercurial/help/common.txt Thu Jan 09 14:19:20 2020 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -.. Common link and substitution definitions. - -.. |hg(1)| replace:: **hg**\ (1) -.. _hg(1): hg.1.html -.. |hgrc(5)| replace:: **hgrc**\ (5) -.. _hgrc(5): hgrc.5.html -.. |hgignore(5)| replace:: **hgignore**\ (5) -.. _hgignore(5): hgignore.5.html diff -r 61881b170140 -r 84a0102c05c7 mercurial/help/config.txt --- a/mercurial/help/config.txt Thu Jan 09 14:19:20 2020 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2870 +0,0 @@ -The Mercurial system uses a set of configuration files to control -aspects of its behavior. - -Troubleshooting -=============== - -If you're having problems with your configuration, -:hg:`config --debug` can help you understand what is introducing -a setting into your environment. - -See :hg:`help config.syntax` and :hg:`help config.files` -for information about how and where to override things. - -Structure -========= - -The configuration files use a simple ini-file format. A configuration -file consists of sections, led by a ``[section]`` header and followed -by ``name = value`` entries:: - - [ui] - username = Firstname Lastname - verbose = True - -The above entries will be referred to as ``ui.username`` and -``ui.verbose``, respectively. See :hg:`help config.syntax`. - -Files -===== - -Mercurial reads configuration data from several files, if they exist. -These files do not exist by default and you will have to create the -appropriate configuration files yourself: - -Local configuration is put into the per-repository ``/.hg/hgrc`` file. - -Global configuration like the username setting is typically put into: - -.. container:: windows - - - ``%USERPROFILE%\mercurial.ini`` (on Windows) - -.. container:: unix.plan9 - - - ``$HOME/.hgrc`` (on Unix, Plan9) - -The names of these files depend on the system on which Mercurial is -installed. ``*.rc`` files from a single directory are read in -alphabetical order, later ones overriding earlier ones. Where multiple -paths are given below, settings from earlier paths override later -ones. - -.. container:: verbose.unix - - On Unix, the following files are consulted: - - - ``/.hg/hgrc`` (per-repository) - - ``$HOME/.hgrc`` (per-user) - - ``${XDG_CONFIG_HOME:-$HOME/.config}/hg/hgrc`` (per-user) - - ``/etc/mercurial/hgrc`` (per-installation) - - ``/etc/mercurial/hgrc.d/*.rc`` (per-installation) - - ``/etc/mercurial/hgrc`` (per-system) - - ``/etc/mercurial/hgrc.d/*.rc`` (per-system) - - ``/default.d/*.rc`` (defaults) - -.. container:: verbose.windows - - On Windows, the following files are consulted: - - - ``/.hg/hgrc`` (per-repository) - - ``%USERPROFILE%\.hgrc`` (per-user) - - ``%USERPROFILE%\Mercurial.ini`` (per-user) - - ``%HOME%\.hgrc`` (per-user) - - ``%HOME%\Mercurial.ini`` (per-user) - - ``HKEY_LOCAL_MACHINE\SOFTWARE\Mercurial`` (per-installation) - - ``\hgrc.d\*.rc`` (per-installation) - - ``\Mercurial.ini`` (per-installation) - - ``/default.d/*.rc`` (defaults) - - .. note:: - - The registry key ``HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Mercurial`` - is used when running 32-bit Python on 64-bit Windows. - -.. container:: windows - - On Windows 9x, ``%HOME%`` is replaced by ``%APPDATA%``. - -.. container:: verbose.plan9 - - On Plan9, the following files are consulted: - - - ``/.hg/hgrc`` (per-repository) - - ``$home/lib/hgrc`` (per-user) - - ``/lib/mercurial/hgrc`` (per-installation) - - ``/lib/mercurial/hgrc.d/*.rc`` (per-installation) - - ``/lib/mercurial/hgrc`` (per-system) - - ``/lib/mercurial/hgrc.d/*.rc`` (per-system) - - ``/default.d/*.rc`` (defaults) - -Per-repository configuration options only apply in a -particular repository. This file is not version-controlled, and -will not get transferred during a "clone" operation. Options in -this file override options in all other configuration files. - -.. container:: unix.plan9 - - On Plan 9 and Unix, most of this file will be ignored if it doesn't - belong to a trusted user or to a trusted group. See - :hg:`help config.trusted` for more details. - -Per-user configuration file(s) are for the user running Mercurial. Options -in these files apply to all Mercurial commands executed by this user in any -directory. Options in these files override per-system and per-installation -options. - -Per-installation configuration files are searched for in the -directory where Mercurial is installed. ```` is the -parent directory of the **hg** executable (or symlink) being run. - -.. container:: unix.plan9 - - For example, if installed in ``/shared/tools/bin/hg``, Mercurial - will look in ``/shared/tools/etc/mercurial/hgrc``. Options in these - files apply to all Mercurial commands executed by any user in any - directory. - -Per-installation configuration files are for the system on -which Mercurial is running. Options in these files apply to all -Mercurial commands executed by any user in any directory. Registry -keys contain PATH-like strings, every part of which must reference -a ``Mercurial.ini`` file or be a directory where ``*.rc`` files will -be read. Mercurial checks each of these locations in the specified -order until one or more configuration files are detected. - -Per-system configuration files are for the system on which Mercurial -is running. Options in these files apply to all Mercurial commands -executed by any user in any directory. Options in these files -override per-installation options. - -Mercurial comes with some default configuration. The default configuration -files are installed with Mercurial and will be overwritten on upgrades. Default -configuration files should never be edited by users or administrators but can -be overridden in other configuration files. So far the directory only contains -merge tool configuration but packagers can also put other default configuration -there. - -Syntax -====== - -A configuration file consists of sections, led by a ``[section]`` header -and followed by ``name = value`` entries (sometimes called -``configuration keys``):: - - [spam] - eggs=ham - green= - eggs - -Each line contains one entry. If the lines that follow are indented, -they are treated as continuations of that entry. Leading whitespace is -removed from values. Empty lines are skipped. Lines beginning with -``#`` or ``;`` are ignored and may be used to provide comments. - -Configuration keys can be set multiple times, in which case Mercurial -will use the value that was configured last. As an example:: - - [spam] - eggs=large - ham=serrano - eggs=small - -This would set the configuration key named ``eggs`` to ``small``. - -It is also possible to define a section multiple times. A section can -be redefined on the same and/or on different configuration files. For -example:: - - [foo] - eggs=large - ham=serrano - eggs=small - - [bar] - eggs=ham - green= - eggs - - [foo] - ham=prosciutto - eggs=medium - bread=toasted - -This would set the ``eggs``, ``ham``, and ``bread`` configuration keys -of the ``foo`` section to ``medium``, ``prosciutto``, and ``toasted``, -respectively. As you can see there only thing that matters is the last -value that was set for each of the configuration keys. - -If a configuration key is set multiple times in different -configuration files the final value will depend on the order in which -the different configuration files are read, with settings from earlier -paths overriding later ones as described on the ``Files`` section -above. - -A line of the form ``%include file`` will include ``file`` into the -current configuration file. The inclusion is recursive, which means -that included files can include other files. Filenames are relative to -the configuration file in which the ``%include`` directive is found. -Environment variables and ``~user`` constructs are expanded in -``file``. This lets you do something like:: - - %include ~/.hgrc.d/$HOST.rc - -to include a different configuration file on each computer you use. - -A line with ``%unset name`` will remove ``name`` from the current -section, if it has been set previously. - -The values are either free-form text strings, lists of text strings, -or Boolean values. Boolean values can be set to true using any of "1", -"yes", "true", or "on" and to false using "0", "no", "false", or "off" -(all case insensitive). - -List values are separated by whitespace or comma, except when values are -placed in double quotation marks:: - - allow_read = "John Doe, PhD", brian, betty - -Quotation marks can be escaped by prefixing them with a backslash. Only -quotation marks at the beginning of a word is counted as a quotation -(e.g., ``foo"bar baz`` is the list of ``foo"bar`` and ``baz``). - -Sections -======== - -This section describes the different sections that may appear in a -Mercurial configuration file, the purpose of each section, its possible -keys, and their possible values. - -``alias`` ---------- - -Defines command aliases. - -Aliases allow you to define your own commands in terms of other -commands (or aliases), optionally including arguments. Positional -arguments in the form of ``$1``, ``$2``, etc. in the alias definition -are expanded by Mercurial before execution. Positional arguments not -already used by ``$N`` in the definition are put at the end of the -command to be executed. - -Alias definitions consist of lines of the form:: - - = []... - -For example, this definition:: - - latest = log --limit 5 - -creates a new command ``latest`` that shows only the five most recent -changesets. You can define subsequent aliases using earlier ones:: - - stable5 = latest -b stable - -.. note:: - - It is possible to create aliases with the same names as - existing commands, which will then override the original - definitions. This is almost always a bad idea! - -An alias can start with an exclamation point (``!``) to make it a -shell alias. A shell alias is executed with the shell and will let you -run arbitrary commands. As an example, :: - - echo = !echo $@ - -will let you do ``hg echo foo`` to have ``foo`` printed in your -terminal. A better example might be:: - - purge = !$HG status --no-status --unknown -0 re: | xargs -0 rm -f - -which will make ``hg purge`` delete all unknown files in the -repository in the same manner as the purge extension. - -Positional arguments like ``$1``, ``$2``, etc. in the alias definition -expand to the command arguments. Unmatched arguments are -removed. ``$0`` expands to the alias name and ``$@`` expands to all -arguments separated by a space. ``"$@"`` (with quotes) expands to all -arguments quoted individually and separated by a space. These expansions -happen before the command is passed to the shell. - -Shell aliases are executed in an environment where ``$HG`` expands to -the path of the Mercurial that was used to execute the alias. This is -useful when you want to call further Mercurial commands in a shell -alias, as was done above for the purge alias. In addition, -``$HG_ARGS`` expands to the arguments given to Mercurial. In the ``hg -echo foo`` call above, ``$HG_ARGS`` would expand to ``echo foo``. - -.. note:: - - Some global configuration options such as ``-R`` are - processed before shell aliases and will thus not be passed to - aliases. - - -``annotate`` ------------- - -Settings used when displaying file annotations. All values are -Booleans and default to False. See :hg:`help config.diff` for -related options for the diff command. - -``ignorews`` - Ignore white space when comparing lines. - -``ignorewseol`` - Ignore white space at the end of a line when comparing lines. - -``ignorewsamount`` - Ignore changes in the amount of white space. - -``ignoreblanklines`` - Ignore changes whose lines are all blank. - - -``auth`` --------- - -Authentication credentials and other authentication-like configuration -for HTTP connections. This section allows you to store usernames and -passwords for use when logging *into* HTTP servers. See -:hg:`help config.web` if you want to configure *who* can login to -your HTTP server. - -The following options apply to all hosts. - -``cookiefile`` - Path to a file containing HTTP cookie lines. Cookies matching a - host will be sent automatically. - - The file format uses the Mozilla cookies.txt format, which defines cookies - on their own lines. Each line contains 7 fields delimited by the tab - character (domain, is_domain_cookie, path, is_secure, expires, name, - value). For more info, do an Internet search for "Netscape cookies.txt - format." - - Note: the cookies parser does not handle port numbers on domains. You - will need to remove ports from the domain for the cookie to be recognized. - This could result in a cookie being disclosed to an unwanted server. - - The cookies file is read-only. - -Other options in this section are grouped by name and have the following -format:: - - . = - -where ```` is used to group arguments into authentication -entries. Example:: - - foo.prefix = hg.intevation.de/mercurial - foo.username = foo - foo.password = bar - foo.schemes = http https - - bar.prefix = secure.example.org - bar.key = path/to/file.key - bar.cert = path/to/file.cert - bar.schemes = https - -Supported arguments: - -``prefix`` - Either ``*`` or a URI prefix with or without the scheme part. - The authentication entry with the longest matching prefix is used - (where ``*`` matches everything and counts as a match of length - 1). If the prefix doesn't include a scheme, the match is performed - against the URI with its scheme stripped as well, and the schemes - argument, q.v., is then subsequently consulted. - -``username`` - Optional. Username to authenticate with. If not given, and the - remote site requires basic or digest authentication, the user will - be prompted for it. Environment variables are expanded in the - username letting you do ``foo.username = $USER``. If the URI - includes a username, only ``[auth]`` entries with a matching - username or without a username will be considered. - -``password`` - Optional. Password to authenticate with. If not given, and the - remote site requires basic or digest authentication, the user - will be prompted for it. - -``key`` - Optional. PEM encoded client certificate key file. Environment - variables are expanded in the filename. - -``cert`` - Optional. PEM encoded client certificate chain file. Environment - variables are expanded in the filename. - -``schemes`` - Optional. Space separated list of URI schemes to use this - authentication entry with. Only used if the prefix doesn't include - a scheme. Supported schemes are http and https. They will match - static-http and static-https respectively, as well. - (default: https) - -If no suitable authentication entry is found, the user is prompted -for credentials as usual if required by the remote. - -``color`` ---------- - -Configure the Mercurial color mode. For details about how to define your custom -effect and style see :hg:`help color`. - -``mode`` - String: control the method used to output color. One of ``auto``, ``ansi``, - ``win32``, ``terminfo`` or ``debug``. In auto mode, Mercurial will - use ANSI mode by default (or win32 mode prior to Windows 10) if it detects a - terminal. Any invalid value will disable color. - -``pagermode`` - String: optional override of ``color.mode`` used with pager. - - On some systems, terminfo mode may cause problems when using - color with ``less -R`` as a pager program. less with the -R option - will only display ECMA-48 color codes, and terminfo mode may sometimes - emit codes that less doesn't understand. You can work around this by - either using ansi mode (or auto mode), or by using less -r (which will - pass through all terminal control codes, not just color control - codes). - - On some systems (such as MSYS in Windows), the terminal may support - a different color mode than the pager program. - -``commands`` ------------- - -``commit.post-status`` - Show status of files in the working directory after successful commit. - (default: False) - -``push.require-revs`` - Require revisions to push be specified using one or more mechanisms such as - specifying them positionally on the command line, using ``-r``, ``-b``, - and/or ``-B`` on the command line, or using ``paths.:pushrev`` in the - configuration. If this is enabled and revisions are not specified, the - command aborts. - (default: False) - -``resolve.confirm`` - Confirm before performing action if no filename is passed. - (default: False) - -``resolve.explicit-re-merge`` - Require uses of ``hg resolve`` to specify which action it should perform, - instead of re-merging files by default. - (default: False) - -``resolve.mark-check`` - Determines what level of checking :hg:`resolve --mark` will perform before - marking files as resolved. Valid values are ``none`, ``warn``, and - ``abort``. ``warn`` will output a warning listing the file(s) that still - have conflict markers in them, but will still mark everything resolved. - ``abort`` will output the same warning but will not mark things as resolved. - If --all is passed and this is set to ``abort``, only a warning will be - shown (an error will not be raised). - (default: ``none``) - -``status.relative`` - Make paths in :hg:`status` output relative to the current directory. - (default: False) - -``status.terse`` - Default value for the --terse flag, which condenses status output. - (default: empty) - -``update.check`` - Determines what level of checking :hg:`update` will perform before moving - to a destination revision. Valid values are ``abort``, ``none``, - ``linear``, and ``noconflict``. ``abort`` always fails if the working - directory has uncommitted changes. ``none`` performs no checking, and may - result in a merge with uncommitted changes. ``linear`` allows any update - as long as it follows a straight line in the revision history, and may - trigger a merge with uncommitted changes. ``noconflict`` will allow any - update which would not trigger a merge with uncommitted changes, if any - are present. - (default: ``linear``) - -``update.requiredest`` - Require that the user pass a destination when running :hg:`update`. - For example, :hg:`update .::` will be allowed, but a plain :hg:`update` - will be disallowed. - (default: False) - -``committemplate`` ------------------- - -``changeset`` - String: configuration in this section is used as the template to - customize the text shown in the editor when committing. - -In addition to pre-defined template keywords, commit log specific one -below can be used for customization: - -``extramsg`` - String: Extra message (typically 'Leave message empty to abort - commit.'). This may be changed by some commands or extensions. - -For example, the template configuration below shows as same text as -one shown by default:: - - [committemplate] - changeset = {desc}\n\n - HG: Enter commit message. Lines beginning with 'HG:' are removed. - HG: {extramsg} - HG: -- - HG: user: {author}\n{ifeq(p2rev, "-1", "", - "HG: branch merge\n") - }HG: branch '{branch}'\n{if(activebookmark, - "HG: bookmark '{activebookmark}'\n") }{subrepos % - "HG: subrepo {subrepo}\n" }{file_adds % - "HG: added {file}\n" }{file_mods % - "HG: changed {file}\n" }{file_dels % - "HG: removed {file}\n" }{if(files, "", - "HG: no files changed\n")} - -``diff()`` - String: show the diff (see :hg:`help templates` for detail) - -Sometimes it is helpful to show the diff of the changeset in the editor without -having to prefix 'HG: ' to each line so that highlighting works correctly. For -this, Mercurial provides a special string which will ignore everything below -it:: - - HG: ------------------------ >8 ------------------------ - -For example, the template configuration below will show the diff below the -extra message:: - - [committemplate] - changeset = {desc}\n\n - HG: Enter commit message. Lines beginning with 'HG:' are removed. - HG: {extramsg} - HG: ------------------------ >8 ------------------------ - HG: Do not touch the line above. - HG: Everything below will be removed. - {diff()} - -.. note:: - - For some problematic encodings (see :hg:`help win32mbcs` for - detail), this customization should be configured carefully, to - avoid showing broken characters. - - For example, if a multibyte character ending with backslash (0x5c) is - followed by the ASCII character 'n' in the customized template, - the sequence of backslash and 'n' is treated as line-feed unexpectedly - (and the multibyte character is broken, too). - -Customized template is used for commands below (``--edit`` may be -required): - -- :hg:`backout` -- :hg:`commit` -- :hg:`fetch` (for merge commit only) -- :hg:`graft` -- :hg:`histedit` -- :hg:`import` -- :hg:`qfold`, :hg:`qnew` and :hg:`qrefresh` -- :hg:`rebase` -- :hg:`shelve` -- :hg:`sign` -- :hg:`tag` -- :hg:`transplant` - -Configuring items below instead of ``changeset`` allows showing -customized message only for specific actions, or showing different -messages for each action. - -- ``changeset.backout`` for :hg:`backout` -- ``changeset.commit.amend.merge`` for :hg:`commit --amend` on merges -- ``changeset.commit.amend.normal`` for :hg:`commit --amend` on other -- ``changeset.commit.normal.merge`` for :hg:`commit` on merges -- ``changeset.commit.normal.normal`` for :hg:`commit` on other -- ``changeset.fetch`` for :hg:`fetch` (impling merge commit) -- ``changeset.gpg.sign`` for :hg:`sign` -- ``changeset.graft`` for :hg:`graft` -- ``changeset.histedit.edit`` for ``edit`` of :hg:`histedit` -- ``changeset.histedit.fold`` for ``fold`` of :hg:`histedit` -- ``changeset.histedit.mess`` for ``mess`` of :hg:`histedit` -- ``changeset.histedit.pick`` for ``pick`` of :hg:`histedit` -- ``changeset.import.bypass`` for :hg:`import --bypass` -- ``changeset.import.normal.merge`` for :hg:`import` on merges -- ``changeset.import.normal.normal`` for :hg:`import` on other -- ``changeset.mq.qnew`` for :hg:`qnew` -- ``changeset.mq.qfold`` for :hg:`qfold` -- ``changeset.mq.qrefresh`` for :hg:`qrefresh` -- ``changeset.rebase.collapse`` for :hg:`rebase --collapse` -- ``changeset.rebase.merge`` for :hg:`rebase` on merges -- ``changeset.rebase.normal`` for :hg:`rebase` on other -- ``changeset.shelve.shelve`` for :hg:`shelve` -- ``changeset.tag.add`` for :hg:`tag` without ``--remove`` -- ``changeset.tag.remove`` for :hg:`tag --remove` -- ``changeset.transplant.merge`` for :hg:`transplant` on merges -- ``changeset.transplant.normal`` for :hg:`transplant` on other - -These dot-separated lists of names are treated as hierarchical ones. -For example, ``changeset.tag.remove`` customizes the commit message -only for :hg:`tag --remove`, but ``changeset.tag`` customizes the -commit message for :hg:`tag` regardless of ``--remove`` option. - -When the external editor is invoked for a commit, the corresponding -dot-separated list of names without the ``changeset.`` prefix -(e.g. ``commit.normal.normal``) is in the ``HGEDITFORM`` environment -variable. - -In this section, items other than ``changeset`` can be referred from -others. For example, the configuration to list committed files up -below can be referred as ``{listupfiles}``:: - - [committemplate] - listupfiles = {file_adds % - "HG: added {file}\n" }{file_mods % - "HG: changed {file}\n" }{file_dels % - "HG: removed {file}\n" }{if(files, "", - "HG: no files changed\n")} - -``decode/encode`` ------------------ - -Filters for transforming files on checkout/checkin. This would -typically be used for newline processing or other -localization/canonicalization of files. - -Filters consist of a filter pattern followed by a filter command. -Filter patterns are globs by default, rooted at the repository root. -For example, to match any file ending in ``.txt`` in the root -directory only, use the pattern ``*.txt``. To match any file ending -in ``.c`` anywhere in the repository, use the pattern ``**.c``. -For each file only the first matching filter applies. - -The filter command can start with a specifier, either ``pipe:`` or -``tempfile:``. If no specifier is given, ``pipe:`` is used by default. - -A ``pipe:`` command must accept data on stdin and return the transformed -data on stdout. - -Pipe example:: - - [encode] - # uncompress gzip files on checkin to improve delta compression - # note: not necessarily a good idea, just an example - *.gz = pipe: gunzip - - [decode] - # recompress gzip files when writing them to the working dir (we - # can safely omit "pipe:", because it's the default) - *.gz = gzip - -A ``tempfile:`` command is a template. The string ``INFILE`` is replaced -with the name of a temporary file that contains the data to be -filtered by the command. The string ``OUTFILE`` is replaced with the name -of an empty temporary file, where the filtered data must be written by -the command. - -.. container:: windows - - .. note:: - - The tempfile mechanism is recommended for Windows systems, - where the standard shell I/O redirection operators often have - strange effects and may corrupt the contents of your files. - -This filter mechanism is used internally by the ``eol`` extension to -translate line ending characters between Windows (CRLF) and Unix (LF) -format. We suggest you use the ``eol`` extension for convenience. - - -``defaults`` ------------- - -(defaults are deprecated. Don't use them. Use aliases instead.) - -Use the ``[defaults]`` section to define command defaults, i.e. the -default options/arguments to pass to the specified commands. - -The following example makes :hg:`log` run in verbose mode, and -:hg:`status` show only the modified files, by default:: - - [defaults] - log = -v - status = -m - -The actual commands, instead of their aliases, must be used when -defining command defaults. The command defaults will also be applied -to the aliases of the commands defined. - - -``diff`` --------- - -Settings used when displaying diffs. Everything except for ``unified`` -is a Boolean and defaults to False. See :hg:`help config.annotate` -for related options for the annotate command. - -``git`` - Use git extended diff format. - -``nobinary`` - Omit git binary patches. - -``nodates`` - Don't include dates in diff headers. - -``noprefix`` - Omit 'a/' and 'b/' prefixes from filenames. Ignored in plain mode. - -``showfunc`` - Show which function each change is in. - -``ignorews`` - Ignore white space when comparing lines. - -``ignorewsamount`` - Ignore changes in the amount of white space. - -``ignoreblanklines`` - Ignore changes whose lines are all blank. - -``unified`` - Number of lines of context to show. - -``word-diff`` - Highlight changed words. - -``email`` ---------- - -Settings for extensions that send email messages. - -``from`` - Optional. Email address to use in "From" header and SMTP envelope - of outgoing messages. - -``to`` - Optional. Comma-separated list of recipients' email addresses. - -``cc`` - Optional. Comma-separated list of carbon copy recipients' - email addresses. - -``bcc`` - Optional. Comma-separated list of blind carbon copy recipients' - email addresses. - -``method`` - Optional. Method to use to send email messages. If value is ``smtp`` - (default), use SMTP (see the ``[smtp]`` section for configuration). - Otherwise, use as name of program to run that acts like sendmail - (takes ``-f`` option for sender, list of recipients on command line, - message on stdin). Normally, setting this to ``sendmail`` or - ``/usr/sbin/sendmail`` is enough to use sendmail to send messages. - -``charsets`` - Optional. Comma-separated list of character sets considered - convenient for recipients. Addresses, headers, and parts not - containing patches of outgoing messages will be encoded in the - first character set to which conversion from local encoding - (``$HGENCODING``, ``ui.fallbackencoding``) succeeds. If correct - conversion fails, the text in question is sent as is. - (default: '') - - Order of outgoing email character sets: - - 1. ``us-ascii``: always first, regardless of settings - 2. ``email.charsets``: in order given by user - 3. ``ui.fallbackencoding``: if not in email.charsets - 4. ``$HGENCODING``: if not in email.charsets - 5. ``utf-8``: always last, regardless of settings - -Email example:: - - [email] - from = Joseph User - method = /usr/sbin/sendmail - # charsets for western Europeans - # us-ascii, utf-8 omitted, as they are tried first and last - charsets = iso-8859-1, iso-8859-15, windows-1252 - - -``extensions`` --------------- - -Mercurial has an extension mechanism for adding new features. To -enable an extension, create an entry for it in this section. - -If you know that the extension is already in Python's search path, -you can give the name of the module, followed by ``=``, with nothing -after the ``=``. - -Otherwise, give a name that you choose, followed by ``=``, followed by -the path to the ``.py`` file (including the file name extension) that -defines the extension. - -To explicitly disable an extension that is enabled in an hgrc of -broader scope, prepend its path with ``!``, as in ``foo = !/ext/path`` -or ``foo = !`` when path is not supplied. - -Example for ``~/.hgrc``:: - - [extensions] - # (the churn extension will get loaded from Mercurial's path) - churn = - # (this extension will get loaded from the file specified) - myfeature = ~/.hgext/myfeature.py - - -``format`` ----------- - -Configuration that controls the repository format. Newer format options are more -powerful but incompatible with some older versions of Mercurial. Format options -are considered at repository initialization only. You need to make a new clone -for config change to be taken into account. - -For more details about repository format and version compatibility, see -https://www.mercurial-scm.org/wiki/MissingRequirement - -``usegeneraldelta`` - Enable or disable the "generaldelta" repository format which improves - repository compression by allowing "revlog" to store delta against arbitrary - revision instead of the previous stored one. This provides significant - improvement for repositories with branches. - - Repositories with this on-disk format require Mercurial version 1.9. - - Enabled by default. - -``dotencode`` - Enable or disable the "dotencode" repository format which enhances - the "fncache" repository format (which has to be enabled to use - dotencode) to avoid issues with filenames starting with ._ on - Mac OS X and spaces on Windows. - - Repositories with this on-disk format require Mercurial version 1.7. - - Enabled by default. - -``usefncache`` - Enable or disable the "fncache" repository format which enhances - the "store" repository format (which has to be enabled to use - fncache) to allow longer filenames and avoids using Windows - reserved names, e.g. "nul". - - Repositories with this on-disk format require Mercurial version 1.1. - - Enabled by default. - -``usestore`` - Enable or disable the "store" repository format which improves - compatibility with systems that fold case or otherwise mangle - filenames. Disabling this option will allow you to store longer filenames - in some situations at the expense of compatibility. - - Repositories with this on-disk format require Mercurial version 0.9.4. - - Enabled by default. - -``sparse-revlog`` - Enable or disable the ``sparse-revlog`` delta strategy. This format improves - delta re-use inside revlog. For very branchy repositories, it results in a - smaller store. For repositories with many revisions, it also helps - performance (by using shortened delta chains.) - - Repositories with this on-disk format require Mercurial version 4.7 - - Enabled by default. - -``revlog-compression`` - Compression algorithm used by revlog. Supported value are `zlib` and `zstd`. - The `zlib` engine is the historical default of Mercurial. `zstd` is a newer - format that is usually a net win over `zlib` operating faster at better - compression rate. Use `zstd` to reduce CPU usage. - - On some system, Mercurial installation may lack `zstd` supports. Default is `zlib`. - -``bookmarks-in-store`` - Store bookmarks in .hg/store/. This means that bookmarks are shared when - using `hg share` regardless of the `-B` option. - - Repositories with this on-disk format require Mercurial version 5.1. - - Disabled by default. - - -``graph`` ---------- - -Web graph view configuration. This section let you change graph -elements display properties by branches, for instance to make the -``default`` branch stand out. - -Each line has the following format:: - - . = - -where ```` is the name of the branch being -customized. Example:: - - [graph] - # 2px width - default.width = 2 - # red color - default.color = FF0000 - -Supported arguments: - -``width`` - Set branch edges width in pixels. - -``color`` - Set branch edges color in hexadecimal RGB notation. - -``hooks`` ---------- - -Commands or Python functions that get automatically executed by -various actions such as starting or finishing a commit. Multiple -hooks can be run for the same action by appending a suffix to the -action. Overriding a site-wide hook can be done by changing its -value or setting it to an empty string. Hooks can be prioritized -by adding a prefix of ``priority.`` to the hook name on a new line -and setting the priority. The default priority is 0. - -Example ``.hg/hgrc``:: - - [hooks] - # update working directory after adding changesets - changegroup.update = hg update - # do not use the site-wide hook - incoming = - incoming.email = /my/email/hook - incoming.autobuild = /my/build/hook - # force autobuild hook to run before other incoming hooks - priority.incoming.autobuild = 1 - -Most hooks are run with environment variables set that give useful -additional information. For each hook below, the environment variables -it is passed are listed with names in the form ``$HG_foo``. The -``$HG_HOOKTYPE`` and ``$HG_HOOKNAME`` variables are set for all hooks. -They contain the type of hook which triggered the run and the full name -of the hook in the config, respectively. In the example above, this will -be ``$HG_HOOKTYPE=incoming`` and ``$HG_HOOKNAME=incoming.email``. - -.. container:: windows - - Some basic Unix syntax can be enabled for portability, including ``$VAR`` - and ``${VAR}`` style variables. A ``~`` followed by ``\`` or ``/`` will - be expanded to ``%USERPROFILE%`` to simulate a subset of tilde expansion - on Unix. To use a literal ``$`` or ``~``, it must be escaped with a back - slash or inside of a strong quote. Strong quotes will be replaced by - double quotes after processing. - - This feature is enabled by adding a prefix of ``tonative.`` to the hook - name on a new line, and setting it to ``True``. For example:: - - [hooks] - incoming.autobuild = /my/build/hook - # enable translation to cmd.exe syntax for autobuild hook - tonative.incoming.autobuild = True - -``changegroup`` - Run after a changegroup has been added via push, pull or unbundle. The ID of - the first new changeset is in ``$HG_NODE`` and last is in ``$HG_NODE_LAST``. - The URL from which changes came is in ``$HG_URL``. - -``commit`` - Run after a changeset has been created in the local repository. The ID - of the newly created changeset is in ``$HG_NODE``. Parent changeset - IDs are in ``$HG_PARENT1`` and ``$HG_PARENT2``. - -``incoming`` - Run after a changeset has been pulled, pushed, or unbundled into - the local repository. The ID of the newly arrived changeset is in - ``$HG_NODE``. The URL that was source of the changes is in ``$HG_URL``. - -``outgoing`` - Run after sending changes from the local repository to another. The ID of - first changeset sent is in ``$HG_NODE``. The source of operation is in - ``$HG_SOURCE``. Also see :hg:`help config.hooks.preoutgoing`. - -``post-`` - Run after successful invocations of the associated command. The - contents of the command line are passed as ``$HG_ARGS`` and the result - code in ``$HG_RESULT``. Parsed command line arguments are passed as - ``$HG_PATS`` and ``$HG_OPTS``. These contain string representations of - the python data internally passed to . ``$HG_OPTS`` is a - dictionary of options (with unspecified options set to their defaults). - ``$HG_PATS`` is a list of arguments. Hook failure is ignored. - -``fail-`` - Run after a failed invocation of an associated command. The contents - of the command line are passed as ``$HG_ARGS``. Parsed command line - arguments are passed as ``$HG_PATS`` and ``$HG_OPTS``. These contain - string representations of the python data internally passed to - . ``$HG_OPTS`` is a dictionary of options (with unspecified - options set to their defaults). ``$HG_PATS`` is a list of arguments. - Hook failure is ignored. - -``pre-`` - Run before executing the associated command. The contents of the - command line are passed as ``$HG_ARGS``. Parsed command line arguments - are passed as ``$HG_PATS`` and ``$HG_OPTS``. These contain string - representations of the data internally passed to . ``$HG_OPTS`` - is a dictionary of options (with unspecified options set to their - defaults). ``$HG_PATS`` is a list of arguments. If the hook returns - failure, the command doesn't execute and Mercurial returns the failure - code. - -``prechangegroup`` - Run before a changegroup is added via push, pull or unbundle. Exit - status 0 allows the changegroup to proceed. A non-zero status will - cause the push, pull or unbundle to fail. The URL from which changes - will come is in ``$HG_URL``. - -``precommit`` - Run before starting a local commit. Exit status 0 allows the - commit to proceed. A non-zero status will cause the commit to fail. - Parent changeset IDs are in ``$HG_PARENT1`` and ``$HG_PARENT2``. - -``prelistkeys`` - Run before listing pushkeys (like bookmarks) in the - repository. A non-zero status will cause failure. The key namespace is - in ``$HG_NAMESPACE``. - -``preoutgoing`` - Run before collecting changes to send from the local repository to - another. A non-zero status will cause failure. This lets you prevent - pull over HTTP or SSH. It can also prevent propagating commits (via - local pull, push (outbound) or bundle commands), but not completely, - since you can just copy files instead. The source of operation is in - ``$HG_SOURCE``. If "serve", the operation is happening on behalf of a remote - SSH or HTTP repository. If "push", "pull" or "bundle", the operation - is happening on behalf of a repository on same system. - -``prepushkey`` - Run before a pushkey (like a bookmark) is added to the - repository. A non-zero status will cause the key to be rejected. The - key namespace is in ``$HG_NAMESPACE``, the key is in ``$HG_KEY``, - the old value (if any) is in ``$HG_OLD``, and the new value is in - ``$HG_NEW``. - -``pretag`` - Run before creating a tag. Exit status 0 allows the tag to be - created. A non-zero status will cause the tag to fail. The ID of the - changeset to tag is in ``$HG_NODE``. The name of tag is in ``$HG_TAG``. The - tag is local if ``$HG_LOCAL=1``, or in the repository if ``$HG_LOCAL=0``. - -``pretxnopen`` - Run before any new repository transaction is open. The reason for the - transaction will be in ``$HG_TXNNAME``, and a unique identifier for the - transaction will be in ``HG_TXNID``. A non-zero status will prevent the - transaction from being opened. - -``pretxnclose`` - Run right before the transaction is actually finalized. Any repository change - will be visible to the hook program. This lets you validate the transaction - content or change it. Exit status 0 allows the commit to proceed. A non-zero - status will cause the transaction to be rolled back. The reason for the - transaction opening will be in ``$HG_TXNNAME``, and a unique identifier for - the transaction will be in ``HG_TXNID``. The rest of the available data will - vary according the transaction type. New changesets will add ``$HG_NODE`` - (the ID of the first added changeset), ``$HG_NODE_LAST`` (the ID of the last - added changeset), ``$HG_URL`` and ``$HG_SOURCE`` variables. Bookmark and - phase changes will set ``HG_BOOKMARK_MOVED`` and ``HG_PHASES_MOVED`` to ``1`` - respectively, etc. - -``pretxnclose-bookmark`` - Run right before a bookmark change is actually finalized. Any repository - change will be visible to the hook program. This lets you validate the - transaction content or change it. Exit status 0 allows the commit to - proceed. A non-zero status will cause the transaction to be rolled back. - The name of the bookmark will be available in ``$HG_BOOKMARK``, the new - bookmark location will be available in ``$HG_NODE`` while the previous - location will be available in ``$HG_OLDNODE``. In case of a bookmark - creation ``$HG_OLDNODE`` will be empty. In case of deletion ``$HG_NODE`` - will be empty. - In addition, the reason for the transaction opening will be in - ``$HG_TXNNAME``, and a unique identifier for the transaction will be in - ``HG_TXNID``. - -``pretxnclose-phase`` - Run right before a phase change is actually finalized. Any repository change - will be visible to the hook program. This lets you validate the transaction - content or change it. Exit status 0 allows the commit to proceed. A non-zero - status will cause the transaction to be rolled back. The hook is called - multiple times, once for each revision affected by a phase change. - The affected node is available in ``$HG_NODE``, the phase in ``$HG_PHASE`` - while the previous ``$HG_OLDPHASE``. In case of new node, ``$HG_OLDPHASE`` - will be empty. In addition, the reason for the transaction opening will be in - ``$HG_TXNNAME``, and a unique identifier for the transaction will be in - ``HG_TXNID``. The hook is also run for newly added revisions. In this case - the ``$HG_OLDPHASE`` entry will be empty. - -``txnclose`` - Run after any repository transaction has been committed. At this - point, the transaction can no longer be rolled back. The hook will run - after the lock is released. See :hg:`help config.hooks.pretxnclose` for - details about available variables. - -``txnclose-bookmark`` - Run after any bookmark change has been committed. At this point, the - transaction can no longer be rolled back. The hook will run after the lock - is released. See :hg:`help config.hooks.pretxnclose-bookmark` for details - about available variables. - -``txnclose-phase`` - Run after any phase change has been committed. At this point, the - transaction can no longer be rolled back. The hook will run after the lock - is released. See :hg:`help config.hooks.pretxnclose-phase` for details about - available variables. - -``txnabort`` - Run when a transaction is aborted. See :hg:`help config.hooks.pretxnclose` - for details about available variables. - -``pretxnchangegroup`` - Run after a changegroup has been added via push, pull or unbundle, but before - the transaction has been committed. The changegroup is visible to the hook - program. This allows validation of incoming changes before accepting them. - The ID of the first new changeset is in ``$HG_NODE`` and last is in - ``$HG_NODE_LAST``. Exit status 0 allows the transaction to commit. A non-zero - status will cause the transaction to be rolled back, and the push, pull or - unbundle will fail. The URL that was the source of changes is in ``$HG_URL``. - -``pretxncommit`` - Run after a changeset has been created, but before the transaction is - committed. The changeset is visible to the hook program. This allows - validation of the commit message and changes. Exit status 0 allows the - commit to proceed. A non-zero status will cause the transaction to - be rolled back. The ID of the new changeset is in ``$HG_NODE``. The parent - changeset IDs are in ``$HG_PARENT1`` and ``$HG_PARENT2``. - -``preupdate`` - Run before updating the working directory. Exit status 0 allows - the update to proceed. A non-zero status will prevent the update. - The changeset ID of first new parent is in ``$HG_PARENT1``. If updating to a - merge, the ID of second new parent is in ``$HG_PARENT2``. - -``listkeys`` - Run after listing pushkeys (like bookmarks) in the repository. The - key namespace is in ``$HG_NAMESPACE``. ``$HG_VALUES`` is a - dictionary containing the keys and values. - -``pushkey`` - Run after a pushkey (like a bookmark) is added to the - repository. The key namespace is in ``$HG_NAMESPACE``, the key is in - ``$HG_KEY``, the old value (if any) is in ``$HG_OLD``, and the new - value is in ``$HG_NEW``. - -``tag`` - Run after a tag is created. The ID of the tagged changeset is in ``$HG_NODE``. - The name of tag is in ``$HG_TAG``. The tag is local if ``$HG_LOCAL=1``, or in - the repository if ``$HG_LOCAL=0``. - -``update`` - Run after updating the working directory. The changeset ID of first - new parent is in ``$HG_PARENT1``. If updating to a merge, the ID of second new - parent is in ``$HG_PARENT2``. If the update succeeded, ``$HG_ERROR=0``. If the - update failed (e.g. because conflicts were not resolved), ``$HG_ERROR=1``. - -.. note:: - - It is generally better to use standard hooks rather than the - generic pre- and post- command hooks, as they are guaranteed to be - called in the appropriate contexts for influencing transactions. - Also, hooks like "commit" will be called in all contexts that - generate a commit (e.g. tag) and not just the commit command. - -.. note:: - - Environment variables with empty values may not be passed to - hooks on platforms such as Windows. As an example, ``$HG_PARENT2`` - will have an empty value under Unix-like platforms for non-merge - changesets, while it will not be available at all under Windows. - -The syntax for Python hooks is as follows:: - - hookname = python:modulename.submodule.callable - hookname = python:/path/to/python/module.py:callable - -Python hooks are run within the Mercurial process. Each hook is -called with at least three keyword arguments: a ui object (keyword -``ui``), a repository object (keyword ``repo``), and a ``hooktype`` -keyword that tells what kind of hook is used. Arguments listed as -environment variables above are passed as keyword arguments, with no -``HG_`` prefix, and names in lower case. - -If a Python hook returns a "true" value or raises an exception, this -is treated as a failure. - - -``hostfingerprints`` --------------------- - -(Deprecated. Use ``[hostsecurity]``'s ``fingerprints`` options instead.) - -Fingerprints of the certificates of known HTTPS servers. - -A HTTPS connection to a server with a fingerprint configured here will -only succeed if the servers certificate matches the fingerprint. -This is very similar to how ssh known hosts works. - -The fingerprint is the SHA-1 hash value of the DER encoded certificate. -Multiple values can be specified (separated by spaces or commas). This can -be used to define both old and new fingerprints while a host transitions -to a new certificate. - -The CA chain and web.cacerts is not used for servers with a fingerprint. - -For example:: - - [hostfingerprints] - hg.intevation.de = fc:e2:8d:d9:51:cd:cb:c1:4d:18:6b:b7:44:8d:49:72:57:e6:cd:33 - hg.intevation.org = fc:e2:8d:d9:51:cd:cb:c1:4d:18:6b:b7:44:8d:49:72:57:e6:cd:33 - -``hostsecurity`` ----------------- - -Used to specify global and per-host security settings for connecting to -other machines. - -The following options control default behavior for all hosts. - -``ciphers`` - Defines the cryptographic ciphers to use for connections. - - Value must be a valid OpenSSL Cipher List Format as documented at - https://www.openssl.org/docs/manmaster/apps/ciphers.html#CIPHER-LIST-FORMAT. - - This setting is for advanced users only. Setting to incorrect values - can significantly lower connection security or decrease performance. - You have been warned. - - This option requires Python 2.7. - -``minimumprotocol`` - Defines the minimum channel encryption protocol to use. - - By default, the highest version of TLS supported by both client and server - is used. - - Allowed values are: ``tls1.0``, ``tls1.1``, ``tls1.2``. - - When running on an old Python version, only ``tls1.0`` is allowed since - old versions of Python only support up to TLS 1.0. - - When running a Python that supports modern TLS versions, the default is - ``tls1.1``. ``tls1.0`` can still be used to allow TLS 1.0. However, this - weakens security and should only be used as a feature of last resort if - a server does not support TLS 1.1+. - -Options in the ``[hostsecurity]`` section can have the form -``hostname``:``setting``. This allows multiple settings to be defined on a -per-host basis. - -The following per-host settings can be defined. - -``ciphers`` - This behaves like ``ciphers`` as described above except it only applies - to the host on which it is defined. - -``fingerprints`` - A list of hashes of the DER encoded peer/remote certificate. Values have - the form ``algorithm``:``fingerprint``. e.g. - ``sha256:c3ab8ff13720e8ad9047dd39466b3c8974e592c2fa383d4a3960714caef0c4f2``. - In addition, colons (``:``) can appear in the fingerprint part. - - The following algorithms/prefixes are supported: ``sha1``, ``sha256``, - ``sha512``. - - Use of ``sha256`` or ``sha512`` is preferred. - - If a fingerprint is specified, the CA chain is not validated for this - host and Mercurial will require the remote certificate to match one - of the fingerprints specified. This means if the server updates its - certificate, Mercurial will abort until a new fingerprint is defined. - This can provide stronger security than traditional CA-based validation - at the expense of convenience. - - This option takes precedence over ``verifycertsfile``. - -``minimumprotocol`` - This behaves like ``minimumprotocol`` as described above except it - only applies to the host on which it is defined. - -``verifycertsfile`` - Path to file a containing a list of PEM encoded certificates used to - verify the server certificate. Environment variables and ``~user`` - constructs are expanded in the filename. - - The server certificate or the certificate's certificate authority (CA) - must match a certificate from this file or certificate verification - will fail and connections to the server will be refused. - - If defined, only certificates provided by this file will be used: - ``web.cacerts`` and any system/default certificates will not be - used. - - This option has no effect if the per-host ``fingerprints`` option - is set. - - The format of the file is as follows:: - - -----BEGIN CERTIFICATE----- - ... (certificate in base64 PEM encoding) ... - -----END CERTIFICATE----- - -----BEGIN CERTIFICATE----- - ... (certificate in base64 PEM encoding) ... - -----END CERTIFICATE----- - -For example:: - - [hostsecurity] - hg.example.com:fingerprints = sha256:c3ab8ff13720e8ad9047dd39466b3c8974e592c2fa383d4a3960714caef0c4f2 - hg2.example.com:fingerprints = sha1:914f1aff87249c09b6859b88b1906d30756491ca, sha1:fc:e2:8d:d9:51:cd:cb:c1:4d:18:6b:b7:44:8d:49:72:57:e6:cd:33 - hg3.example.com:fingerprints = sha256:9a:b0:dc:e2:75:ad:8a:b7:84:58:e5:1f:07:32:f1:87:e6:bd:24:22:af:b7:ce:8e:9c:b4:10:cf:b9:f4:0e:d2 - foo.example.com:verifycertsfile = /etc/ssl/trusted-ca-certs.pem - -To change the default minimum protocol version to TLS 1.2 but to allow TLS 1.1 -when connecting to ``hg.example.com``:: - - [hostsecurity] - minimumprotocol = tls1.2 - hg.example.com:minimumprotocol = tls1.1 - -``http_proxy`` --------------- - -Used to access web-based Mercurial repositories through a HTTP -proxy. - -``host`` - Host name and (optional) port of the proxy server, for example - "myproxy:8000". - -``no`` - Optional. Comma-separated list of host names that should bypass - the proxy. - -``passwd`` - Optional. Password to authenticate with at the proxy server. - -``user`` - Optional. User name to authenticate with at the proxy server. - -``always`` - Optional. Always use the proxy, even for localhost and any entries - in ``http_proxy.no``. (default: False) - -``http`` ----------- - -Used to configure access to Mercurial repositories via HTTP. - -``timeout`` - If set, blocking operations will timeout after that many seconds. - (default: None) - -``merge`` ---------- - -This section specifies behavior during merges and updates. - -``checkignored`` - Controls behavior when an ignored file on disk has the same name as a tracked - file in the changeset being merged or updated to, and has different - contents. Options are ``abort``, ``warn`` and ``ignore``. With ``abort``, - abort on such files. With ``warn``, warn on such files and back them up as - ``.orig``. With ``ignore``, don't print a warning and back them up as - ``.orig``. (default: ``abort``) - -``checkunknown`` - Controls behavior when an unknown file that isn't ignored has the same name - as a tracked file in the changeset being merged or updated to, and has - different contents. Similar to ``merge.checkignored``, except for files that - are not ignored. (default: ``abort``) - -``on-failure`` - When set to ``continue`` (the default), the merge process attempts to - merge all unresolved files using the merge chosen tool, regardless of - whether previous file merge attempts during the process succeeded or not. - Setting this to ``prompt`` will prompt after any merge failure continue - or halt the merge process. Setting this to ``halt`` will automatically - halt the merge process on any merge tool failure. The merge process - can be restarted by using the ``resolve`` command. When a merge is - halted, the repository is left in a normal ``unresolved`` merge state. - (default: ``continue``) - -``strict-capability-check`` - Whether capabilities of internal merge tools are checked strictly - or not, while examining rules to decide merge tool to be used. - (default: False) - -``merge-patterns`` ------------------- - -This section specifies merge tools to associate with particular file -patterns. Tools matched here will take precedence over the default -merge tool. Patterns are globs by default, rooted at the repository -root. - -Example:: - - [merge-patterns] - **.c = kdiff3 - **.jpg = myimgmerge - -``merge-tools`` ---------------- - -This section configures external merge tools to use for file-level -merges. This section has likely been preconfigured at install time. -Use :hg:`config merge-tools` to check the existing configuration. -Also see :hg:`help merge-tools` for more details. - -Example ``~/.hgrc``:: - - [merge-tools] - # Override stock tool location - kdiff3.executable = ~/bin/kdiff3 - # Specify command line - kdiff3.args = $base $local $other -o $output - # Give higher priority - kdiff3.priority = 1 - - # Changing the priority of preconfigured tool - meld.priority = 0 - - # Disable a preconfigured tool - vimdiff.disabled = yes - - # Define new tool - myHtmlTool.args = -m $local $other $base $output - myHtmlTool.regkey = Software\FooSoftware\HtmlMerge - myHtmlTool.priority = 1 - -Supported arguments: - -``priority`` - The priority in which to evaluate this tool. - (default: 0) - -``executable`` - Either just the name of the executable or its pathname. - - .. container:: windows - - On Windows, the path can use environment variables with ${ProgramFiles} - syntax. - - (default: the tool name) - -``args`` - The arguments to pass to the tool executable. You can refer to the - files being merged as well as the output file through these - variables: ``$base``, ``$local``, ``$other``, ``$output``. - - The meaning of ``$local`` and ``$other`` can vary depending on which action is - being performed. During an update or merge, ``$local`` represents the original - state of the file, while ``$other`` represents the commit you are updating to or - the commit you are merging with. During a rebase, ``$local`` represents the - destination of the rebase, and ``$other`` represents the commit being rebased. - - Some operations define custom labels to assist with identifying the revisions, - accessible via ``$labellocal``, ``$labelother``, and ``$labelbase``. If custom - labels are not available, these will be ``local``, ``other``, and ``base``, - respectively. - (default: ``$local $base $other``) - -``premerge`` - Attempt to run internal non-interactive 3-way merge tool before - launching external tool. Options are ``true``, ``false``, ``keep`` or - ``keep-merge3``. The ``keep`` option will leave markers in the file if the - premerge fails. The ``keep-merge3`` will do the same but include information - about the base of the merge in the marker (see internal :merge3 in - :hg:`help merge-tools`). - (default: True) - -``binary`` - This tool can merge binary files. (default: False, unless tool - was selected by file pattern match) - -``symlink`` - This tool can merge symlinks. (default: False) - -``check`` - A list of merge success-checking options: - - ``changed`` - Ask whether merge was successful when the merged file shows no changes. - ``conflicts`` - Check whether there are conflicts even though the tool reported success. - ``prompt`` - Always prompt for merge success, regardless of success reported by tool. - -``fixeol`` - Attempt to fix up EOL changes caused by the merge tool. - (default: False) - -``gui`` - This tool requires a graphical interface to run. (default: False) - -``mergemarkers`` - Controls whether the labels passed via ``$labellocal``, ``$labelother``, and - ``$labelbase`` are ``detailed`` (respecting ``mergemarkertemplate``) or - ``basic``. If ``premerge`` is ``keep`` or ``keep-merge3``, the conflict - markers generated during premerge will be ``detailed`` if either this option or - the corresponding option in the ``[ui]`` section is ``detailed``. - (default: ``basic``) - -``mergemarkertemplate`` - This setting can be used to override ``mergemarkertemplate`` from the ``[ui]`` - section on a per-tool basis; this applies to the ``$label``-prefixed variables - and to the conflict markers that are generated if ``premerge`` is ``keep` or - ``keep-merge3``. See the corresponding variable in ``[ui]`` for more - information. - -.. container:: windows - - ``regkey`` - Windows registry key which describes install location of this - tool. Mercurial will search for this key first under - ``HKEY_CURRENT_USER`` and then under ``HKEY_LOCAL_MACHINE``. - (default: None) - - ``regkeyalt`` - An alternate Windows registry key to try if the first key is not - found. The alternate key uses the same ``regname`` and ``regappend`` - semantics of the primary key. The most common use for this key - is to search for 32bit applications on 64bit operating systems. - (default: None) - - ``regname`` - Name of value to read from specified registry key. - (default: the unnamed (default) value) - - ``regappend`` - String to append to the value read from the registry, typically - the executable name of the tool. - (default: None) - -``pager`` ---------- - -Setting used to control when to paginate and with what external tool. See -:hg:`help pager` for details. - -``pager`` - Define the external tool used as pager. - - If no pager is set, Mercurial uses the environment variable $PAGER. - If neither pager.pager, nor $PAGER is set, a default pager will be - used, typically `less` on Unix and `more` on Windows. Example:: - - [pager] - pager = less -FRX - -``ignore`` - List of commands to disable the pager for. Example:: - - [pager] - ignore = version, help, update - -``patch`` ---------- - -Settings used when applying patches, for instance through the 'import' -command or with Mercurial Queues extension. - -``eol`` - When set to 'strict' patch content and patched files end of lines - are preserved. When set to ``lf`` or ``crlf``, both files end of - lines are ignored when patching and the result line endings are - normalized to either LF (Unix) or CRLF (Windows). When set to - ``auto``, end of lines are again ignored while patching but line - endings in patched files are normalized to their original setting - on a per-file basis. If target file does not exist or has no end - of line, patch line endings are preserved. - (default: strict) - -``fuzz`` - The number of lines of 'fuzz' to allow when applying patches. This - controls how much context the patcher is allowed to ignore when - trying to apply a patch. - (default: 2) - -``paths`` ---------- - -Assigns symbolic names and behavior to repositories. - -Options are symbolic names defining the URL or directory that is the -location of the repository. Example:: - - [paths] - my_server = https://example.com/my_repo - local_path = /home/me/repo - -These symbolic names can be used from the command line. To pull -from ``my_server``: :hg:`pull my_server`. To push to ``local_path``: -:hg:`push local_path`. - -Options containing colons (``:``) denote sub-options that can influence -behavior for that specific path. Example:: - - [paths] - my_server = https://example.com/my_path - my_server:pushurl = ssh://example.com/my_path - -The following sub-options can be defined: - -``pushurl`` - The URL to use for push operations. If not defined, the location - defined by the path's main entry is used. - -``pushrev`` - A revset defining which revisions to push by default. - - When :hg:`push` is executed without a ``-r`` argument, the revset - defined by this sub-option is evaluated to determine what to push. - - For example, a value of ``.`` will push the working directory's - revision by default. - - Revsets specifying bookmarks will not result in the bookmark being - pushed. - -The following special named paths exist: - -``default`` - The URL or directory to use when no source or remote is specified. - - :hg:`clone` will automatically define this path to the location the - repository was cloned from. - -``default-push`` - (deprecated) The URL or directory for the default :hg:`push` location. - ``default:pushurl`` should be used instead. - -``phases`` ----------- - -Specifies default handling of phases. See :hg:`help phases` for more -information about working with phases. - -``publish`` - Controls draft phase behavior when working as a server. When true, - pushed changesets are set to public in both client and server and - pulled or cloned changesets are set to public in the client. - (default: True) - -``new-commit`` - Phase of newly-created commits. - (default: draft) - -``checksubrepos`` - Check the phase of the current revision of each subrepository. Allowed - values are "ignore", "follow" and "abort". For settings other than - "ignore", the phase of the current revision of each subrepository is - checked before committing the parent repository. If any of those phases is - greater than the phase of the parent repository (e.g. if a subrepo is in a - "secret" phase while the parent repo is in "draft" phase), the commit is - either aborted (if checksubrepos is set to "abort") or the higher phase is - used for the parent repository commit (if set to "follow"). - (default: follow) - - -``profiling`` -------------- - -Specifies profiling type, format, and file output. Two profilers are -supported: an instrumenting profiler (named ``ls``), and a sampling -profiler (named ``stat``). - -In this section description, 'profiling data' stands for the raw data -collected during profiling, while 'profiling report' stands for a -statistical text report generated from the profiling data. - -``enabled`` - Enable the profiler. - (default: false) - - This is equivalent to passing ``--profile`` on the command line. - -``type`` - The type of profiler to use. - (default: stat) - - ``ls`` - Use Python's built-in instrumenting profiler. This profiler - works on all platforms, but each line number it reports is the - first line of a function. This restriction makes it difficult to - identify the expensive parts of a non-trivial function. - ``stat`` - Use a statistical profiler, statprof. This profiler is most - useful for profiling commands that run for longer than about 0.1 - seconds. - -``format`` - Profiling format. Specific to the ``ls`` instrumenting profiler. - (default: text) - - ``text`` - Generate a profiling report. When saving to a file, it should be - noted that only the report is saved, and the profiling data is - not kept. - ``kcachegrind`` - Format profiling data for kcachegrind use: when saving to a - file, the generated file can directly be loaded into - kcachegrind. - -``statformat`` - Profiling format for the ``stat`` profiler. - (default: hotpath) - - ``hotpath`` - Show a tree-based display containing the hot path of execution (where - most time was spent). - ``bymethod`` - Show a table of methods ordered by how frequently they are active. - ``byline`` - Show a table of lines in files ordered by how frequently they are active. - ``json`` - Render profiling data as JSON. - -``frequency`` - Sampling frequency. Specific to the ``stat`` sampling profiler. - (default: 1000) - -``output`` - File path where profiling data or report should be saved. If the - file exists, it is replaced. (default: None, data is printed on - stderr) - -``sort`` - Sort field. Specific to the ``ls`` instrumenting profiler. - One of ``callcount``, ``reccallcount``, ``totaltime`` and - ``inlinetime``. - (default: inlinetime) - -``time-track`` - Control if the stat profiler track ``cpu`` or ``real`` time. - (default: ``cpu`` on Windows, otherwise ``real``) - -``limit`` - Number of lines to show. Specific to the ``ls`` instrumenting profiler. - (default: 30) - -``nested`` - Show at most this number of lines of drill-down info after each main entry. - This can help explain the difference between Total and Inline. - Specific to the ``ls`` instrumenting profiler. - (default: 0) - -``showmin`` - Minimum fraction of samples an entry must have for it to be displayed. - Can be specified as a float between ``0.0`` and ``1.0`` or can have a - ``%`` afterwards to allow values up to ``100``. e.g. ``5%``. - - Only used by the ``stat`` profiler. - - For the ``hotpath`` format, default is ``0.05``. - For the ``chrome`` format, default is ``0.005``. - - The option is unused on other formats. - -``showmax`` - Maximum fraction of samples an entry can have before it is ignored in - display. Values format is the same as ``showmin``. - - Only used by the ``stat`` profiler. - - For the ``chrome`` format, default is ``0.999``. - - The option is unused on other formats. - -``showtime`` - Show time taken as absolute durations, in addition to percentages. - Only used by the ``hotpath`` format. - (default: true) - -``progress`` ------------- - -Mercurial commands can draw progress bars that are as informative as -possible. Some progress bars only offer indeterminate information, while others -have a definite end point. - -``debug`` - Whether to print debug info when updating the progress bar. (default: False) - -``delay`` - Number of seconds (float) before showing the progress bar. (default: 3) - -``changedelay`` - Minimum delay before showing a new topic. When set to less than 3 * refresh, - that value will be used instead. (default: 1) - -``estimateinterval`` - Maximum sampling interval in seconds for speed and estimated time - calculation. (default: 60) - -``refresh`` - Time in seconds between refreshes of the progress bar. (default: 0.1) - -``format`` - Format of the progress bar. - - Valid entries for the format field are ``topic``, ``bar``, ``number``, - ``unit``, ``estimate``, ``speed``, and ``item``. ``item`` defaults to the - last 20 characters of the item, but this can be changed by adding either - ``-`` which would take the last num characters, or ``+`` for the - first num characters. - - (default: topic bar number estimate) - -``width`` - If set, the maximum width of the progress information (that is, min(width, - term width) will be used). - -``clear-complete`` - Clear the progress bar after it's done. (default: True) - -``disable`` - If true, don't show a progress bar. - -``assume-tty`` - If true, ALWAYS show a progress bar, unless disable is given. - -``rebase`` ----------- - -``evolution.allowdivergence`` - Default to False, when True allow creating divergence when performing - rebase of obsolete changesets. - -``revsetalias`` ---------------- - -Alias definitions for revsets. See :hg:`help revsets` for details. - -``rewrite`` ------------ - -``backup-bundle`` - Whether to save stripped changesets to a bundle file. (default: True) - -``update-timestamp`` - If true, updates the date and time of the changeset to current. It is only - applicable for `hg amend`, `hg commit --amend` and `hg uncommit` in the - current version. - -``storage`` ------------ - -Control the strategy Mercurial uses internally to store history. Options in this -category impact performance and repository size. - -``revlog.optimize-delta-parent-choice`` - When storing a merge revision, both parents will be equally considered as - a possible delta base. This results in better delta selection and improved - revlog compression. This option is enabled by default. - - Turning this option off can result in large increase of repository size for - repository with many merges. - -``revlog.reuse-external-delta-parent`` - Control the order in which delta parents are considered when adding new - revisions from an external source. - (typically: apply bundle from `hg pull` or `hg push`). - - New revisions are usually provided as a delta against other revisions. By - default, Mercurial will try to reuse this delta first, therefore using the - same "delta parent" as the source. Directly using delta's from the source - reduces CPU usage and usually speeds up operation. However, in some case, - the source might have sub-optimal delta bases and forcing their reevaluation - is useful. For example, pushes from an old client could have sub-optimal - delta's parent that the server want to optimize. (lack of general delta, bad - parents, choice, lack of sparse-revlog, etc). - - This option is enabled by default. Turning it off will ensure bad delta - parent choices from older client do not propagate to this repository, at - the cost of a small increase in CPU consumption. - - Note: this option only control the order in which delta parents are - considered. Even when disabled, the existing delta from the source will be - reused if the same delta parent is selected. - -``revlog.reuse-external-delta`` - Control the reuse of delta from external source. - (typically: apply bundle from `hg pull` or `hg push`). - - New revisions are usually provided as a delta against another revision. By - default, Mercurial will not recompute the same delta again, trusting - externally provided deltas. There have been rare cases of small adjustment - to the diffing algorithm in the past. So in some rare case, recomputing - delta provided by ancient clients can provides better results. Disabling - this option means going through a full delta recomputation for all incoming - revisions. It means a large increase in CPU usage and will slow operations - down. - - This option is enabled by default. When disabled, it also disables the - related ``storage.revlog.reuse-external-delta-parent`` option. - -``revlog.zlib.level`` - Zlib compression level used when storing data into the repository. Accepted - Value range from 1 (lowest compression) to 9 (highest compression). Zlib - default value is 6. - - -``revlog.zstd.level`` - zstd compression level used when storing data into the repository. Accepted - Value range from 1 (lowest compression) to 22 (highest compression). - (default 3) - -``server`` ----------- - -Controls generic server settings. - -``bookmarks-pushkey-compat`` - Trigger pushkey hook when being pushed bookmark updates. This config exist - for compatibility purpose (default to True) - - If you use ``pushkey`` and ``pre-pushkey`` hooks to control bookmark - movement we recommend you migrate them to ``txnclose-bookmark`` and - ``pretxnclose-bookmark``. - -``compressionengines`` - List of compression engines and their relative priority to advertise - to clients. - - The order of compression engines determines their priority, the first - having the highest priority. If a compression engine is not listed - here, it won't be advertised to clients. - - If not set (the default), built-in defaults are used. Run - :hg:`debuginstall` to list available compression engines and their - default wire protocol priority. - - Older Mercurial clients only support zlib compression and this setting - has no effect for legacy clients. - -``uncompressed`` - Whether to allow clients to clone a repository using the - uncompressed streaming protocol. This transfers about 40% more - data than a regular clone, but uses less memory and CPU on both - server and client. Over a LAN (100 Mbps or better) or a very fast - WAN, an uncompressed streaming clone is a lot faster (~10x) than a - regular clone. Over most WAN connections (anything slower than - about 6 Mbps), uncompressed streaming is slower, because of the - extra data transfer overhead. This mode will also temporarily hold - the write lock while determining what data to transfer. - (default: True) - -``uncompressedallowsecret`` - Whether to allow stream clones when the repository contains secret - changesets. (default: False) - -``preferuncompressed`` - When set, clients will try to use the uncompressed streaming - protocol. (default: False) - -``disablefullbundle`` - When set, servers will refuse attempts to do pull-based clones. - If this option is set, ``preferuncompressed`` and/or clone bundles - are highly recommended. Partial clones will still be allowed. - (default: False) - -``streamunbundle`` - When set, servers will apply data sent from the client directly, - otherwise it will be written to a temporary file first. This option - effectively prevents concurrent pushes. - -``pullbundle`` - When set, the server will check pullbundle.manifest for bundles - covering the requested heads and common nodes. The first matching - entry will be streamed to the client. - - For HTTP transport, the stream will still use zlib compression - for older clients. - -``concurrent-push-mode`` - Level of allowed race condition between two pushing clients. - - - 'strict': push is abort if another client touched the repository - while the push was preparing. (default) - - 'check-related': push is only aborted if it affects head that got also - affected while the push was preparing. - - This requires compatible client (version 4.3 and later). Old client will - use 'strict'. - -``validate`` - Whether to validate the completeness of pushed changesets by - checking that all new file revisions specified in manifests are - present. (default: False) - -``maxhttpheaderlen`` - Instruct HTTP clients not to send request headers longer than this - many bytes. (default: 1024) - -``bundle1`` - Whether to allow clients to push and pull using the legacy bundle1 - exchange format. (default: True) - -``bundle1gd`` - Like ``bundle1`` but only used if the repository is using the - *generaldelta* storage format. (default: True) - -``bundle1.push`` - Whether to allow clients to push using the legacy bundle1 exchange - format. (default: True) - -``bundle1gd.push`` - Like ``bundle1.push`` but only used if the repository is using the - *generaldelta* storage format. (default: True) - -``bundle1.pull`` - Whether to allow clients to pull using the legacy bundle1 exchange - format. (default: True) - -``bundle1gd.pull`` - Like ``bundle1.pull`` but only used if the repository is using the - *generaldelta* storage format. (default: True) - - Large repositories using the *generaldelta* storage format should - consider setting this option because converting *generaldelta* - repositories to the exchange format required by the bundle1 data - format can consume a lot of CPU. - -``bundle2.stream`` - Whether to allow clients to pull using the bundle2 streaming protocol. - (default: True) - -``zliblevel`` - Integer between ``-1`` and ``9`` that controls the zlib compression level - for wire protocol commands that send zlib compressed output (notably the - commands that send repository history data). - - The default (``-1``) uses the default zlib compression level, which is - likely equivalent to ``6``. ``0`` means no compression. ``9`` means - maximum compression. - - Setting this option allows server operators to make trade-offs between - bandwidth and CPU used. Lowering the compression lowers CPU utilization - but sends more bytes to clients. - - This option only impacts the HTTP server. - -``zstdlevel`` - Integer between ``1`` and ``22`` that controls the zstd compression level - for wire protocol commands. ``1`` is the minimal amount of compression and - ``22`` is the highest amount of compression. - - The default (``3``) should be significantly faster than zlib while likely - delivering better compression ratios. - - This option only impacts the HTTP server. - - See also ``server.zliblevel``. - -``view`` - Repository filter used when exchanging revisions with the peer. - - The default view (``served``) excludes secret and hidden changesets. - Another useful value is ``immutable`` (no draft, secret or hidden - changesets). (EXPERIMENTAL) - -``smtp`` --------- - -Configuration for extensions that need to send email messages. - -``host`` - Host name of mail server, e.g. "mail.example.com". - -``port`` - Optional. Port to connect to on mail server. (default: 465 if - ``tls`` is smtps; 25 otherwise) - -``tls`` - Optional. Method to enable TLS when connecting to mail server: starttls, - smtps or none. (default: none) - -``username`` - Optional. User name for authenticating with the SMTP server. - (default: None) - -``password`` - Optional. Password for authenticating with the SMTP server. If not - specified, interactive sessions will prompt the user for a - password; non-interactive sessions will fail. (default: None) - -``local_hostname`` - Optional. The hostname that the sender can use to identify - itself to the MTA. - - -``subpaths`` ------------- - -Subrepository source URLs can go stale if a remote server changes name -or becomes temporarily unavailable. This section lets you define -rewrite rules of the form:: - - = - -where ``pattern`` is a regular expression matching a subrepository -source URL and ``replacement`` is the replacement string used to -rewrite it. Groups can be matched in ``pattern`` and referenced in -``replacements``. For instance:: - - http://server/(.*)-hg/ = http://hg.server/\1/ - -rewrites ``http://server/foo-hg/`` into ``http://hg.server/foo/``. - -Relative subrepository paths are first made absolute, and the -rewrite rules are then applied on the full (absolute) path. If ``pattern`` -doesn't match the full path, an attempt is made to apply it on the -relative path alone. The rules are applied in definition order. - -``subrepos`` ------------- - -This section contains options that control the behavior of the -subrepositories feature. See also :hg:`help subrepos`. - -Security note: auditing in Mercurial is known to be insufficient to -prevent clone-time code execution with carefully constructed Git -subrepos. It is unknown if a similar detect is present in Subversion -subrepos. Both Git and Subversion subrepos are disabled by default -out of security concerns. These subrepo types can be enabled using -the respective options below. - -``allowed`` - Whether subrepositories are allowed in the working directory. - - When false, commands involving subrepositories (like :hg:`update`) - will fail for all subrepository types. - (default: true) - -``hg:allowed`` - Whether Mercurial subrepositories are allowed in the working - directory. This option only has an effect if ``subrepos.allowed`` - is true. - (default: true) - -``git:allowed`` - Whether Git subrepositories are allowed in the working directory. - This option only has an effect if ``subrepos.allowed`` is true. - - See the security note above before enabling Git subrepos. - (default: false) - -``svn:allowed`` - Whether Subversion subrepositories are allowed in the working - directory. This option only has an effect if ``subrepos.allowed`` - is true. - - See the security note above before enabling Subversion subrepos. - (default: false) - -``templatealias`` ------------------ - -Alias definitions for templates. See :hg:`help templates` for details. - -``templates`` -------------- - -Use the ``[templates]`` section to define template strings. -See :hg:`help templates` for details. - -``trusted`` ------------ - -Mercurial will not use the settings in the -``.hg/hgrc`` file from a repository if it doesn't belong to a trusted -user or to a trusted group, as various hgrc features allow arbitrary -commands to be run. This issue is often encountered when configuring -hooks or extensions for shared repositories or servers. However, -the web interface will use some safe settings from the ``[web]`` -section. - -This section specifies what users and groups are trusted. The -current user is always trusted. To trust everybody, list a user or a -group with name ``*``. These settings must be placed in an -*already-trusted file* to take effect, such as ``$HOME/.hgrc`` of the -user or service running Mercurial. - -``users`` - Comma-separated list of trusted users. - -``groups`` - Comma-separated list of trusted groups. - - -``ui`` ------- - -User interface controls. - -``archivemeta`` - Whether to include the .hg_archival.txt file containing meta data - (hashes for the repository base and for tip) in archives created - by the :hg:`archive` command or downloaded via hgweb. - (default: True) - -``askusername`` - Whether to prompt for a username when committing. If True, and - neither ``$HGUSER`` nor ``$EMAIL`` has been specified, then the user will - be prompted to enter a username. If no username is entered, the - default ``USER@HOST`` is used instead. - (default: False) - -``clonebundles`` - Whether the "clone bundles" feature is enabled. - - When enabled, :hg:`clone` may download and apply a server-advertised - bundle file from a URL instead of using the normal exchange mechanism. - - This can likely result in faster and more reliable clones. - - (default: True) - -``clonebundlefallback`` - Whether failure to apply an advertised "clone bundle" from a server - should result in fallback to a regular clone. - - This is disabled by default because servers advertising "clone - bundles" often do so to reduce server load. If advertised bundles - start mass failing and clients automatically fall back to a regular - clone, this would add significant and unexpected load to the server - since the server is expecting clone operations to be offloaded to - pre-generated bundles. Failing fast (the default behavior) ensures - clients don't overwhelm the server when "clone bundle" application - fails. - - (default: False) - -``clonebundleprefers`` - Defines preferences for which "clone bundles" to use. - - Servers advertising "clone bundles" may advertise multiple available - bundles. Each bundle may have different attributes, such as the bundle - type and compression format. This option is used to prefer a particular - bundle over another. - - The following keys are defined by Mercurial: - - BUNDLESPEC - A bundle type specifier. These are strings passed to :hg:`bundle -t`. - e.g. ``gzip-v2`` or ``bzip2-v1``. - - COMPRESSION - The compression format of the bundle. e.g. ``gzip`` and ``bzip2``. - - Server operators may define custom keys. - - Example values: ``COMPRESSION=bzip2``, - ``BUNDLESPEC=gzip-v2, COMPRESSION=gzip``. - - By default, the first bundle advertised by the server is used. - -``color`` - When to colorize output. Possible value are Boolean ("yes" or "no"), or - "debug", or "always". (default: "yes"). "yes" will use color whenever it - seems possible. See :hg:`help color` for details. - -``commitsubrepos`` - Whether to commit modified subrepositories when committing the - parent repository. If False and one subrepository has uncommitted - changes, abort the commit. - (default: False) - -``debug`` - Print debugging information. (default: False) - -``editor`` - The editor to use during a commit. (default: ``$EDITOR`` or ``vi``) - -``fallbackencoding`` - Encoding to try if it's not possible to decode the changelog using - UTF-8. (default: ISO-8859-1) - -``graphnodetemplate`` - The template used to print changeset nodes in an ASCII revision graph. - (default: ``{graphnode}``) - -``ignore`` - A file to read per-user ignore patterns from. This file should be - in the same format as a repository-wide .hgignore file. Filenames - are relative to the repository root. This option supports hook syntax, - so if you want to specify multiple ignore files, you can do so by - setting something like ``ignore.other = ~/.hgignore2``. For details - of the ignore file format, see the ``hgignore(5)`` man page. - -``interactive`` - Allow to prompt the user. (default: True) - -``interface`` - Select the default interface for interactive features (default: text). - Possible values are 'text' and 'curses'. - -``interface.chunkselector`` - Select the interface for change recording (e.g. :hg:`commit -i`). - Possible values are 'text' and 'curses'. - This config overrides the interface specified by ui.interface. - -``large-file-limit`` - Largest file size that gives no memory use warning. - Possible values are integers or 0 to disable the check. - (default: 10000000) - -``logtemplate`` - Template string for commands that print changesets. - -``merge`` - The conflict resolution program to use during a manual merge. - For more information on merge tools see :hg:`help merge-tools`. - For configuring merge tools see the ``[merge-tools]`` section. - -``mergemarkers`` - Sets the merge conflict marker label styling. The ``detailed`` - style uses the ``mergemarkertemplate`` setting to style the labels. - The ``basic`` style just uses 'local' and 'other' as the marker label. - One of ``basic`` or ``detailed``. - (default: ``basic``) - -``mergemarkertemplate`` - The template used to print the commit description next to each conflict - marker during merge conflicts. See :hg:`help templates` for the template - format. - - Defaults to showing the hash, tags, branches, bookmarks, author, and - the first line of the commit description. - - If you use non-ASCII characters in names for tags, branches, bookmarks, - authors, and/or commit descriptions, you must pay attention to encodings of - managed files. At template expansion, non-ASCII characters use the encoding - specified by the ``--encoding`` global option, ``HGENCODING`` or other - environment variables that govern your locale. If the encoding of the merge - markers is different from the encoding of the merged files, - serious problems may occur. - - Can be overridden per-merge-tool, see the ``[merge-tools]`` section. - -``message-output`` - Where to write status and error messages. (default: ``stdio``) - - ``stderr`` - Everything to stderr. - ``stdio`` - Status to stdout, and error to stderr. - -``origbackuppath`` - The path to a directory used to store generated .orig files. If the path is - not a directory, one will be created. If set, files stored in this - directory have the same name as the original file and do not have a .orig - suffix. - -``paginate`` - Control the pagination of command output (default: True). See :hg:`help pager` - for details. - -``patch`` - An optional external tool that ``hg import`` and some extensions - will use for applying patches. By default Mercurial uses an - internal patch utility. The external tool must work as the common - Unix ``patch`` program. In particular, it must accept a ``-p`` - argument to strip patch headers, a ``-d`` argument to specify the - current directory, a file name to patch, and a patch file to take - from stdin. - - It is possible to specify a patch tool together with extra - arguments. For example, setting this option to ``patch --merge`` - will use the ``patch`` program with its 2-way merge option. - -``portablefilenames`` - Check for portable filenames. Can be ``warn``, ``ignore`` or ``abort``. - (default: ``warn``) - - ``warn`` - Print a warning message on POSIX platforms, if a file with a non-portable - filename is added (e.g. a file with a name that can't be created on - Windows because it contains reserved parts like ``AUX``, reserved - characters like ``:``, or would cause a case collision with an existing - file). - - ``ignore`` - Don't print a warning. - - ``abort`` - The command is aborted. - - ``true`` - Alias for ``warn``. - - ``false`` - Alias for ``ignore``. - - .. container:: windows - - On Windows, this configuration option is ignored and the command aborted. - -``pre-merge-tool-output-template`` - A template that is printed before executing an external merge tool. This can - be used to print out additional context that might be useful to have during - the conflict resolution, such as the description of the various commits - involved or bookmarks/tags. - - Additional information is available in the ``local`, ``base``, and ``other`` - dicts. For example: ``{local.label}``, ``{base.name}``, or - ``{other.islink}``. - -``quiet`` - Reduce the amount of output printed. - (default: False) - -``relative-paths`` - Prefer relative paths in the UI. - -``remotecmd`` - Remote command to use for clone/push/pull operations. - (default: ``hg``) - -``report_untrusted`` - Warn if a ``.hg/hgrc`` file is ignored due to not being owned by a - trusted user or group. - (default: True) - -``slash`` - (Deprecated. Use ``slashpath`` template filter instead.) - - Display paths using a slash (``/``) as the path separator. This - only makes a difference on systems where the default path - separator is not the slash character (e.g. Windows uses the - backslash character (``\``)). - (default: False) - -``statuscopies`` - Display copies in the status command. - -``ssh`` - Command to use for SSH connections. (default: ``ssh``) - -``ssherrorhint`` - A hint shown to the user in the case of SSH error (e.g. - ``Please see http://company/internalwiki/ssh.html``) - -``strict`` - Require exact command names, instead of allowing unambiguous - abbreviations. (default: False) - -``style`` - Name of style to use for command output. - -``supportcontact`` - A URL where users should report a Mercurial traceback. Use this if you are a - large organisation with its own Mercurial deployment process and crash - reports should be addressed to your internal support. - -``textwidth`` - Maximum width of help text. A longer line generated by ``hg help`` or - ``hg subcommand --help`` will be broken after white space to get this - width or the terminal width, whichever comes first. - A non-positive value will disable this and the terminal width will be - used. (default: 78) - -``timeout`` - The timeout used when a lock is held (in seconds), a negative value - means no timeout. (default: 600) - -``timeout.warn`` - Time (in seconds) before a warning is printed about held lock. A negative - value means no warning. (default: 0) - -``traceback`` - Mercurial always prints a traceback when an unknown exception - occurs. Setting this to True will make Mercurial print a traceback - on all exceptions, even those recognized by Mercurial (such as - IOError or MemoryError). (default: False) - -``tweakdefaults`` - - By default Mercurial's behavior changes very little from release - to release, but over time the recommended config settings - shift. Enable this config to opt in to get automatic tweaks to - Mercurial's behavior over time. This config setting will have no - effect if ``HGPLAIN`` is set or ``HGPLAINEXCEPT`` is set and does - not include ``tweakdefaults``. (default: False) - - It currently means:: - - .. tweakdefaultsmarker - -``username`` - The committer of a changeset created when running "commit". - Typically a person's name and email address, e.g. ``Fred Widget - ``. Environment variables in the - username are expanded. - - (default: ``$EMAIL`` or ``username@hostname``. If the username in - hgrc is empty, e.g. if the system admin set ``username =`` in the - system hgrc, it has to be specified manually or in a different - hgrc file) - -``verbose`` - Increase the amount of output printed. (default: False) - - -``web`` -------- - -Web interface configuration. The settings in this section apply to -both the builtin webserver (started by :hg:`serve`) and the script you -run through a webserver (``hgweb.cgi`` and the derivatives for FastCGI -and WSGI). - -The Mercurial webserver does no authentication (it does not prompt for -usernames and passwords to validate *who* users are), but it does do -authorization (it grants or denies access for *authenticated users* -based on settings in this section). You must either configure your -webserver to do authentication for you, or disable the authorization -checks. - -For a quick setup in a trusted environment, e.g., a private LAN, where -you want it to accept pushes from anybody, you can use the following -command line:: - - $ hg --config web.allow-push=* --config web.push_ssl=False serve - -Note that this will allow anybody to push anything to the server and -that this should not be used for public servers. - -The full set of options is: - -``accesslog`` - Where to output the access log. (default: stdout) - -``address`` - Interface address to bind to. (default: all) - -``allow-archive`` - List of archive format (bz2, gz, zip) allowed for downloading. - (default: empty) - -``allowbz2`` - (DEPRECATED) Whether to allow .tar.bz2 downloading of repository - revisions. - (default: False) - -``allowgz`` - (DEPRECATED) Whether to allow .tar.gz downloading of repository - revisions. - (default: False) - -``allow-pull`` - Whether to allow pulling from the repository. (default: True) - -``allow-push`` - Whether to allow pushing to the repository. If empty or not set, - pushing is not allowed. If the special value ``*``, any remote - user can push, including unauthenticated users. Otherwise, the - remote user must have been authenticated, and the authenticated - user name must be present in this list. The contents of the - allow-push list are examined after the deny_push list. - -``allow_read`` - If the user has not already been denied repository access due to - the contents of deny_read, this list determines whether to grant - repository access to the user. If this list is not empty, and the - user is unauthenticated or not present in the list, then access is - denied for the user. If the list is empty or not set, then access - is permitted to all users by default. Setting allow_read to the - special value ``*`` is equivalent to it not being set (i.e. access - is permitted to all users). The contents of the allow_read list are - examined after the deny_read list. - -``allowzip`` - (DEPRECATED) Whether to allow .zip downloading of repository - revisions. This feature creates temporary files. - (default: False) - -``archivesubrepos`` - Whether to recurse into subrepositories when archiving. - (default: False) - -``baseurl`` - Base URL to use when publishing URLs in other locations, so - third-party tools like email notification hooks can construct - URLs. Example: ``http://hgserver/repos/``. - -``cacerts`` - Path to file containing a list of PEM encoded certificate - authority certificates. Environment variables and ``~user`` - constructs are expanded in the filename. If specified on the - client, then it will verify the identity of remote HTTPS servers - with these certificates. - - To disable SSL verification temporarily, specify ``--insecure`` from - command line. - - You can use OpenSSL's CA certificate file if your platform has - one. On most Linux systems this will be - ``/etc/ssl/certs/ca-certificates.crt``. Otherwise you will have to - generate this file manually. The form must be as follows:: - - -----BEGIN CERTIFICATE----- - ... (certificate in base64 PEM encoding) ... - -----END CERTIFICATE----- - -----BEGIN CERTIFICATE----- - ... (certificate in base64 PEM encoding) ... - -----END CERTIFICATE----- - -``cache`` - Whether to support caching in hgweb. (default: True) - -``certificate`` - Certificate to use when running :hg:`serve`. - -``collapse`` - With ``descend`` enabled, repositories in subdirectories are shown at - a single level alongside repositories in the current path. With - ``collapse`` also enabled, repositories residing at a deeper level than - the current path are grouped behind navigable directory entries that - lead to the locations of these repositories. In effect, this setting - collapses each collection of repositories found within a subdirectory - into a single entry for that subdirectory. (default: False) - -``comparisoncontext`` - Number of lines of context to show in side-by-side file comparison. If - negative or the value ``full``, whole files are shown. (default: 5) - - This setting can be overridden by a ``context`` request parameter to the - ``comparison`` command, taking the same values. - -``contact`` - Name or email address of the person in charge of the repository. - (default: ui.username or ``$EMAIL`` or "unknown" if unset or empty) - -``csp`` - Send a ``Content-Security-Policy`` HTTP header with this value. - - The value may contain a special string ``%nonce%``, which will be replaced - by a randomly-generated one-time use value. If the value contains - ``%nonce%``, ``web.cache`` will be disabled, as caching undermines the - one-time property of the nonce. This nonce will also be inserted into - ``