From cd56ad8e0836ec4b8883ecdcdf25d195ad7b6ede Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Fri, 1 Oct 2021 17:54:03 -0300 Subject: [PATCH 1/7] Expose size calculations; stricter b32z/b64 validity checking - Add {to,from}_{base64,base32z,hex}_size functions to calculate the resulting output size from a given input size. - Use it internally - Make b32z and b64 validity checking slightly stricter: currently we "accept" some b32z and b64 strings that contain an extra character that leave us with 5-7 trailing bits (base32z) or 6 trailing bits (base64). We simply ignore the extra one if decoding, but we shouldn't accept it in the "is valid" calls. --- oxenmq/base32z.h | 40 ++++++++++++++++++++++++++---- oxenmq/base64.h | 39 ++++++++++++++++++++++++++--- oxenmq/hex.h | 23 ++++++++++++----- tests/test_encoding.cpp | 55 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 142 insertions(+), 15 deletions(-) diff --git a/oxenmq/base32z.h b/oxenmq/base32z.h index 8a5c3ad..f85070c 100644 --- a/oxenmq/base32z.h +++ b/oxenmq/base32z.h @@ -74,6 +74,11 @@ static_assert(b32z_lut.from_b32z('w') == 20 && b32z_lut.from_b32z('T') == 17 && } // namespace detail +/// Returns the number of characters required to encode a base32z string from the given number of bytes. +inline constexpr size_t to_base32z_size(size_t byte_size) { return (byte_size*8 + 4) / 5; } // ⌈bits/5⌉ because 5 bits per byte +/// Returns the (maximum) number of bytes required to decode a base32z string of the given size. +inline constexpr size_t from_base32z_size(size_t b32z_size) { return b32z_size*5 / 8; } // ⌊bits/8⌋ + /// Converts bytes into a base32z encoded character sequence, writing them starting at `out`. /// Returns the final value of out (i.e. the iterator positioned just after the last written base32z /// character). @@ -110,8 +115,10 @@ OutputIt to_base32z(InputIt begin, InputIt end, OutputIt out) { template std::string to_base32z(It begin, It end) { std::string base32z; - if constexpr (std::is_base_of_v::iterator_category>) - base32z.reserve((std::distance(begin, end)*8 + 4) / 5); // == bytes*8/5, rounded up. + if constexpr (std::is_base_of_v::iterator_category>) { + using std::distance; + base32z.reserve(to_base32z_size(distance(begin, end))); + } to_base32z(begin, end, std::back_inserter(base32z)); return base32z; } @@ -121,15 +128,36 @@ template std::string to_base32z(std::basic_string_view s) { return to_base32z(s.begin(), s.end()); } inline std::string to_base32z(std::string_view s) { return to_base32z<>(s); } -/// Returns true if all elements in the range are base32z characters +/// Returns true if the given [begin, end) range is an acceptable base32z string: specifically every +/// character must be in the base32z alphabet, and the string must be a valid encoding length that +/// could have been produced by to_base32z (i.e. some lengths are impossible). template constexpr bool is_base32z(It begin, It end) { static_assert(sizeof(decltype(*begin)) == 1, "is_base32z requires chars/bytes"); + size_t count = 0; + constexpr bool random = std::is_base_of_v::iterator_category>; + if constexpr (random) { + using std::distance; + count = distance(begin, end) % 8; + if (count == 1 || count == 3 || count == 6) // see below + return false; + } for (; begin != end; ++begin) { auto c = static_cast(*begin); if (detail::b32z_lut.from_b32z(c) == 0 && !(c == 'y' || c == 'Y')) return false; + if constexpr (!random) + count++; } + // Check for a valid length. + // - 5n + 0 bytes encodes to 8n chars (no padding bits) + // - 5n + 1 bytes encodes to 8n+2 chars (last 2 bits are padding) + // - 5n + 2 bytes encodes to 8n+4 chars (last 4 bits are padding) + // - 5n + 3 bytes encodes to 8n+5 chars (last 1 bit is padding) + // - 5n + 4 bytes encodes to 8n+7 chars (last 3 bits are padding) + if constexpr (!random) + if (count %= 8; count == 1 || count == 3 || count == 6) + return false; return true; } @@ -197,8 +225,10 @@ OutputIt from_base32z(InputIt begin, InputIt end, OutputIt out) { template std::string from_base32z(It begin, It end) { std::string bytes; - if constexpr (std::is_base_of_v::iterator_category>) - bytes.reserve((std::distance(begin, end)*5 + 7) / 8); // == chars*5/8, rounded up. + if constexpr (std::is_base_of_v::iterator_category>) { + using std::distance; + bytes.reserve(from_base32z_size(distance(begin, end))); + } from_base32z(begin, end, std::back_inserter(bytes)); return bytes; } diff --git a/oxenmq/base64.h b/oxenmq/base64.h index 621f62c..d54259c 100644 --- a/oxenmq/base64.h +++ b/oxenmq/base64.h @@ -76,6 +76,17 @@ static_assert(b64_lut.from_b64('/') == 63 && b64_lut.from_b64('7') == 59 && b64_ } // namespace detail +/// Returns the number of characters required to encode a base64 string from the given number of bytes. +inline constexpr size_t to_base64_size(size_t byte_size) { + // bytes*4/3, rounded up to the next multiple of 4 + return (byte_size + 2) / 3 * 4; +} +/// Returns the (maximum) number of bytes required to decode a base64 string of the given size. +/// Note that this may overallocate by 1-2 bytes if the size includes 1-2 padding chars. +inline constexpr size_t from_base64_size(size_t b64_size) { + return b64_size * 3 / 4; // == ⌊bits/8⌋; floor because we ignore trailing "impossible" bits (see below) +} + /// Converts bytes into a base64 encoded character sequence, writing them starting at `out`. /// Returns the final value of out (i.e. the iterator positioned just after the last written base64 /// character). @@ -126,8 +137,10 @@ OutputIt to_base64(InputIt begin, InputIt end, OutputIt out) { template std::string to_base64(It begin, It end) { std::string base64; - if constexpr (std::is_base_of_v::iterator_category>) - base64.reserve((std::distance(begin, end) + 2) / 3 * 4); // bytes*4/3, rounded up to the next multiple of 4 + if constexpr (std::is_base_of_v::iterator_category>) { + using std::distance; + base64.reserve(to_base64_size(distance(begin, end))); + } to_base64(begin, end, std::back_inserter(base64)); return base64; } @@ -139,11 +152,20 @@ inline std::string to_base64(std::string_view s) { return to_base64<>(s); } /// Returns true if the range is a base64 encoded value; we allow (but do not require) '=' padding, /// but only at the end, only 1 or 2, and only if it pads out the total to a multiple of 4. +/// Otherwise the string must contain only valid base64 characters, and must not have a length of +/// 4n+1 (because that cannot be produced by base64 encoding). template constexpr bool is_base64(It begin, It end) { static_assert(sizeof(decltype(*begin)) == 1, "is_base64 requires chars/bytes"); using std::distance; using std::prev; + size_t count = 0; + constexpr bool random = std::is_base_of_v::iterator_category>; + if constexpr (random) { + count = distance(begin, end) % 4; + if (count == 1) + return false; + } // Allow 1 or 2 padding chars *if* they pad it to a multiple of 4. if (begin != end && distance(begin, end) % 4 == 0) { @@ -158,7 +180,14 @@ constexpr bool is_base64(It begin, It end) { auto c = static_cast(*begin); if (detail::b64_lut.from_b64(c) == 0 && c != 'A') return false; + if constexpr (!random) + count++; } + + if constexpr (!random) + if (count % 4 == 1) // base64 encoding will produce 4n, 4n+2, 4n+3, but never 4n+1 + return false; + return true; } @@ -213,8 +242,10 @@ OutputIt from_base64(InputIt begin, InputIt end, OutputIt out) { template std::string from_base64(It begin, It end) { std::string bytes; - if constexpr (std::is_base_of_v::iterator_category>) - bytes.reserve(std::distance(begin, end)*6 / 8); // each digit carries 6 bits; this may overallocate by 1-2 bytes due to padding + if constexpr (std::is_base_of_v::iterator_category>) { + using std::distance; + bytes.reserve(from_base64_size(distance(begin, end))); + } from_base64(begin, end, std::back_inserter(bytes)); return bytes; } diff --git a/oxenmq/hex.h b/oxenmq/hex.h index ad44482..a9feb76 100644 --- a/oxenmq/hex.h +++ b/oxenmq/hex.h @@ -62,6 +62,11 @@ static_assert(hex_lut.from_hex('a') == 10 && hex_lut.from_hex('F') == 15 && hex_ } // namespace detail +/// Returns the number of characters required to encode a hex string from the given number of bytes. +inline constexpr size_t to_hex_size(size_t byte_size) { return byte_size * 2; } +/// Returns the number of bytes required to decode a hex string of the given size. +inline constexpr size_t from_hex_size(size_t hex_size) { return hex_size / 2; } + /// Creates hex digits from a character sequence given by iterators, writes them starting at `out`. /// Returns the final value of out (i.e. the iterator positioned just after the last written /// hex character). @@ -80,8 +85,10 @@ OutputIt to_hex(InputIt begin, InputIt end, OutputIt out) { template std::string to_hex(It begin, It end) { std::string hex; - if constexpr (std::is_base_of_v::iterator_category>) - hex.reserve(2 * std::distance(begin, end)); + if constexpr (std::is_base_of_v::iterator_category>) { + using std::distance; + hex.reserve(to_hex_size(distance(begin, end))); + } to_hex(begin, end, std::back_inserter(hex)); return hex; } @@ -104,9 +111,11 @@ template constexpr bool is_hex(It begin, It end) { static_assert(sizeof(decltype(*begin)) == 1, "is_hex requires chars/bytes"); constexpr bool ra = std::is_base_of_v::iterator_category>; - if constexpr (ra) - if (std::distance(begin, end) % 2 != 0) + if constexpr (ra) { + using std::distance; + if (distance(begin, end) % 2 != 0) return false; + } size_t count = 0; for (; begin != end; ++begin) { @@ -155,8 +164,10 @@ OutputIt from_hex(InputIt begin, InputIt end, OutputIt out) { template std::string from_hex(It begin, It end) { std::string bytes; - if constexpr (std::is_base_of_v::iterator_category>) - bytes.reserve(std::distance(begin, end) / 2); + if constexpr (std::is_base_of_v::iterator_category>) { + using std::distance; + bytes.reserve(from_hex_size(distance(begin, end))); + } from_hex(begin, end, std::back_inserter(bytes)); return bytes; } diff --git a/tests/test_encoding.cpp b/tests/test_encoding.cpp index bc4a675..5813b02 100644 --- a/tests/test_encoding.cpp +++ b/tests/test_encoding.cpp @@ -60,6 +60,16 @@ TEST_CASE("hex encoding/decoding", "[encoding][decoding][hex]") { std::basic_string_view hex_bytes{bytes.data(), bytes.size()}; REQUIRE( oxenmq::is_hex(hex_bytes) ); REQUIRE( oxenmq::from_hex(hex_bytes) == "\xff\x42\x12\x34" ); + + REQUIRE( oxenmq::to_hex_size(1) == 2 ); + REQUIRE( oxenmq::to_hex_size(2) == 4 ); + REQUIRE( oxenmq::to_hex_size(3) == 6 ); + REQUIRE( oxenmq::to_hex_size(4) == 8 ); + REQUIRE( oxenmq::to_hex_size(100) == 200 ); + REQUIRE( oxenmq::from_hex_size(2) == 1 ); + REQUIRE( oxenmq::from_hex_size(4) == 2 ); + REQUIRE( oxenmq::from_hex_size(6) == 3 ); + REQUIRE( oxenmq::from_hex_size(98) == 49 ); } TEST_CASE("base32z encoding/decoding", "[encoding][decoding][base32z]") { @@ -128,6 +138,27 @@ TEST_CASE("base32z encoding/decoding", "[encoding][decoding][base32z]") { std::basic_string_view b32_bytes{bytes.data(), bytes.size()}; REQUIRE( oxenmq::is_base32z(b32_bytes) ); REQUIRE( oxenmq::from_base32z(b32_bytes) == "\x00\xff"sv ); + + REQUIRE( oxenmq::to_base32z_size(1) == 2 ); + REQUIRE( oxenmq::to_base32z_size(2) == 4 ); + REQUIRE( oxenmq::to_base32z_size(3) == 5 ); + REQUIRE( oxenmq::to_base32z_size(4) == 7 ); + REQUIRE( oxenmq::to_base32z_size(5) == 8 ); + REQUIRE( oxenmq::to_base32z_size(30) == 48 ); + REQUIRE( oxenmq::to_base32z_size(31) == 50 ); + REQUIRE( oxenmq::to_base32z_size(32) == 52 ); + REQUIRE( oxenmq::to_base32z_size(33) == 53 ); + REQUIRE( oxenmq::to_base32z_size(100) == 160 ); + REQUIRE( oxenmq::from_base32z_size(160) == 100 ); + REQUIRE( oxenmq::from_base32z_size(53) == 33 ); + REQUIRE( oxenmq::from_base32z_size(52) == 32 ); + REQUIRE( oxenmq::from_base32z_size(50) == 31 ); + REQUIRE( oxenmq::from_base32z_size(48) == 30 ); + REQUIRE( oxenmq::from_base32z_size(8) == 5 ); + REQUIRE( oxenmq::from_base32z_size(7) == 4 ); + REQUIRE( oxenmq::from_base32z_size(5) == 3 ); + REQUIRE( oxenmq::from_base32z_size(4) == 2 ); + REQUIRE( oxenmq::from_base32z_size(2) == 1 ); } TEST_CASE("base64 encoding/decoding", "[encoding][decoding][base64]") { @@ -228,6 +259,30 @@ TEST_CASE("base64 encoding/decoding", "[encoding][decoding][base64]") { std::basic_string_view b64_bytes{bytes.data(), bytes.size()}; REQUIRE( oxenmq::is_base64(b64_bytes) ); REQUIRE( oxenmq::from_base64(b64_bytes) == "\xff\x00"sv ); + + REQUIRE( oxenmq::to_base64_size(1) == 4 ); + REQUIRE( oxenmq::to_base64_size(2) == 4 ); + REQUIRE( oxenmq::to_base64_size(3) == 4 ); + REQUIRE( oxenmq::to_base64_size(4) == 8 ); + REQUIRE( oxenmq::to_base64_size(5) == 8 ); + REQUIRE( oxenmq::to_base64_size(6) == 8 ); + REQUIRE( oxenmq::to_base64_size(30) == 40 ); + REQUIRE( oxenmq::to_base64_size(31) == 44 ); + REQUIRE( oxenmq::to_base64_size(32) == 44 ); + REQUIRE( oxenmq::to_base64_size(33) == 44 ); + REQUIRE( oxenmq::to_base64_size(100) == 136 ); + REQUIRE( oxenmq::from_base64_size(136) == 102 ); // Not symmetric because we don't know the last two are padding + REQUIRE( oxenmq::from_base64_size(134) == 100 ); // Unpadded + REQUIRE( oxenmq::from_base64_size(44) == 33 ); + REQUIRE( oxenmq::from_base64_size(43) == 32 ); + REQUIRE( oxenmq::from_base64_size(42) == 31 ); + REQUIRE( oxenmq::from_base64_size(40) == 30 ); + REQUIRE( oxenmq::from_base64_size(8) == 6 ); + REQUIRE( oxenmq::from_base64_size(7) == 5 ); + REQUIRE( oxenmq::from_base64_size(6) == 4 ); + REQUIRE( oxenmq::from_base64_size(4) == 3 ); + REQUIRE( oxenmq::from_base64_size(3) == 2 ); + REQUIRE( oxenmq::from_base64_size(2) == 1 ); } TEST_CASE("std::byte decoding", "[decoding][hex][base32z][base64]") { From 24dd7a3854e0ba700f5536ed1909b818ddc52fb8 Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Fri, 1 Oct 2021 18:23:29 -0300 Subject: [PATCH 2/7] Make (and use) iterator approach for encoding/decoding This allows for on-the-fly encoding/decoding, and also allows for on-the-fly transcoding between types without needing intermediate string allocations (see added test cases for examples). --- oxenmq/base32z.h | 204 ++++++++++++++++++++++++------------- oxenmq/base64.h | 217 +++++++++++++++++++++++++++++----------- oxenmq/hex.h | 97 +++++++++++++++--- tests/test_encoding.cpp | 84 ++++++++++++++++ 4 files changed, 459 insertions(+), 143 deletions(-) diff --git a/oxenmq/base32z.h b/oxenmq/base32z.h index f85070c..7e7d96e 100644 --- a/oxenmq/base32z.h +++ b/oxenmq/base32z.h @@ -79,36 +79,71 @@ inline constexpr size_t to_base32z_size(size_t byte_size) { return (byte_size*8 /// Returns the (maximum) number of bytes required to decode a base32z string of the given size. inline constexpr size_t from_base32z_size(size_t b32z_size) { return b32z_size*5 / 8; } // ⌊bits/8⌋ +/// Iterable object for on-the-fly base32z encoding. Used internally, but also particularly useful +/// when converting from one encoding to another. +template +struct base32z_encoder final { +private: + InputIt _it, _end; + static_assert(sizeof(decltype(*_it)) == 1, "base32z_encoder requires chars/bytes input iterator"); + int bits; // Number of bits held in r; will always be >= 5 until we are at the end. + uint_fast16_t r; +public: + using iterator_category = std::input_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = char; + using reference = value_type; + using pointer = void; + base32z_encoder(InputIt begin, InputIt end) : _it{std::move(begin)}, _end{std::move(end)} { + if (_it != _end) { + bits = 8; + r = static_cast(*_it); + } else { + bits = 0; + } + } + + base32z_encoder end() { return {_end, _end}; } + + bool operator==(const base32z_encoder& i) { return _it == i._it && bits == i.bits; } + bool operator!=(const base32z_encoder& i) { return !(*this == i); } + + base32z_encoder& operator++() { + assert(bits >= 5); + // Discard the most significant 5 bits + bits -= 5; + r &= (1 << bits) - 1; + // If we end up with less than 5 significant bits then try to pull another 8 bits: + if (bits < 5 && _it != _end) { + if (++_it != _end) { + r = (r << 8) | static_cast(*_it); + bits += 8; + } else if (bits > 0) { + // No more input bytes, so shift `r` to put the bits we have into the most + // significant bit position for the final character. E.g. if we have "11" we want + // the last character to be encoded "11000". + r <<= (5 - bits); + bits = 5; + } + } + return *this; + } + base32z_encoder operator++(int) { base32z_encoder copy{*this}; ++*this; return copy; } + + char operator*() { + // Right-shift off the excess bits we aren't accessing yet + return detail::b32z_lut.to_b32z(r >> (bits - 5)); + } +}; + /// Converts bytes into a base32z encoded character sequence, writing them starting at `out`. /// Returns the final value of out (i.e. the iterator positioned just after the last written base32z /// character). template OutputIt to_base32z(InputIt begin, InputIt end, OutputIt out) { static_assert(sizeof(decltype(*begin)) == 1, "to_base32z requires chars/bytes"); - int bits = 0; // Tracks the number of unconsumed bits held in r, will always be in [0, 4] - std::uint_fast16_t r = 0; - while (begin != end) { - r = r << 8 | static_cast(*begin++); - - // we just added 8 bits, so we can *always* consume 5 to produce one character, so (net) we - // are adding 3 bits. - bits += 3; - *out++ = detail::b32z_lut.to_b32z(r >> bits); // Right-shift off the bits we aren't consuming right now - - // Drop the bits we don't want to keep (because we just consumed them) - r &= (1 << bits) - 1; - - if (bits >= 5) { // We have enough bits to produce a second character; essentially the same as above - bits -= 5; // Except now we are just consuming 5 without having added any more - *out++ = detail::b32z_lut.to_b32z(r >> bits); - r &= (1 << bits) - 1; - } - } - - if (bits > 0) // We hit the end, but still have some unconsumed bits so need one final character to append - *out++ = detail::b32z_lut.to_b32z(r << (5 - bits)); - - return out; + base32z_encoder it{begin, end}; + return std::copy(it, it.end(), out); } /// Creates a base32z string from an iterator pair of a byte sequence. @@ -166,57 +201,88 @@ template constexpr bool is_base32z(std::basic_string_view s) { return is_base32z(s.begin(), s.end()); } constexpr bool is_base32z(std::string_view s) { return is_base32z<>(s); } +/// Iterable object for on-the-fly base32z decoding. Used internally, but also particularly useful +/// when converting from one encoding to another. The input range must be a valid base32z +/// encoded string. +/// +/// Note that we ignore "padding" bits without requiring that they actually be 0. For instance, the +/// bytes "\ff\ff" are ideally encoded as "999o" (16 bits of 1s + 4 padding 0 bits), but we don't +/// require that the padding bits be 0. That is, "9999", "9993", etc. will all decode to the same +/// \ff\ff output string. +template +struct base32z_decoder final { +private: + InputIt _it, _end; + static_assert(sizeof(decltype(*_it)) == 1, "base32z_decoder requires chars/bytes input iterator"); + uint_fast16_t in = 0; + int bits = 0; // number of bits loaded into `in`; will be in [8, 12] until we hit the end +public: + using iterator_category = std::input_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = char; + using reference = value_type; + using pointer = void; + base32z_decoder(InputIt begin, InputIt end) : _it{std::move(begin)}, _end{std::move(end)} { + if (_it != _end) + load_byte(); + } + + base32z_decoder end() { return {_end, _end}; } + + bool operator==(const base32z_decoder& i) { return _it == i._it; } + bool operator!=(const base32z_decoder& i) { return _it != i._it; } + + base32z_decoder& operator++() { + // Discard 8 most significant bits + bits -= 8; + in &= (1 << bits) - 1; + if (++_it != _end) + load_byte(); + return *this; + } + base32z_decoder operator++(int) { base32z_decoder copy{*this}; ++*this; return copy; } + + char operator*() { + return in >> (bits - 8); + } + +private: + void load_in() { + in = in << 5 + | detail::b32z_lut.from_b32z(static_cast(*_it)); + bits += 5; + } + + void load_byte() { + load_in(); + if (bits < 8 && ++_it != _end) + load_in(); + + // If we hit the _end iterator above then we hit the end of the input with fewer than 8 bits + // accumulated to make a full byte. For a properly encoded base32z string this should only + // be possible with 0-4 bits of all 0s; these are essentially "padding" bits (e.g. encoding + // 2 byte (16 bits) requires 4 b32z chars (20 bits), where only the first 16 bits are + // significant). Ideally any padding bits should be 0, but we don't check that and rather + // just ignore them. + // + // It also isn't possible to get here with 5-7 bits if the string passes `is_base32z` + // because the length checks we do there disallow such a length as valid. (If you were to + // pass such a string to us anyway then we are technically UB, but the current + // implementation just ignore the extra bits as if they are extra padding). + } +}; + /// Converts a sequence of base32z digits to bytes. Undefined behaviour if any characters are not /// valid base32z alphabet characters. It is permitted for the input and output ranges to overlap -/// as long as `out` is no later than `begin`. Note that if you pass in a sequence that could not -/// have been created by a base32z encoding of a byte sequence, we treat the excess bits as if they -/// were not provided. Returns the final value of out (that is, the iterator positioned just after -/// the last written character). +/// as long as `out` is no later than `begin`. /// -/// For example, "yyy" represents a 15-bit value, but a byte sequence is either 8-bit (requiring 2 -/// characters) or 16-bit (requiring 4). Similarly, "yb" is an impossible encoding because it has -/// its 10th bit set (b = 00001), but a base32z encoded value should have all 0's beyond the 8th (or -/// 16th or 24th or ... bit). We treat any such bits as if they were not specified (even if they -/// are): which means "yy", "yb", "yyy", "yy9", "yd", etc. all decode to the same 1-byte value "\0". template OutputIt from_base32z(InputIt begin, InputIt end, OutputIt out) { static_assert(sizeof(decltype(*begin)) == 1, "from_base32z requires chars/bytes"); - uint_fast16_t curr = 0; - int bits = 0; // number of bits we've loaded into val; we always keep this < 8. - while (begin != end) { - curr = curr << 5 | detail::b32z_lut.from_b32z(static_cast(*begin++)); - if (bits >= 3) { - bits -= 3; // Added 5, removing 8 - *out++ = static_cast>( - static_cast(curr >> bits)); - curr &= (1 << bits) - 1; - } else { - bits += 5; - } - } - - // Ignore any trailing bits. base32z encoding always has at least as many bits as the source - // bytes, which means we should not be able to get here from a properly encoded b32z value with - // anything other than 0s: if we have no extra bits (e.g. 5 bytes == 8 b32z chars) then we have - // a 0-bit value; if we have some extra bits (e.g. 6 bytes requires 10 b32z chars, but that - // contains 50 bits > 48 bits) then those extra bits will be 0s (and this covers the bits -= 3 - // case above: it'll leave us with 0-4 extra bits, but those extra bits would be 0 if produced - // from an actual byte sequence). - // - // The "bits += 5" case, then, means that we could end with 5-7 bits. This, however, cannot be - // produced by a valid encoding: - // - 0 bytes gives us 0 chars with 0 leftover bits - // - 1 byte gives us 2 chars with 2 leftover bits - // - 2 bytes gives us 4 chars with 4 leftover bits - // - 3 bytes gives us 5 chars with 1 leftover bit - // - 4 bytes gives us 7 chars with 3 leftover bits - // - 5 bytes gives us 8 chars with 0 leftover bits (this is where the cycle repeats) - // - // So really the only way we can get 5-7 leftover bits is if you took a 0, 2 or 5 char output (or - // any 8n + {0,2,5} char output) and added a base32z character to the end. If you do that, - // well, too bad: you're giving invalid output and so we're just going to pretend that extra - // character you added isn't there by not doing anything here. - + base32z_decoder it{begin, end}; + auto bend = it.end(); + while (it != bend) + *out++ = static_cast>(*it++); return out; } diff --git a/oxenmq/base64.h b/oxenmq/base64.h index d54259c..8630cdf 100644 --- a/oxenmq/base64.h +++ b/oxenmq/base64.h @@ -87,50 +87,88 @@ inline constexpr size_t from_base64_size(size_t b64_size) { return b64_size * 3 / 4; // == ⌊bits/8⌋; floor because we ignore trailing "impossible" bits (see below) } +/// Iterable object for on-the-fly base64 encoding. Used internally, but also particularly useful +/// when converting from one encoding to another. +template +struct base64_encoder final { +private: + InputIt _it, _end; + static_assert(sizeof(decltype(*_it)) == 1, "base64_encoder requires chars/bytes input iterator"); + int bits; // Number of bits held in r; will always be >= 6 until we are at the end. + int padding; + uint_fast16_t r; +public: + using iterator_category = std::input_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = char; + using reference = value_type; + using pointer = void; + base64_encoder(InputIt begin, InputIt end, bool padded = true) : _it{std::move(begin)}, _end{std::move(end)}, padding{padded} { + if (_it != _end) { + bits = 8; + r = static_cast(*_it); + } else { + bits = 0; + } + } + + base64_encoder end() { return {_end, _end, false}; } + + bool operator==(const base64_encoder& i) { return _it == i._it && bits == i.bits && padding == i.padding; } + bool operator!=(const base64_encoder& i) { return !(*this == i); } + + base64_encoder& operator++() { + if (bits == 0) { + padding--; + return *this; + } + assert(bits >= 6); + // Discard the most significant 6 bits + bits -= 6; + r &= (1 << bits) - 1; + // If we end up with less than 6 significant bits then try to pull another 8 bits: + if (bits < 6 && _it != _end) { + if (++_it != _end) { + r = (r << 8) | static_cast(*_it); + bits += 8; + } else if (bits > 0) { + // No more input bytes, so shift `r` to put the bits we have into the most + // significant bit position for the final character, and figure out how many padding + // bytes we want to append. E.g. if we have "11" we want + // the last character to be encoded "110000". + if (padding) { + // padding should be: + // 3n+0 input => 4n output, no padding, handled below + // 3n+1 input => 4n+2 output + 2 padding; we'll land here with 2 trailing bits + // 3n+2 input => 4n+3 output + 1 padding; we'll land here with 4 trailing bits + padding = 3 - bits / 2; + } + r <<= (6 - bits); + bits = 6; + } else { + padding = 0; // No excess bits, so input was a multiple of 3 and thus no padding + } + } + return *this; + } + base64_encoder operator++(int) { base64_encoder copy{*this}; ++*this; return copy; } + + char operator*() { + if (bits == 0 && padding) + return '='; + // Right-shift off the excess bits we aren't accessing yet + return detail::b64_lut.to_b64(r >> (bits - 6)); + } +}; + /// Converts bytes into a base64 encoded character sequence, writing them starting at `out`. /// Returns the final value of out (i.e. the iterator positioned just after the last written base64 /// character). template OutputIt to_base64(InputIt begin, InputIt end, OutputIt out) { static_assert(sizeof(decltype(*begin)) == 1, "to_base64 requires chars/bytes"); - int bits = 0; // Tracks the number of unconsumed bits held in r, will always be in {0, 2, 4} - std::uint_fast16_t r = 0; - while (begin != end) { - r = r << 8 | static_cast(*begin++); - - // we just added 8 bits, so we can *always* consume 6 to produce one character, so (net) we - // are adding 2 bits. - bits += 2; - *out++ = detail::b64_lut.to_b64(r >> bits); // Right-shift off the bits we aren't consuming right now - - // Drop the bits we don't want to keep (because we just consumed them) - r &= (1 << bits) - 1; - - if (bits == 6) { // We have enough bits to produce a second character (which means we had 4 before and added 8) - bits = 0; - *out++ = detail::b64_lut.to_b64(r); - r = 0; - } - } - - // If bits == 0 then we ended our 6-bit outputs coinciding with 8-bit values, i.e. at a multiple - // of 24 bits: this means we don't have anything else to output and don't need any padding. - if (bits == 2) { - // We finished with 2 unconsumed bits, which means we ended 1 byte past a 24-bit group (e.g. - // 1 byte, 4 bytes, 301 bytes, etc.); since we need to always be a multiple of 4 output - // characters that means we've produced 1: so we right-fill 0s to get the next char, then - // add two padding ='s. - *out++ = detail::b64_lut.to_b64(r << 4); - *out++ = '='; - *out++ = '='; - } else if (bits == 4) { - // 4 bits left means we produced 2 6-bit values from the first 2 bytes of a 3-byte group. - // Fill 0s to get the last one, plus one padding output. - *out++ = detail::b64_lut.to_b64(r << 2); - *out++ = '='; - } - - return out; + auto it = base64_encoder{begin, end}; + return std::copy(it, it.end(), out); } /// Creates and returns a base64 string from an iterator pair of a character sequence @@ -196,6 +234,82 @@ template constexpr bool is_base64(std::basic_string_view s) { return is_base64(s.begin(), s.end()); } constexpr bool is_base64(std::string_view s) { return is_base64(s.begin(), s.end()); } +/// Iterable object for on-the-fly base64 decoding. Used internally, but also particularly useful +/// when converting from one encoding to another. The input range must be a valid base64 encoded +/// string (with or without padding). +/// +/// Note that we ignore "padding" bits without requiring that they actually be 0. For instance, the +/// bytes "\ff\ff" are ideally encoded as "//8=" (16 bits of 1s + 2 padding 0 bits, then a full +/// 6-bit padding char). We don't, however, require that the padding bits be 0. That is, "///=", +/// "//9=", "//+=", etc. will all decode to the same \ff\ff output string. +template +struct base64_decoder final { +private: + InputIt _it, _end; + static_assert(sizeof(decltype(*_it)) == 1, "base64_decoder requires chars/bytes input iterator"); + uint_fast16_t in = 0; + int bits = 0; // number of bits loaded into `in`; will be in [8, 12] until we hit the end +public: + using iterator_category = std::input_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = char; + using reference = value_type; + using pointer = void; + base64_decoder(InputIt begin, InputIt end) : _it{std::move(begin)}, _end{std::move(end)} { + if (_it != _end) + load_byte(); + } + + base64_decoder end() { return {_end, _end}; } + + bool operator==(const base64_decoder& i) { return _it == i._it; } + bool operator!=(const base64_decoder& i) { return _it != i._it; } + + base64_decoder& operator++() { + // Discard 8 most significant bits + bits -= 8; + in &= (1 << bits) - 1; + if (++_it != _end) + load_byte(); + return *this; + } + base64_decoder operator++(int) { base64_decoder copy{*this}; ++*this; return copy; } + + char operator*() { + return in >> (bits - 8); + } + +private: + void load_in() { + // We hit padding trying to read enough for a full byte, so we're done. (And since you were + // already supposed to have checked validity with is_base64, the padding can only be at the + // end). + auto c = static_cast(*_it); + if (c == '=') { + _it = _end; + bits = 0; + return; + } + + in = in << 6 + | detail::b64_lut.from_b64(c); + bits += 6; + } + + void load_byte() { + load_in(); + if (bits && bits < 8 && ++_it != _end) + load_in(); + + // If we hit the _end iterator above then we hit the end of the input (or hit padding) with + // fewer than 8 bits accumulated to make a full byte. For a properly encoded base64 string + // this should only be possible with 0, 2, or 4 bits of all 0s; these are essentially + // "padding" bits (e.g. encoding 2 byte (16 bits) requires 3 b64 chars (18 bits), where + // only the first 16 bits are significant). Ideally any padding bits should be 0, but we + // don't check that and rather just ignore them. + } +}; + /// Converts a sequence of base64 digits to bytes. Undefined behaviour if any characters are not /// valid base64 alphabet characters. It is permitted for the input and output ranges to overlap as /// long as `out` is no later than `begin`. Trailing padding characters are permitted but not @@ -211,29 +325,10 @@ constexpr bool is_base64(std::string_view s) { return is_base64(s.begin(), s.end template OutputIt from_base64(InputIt begin, InputIt end, OutputIt out) { static_assert(sizeof(decltype(*begin)) == 1, "from_base64 requires chars/bytes"); - uint_fast16_t curr = 0; - int bits = 0; // number of bits we've loaded into val; we always keep this < 8. - while (begin != end) { - auto c = static_cast(*begin++); - - // padding; don't bother checking if we're at the end because is_base64 is a precondition - // and we're allowed UB if it isn't satisfied. - if (c == '=') continue; - - curr = curr << 6 | detail::b64_lut.from_b64(c); - if (bits == 0) - bits = 6; - else { - bits -= 2; // Added 6, removing 8 - *out++ = static_cast>( - static_cast(curr >> bits)); - curr &= (1 << bits) - 1; - } - } - // Don't worry about leftover bits because either they have to be 0, or they can't happen at - // all. See base32z.h for why: the reasoning is exactly the same (except using 6 bits per - // character here instead of 5). - + base64_decoder it{begin, end}; + auto bend = it.end(); + while (it != bend) + *out++ = static_cast>(*it++); return out; } diff --git a/oxenmq/hex.h b/oxenmq/hex.h index a9feb76..e101737 100644 --- a/oxenmq/hex.h +++ b/oxenmq/hex.h @@ -67,18 +67,50 @@ inline constexpr size_t to_hex_size(size_t byte_size) { return byte_size * 2; } /// Returns the number of bytes required to decode a hex string of the given size. inline constexpr size_t from_hex_size(size_t hex_size) { return hex_size / 2; } +/// Iterable object for on-the-fly hex encoding. Used internally, but also particularly useful when +/// converting from one encoding to another. +template +struct hex_encoder final { +private: + InputIt _it, _end; + static_assert(sizeof(decltype(*_it)) == 1, "hex_encoder requires chars/bytes input iterator"); + uint8_t c; + bool second_half = false; +public: + using iterator_category = std::input_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = char; + using reference = value_type; + using pointer = void; + hex_encoder(InputIt begin, InputIt end) : _it{std::move(begin)}, _end{std::move(end)} {} + + hex_encoder end() { return {_end, _end}; } + + bool operator==(const hex_encoder& i) { return _it == i._it && second_half == i.second_half; } + bool operator!=(const hex_encoder& i) { return !(*this == i); } + + hex_encoder& operator++() { + second_half = !second_half; + if (!second_half) + ++_it; + return *this; + } + hex_encoder operator++(int) { hex_encoder copy{*this}; ++*this; return copy; } + char operator*() { + return detail::hex_lut.to_hex(second_half + ? c & 0x0f + : (c = static_cast(*_it)) >> 4); + } +}; + /// Creates hex digits from a character sequence given by iterators, writes them starting at `out`. /// Returns the final value of out (i.e. the iterator positioned just after the last written /// hex character). template OutputIt to_hex(InputIt begin, InputIt end, OutputIt out) { static_assert(sizeof(decltype(*begin)) == 1, "to_hex requires chars/bytes"); - for (; begin != end; ++begin) { - uint8_t c = static_cast(*begin); - *out++ = detail::hex_lut.to_hex(c >> 4); - *out++ = detail::hex_lut.to_hex(c & 0x0f); - } - return out; + auto it = hex_encoder{begin, end}; + return std::copy(it, it.end(), out); } /// Creates a string of hex digits from a character sequence iterator pair @@ -141,6 +173,48 @@ constexpr char from_hex_digit(unsigned char x) noexcept { /// Constructs a byte value from a pair of hex digits constexpr char from_hex_pair(unsigned char a, unsigned char b) noexcept { return (from_hex_digit(a) << 4) | from_hex_digit(b); } +/// Iterable object for on-the-fly hex decoding. Used internally but also particularly useful when +/// converting from one encoding to another. Undefined behaviour if the given iterator range is not +/// a valid hex string with even length (i.e. is_hex() should return true). +template +struct hex_decoder final { +private: + InputIt _it, _end; + static_assert(sizeof(decltype(*_it)) == 1, "hex_encoder requires chars/bytes input iterator"); + char byte; +public: + using iterator_category = std::input_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = char; + using reference = value_type; + using pointer = void; + hex_decoder(InputIt begin, InputIt end) : _it{std::move(begin)}, _end{std::move(end)} { + if (_it != _end) + load_byte(); + } + + hex_decoder end() { return {_end, _end}; } + + bool operator==(const hex_decoder& i) { return _it == i._it; } + bool operator!=(const hex_decoder& i) { return _it != i._it; } + + hex_decoder& operator++() { + if (++_it != _end) + load_byte(); + return *this; + } + hex_decoder operator++(int) { hex_decoder copy{*this}; ++*this; return copy; } + char operator*() const { return byte; } + +private: + void load_byte() { + auto a = *_it; + auto b = *++_it; + byte = from_hex_pair(static_cast(a), static_cast(b)); + } + +}; + /// Converts a sequence of hex digits to bytes. Undefined behaviour if any characters are not in /// [0-9a-fA-F] or if the input sequence length is not even: call `is_hex` first if you need to /// check. It is permitted for the input and output ranges to overlap as long as out is no later @@ -148,14 +222,11 @@ constexpr char from_hex_pair(unsigned char a, unsigned char b) noexcept { return /// last written character). template OutputIt from_hex(InputIt begin, InputIt end, OutputIt out) { - using std::distance; assert(is_hex(begin, end)); - while (begin != end) { - auto a = *begin++; - auto b = *begin++; - *out++ = static_cast>( - from_hex_pair(static_cast(a), static_cast(b))); - } + auto it = hex_decoder(begin, end); + const auto hend = it.end(); + while (it != hend) + *out++ = static_cast>(*it++); return out; } diff --git a/tests/test_encoding.cpp b/tests/test_encoding.cpp index 5813b02..cbe80c4 100644 --- a/tests/test_encoding.cpp +++ b/tests/test_encoding.cpp @@ -285,6 +285,90 @@ TEST_CASE("base64 encoding/decoding", "[encoding][decoding][base64]") { REQUIRE( oxenmq::from_base64_size(2) == 1 ); } +TEST_CASE("transcoding", "[decoding][encoding][base32z][hex][base64]") { + // Decoders: + oxenmq::base64_decoder in64{pk_b64.begin(), pk_b64.end()}; + oxenmq::base32z_decoder in32z{pk_b32z.begin(), pk_b32z.end()}; + oxenmq::hex_decoder in16{pk_hex.begin(), pk_hex.end()}; + + // Transcoders: + oxenmq::base32z_encoder b64_to_b32z{in64, in64.end()}; + oxenmq::base32z_encoder hex_to_b32z{in16, in16.end()}; + oxenmq::hex_encoder b64_to_hex{in64, in64.end()}; + oxenmq::hex_encoder b32z_to_hex{in32z, in32z.end()}; + oxenmq::base64_encoder hex_to_b64{in16, in16.end()}; + oxenmq::base64_encoder b32z_to_b64{in32z, in32z.end()}; + // These ones are stupid, but should work anyway: + oxenmq::base64_encoder b64_to_b64{in64, in64.end()}; + oxenmq::base32z_encoder b32z_to_b32z{in32z, in32z.end()}; + oxenmq::hex_encoder hex_to_hex{in16, in16.end()}; + + // Decoding to bytes: + std::string x; + auto xx = std::back_inserter(x); + std::copy(in64, in64.end(), xx); + REQUIRE( x == pk ); + x.clear(); + std::copy(in32z, in32z.end(), xx); + REQUIRE( x == pk ); + x.clear(); + std::copy(in16, in16.end(), xx); + REQUIRE( x == pk ); + + // Transcoding + x.clear(); + std::copy(b64_to_hex, b64_to_hex.end(), xx); + CHECK( x == pk_hex ); + + x.clear(); + std::copy(b64_to_b32z, b64_to_b32z.end(), xx); + CHECK( x == pk_b32z ); + + x.clear(); + std::copy(b64_to_b64, b64_to_b64.end(), xx); + CHECK( x == pk_b64 ); + + x.clear(); + std::copy(b32z_to_hex, b32z_to_hex.end(), xx); + CHECK( x == pk_hex ); + + x.clear(); + std::copy(b32z_to_b32z, b32z_to_b32z.end(), xx); + CHECK( x == pk_b32z ); + + x.clear(); + std::copy(b32z_to_b64, b32z_to_b64.end(), xx); + CHECK( x == pk_b64 ); + + x.clear(); + std::copy(hex_to_hex, hex_to_hex.end(), xx); + CHECK( x == pk_hex ); + + x.clear(); + std::copy(hex_to_b32z, hex_to_b32z.end(), xx); + CHECK( x == pk_b32z ); + + x.clear(); + std::copy(hex_to_b64, hex_to_b64.end(), xx); + CHECK( x == pk_b64 ); + + // Make a big chain of conversions + oxenmq::base32z_encoder it1{in64, in64.end()}; + oxenmq::base32z_decoder it2{it1, it1.end()}; + oxenmq::base64_encoder it3{it2, it2.end()}; + oxenmq::base64_decoder it4{it3, it3.end()}; + oxenmq::hex_encoder it5{it4, it4.end()}; + x.clear(); + std::copy(it5, it5.end(), xx); + CHECK( x == pk_hex ); + + // No-padding b64 encoding: + oxenmq::base64_encoder b64_nopad{pk.begin(), pk.end(), false}; + x.clear(); + std::copy(b64_nopad, b64_nopad.end(), xx); + CHECK( x == pk_b64.substr(0, pk_b64.size()-1) ); +} + TEST_CASE("std::byte decoding", "[decoding][hex][base32z][base64]") { // Decoding to std::byte is a little trickier because you can't assign to a byte without an // explicit cast, which means we have to properly detect that output is going to a std::byte From ee1d69f333722469d0f4bf5c3fd3a463c35d2dcf Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Fri, 1 Oct 2021 18:52:30 -0300 Subject: [PATCH 3/7] Add b32z/b64 invalid garbage tests Tests the new restricted added for b32z/b64 trailing crap. --- tests/test_encoding.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/test_encoding.cpp b/tests/test_encoding.cpp index cbe80c4..99c2a03 100644 --- a/tests/test_encoding.cpp +++ b/tests/test_encoding.cpp @@ -139,6 +139,16 @@ TEST_CASE("base32z encoding/decoding", "[encoding][decoding][base32z]") { REQUIRE( oxenmq::is_base32z(b32_bytes) ); REQUIRE( oxenmq::from_base32z(b32_bytes) == "\x00\xff"sv ); + REQUIRE( oxenmq::is_base32z("") ); + REQUIRE_FALSE( oxenmq::is_base32z("y") ); + REQUIRE( oxenmq::is_base32z("yy") ); + REQUIRE_FALSE( oxenmq::is_base32z("yyy") ); + REQUIRE( oxenmq::is_base32z("yyyy") ); + REQUIRE( oxenmq::is_base32z("yyyyy") ); + REQUIRE_FALSE( oxenmq::is_base32z("yyyyyy") ); + REQUIRE( oxenmq::is_base32z("yyyyyyy") ); + REQUIRE( oxenmq::is_base32z("yyyyyyyy") ); + REQUIRE( oxenmq::to_base32z_size(1) == 2 ); REQUIRE( oxenmq::to_base32z_size(2) == 4 ); REQUIRE( oxenmq::to_base32z_size(3) == 5 ); @@ -210,6 +220,7 @@ TEST_CASE("base64 encoding/decoding", "[encoding][decoding][base64]") { REQUIRE( oxenmq::is_base64("YWJjZB") ); // not really valid, but we explicitly accept it REQUIRE_FALSE( oxenmq::is_base64("YWJjZ=") ); // invalid padding (padding can only be 4th or 3rd+4th of a 4-char block) + REQUIRE_FALSE( oxenmq::is_base64("YYYYA") ); // invalid: base64 can never be length 4n+1 REQUIRE_FALSE( oxenmq::is_base64("YWJj=") ); REQUIRE_FALSE( oxenmq::is_base64("YWJj=A") ); REQUIRE_FALSE( oxenmq::is_base64("YWJjA===") ); From 9a8adb5bfdd1edcae0e561de52385ae8ae787617 Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Fri, 1 Oct 2021 18:53:05 -0300 Subject: [PATCH 4/7] Add methods for unpadded base64 construction The iterator has them; this adds wrapper methods to access them when not using the iterator directly. --- oxenmq/base64.h | 36 +++++++++++++++++++++++++++++------- tests/test_encoding.cpp | 7 +++++++ 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/oxenmq/base64.h b/oxenmq/base64.h index 8630cdf..778456e 100644 --- a/oxenmq/base64.h +++ b/oxenmq/base64.h @@ -77,9 +77,10 @@ static_assert(b64_lut.from_b64('/') == 63 && b64_lut.from_b64('7') == 59 && b64_ } // namespace detail /// Returns the number of characters required to encode a base64 string from the given number of bytes. -inline constexpr size_t to_base64_size(size_t byte_size) { - // bytes*4/3, rounded up to the next multiple of 4 - return (byte_size + 2) / 3 * 4; +inline constexpr size_t to_base64_size(size_t byte_size, bool padded = true) { + return padded + ? (byte_size + 2) / 3 * 4 // bytes*4/3, rounded up to the next multiple of 4 + : (byte_size * 4 + 2) / 3; // ⌈bytes*4/3⌉ } /// Returns the (maximum) number of bytes required to decode a base64 string of the given size. /// Note that this may overallocate by 1-2 bytes if the size includes 1-2 padding chars. @@ -165,13 +166,14 @@ public: /// Returns the final value of out (i.e. the iterator positioned just after the last written base64 /// character). template -OutputIt to_base64(InputIt begin, InputIt end, OutputIt out) { +OutputIt to_base64(InputIt begin, InputIt end, OutputIt out, bool padded = true) { static_assert(sizeof(decltype(*begin)) == 1, "to_base64 requires chars/bytes"); - auto it = base64_encoder{begin, end}; + auto it = base64_encoder{begin, end, padded}; return std::copy(it, it.end(), out); } -/// Creates and returns a base64 string from an iterator pair of a character sequence +/// Creates and returns a base64 string from an iterator pair of a character sequence. The +/// resulting string will have '=' padding, if appropriate. template std::string to_base64(It begin, It end) { std::string base64; @@ -183,11 +185,31 @@ std::string to_base64(It begin, It end) { return base64; } -/// Creates a base64 string from an iterable, std::string-like object +/// Creates and returns a base64 string from an iterator pair of a character sequence. The +/// resulting string will not be padded. +template +std::string to_base64_unpadded(It begin, It end) { + std::string base64; + if constexpr (std::is_base_of_v::iterator_category>) { + using std::distance; + base64.reserve(to_base64_size(distance(begin, end), false)); + } + to_base64(begin, end, std::back_inserter(base64), false); + return base64; +} + +/// Creates a base64 string from an iterable, std::string-like object. The string will have '=' +/// padding, if appropriate. template std::string to_base64(std::basic_string_view s) { return to_base64(s.begin(), s.end()); } inline std::string to_base64(std::string_view s) { return to_base64<>(s); } +/// Creates a base64 string from an iterable, std::string-like object. The string will not be +/// padded. +template +std::string to_base64_unpadded(std::basic_string_view s) { return to_base64_unpadded(s.begin(), s.end()); } +inline std::string to_base64_unpadded(std::string_view s) { return to_base64_unpadded<>(s); } + /// Returns true if the range is a base64 encoded value; we allow (but do not require) '=' padding, /// but only at the end, only 1 or 2, and only if it pads out the total to a multiple of 4. /// Otherwise the string must contain only valid base64 characters, and must not have a length of diff --git a/tests/test_encoding.cpp b/tests/test_encoding.cpp index 99c2a03..85427f7 100644 --- a/tests/test_encoding.cpp +++ b/tests/test_encoding.cpp @@ -186,6 +186,13 @@ TEST_CASE("base64 encoding/decoding", "[encoding][decoding][base64]") { REQUIRE( oxenmq::to_base64("abcde") == "YWJjZGU=" ); REQUIRE( oxenmq::to_base64("abcdef") == "YWJjZGVm" ); + REQUIRE( oxenmq::to_base64_unpadded("a") == "YQ" ); + REQUIRE( oxenmq::to_base64_unpadded("ab") == "YWI" ); + REQUIRE( oxenmq::to_base64_unpadded("abc") == "YWJj" ); + REQUIRE( oxenmq::to_base64_unpadded("abcd") == "YWJjZA" ); + REQUIRE( oxenmq::to_base64_unpadded("abcde") == "YWJjZGU" ); + REQUIRE( oxenmq::to_base64_unpadded("abcdef") == "YWJjZGVm" ); + REQUIRE( oxenmq::to_base64("\0\0\0\xff"s) == "AAAA/w==" ); REQUIRE( oxenmq::to_base64("\0\0\0\xff\xff"s) == "AAAA//8=" ); REQUIRE( oxenmq::to_base64("\0\0\0\xff\xff\xff"s) == "AAAA////" ); From 02a542b9c6978b1d7b7a3cf7cdcee577f2a0364a Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Tue, 5 Oct 2021 12:12:16 -0300 Subject: [PATCH 5/7] Simplify iterator initialization & avoid warnings --- oxenmq/base32z.h | 15 +++++---------- oxenmq/base64.h | 17 +++++++---------- oxenmq/hex.h | 2 +- 3 files changed, 13 insertions(+), 21 deletions(-) diff --git a/oxenmq/base32z.h b/oxenmq/base32z.h index 7e7d96e..229e4f1 100644 --- a/oxenmq/base32z.h +++ b/oxenmq/base32z.h @@ -86,22 +86,17 @@ struct base32z_encoder final { private: InputIt _it, _end; static_assert(sizeof(decltype(*_it)) == 1, "base32z_encoder requires chars/bytes input iterator"); - int bits; // Number of bits held in r; will always be >= 5 until we are at the end. - uint_fast16_t r; + // Number of bits held in r; will always be >= 5 until we are at the end. + int bits{_it != _end ? 8 : 0}; + // Holds bits of data we've already read, which might belong to current or next chars + uint_fast16_t r{bits ? static_cast(*_it) : 0u}; public: using iterator_category = std::input_iterator_tag; using difference_type = std::ptrdiff_t; using value_type = char; using reference = value_type; using pointer = void; - base32z_encoder(InputIt begin, InputIt end) : _it{std::move(begin)}, _end{std::move(end)} { - if (_it != _end) { - bits = 8; - r = static_cast(*_it); - } else { - bits = 0; - } - } + base32z_encoder(InputIt begin, InputIt end) : _it{std::move(begin)}, _end{std::move(end)} {} base32z_encoder end() { return {_end, _end}; } diff --git a/oxenmq/base64.h b/oxenmq/base64.h index 778456e..b368ae4 100644 --- a/oxenmq/base64.h +++ b/oxenmq/base64.h @@ -95,23 +95,20 @@ struct base64_encoder final { private: InputIt _it, _end; static_assert(sizeof(decltype(*_it)) == 1, "base64_encoder requires chars/bytes input iterator"); - int bits; // Number of bits held in r; will always be >= 6 until we are at the end. + // How much padding (at most) we can add at the end int padding; - uint_fast16_t r; + // Number of bits held in r; will always be >= 6 until we are at the end. + int bits{_it != _end ? 8 : 0}; + // Holds bits of data we've already read, which might belong to current or next chars + uint_fast16_t r{bits ? static_cast(*_it) : 0u}; public: using iterator_category = std::input_iterator_tag; using difference_type = std::ptrdiff_t; using value_type = char; using reference = value_type; using pointer = void; - base64_encoder(InputIt begin, InputIt end, bool padded = true) : _it{std::move(begin)}, _end{std::move(end)}, padding{padded} { - if (_it != _end) { - bits = 8; - r = static_cast(*_it); - } else { - bits = 0; - } - } + base64_encoder(InputIt begin, InputIt end, bool padded = true) + : _it{std::move(begin)}, _end{std::move(end)}, padding{padded} {} base64_encoder end() { return {_end, _end, false}; } diff --git a/oxenmq/hex.h b/oxenmq/hex.h index e101737..553b351 100644 --- a/oxenmq/hex.h +++ b/oxenmq/hex.h @@ -74,7 +74,7 @@ struct hex_encoder final { private: InputIt _it, _end; static_assert(sizeof(decltype(*_it)) == 1, "hex_encoder requires chars/bytes input iterator"); - uint8_t c; + uint8_t c = 0; bool second_half = false; public: using iterator_category = std::input_iterator_tag; From 0d0ed8efa924826ac952bff02d977207cf420790 Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Tue, 5 Oct 2021 12:21:38 -0300 Subject: [PATCH 6/7] Fix r narrowing initialization warning when uint_fast16_t is small --- oxenmq/base32z.h | 2 +- oxenmq/base64.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/oxenmq/base32z.h b/oxenmq/base32z.h index 229e4f1..78e6973 100644 --- a/oxenmq/base32z.h +++ b/oxenmq/base32z.h @@ -89,7 +89,7 @@ private: // Number of bits held in r; will always be >= 5 until we are at the end. int bits{_it != _end ? 8 : 0}; // Holds bits of data we've already read, which might belong to current or next chars - uint_fast16_t r{bits ? static_cast(*_it) : 0u}; + uint_fast16_t r{bits ? static_cast(*_it) : (unsigned char)0}; public: using iterator_category = std::input_iterator_tag; using difference_type = std::ptrdiff_t; diff --git a/oxenmq/base64.h b/oxenmq/base64.h index b368ae4..2bfac55 100644 --- a/oxenmq/base64.h +++ b/oxenmq/base64.h @@ -100,7 +100,7 @@ private: // Number of bits held in r; will always be >= 6 until we are at the end. int bits{_it != _end ? 8 : 0}; // Holds bits of data we've already read, which might belong to current or next chars - uint_fast16_t r{bits ? static_cast(*_it) : 0u}; + uint_fast16_t r{bits ? static_cast(*_it) : (unsigned char)0}; public: using iterator_category = std::input_iterator_tag; using difference_type = std::ptrdiff_t; From 7695e770a7b76e3a76f688dffb6c7c3adc75f0ca Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Tue, 5 Oct 2021 12:27:53 -0300 Subject: [PATCH 7/7] drone: quiet/optimize image setup --- .drone.jsonnet | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index 86cf6a0..4b67711 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -1,3 +1,6 @@ + +local apt_get_quiet = 'apt-get -o=Dpkg::Use-Pty=0 -q '; + local debian_pipeline(name, image, arch='amd64', deps='g++ libsodium-dev libzmq3-dev', cmake_extra='', build_type='Release', extra_cmds=[], allow_fail=false) = { kind: 'pipeline', type: 'docker', @@ -10,10 +13,12 @@ local debian_pipeline(name, image, arch='amd64', deps='g++ libsodium-dev libzmq3 image: image, [if allow_fail then "failure"]: "ignore", commands: [ - 'apt-get update', - 'apt-get install -y eatmydata', - 'eatmydata apt-get dist-upgrade -y', - 'eatmydata apt-get install -y cmake git ninja-build pkg-config ccache ' + deps, + 'echo "Building on ${DRONE_STAGE_MACHINE}"', + 'echo "man-db man-db/auto-update boolean false" | debconf-set-selections', + apt_get_quiet + 'update', + apt_get_quiet + 'install -y eatmydata', + 'eatmydata ' + apt_get_quiet + 'dist-upgrade -y', + 'eatmydata ' + apt_get_quiet + 'install -y cmake git ninja-build pkg-config ccache ' + deps, 'git submodule update --init --recursive', 'mkdir build', 'cd build',