From 2ac4379fa66072b116b862fb9ad3d83e78ee79a9 Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Fri, 20 Aug 2021 16:08:33 -0300 Subject: [PATCH] Make {to,from}_{hex/b64/b32} return output iterator Changes the 3-iterator versions of to_hex, from_b32z, etc. to return the final output iterator, which allows for much easier in-place "from" conversion without needing a new string by doing something like: std::string data = /* some hex */; auto end = oxenmq::from_hex(data.begin(), data.end(), data.begin(); data.erase(end, data.end()); Returning from the "to" converters is a bit less useful but doing it anyway for consistency (and because it could still have some use, e.g. if output is into some fixed buffer it lets you determine how much was written). --- oxenmq/base32z.h | 15 +++++++++++---- oxenmq/base64.h | 15 +++++++++++---- oxenmq/hex.h | 13 +++++++++---- tests/test_encoding.cpp | 30 ++++++++++++++++++++++++++++++ 4 files changed, 61 insertions(+), 12 deletions(-) diff --git a/oxenmq/base32z.h b/oxenmq/base32z.h index 70f019c..8a5c3ad 100644 --- a/oxenmq/base32z.h +++ b/oxenmq/base32z.h @@ -74,9 +74,11 @@ static_assert(b32z_lut.from_b32z('w') == 20 && b32z_lut.from_b32z('T') == 17 && } // namespace detail -/// Converts bytes into a base32z encoded character sequence. +/// Converts bytes into a base32z encoded character sequence, writing them starting at `out`. +/// Returns the final value of out (i.e. the iterator positioned just after the last written base32z +/// character). template -void to_base32z(InputIt begin, InputIt end, OutputIt out) { +OutputIt to_base32z(InputIt begin, InputIt end, OutputIt out) { static_assert(sizeof(decltype(*begin)) == 1, "to_base32z requires chars/bytes"); int bits = 0; // Tracks the number of unconsumed bits held in r, will always be in [0, 4] std::uint_fast16_t r = 0; @@ -100,6 +102,8 @@ void to_base32z(InputIt begin, InputIt end, OutputIt out) { if (bits > 0) // We hit the end, but still have some unconsumed bits so need one final character to append *out++ = detail::b32z_lut.to_b32z(r << (5 - bits)); + + return out; } /// Creates a base32z string from an iterator pair of a byte sequence. @@ -138,7 +142,8 @@ constexpr bool is_base32z(std::string_view s) { return is_base32z<>(s); } /// valid base32z alphabet characters. It is permitted for the input and output ranges to overlap /// as long as `out` is no later than `begin`. Note that if you pass in a sequence that could not /// have been created by a base32z encoding of a byte sequence, we treat the excess bits as if they -/// were not provided. +/// were not provided. Returns the final value of out (that is, the iterator positioned just after +/// the last written character). /// /// For example, "yyy" represents a 15-bit value, but a byte sequence is either 8-bit (requiring 2 /// characters) or 16-bit (requiring 4). Similarly, "yb" is an impossible encoding because it has @@ -146,7 +151,7 @@ constexpr bool is_base32z(std::string_view s) { return is_base32z<>(s); } /// 16th or 24th or ... bit). We treat any such bits as if they were not specified (even if they /// are): which means "yy", "yb", "yyy", "yy9", "yd", etc. all decode to the same 1-byte value "\0". template -void from_base32z(InputIt begin, InputIt end, OutputIt out) { +OutputIt from_base32z(InputIt begin, InputIt end, OutputIt out) { static_assert(sizeof(decltype(*begin)) == 1, "from_base32z requires chars/bytes"); uint_fast16_t curr = 0; int bits = 0; // number of bits we've loaded into val; we always keep this < 8. @@ -183,6 +188,8 @@ void from_base32z(InputIt begin, InputIt end, OutputIt out) { // any 8n + {0,2,5} char output) and added a base32z character to the end. If you do that, // well, too bad: you're giving invalid output and so we're just going to pretend that extra // character you added isn't there by not doing anything here. + + return out; } /// Convert a base32z sequence into a std::string of bytes. Undefined behaviour if any characters diff --git a/oxenmq/base64.h b/oxenmq/base64.h index c9538a6..621f62c 100644 --- a/oxenmq/base64.h +++ b/oxenmq/base64.h @@ -76,9 +76,11 @@ static_assert(b64_lut.from_b64('/') == 63 && b64_lut.from_b64('7') == 59 && b64_ } // namespace detail -/// Converts bytes into a base64 encoded character sequence. +/// Converts bytes into a base64 encoded character sequence, writing them starting at `out`. +/// Returns the final value of out (i.e. the iterator positioned just after the last written base64 +/// character). template -void to_base64(InputIt begin, InputIt end, OutputIt out) { +OutputIt to_base64(InputIt begin, InputIt end, OutputIt out) { static_assert(sizeof(decltype(*begin)) == 1, "to_base64 requires chars/bytes"); int bits = 0; // Tracks the number of unconsumed bits held in r, will always be in {0, 2, 4} std::uint_fast16_t r = 0; @@ -116,6 +118,8 @@ void to_base64(InputIt begin, InputIt end, OutputIt out) { *out++ = detail::b64_lut.to_b64(r << 2); *out++ = '='; } + + return out; } /// Creates and returns a base64 string from an iterator pair of a character sequence @@ -166,7 +170,8 @@ constexpr bool is_base64(std::string_view s) { return is_base64(s.begin(), s.end /// Converts a sequence of base64 digits to bytes. Undefined behaviour if any characters are not /// valid base64 alphabet characters. It is permitted for the input and output ranges to overlap as /// long as `out` is no later than `begin`. Trailing padding characters are permitted but not -/// required. +/// required. Returns the final value of out (that is, the iterator positioned just after the +/// last written character). /// /// It is possible to provide "impossible" base64 encoded values; for example "YWJja" which has 30 /// bits of data even though a base64 encoded byte string should have 24 (4 chars) or 36 (6 chars) @@ -175,7 +180,7 @@ constexpr bool is_base64(std::string_view s) { return is_base64(s.begin(), s.end /// encoding of "abcd") and "YWJjZB", "YWJjZC", ..., "YWJjZP" all decode to the same "abcd" value: /// the last 4 bits of the last character are essentially considered padding. template -void from_base64(InputIt begin, InputIt end, OutputIt out) { +OutputIt from_base64(InputIt begin, InputIt end, OutputIt out) { static_assert(sizeof(decltype(*begin)) == 1, "from_base64 requires chars/bytes"); uint_fast16_t curr = 0; int bits = 0; // number of bits we've loaded into val; we always keep this < 8. @@ -199,6 +204,8 @@ void from_base64(InputIt begin, InputIt end, OutputIt out) { // Don't worry about leftover bits because either they have to be 0, or they can't happen at // all. See base32z.h for why: the reasoning is exactly the same (except using 6 bits per // character here instead of 5). + + return out; } /// Converts base64 digits from a iterator pair of characters into a std::string of bytes. diff --git a/oxenmq/hex.h b/oxenmq/hex.h index 68cf7e7..ad44482 100644 --- a/oxenmq/hex.h +++ b/oxenmq/hex.h @@ -62,15 +62,18 @@ static_assert(hex_lut.from_hex('a') == 10 && hex_lut.from_hex('F') == 15 && hex_ } // namespace detail -/// Creates hex digits from a character sequence. +/// Creates hex digits from a character sequence given by iterators, writes them starting at `out`. +/// Returns the final value of out (i.e. the iterator positioned just after the last written +/// hex character). template -void to_hex(InputIt begin, InputIt end, OutputIt out) { +OutputIt to_hex(InputIt begin, InputIt end, OutputIt out) { static_assert(sizeof(decltype(*begin)) == 1, "to_hex requires chars/bytes"); for (; begin != end; ++begin) { uint8_t c = static_cast(*begin); *out++ = detail::hex_lut.to_hex(c >> 4); *out++ = detail::hex_lut.to_hex(c & 0x0f); } + return out; } /// Creates a string of hex digits from a character sequence iterator pair @@ -132,9 +135,10 @@ constexpr char from_hex_pair(unsigned char a, unsigned char b) noexcept { return /// Converts a sequence of hex digits to bytes. Undefined behaviour if any characters are not in /// [0-9a-fA-F] or if the input sequence length is not even: call `is_hex` first if you need to /// check. It is permitted for the input and output ranges to overlap as long as out is no later -/// than begin. +/// than begin. Returns the final value of out (that is, the iterator positioned just after the +/// last written character). template -void from_hex(InputIt begin, InputIt end, OutputIt out) { +OutputIt from_hex(InputIt begin, InputIt end, OutputIt out) { using std::distance; assert(is_hex(begin, end)); while (begin != end) { @@ -143,6 +147,7 @@ void from_hex(InputIt begin, InputIt end, OutputIt out) { *out++ = static_cast>( from_hex_pair(static_cast(a), static_cast(b))); } + return out; } /// Converts a sequence of hex digits to a string of bytes and returns it. Undefined behaviour if diff --git a/tests/test_encoding.cpp b/tests/test_encoding.cpp index f506d8c..bc4a675 100644 --- a/tests/test_encoding.cpp +++ b/tests/test_encoding.cpp @@ -44,6 +44,16 @@ TEST_CASE("hex encoding/decoding", "[encoding][decoding][hex]") { std::basic_string_view b{bytes.data(), bytes.size()}; REQUIRE( oxenmq::to_hex(b) == "ff421234"s ); + // In-place decoding and truncation via to_hex's returned iterator: + std::string some_hex = "48656c6c6f"; + some_hex.erase(oxenmq::from_hex(some_hex.begin(), some_hex.end(), some_hex.begin()), some_hex.end()); + REQUIRE( some_hex == "Hello" ); + + // Test the returned iterator from encoding + std::string hellohex; + *oxenmq::to_hex(some_hex.begin(), some_hex.end(), std::back_inserter(hellohex))++ = '!'; + REQUIRE( hellohex == "48656c6c6f!" ); + bytes.resize(8); bytes[0] = std::byte{'f'}; bytes[1] = std::byte{'f'}; bytes[2] = std::byte{'4'}; bytes[3] = std::byte{'2'}; bytes[4] = std::byte{'1'}; bytes[5] = std::byte{'2'}; bytes[6] = std::byte{'3'}; bytes[7] = std::byte{'4'}; @@ -99,6 +109,16 @@ TEST_CASE("base32z encoding/decoding", "[encoding][decoding][base32z]") { REQUIRE( pk_b32z_again == pk_b32z ); REQUIRE( pk_again == pk ); + // In-place decoding and truncation via returned iterator: + std::string some_b32z = "jb1sa5dx"; + some_b32z.erase(oxenmq::from_base32z(some_b32z.begin(), some_b32z.end(), some_b32z.begin()), some_b32z.end()); + REQUIRE( some_b32z == "Hello" ); + + // Test the returned iterator from encoding + std::string hellob32z; + *oxenmq::to_base32z(some_b32z.begin(), some_b32z.end(), std::back_inserter(hellob32z))++ = '!'; + REQUIRE( hellob32z == "jb1sa5dx!" ); + std::vector bytes{{std::byte{0}, std::byte{255}}}; std::basic_string_view b{bytes.data(), bytes.size()}; REQUIRE( oxenmq::to_base32z(b) == "yd9o" ); @@ -189,6 +209,16 @@ TEST_CASE("base64 encoding/decoding", "[encoding][decoding][base64]") { REQUIRE( pk_b64_again == pk_b64 ); REQUIRE( pk_again == pk ); + // In-place decoding and truncation via returned iterator: + std::string some_b64 = "SGVsbG8="; + some_b64.erase(oxenmq::from_base64(some_b64.begin(), some_b64.end(), some_b64.begin()), some_b64.end()); + REQUIRE( some_b64 == "Hello" ); + + // Test the returned iterator from encoding + std::string hellob64; + *oxenmq::to_base64(some_b64.begin(), some_b64.end(), std::back_inserter(hellob64))++ = '!'; + REQUIRE( hellob64 == "SGVsbG8=!" ); + std::vector bytes{{std::byte{0}, std::byte{255}}}; std::basic_string_view b{bytes.data(), bytes.size()}; REQUIRE( oxenmq::to_base64(b) == "AP8=" );