Make (and use) iterator approach for encoding/decoding

This allows for on-the-fly encoding/decoding, and also allows for on-the-fly transcoding between types without needing intermediate string allocations (see added test cases for examples).
2021-10-01 18:23:29 -03:00 · 2021-10-01 18:23:29 -03:00 · 24dd7a3854
parent cd56ad8e08
commit 24dd7a3854
4 changed files with 459 additions and 143 deletions
--- a/oxenmq/base32z.h
+++ b/oxenmq/base32z.h
@ -79,36 +79,71 @@ inline constexpr size_t to_base32z_size(size_t byte_size) { return (byte_size*8
 /// Returns the (maximum) number of bytes required to decode a base32z string of the given size.
 inline constexpr size_t from_base32z_size(size_t b32z_size) { return b32z_size*5 / 8; } // ⌊bits/8⌋

+/// Iterable object for on-the-fly base32z encoding.  Used internally, but also particularly useful
+/// when converting from one encoding to another.
+template <typename InputIt>
+struct base32z_encoder final {
+private:
+    InputIt _it, _end;
+    static_assert(sizeof(decltype(*_it)) == 1, "base32z_encoder requires chars/bytes input iterator");
+    int bits; // Number of bits held in r; will always be >= 5 until we are at the end.
+    uint_fast16_t r;
+public:
+    using iterator_category = std::input_iterator_tag;
+    using difference_type = std::ptrdiff_t;
+    using value_type = char;
+    using reference = value_type;
+    using pointer = void;
+    base32z_encoder(InputIt begin, InputIt end) : _it{std::move(begin)}, _end{std::move(end)} {
+        if (_it != _end) {
+            bits = 8;
+            r = static_cast<unsigned char>(*_it);
+        } else {
+            bits = 0;
+        }
+    }
+
+    base32z_encoder end() { return {_end, _end}; }
+
+    bool operator==(const base32z_encoder& i) { return _it == i._it && bits == i.bits; }
+    bool operator!=(const base32z_encoder& i) { return !(*this == i); }
+
+    base32z_encoder& operator++() {
+        assert(bits >= 5);
+        // Discard the most significant 5 bits
+        bits -= 5;
+        r &= (1 << bits) - 1;
+        // If we end up with less than 5 significant bits then try to pull another 8 bits:
+        if (bits < 5 && _it != _end) {
+            if (++_it != _end) {
+                r = (r << 8) | static_cast<unsigned char>(*_it);
+                bits += 8;
+            } else if (bits > 0) {
+                // No more input bytes, so shift `r` to put the bits we have into the most
+                // significant bit position for the final character.  E.g. if we have "11" we want
+                // the last character to be encoded "11000".
+                r <<= (5 - bits);
+                bits = 5;
+            }
+        }
+        return *this;
+    }
+    base32z_encoder operator++(int) { base32z_encoder copy{*this}; ++*this; return copy; }
+
+    char operator*() {
+        // Right-shift off the excess bits we aren't accessing yet
+        return detail::b32z_lut.to_b32z(r >> (bits - 5));
+    }
+};
+
 /// Converts bytes into a base32z encoded character sequence, writing them starting at `out`.
 /// Returns the final value of out (i.e. the iterator positioned just after the last written base32z
 /// character).
 template <typename InputIt, typename OutputIt>
 OutputIt to_base32z(InputIt begin, InputIt end, OutputIt out) {
    static_assert(sizeof(decltype(*begin)) == 1, "to_base32z requires chars/bytes");
-    int bits = 0; // Tracks the number of unconsumed bits held in r, will always be in [0, 4]
-    std::uint_fast16_t r = 0;
-    while (begin != end) {
-        r = r << 8 | static_cast<unsigned char>(*begin++);
-
-        // we just added 8 bits, so we can *always* consume 5 to produce one character, so (net) we
-        // are adding 3 bits.
-        bits += 3;
-        *out++ = detail::b32z_lut.to_b32z(r >> bits); // Right-shift off the bits we aren't consuming right now
-
-        // Drop the bits we don't want to keep (because we just consumed them)
-        r &= (1 << bits) - 1;
-
-        if (bits >= 5) { // We have enough bits to produce a second character; essentially the same as above
-            bits -= 5; // Except now we are just consuming 5 without having added any more
-            *out++ = detail::b32z_lut.to_b32z(r >> bits);
-            r &= (1 << bits) - 1;
-        }
-    }
-
-    if (bits > 0) // We hit the end, but still have some unconsumed bits so need one final character to append
-        *out++ = detail::b32z_lut.to_b32z(r << (5 - bits));
-
-    return out;
+    base32z_encoder it{begin, end};
+    return std::copy(it, it.end(), out);
 }

 /// Creates a base32z string from an iterator pair of a byte sequence.
@ -166,57 +201,88 @@ template <typename CharT>
 constexpr bool is_base32z(std::basic_string_view<CharT> s) { return is_base32z(s.begin(), s.end()); }
 constexpr bool is_base32z(std::string_view s) { return is_base32z<>(s); }

+/// Iterable object for on-the-fly base32z decoding.  Used internally, but also particularly useful
+/// when converting from one encoding to another.  The input range must be a valid base32z
+/// encoded string.
+///
+/// Note that we ignore "padding" bits without requiring that they actually be 0.  For instance, the
+/// bytes "\ff\ff" are ideally encoded as "999o" (16 bits of 1s + 4 padding 0 bits), but we don't
+/// require that the padding bits be 0.  That is, "9999", "9993", etc. will all decode to the same
+/// \ff\ff output string.
+template <typename InputIt>
+struct base32z_decoder final {
+private:
+    InputIt _it, _end;
+    static_assert(sizeof(decltype(*_it)) == 1, "base32z_decoder requires chars/bytes input iterator");
+    uint_fast16_t in = 0;
+    int bits = 0; // number of bits loaded into `in`; will be in [8, 12] until we hit the end
+public:
+    using iterator_category = std::input_iterator_tag;
+    using difference_type = std::ptrdiff_t;
+    using value_type = char;
+    using reference = value_type;
+    using pointer = void;
+    base32z_decoder(InputIt begin, InputIt end) : _it{std::move(begin)}, _end{std::move(end)} {
+        if (_it != _end)
+            load_byte();
+    }
+
+    base32z_decoder end() { return {_end, _end}; }
+
+    bool operator==(const base32z_decoder& i) { return _it == i._it; }
+    bool operator!=(const base32z_decoder& i) { return _it != i._it; }
+
+    base32z_decoder& operator++() {
+        // Discard 8 most significant bits
+        bits -= 8;
+        in &= (1 << bits) - 1;
+        if (++_it != _end)
+            load_byte();
+        return *this;
+    }
+    base32z_decoder operator++(int) { base32z_decoder copy{*this}; ++*this; return copy; }
+
+    char operator*() {
+        return in >> (bits - 8);
+    }
+
+private:
+    void load_in() {
+        in = in << 5
+            | detail::b32z_lut.from_b32z(static_cast<unsigned char>(*_it));
+        bits += 5;
+    }
+
+    void load_byte() {
+        load_in();
+        if (bits < 8 && ++_it != _end)
+            load_in();
+
+        // If we hit the _end iterator above then we hit the end of the input with fewer than 8 bits
+        // accumulated to make a full byte.  For a properly encoded base32z string this should only
+        // be possible with 0-4 bits of all 0s; these are essentially "padding" bits (e.g. encoding
+        // 2 byte (16 bits) requires 4 b32z chars (20 bits), where only the first 16 bits are
+        // significant).  Ideally any padding bits should be 0, but we don't check that and rather
+        // just ignore them.
+        //
+        // It also isn't possible to get here with 5-7 bits if the string passes `is_base32z`
+        // because the length checks we do there disallow such a length as valid.  (If you were to
+        // pass such a string to us anyway then we are technically UB, but the current
+        // implementation just ignore the extra bits as if they are extra padding).
+    }
+};
+
 /// Converts a sequence of base32z digits to bytes.  Undefined behaviour if any characters are not
 /// valid base32z alphabet characters.  It is permitted for the input and output ranges to overlap
-/// as long as `out` is no later than `begin`.  Note that if you pass in a sequence that could not
-/// have been created by a base32z encoding of a byte sequence, we treat the excess bits as if they
-/// were not provided.  Returns the final value of out (that is, the iterator positioned just after
-/// the last written character).
+/// as long as `out` is no later than `begin`.
 ///
-/// For example, "yyy" represents a 15-bit value, but a byte sequence is either 8-bit (requiring 2
-/// characters) or 16-bit (requiring 4).  Similarly, "yb" is an impossible encoding because it has
-/// its 10th bit set (b = 00001), but a base32z encoded value should have all 0's beyond the 8th (or
-/// 16th or 24th or ... bit).  We treat any such bits as if they were not specified (even if they
-/// are): which means "yy", "yb", "yyy", "yy9", "yd", etc. all decode to the same 1-byte value "\0".
 template <typename InputIt, typename OutputIt>
 OutputIt from_base32z(InputIt begin, InputIt end, OutputIt out) {
    static_assert(sizeof(decltype(*begin)) == 1, "from_base32z requires chars/bytes");
-    uint_fast16_t curr = 0;
-    int bits = 0; // number of bits we've loaded into val; we always keep this < 8.
-    while (begin != end) {
-        curr = curr << 5 | detail::b32z_lut.from_b32z(static_cast<unsigned char>(*begin++));
-        if (bits >= 3) {
-            bits -= 3; // Added 5, removing 8
-            *out++ = static_cast<detail::byte_type_t<OutputIt>>(
-                    static_cast<uint8_t>(curr >> bits));
-            curr &= (1 << bits) - 1;
-        } else {
-            bits += 5;
-        }
-    }
-
-    // Ignore any trailing bits.  base32z encoding always has at least as many bits as the source
-    // bytes, which means we should not be able to get here from a properly encoded b32z value with
-    // anything other than 0s: if we have no extra bits (e.g. 5 bytes == 8 b32z chars) then we have
-    // a 0-bit value; if we have some extra bits (e.g. 6 bytes requires 10 b32z chars, but that
-    // contains 50 bits > 48 bits) then those extra bits will be 0s (and this covers the bits -= 3
-    // case above: it'll leave us with 0-4 extra bits, but those extra bits would be 0 if produced
-    // from an actual byte sequence).
-    //
-    // The "bits += 5" case, then, means that we could end with 5-7 bits.  This, however, cannot be
-    // produced by a valid encoding:
-    // - 0 bytes gives us 0 chars with 0 leftover bits
-    // - 1 byte gives us 2 chars with 2 leftover bits
-    // - 2 bytes gives us 4 chars with 4 leftover bits
-    // - 3 bytes gives us 5 chars with 1 leftover bit
-    // - 4 bytes gives us 7 chars with 3 leftover bits
-    // - 5 bytes gives us 8 chars with 0 leftover bits (this is where the cycle repeats)
-    //
-    // So really the only way we can get 5-7 leftover bits is if you took a 0, 2 or 5 char output (or
-    // any 8n + {0,2,5} char output) and added a base32z character to the end.  If you do that,
-    // well, too bad: you're giving invalid output and so we're just going to pretend that extra
-    // character you added isn't there by not doing anything here.
-
+    base32z_decoder it{begin, end};
+    auto bend = it.end();
+    while (it != bend)
+        *out++ = static_cast<detail::byte_type_t<OutputIt>>(*it++);
    return out;
 }

--- a/oxenmq/base64.h
+++ b/oxenmq/base64.h
@ -87,50 +87,88 @@ inline constexpr size_t from_base64_size(size_t b64_size) {
    return b64_size * 3 / 4; // == ⌊bits/8⌋; floor because we ignore trailing "impossible" bits (see below)
 }

+/// Iterable object for on-the-fly base64 encoding.  Used internally, but also particularly useful
+/// when converting from one encoding to another.
+template <typename InputIt>
+struct base64_encoder final {
+private:
+    InputIt _it, _end;
+    static_assert(sizeof(decltype(*_it)) == 1, "base64_encoder requires chars/bytes input iterator");
+    int bits; // Number of bits held in r; will always be >= 6 until we are at the end.
+    int padding;
+    uint_fast16_t r;
+public:
+    using iterator_category = std::input_iterator_tag;
+    using difference_type = std::ptrdiff_t;
+    using value_type = char;
+    using reference = value_type;
+    using pointer = void;
+    base64_encoder(InputIt begin, InputIt end, bool padded = true) : _it{std::move(begin)}, _end{std::move(end)}, padding{padded} {
+        if (_it != _end) {
+            bits = 8;
+            r = static_cast<unsigned char>(*_it);
+        } else {
+            bits = 0;
+        }
+    }
+
+    base64_encoder end() { return {_end, _end, false}; }
+
+    bool operator==(const base64_encoder& i) { return _it == i._it && bits == i.bits && padding == i.padding; }
+    bool operator!=(const base64_encoder& i) { return !(*this == i); }
+
+    base64_encoder& operator++() {
+        if (bits == 0) {
+            padding--;
+            return *this;
+        }
+        assert(bits >= 6);
+        // Discard the most significant 6 bits
+        bits -= 6;
+        r &= (1 << bits) - 1;
+        // If we end up with less than 6 significant bits then try to pull another 8 bits:
+        if (bits < 6 && _it != _end) {
+            if (++_it != _end) {
+                r = (r << 8) | static_cast<unsigned char>(*_it);
+                bits += 8;
+            } else if (bits > 0) {
+                // No more input bytes, so shift `r` to put the bits we have into the most
+                // significant bit position for the final character, and figure out how many padding
+                // bytes we want to append.  E.g. if we have "11" we want
+                // the last character to be encoded "110000".
+                if (padding) {
+                    // padding should be:
+                    // 3n+0 input => 4n output, no padding, handled below
+                    // 3n+1 input => 4n+2 output + 2 padding; we'll land here with 2 trailing bits
+                    // 3n+2 input => 4n+3 output + 1 padding; we'll land here with 4 trailing bits
+                    padding = 3 - bits / 2;
+                }
+                r <<= (6 - bits);
+                bits = 6;
+            } else {
+                padding = 0; // No excess bits, so input was a multiple of 3 and thus no padding
+            }
+        }
+        return *this;
+    }
+    base64_encoder operator++(int) { base64_encoder copy{*this}; ++*this; return copy; }
+
+    char operator*() {
+        if (bits == 0 && padding)
+            return '=';
+        // Right-shift off the excess bits we aren't accessing yet
+        return detail::b64_lut.to_b64(r >> (bits - 6));
+    }
+};
+
 /// Converts bytes into a base64 encoded character sequence, writing them starting at `out`.
 /// Returns the final value of out (i.e. the iterator positioned just after the last written base64
 /// character).
 template <typename InputIt, typename OutputIt>
 OutputIt to_base64(InputIt begin, InputIt end, OutputIt out) {
    static_assert(sizeof(decltype(*begin)) == 1, "to_base64 requires chars/bytes");
-    int bits = 0; // Tracks the number of unconsumed bits held in r, will always be in {0, 2, 4}
-    std::uint_fast16_t r = 0;
-    while (begin != end) {
-        r = r << 8 | static_cast<unsigned char>(*begin++);
-
-        // we just added 8 bits, so we can *always* consume 6 to produce one character, so (net) we
-        // are adding 2 bits.
-        bits += 2;
-        *out++ = detail::b64_lut.to_b64(r >> bits); // Right-shift off the bits we aren't consuming right now
-
-        // Drop the bits we don't want to keep (because we just consumed them)
-        r &= (1 << bits) - 1;
-
-        if (bits == 6) { // We have enough bits to produce a second character (which means we had 4 before and added 8)
-            bits = 0;
-            *out++ = detail::b64_lut.to_b64(r);
-            r = 0;
-        }
-    }
-
-    // If bits == 0 then we ended our 6-bit outputs coinciding with 8-bit values, i.e. at a multiple
-    // of 24 bits: this means we don't have anything else to output and don't need any padding.
-    if (bits == 2) {
-        // We finished with 2 unconsumed bits, which means we ended 1 byte past a 24-bit group (e.g.
-        // 1 byte, 4 bytes, 301 bytes, etc.); since we need to always be a multiple of 4 output
-        // characters that means we've produced 1: so we right-fill 0s to get the next char, then
-        // add two padding ='s.
-        *out++ = detail::b64_lut.to_b64(r << 4);
-        *out++ = '=';
-        *out++ = '=';
-    } else if (bits == 4) {
-        // 4 bits left means we produced 2 6-bit values from the first 2 bytes of a 3-byte group.
-        // Fill 0s to get the last one, plus one padding output.
-        *out++ = detail::b64_lut.to_b64(r << 2);
-        *out++ = '=';
-    }
-
-    return out;
+    auto it = base64_encoder{begin, end};
+    return std::copy(it, it.end(), out);
 }

 /// Creates and returns a base64 string from an iterator pair of a character sequence
@ -196,6 +234,82 @@ template <typename CharT>
 constexpr bool is_base64(std::basic_string_view<CharT> s) { return is_base64(s.begin(), s.end()); }
 constexpr bool is_base64(std::string_view s) { return is_base64(s.begin(), s.end()); }

+/// Iterable object for on-the-fly base64 decoding.  Used internally, but also particularly useful
+/// when converting from one encoding to another.  The input range must be a valid base64 encoded
+/// string (with or without padding).
+///
+/// Note that we ignore "padding" bits without requiring that they actually be 0.  For instance, the
+/// bytes "\ff\ff" are ideally encoded as "//8=" (16 bits of 1s + 2 padding 0 bits, then a full
+/// 6-bit padding char).  We don't, however, require that the padding bits be 0.  That is, "///=",
+/// "//9=", "//+=", etc. will all decode to the same \ff\ff output string.
+template <typename InputIt>
+struct base64_decoder final {
+private:
+    InputIt _it, _end;
+    static_assert(sizeof(decltype(*_it)) == 1, "base64_decoder requires chars/bytes input iterator");
+    uint_fast16_t in = 0;
+    int bits = 0; // number of bits loaded into `in`; will be in [8, 12] until we hit the end
+public:
+    using iterator_category = std::input_iterator_tag;
+    using difference_type = std::ptrdiff_t;
+    using value_type = char;
+    using reference = value_type;
+    using pointer = void;
+    base64_decoder(InputIt begin, InputIt end) : _it{std::move(begin)}, _end{std::move(end)} {
+        if (_it != _end)
+            load_byte();
+    }
+
+    base64_decoder end() { return {_end, _end}; }
+
+    bool operator==(const base64_decoder& i) { return _it == i._it; }
+    bool operator!=(const base64_decoder& i) { return _it != i._it; }
+
+    base64_decoder& operator++() {
+        // Discard 8 most significant bits
+        bits -= 8;
+        in &= (1 << bits) - 1;
+        if (++_it != _end)
+            load_byte();
+        return *this;
+    }
+    base64_decoder operator++(int) { base64_decoder copy{*this}; ++*this; return copy; }
+
+    char operator*() {
+        return in >> (bits - 8);
+    }
+
+private:
+    void load_in() {
+        // We hit padding trying to read enough for a full byte, so we're done.  (And since you were
+        // already supposed to have checked validity with is_base64, the padding can only be at the
+        // end).
+        auto c = static_cast<unsigned char>(*_it);
+        if (c == '=') {
+            _it = _end;
+            bits = 0;
+            return;
+        }
+
+        in = in << 6
+            | detail::b64_lut.from_b64(c);
+        bits += 6;
+    }
+
+    void load_byte() {
+        load_in();
+        if (bits && bits < 8 && ++_it != _end)
+            load_in();
+
+        // If we hit the _end iterator above then we hit the end of the input (or hit padding) with
+        // fewer than 8 bits accumulated to make a full byte.  For a properly encoded base64 string
+        // this should only be possible with 0, 2, or 4 bits of all 0s; these are essentially
+        // "padding" bits (e.g.  encoding 2 byte (16 bits) requires 3 b64 chars (18 bits), where
+        // only the first 16 bits are significant).  Ideally any padding bits should be 0, but we
+        // don't check that and rather just ignore them.
+    }
+};
+
 /// Converts a sequence of base64 digits to bytes.  Undefined behaviour if any characters are not
 /// valid base64 alphabet characters.  It is permitted for the input and output ranges to overlap as
 /// long as `out` is no later than `begin`.  Trailing padding characters are permitted but not
@ -211,29 +325,10 @@ constexpr bool is_base64(std::string_view s) { return is_base64(s.begin(), s.end
 template <typename InputIt, typename OutputIt>
 OutputIt from_base64(InputIt begin, InputIt end, OutputIt out) {
    static_assert(sizeof(decltype(*begin)) == 1, "from_base64 requires chars/bytes");
-    uint_fast16_t curr = 0;
-    int bits = 0; // number of bits we've loaded into val; we always keep this < 8.
-    while (begin != end) {
-        auto c = static_cast<unsigned char>(*begin++);
-
-        // padding; don't bother checking if we're at the end because is_base64 is a precondition
-        // and we're allowed UB if it isn't satisfied.
-        if (c == '=') continue;
-
-        curr = curr << 6 | detail::b64_lut.from_b64(c);
-        if (bits == 0)
-            bits = 6;
-        else {
-            bits -= 2; // Added 6, removing 8
-            *out++ = static_cast<detail::byte_type_t<OutputIt>>(
-                    static_cast<uint8_t>(curr >> bits));
-            curr &= (1 << bits) - 1;
-        }
-    }
-    // Don't worry about leftover bits because either they have to be 0, or they can't happen at
-    // all.  See base32z.h for why: the reasoning is exactly the same (except using 6 bits per
-    // character here instead of 5).
-
+    base64_decoder it{begin, end};
+    auto bend = it.end();
+    while (it != bend)
+        *out++ = static_cast<detail::byte_type_t<OutputIt>>(*it++);
    return out;
 }

--- a/oxenmq/hex.h
+++ b/oxenmq/hex.h
@ -67,18 +67,50 @@ inline constexpr size_t to_hex_size(size_t byte_size) { return byte_size * 2; }
 /// Returns the number of bytes required to decode a hex string of the given size.
 inline constexpr size_t from_hex_size(size_t hex_size) { return hex_size / 2; }

+/// Iterable object for on-the-fly hex encoding.  Used internally, but also particularly useful when
+/// converting from one encoding to another.
+template <typename InputIt>
+struct hex_encoder final {
+private:
+    InputIt _it, _end;
+    static_assert(sizeof(decltype(*_it)) == 1, "hex_encoder requires chars/bytes input iterator");
+    uint8_t c;
+    bool second_half = false;
+public:
+    using iterator_category = std::input_iterator_tag;
+    using difference_type = std::ptrdiff_t;
+    using value_type = char;
+    using reference = value_type;
+    using pointer = void;
+    hex_encoder(InputIt begin, InputIt end) : _it{std::move(begin)}, _end{std::move(end)} {}
+
+    hex_encoder end() { return {_end, _end}; }
+
+    bool operator==(const hex_encoder& i) { return _it == i._it && second_half == i.second_half; }
+    bool operator!=(const hex_encoder& i) { return !(*this == i); }
+
+    hex_encoder& operator++() {
+        second_half = !second_half;
+        if (!second_half)
+            ++_it;
+        return *this;
+    }
+    hex_encoder operator++(int) { hex_encoder copy{*this}; ++*this; return copy; }
+    char operator*() {
+        return detail::hex_lut.to_hex(second_half
+                ? c & 0x0f
+                : (c = static_cast<uint8_t>(*_it)) >> 4);
+    }
+};
+
 /// Creates hex digits from a character sequence given by iterators, writes them starting at `out`.
 /// Returns the final value of out (i.e. the iterator positioned just after the last written
 /// hex character).
 template <typename InputIt, typename OutputIt>
 OutputIt to_hex(InputIt begin, InputIt end, OutputIt out) {
    static_assert(sizeof(decltype(*begin)) == 1, "to_hex requires chars/bytes");
-    for (; begin != end; ++begin) {
-        uint8_t c = static_cast<uint8_t>(*begin);
-        *out++ = detail::hex_lut.to_hex(c >> 4);
-        *out++ = detail::hex_lut.to_hex(c & 0x0f);
-    }
-    return out;
+    auto it = hex_encoder{begin, end};
+    return std::copy(it, it.end(), out);
 }

 /// Creates a string of hex digits from a character sequence iterator pair
@ -141,6 +173,48 @@ constexpr char from_hex_digit(unsigned char x) noexcept {
 /// Constructs a byte value from a pair of hex digits
 constexpr char from_hex_pair(unsigned char a, unsigned char b) noexcept { return (from_hex_digit(a) << 4) | from_hex_digit(b); }

+/// Iterable object for on-the-fly hex decoding.  Used internally but also particularly useful when
+/// converting from one encoding to another.  Undefined behaviour if the given iterator range is not
+/// a valid hex string with even length (i.e. is_hex() should return true).
+template <typename InputIt>
+struct hex_decoder final {
+private:
+    InputIt _it, _end;
+    static_assert(sizeof(decltype(*_it)) == 1, "hex_encoder requires chars/bytes input iterator");
+    char byte;
+public:
+    using iterator_category = std::input_iterator_tag;
+    using difference_type = std::ptrdiff_t;
+    using value_type = char;
+    using reference = value_type;
+    using pointer = void;
+    hex_decoder(InputIt begin, InputIt end) : _it{std::move(begin)}, _end{std::move(end)} {
+        if (_it != _end)
+            load_byte();
+    }
+
+    hex_decoder end() { return {_end, _end}; }
+
+    bool operator==(const hex_decoder& i) { return _it == i._it; }
+    bool operator!=(const hex_decoder& i) { return _it != i._it; }
+
+    hex_decoder& operator++() {
+        if (++_it != _end)
+            load_byte();
+        return *this;
+    }
+    hex_decoder operator++(int) { hex_decoder copy{*this}; ++*this; return copy; }
+    char operator*() const { return byte; }
+
+private:
+    void load_byte() {
+        auto a = *_it;
+        auto b = *++_it;
+        byte = from_hex_pair(static_cast<unsigned char>(a), static_cast<unsigned char>(b));
+    }
+
+};
+
 /// Converts a sequence of hex digits to bytes.  Undefined behaviour if any characters are not in
 /// [0-9a-fA-F] or if the input sequence length is not even: call `is_hex` first if you need to
 /// check.  It is permitted for the input and output ranges to overlap as long as out is no later
@ -148,14 +222,11 @@ constexpr char from_hex_pair(unsigned char a, unsigned char b) noexcept { return
 /// last written character).
 template <typename InputIt, typename OutputIt>
 OutputIt from_hex(InputIt begin, InputIt end, OutputIt out) {
-    using std::distance;
    assert(is_hex(begin, end));
-    while (begin != end) {
-        auto a = *begin++;
-        auto b = *begin++;
-        *out++ = static_cast<detail::byte_type_t<OutputIt>>(
-                from_hex_pair(static_cast<unsigned char>(a), static_cast<unsigned char>(b)));
-    }
+    auto it = hex_decoder(begin, end);
+    const auto hend = it.end();
+    while (it != hend)
+        *out++ = static_cast<detail::byte_type_t<OutputIt>>(*it++);
    return out;
 }

--- a/tests/test_encoding.cpp
+++ b/tests/test_encoding.cpp
@ -285,6 +285,90 @@ TEST_CASE("base64 encoding/decoding", "[encoding][decoding][base64]") {
    REQUIRE( oxenmq::from_base64_size(2) == 1 );
 }

+TEST_CASE("transcoding", "[decoding][encoding][base32z][hex][base64]") {
+    // Decoders:
+    oxenmq::base64_decoder in64{pk_b64.begin(), pk_b64.end()};
+    oxenmq::base32z_decoder in32z{pk_b32z.begin(), pk_b32z.end()};
+    oxenmq::hex_decoder in16{pk_hex.begin(), pk_hex.end()};
+
+    // Transcoders:
+    oxenmq::base32z_encoder b64_to_b32z{in64, in64.end()};
+    oxenmq::base32z_encoder hex_to_b32z{in16, in16.end()};
+    oxenmq::hex_encoder b64_to_hex{in64, in64.end()};
+    oxenmq::hex_encoder b32z_to_hex{in32z, in32z.end()};
+    oxenmq::base64_encoder hex_to_b64{in16, in16.end()};
+    oxenmq::base64_encoder b32z_to_b64{in32z, in32z.end()};
+    // These ones are stupid, but should work anyway:
+    oxenmq::base64_encoder b64_to_b64{in64, in64.end()};
+    oxenmq::base32z_encoder b32z_to_b32z{in32z, in32z.end()};
+    oxenmq::hex_encoder hex_to_hex{in16, in16.end()};
+
+    // Decoding to bytes:
+    std::string x;
+    auto xx = std::back_inserter(x);
+    std::copy(in64, in64.end(), xx);
+    REQUIRE( x == pk );
+    x.clear();
+    std::copy(in32z, in32z.end(), xx);
+    REQUIRE( x == pk );
+    x.clear();
+    std::copy(in16, in16.end(), xx);
+    REQUIRE( x == pk );
+
+    // Transcoding
+    x.clear();
+    std::copy(b64_to_hex, b64_to_hex.end(), xx);
+    CHECK( x == pk_hex );
+
+    x.clear();
+    std::copy(b64_to_b32z, b64_to_b32z.end(), xx);
+    CHECK( x == pk_b32z );
+
+    x.clear();
+    std::copy(b64_to_b64, b64_to_b64.end(), xx);
+    CHECK( x == pk_b64 );
+
+    x.clear();
+    std::copy(b32z_to_hex, b32z_to_hex.end(), xx);
+    CHECK( x == pk_hex );
+
+    x.clear();
+    std::copy(b32z_to_b32z, b32z_to_b32z.end(), xx);
+    CHECK( x == pk_b32z );
+
+    x.clear();
+    std::copy(b32z_to_b64, b32z_to_b64.end(), xx);
+    CHECK( x == pk_b64 );
+
+    x.clear();
+    std::copy(hex_to_hex, hex_to_hex.end(), xx);
+    CHECK( x == pk_hex );
+
+    x.clear();
+    std::copy(hex_to_b32z, hex_to_b32z.end(), xx);
+    CHECK( x == pk_b32z );
+
+    x.clear();
+    std::copy(hex_to_b64, hex_to_b64.end(), xx);
+    CHECK( x == pk_b64 );
+
+    // Make a big chain of conversions
+    oxenmq::base32z_encoder it1{in64, in64.end()};
+    oxenmq::base32z_decoder it2{it1, it1.end()};
+    oxenmq::base64_encoder it3{it2, it2.end()};
+    oxenmq::base64_decoder it4{it3, it3.end()};
+    oxenmq::hex_encoder it5{it4, it4.end()};
+    x.clear();
+    std::copy(it5, it5.end(), xx);
+    CHECK( x == pk_hex );
+
+    // No-padding b64 encoding:
+    oxenmq::base64_encoder b64_nopad{pk.begin(), pk.end(), false};
+    x.clear();
+    std::copy(b64_nopad, b64_nopad.end(), xx);
+    CHECK( x == pk_b64.substr(0, pk_b64.size()-1) );
+}
+
 TEST_CASE("std::byte decoding", "[decoding][hex][base32z][base64]") {
    // Decoding to std::byte is a little trickier because you can't assign to a byte without an
    // explicit cast, which means we have to properly detect that output is going to a std::byte