Merge pull request #52 from jagerman/convert-iterators

Make (and use) iterator approach for encoding/decoding
This commit is contained in:
Jason Rhinelander 2021-10-13 18:17:28 -03:00 committed by GitHub
commit 504d0d10ea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 645 additions and 165 deletions

View File

@ -1,3 +1,6 @@
local apt_get_quiet = 'apt-get -o=Dpkg::Use-Pty=0 -q ';
local debian_pipeline(name, image, arch='amd64', deps='g++ libsodium-dev libzmq3-dev', cmake_extra='', build_type='Release', extra_cmds=[], allow_fail=false) = {
kind: 'pipeline',
type: 'docker',
@ -10,10 +13,12 @@ local debian_pipeline(name, image, arch='amd64', deps='g++ libsodium-dev libzmq3
image: image,
[if allow_fail then "failure"]: "ignore",
commands: [
'apt-get update',
'apt-get install -y eatmydata',
'eatmydata apt-get dist-upgrade -y',
'eatmydata apt-get install -y cmake git ninja-build pkg-config ccache ' + deps,
'echo "Building on ${DRONE_STAGE_MACHINE}"',
'echo "man-db man-db/auto-update boolean false" | debconf-set-selections',
apt_get_quiet + 'update',
apt_get_quiet + 'install -y eatmydata',
'eatmydata ' + apt_get_quiet + 'dist-upgrade -y',
'eatmydata ' + apt_get_quiet + 'install -y cmake git ninja-build pkg-config ccache ' + deps,
'git submodule update --init --recursive',
'mkdir build',
'cd build',

View File

@ -74,44 +74,81 @@ static_assert(b32z_lut.from_b32z('w') == 20 && b32z_lut.from_b32z('T') == 17 &&
} // namespace detail
/// Returns the number of characters required to encode a base32z string from the given number of bytes.
inline constexpr size_t to_base32z_size(size_t byte_size) { return (byte_size*8 + 4) / 5; } // ⌈bits/5⌉ because 5 bits per byte
/// Returns the (maximum) number of bytes required to decode a base32z string of the given size.
inline constexpr size_t from_base32z_size(size_t b32z_size) { return b32z_size*5 / 8; } // ⌊bits/8⌋
/// Iterable object for on-the-fly base32z encoding. Used internally, but also particularly useful
/// when converting from one encoding to another.
template <typename InputIt>
struct base32z_encoder final {
private:
InputIt _it, _end;
static_assert(sizeof(decltype(*_it)) == 1, "base32z_encoder requires chars/bytes input iterator");
// Number of bits held in r; will always be >= 5 until we are at the end.
int bits{_it != _end ? 8 : 0};
// Holds bits of data we've already read, which might belong to current or next chars
uint_fast16_t r{bits ? static_cast<unsigned char>(*_it) : (unsigned char)0};
public:
using iterator_category = std::input_iterator_tag;
using difference_type = std::ptrdiff_t;
using value_type = char;
using reference = value_type;
using pointer = void;
base32z_encoder(InputIt begin, InputIt end) : _it{std::move(begin)}, _end{std::move(end)} {}
base32z_encoder end() { return {_end, _end}; }
bool operator==(const base32z_encoder& i) { return _it == i._it && bits == i.bits; }
bool operator!=(const base32z_encoder& i) { return !(*this == i); }
base32z_encoder& operator++() {
assert(bits >= 5);
// Discard the most significant 5 bits
bits -= 5;
r &= (1 << bits) - 1;
// If we end up with less than 5 significant bits then try to pull another 8 bits:
if (bits < 5 && _it != _end) {
if (++_it != _end) {
r = (r << 8) | static_cast<unsigned char>(*_it);
bits += 8;
} else if (bits > 0) {
// No more input bytes, so shift `r` to put the bits we have into the most
// significant bit position for the final character. E.g. if we have "11" we want
// the last character to be encoded "11000".
r <<= (5 - bits);
bits = 5;
}
}
return *this;
}
base32z_encoder operator++(int) { base32z_encoder copy{*this}; ++*this; return copy; }
char operator*() {
// Right-shift off the excess bits we aren't accessing yet
return detail::b32z_lut.to_b32z(r >> (bits - 5));
}
};
/// Converts bytes into a base32z encoded character sequence, writing them starting at `out`.
/// Returns the final value of out (i.e. the iterator positioned just after the last written base32z
/// character).
template <typename InputIt, typename OutputIt>
OutputIt to_base32z(InputIt begin, InputIt end, OutputIt out) {
static_assert(sizeof(decltype(*begin)) == 1, "to_base32z requires chars/bytes");
int bits = 0; // Tracks the number of unconsumed bits held in r, will always be in [0, 4]
std::uint_fast16_t r = 0;
while (begin != end) {
r = r << 8 | static_cast<unsigned char>(*begin++);
// we just added 8 bits, so we can *always* consume 5 to produce one character, so (net) we
// are adding 3 bits.
bits += 3;
*out++ = detail::b32z_lut.to_b32z(r >> bits); // Right-shift off the bits we aren't consuming right now
// Drop the bits we don't want to keep (because we just consumed them)
r &= (1 << bits) - 1;
if (bits >= 5) { // We have enough bits to produce a second character; essentially the same as above
bits -= 5; // Except now we are just consuming 5 without having added any more
*out++ = detail::b32z_lut.to_b32z(r >> bits);
r &= (1 << bits) - 1;
}
}
if (bits > 0) // We hit the end, but still have some unconsumed bits so need one final character to append
*out++ = detail::b32z_lut.to_b32z(r << (5 - bits));
return out;
base32z_encoder it{begin, end};
return std::copy(it, it.end(), out);
}
/// Creates a base32z string from an iterator pair of a byte sequence.
template <typename It>
std::string to_base32z(It begin, It end) {
std::string base32z;
if constexpr (std::is_base_of_v<std::random_access_iterator_tag, typename std::iterator_traits<It>::iterator_category>)
base32z.reserve((std::distance(begin, end)*8 + 4) / 5); // == bytes*8/5, rounded up.
if constexpr (std::is_base_of_v<std::random_access_iterator_tag, typename std::iterator_traits<It>::iterator_category>) {
using std::distance;
base32z.reserve(to_base32z_size(distance(begin, end)));
}
to_base32z(begin, end, std::back_inserter(base32z));
return base32z;
}
@ -121,15 +158,36 @@ template <typename CharT>
std::string to_base32z(std::basic_string_view<CharT> s) { return to_base32z(s.begin(), s.end()); }
inline std::string to_base32z(std::string_view s) { return to_base32z<>(s); }
/// Returns true if all elements in the range are base32z characters
/// Returns true if the given [begin, end) range is an acceptable base32z string: specifically every
/// character must be in the base32z alphabet, and the string must be a valid encoding length that
/// could have been produced by to_base32z (i.e. some lengths are impossible).
template <typename It>
constexpr bool is_base32z(It begin, It end) {
static_assert(sizeof(decltype(*begin)) == 1, "is_base32z requires chars/bytes");
size_t count = 0;
constexpr bool random = std::is_base_of_v<std::random_access_iterator_tag, typename std::iterator_traits<It>::iterator_category>;
if constexpr (random) {
using std::distance;
count = distance(begin, end) % 8;
if (count == 1 || count == 3 || count == 6) // see below
return false;
}
for (; begin != end; ++begin) {
auto c = static_cast<unsigned char>(*begin);
if (detail::b32z_lut.from_b32z(c) == 0 && !(c == 'y' || c == 'Y'))
return false;
if constexpr (!random)
count++;
}
// Check for a valid length.
// - 5n + 0 bytes encodes to 8n chars (no padding bits)
// - 5n + 1 bytes encodes to 8n+2 chars (last 2 bits are padding)
// - 5n + 2 bytes encodes to 8n+4 chars (last 4 bits are padding)
// - 5n + 3 bytes encodes to 8n+5 chars (last 1 bit is padding)
// - 5n + 4 bytes encodes to 8n+7 chars (last 3 bits are padding)
if constexpr (!random)
if (count %= 8; count == 1 || count == 3 || count == 6)
return false;
return true;
}
@ -138,57 +196,88 @@ template <typename CharT>
constexpr bool is_base32z(std::basic_string_view<CharT> s) { return is_base32z(s.begin(), s.end()); }
constexpr bool is_base32z(std::string_view s) { return is_base32z<>(s); }
/// Iterable object for on-the-fly base32z decoding. Used internally, but also particularly useful
/// when converting from one encoding to another. The input range must be a valid base32z
/// encoded string.
///
/// Note that we ignore "padding" bits without requiring that they actually be 0. For instance, the
/// bytes "\ff\ff" are ideally encoded as "999o" (16 bits of 1s + 4 padding 0 bits), but we don't
/// require that the padding bits be 0. That is, "9999", "9993", etc. will all decode to the same
/// \ff\ff output string.
template <typename InputIt>
struct base32z_decoder final {
private:
InputIt _it, _end;
static_assert(sizeof(decltype(*_it)) == 1, "base32z_decoder requires chars/bytes input iterator");
uint_fast16_t in = 0;
int bits = 0; // number of bits loaded into `in`; will be in [8, 12] until we hit the end
public:
using iterator_category = std::input_iterator_tag;
using difference_type = std::ptrdiff_t;
using value_type = char;
using reference = value_type;
using pointer = void;
base32z_decoder(InputIt begin, InputIt end) : _it{std::move(begin)}, _end{std::move(end)} {
if (_it != _end)
load_byte();
}
base32z_decoder end() { return {_end, _end}; }
bool operator==(const base32z_decoder& i) { return _it == i._it; }
bool operator!=(const base32z_decoder& i) { return _it != i._it; }
base32z_decoder& operator++() {
// Discard 8 most significant bits
bits -= 8;
in &= (1 << bits) - 1;
if (++_it != _end)
load_byte();
return *this;
}
base32z_decoder operator++(int) { base32z_decoder copy{*this}; ++*this; return copy; }
char operator*() {
return in >> (bits - 8);
}
private:
void load_in() {
in = in << 5
| detail::b32z_lut.from_b32z(static_cast<unsigned char>(*_it));
bits += 5;
}
void load_byte() {
load_in();
if (bits < 8 && ++_it != _end)
load_in();
// If we hit the _end iterator above then we hit the end of the input with fewer than 8 bits
// accumulated to make a full byte. For a properly encoded base32z string this should only
// be possible with 0-4 bits of all 0s; these are essentially "padding" bits (e.g. encoding
// 2 byte (16 bits) requires 4 b32z chars (20 bits), where only the first 16 bits are
// significant). Ideally any padding bits should be 0, but we don't check that and rather
// just ignore them.
//
// It also isn't possible to get here with 5-7 bits if the string passes `is_base32z`
// because the length checks we do there disallow such a length as valid. (If you were to
// pass such a string to us anyway then we are technically UB, but the current
// implementation just ignore the extra bits as if they are extra padding).
}
};
/// Converts a sequence of base32z digits to bytes. Undefined behaviour if any characters are not
/// valid base32z alphabet characters. It is permitted for the input and output ranges to overlap
/// as long as `out` is no later than `begin`. Note that if you pass in a sequence that could not
/// have been created by a base32z encoding of a byte sequence, we treat the excess bits as if they
/// were not provided. Returns the final value of out (that is, the iterator positioned just after
/// the last written character).
/// as long as `out` is no later than `begin`.
///
/// For example, "yyy" represents a 15-bit value, but a byte sequence is either 8-bit (requiring 2
/// characters) or 16-bit (requiring 4). Similarly, "yb" is an impossible encoding because it has
/// its 10th bit set (b = 00001), but a base32z encoded value should have all 0's beyond the 8th (or
/// 16th or 24th or ... bit). We treat any such bits as if they were not specified (even if they
/// are): which means "yy", "yb", "yyy", "yy9", "yd", etc. all decode to the same 1-byte value "\0".
template <typename InputIt, typename OutputIt>
OutputIt from_base32z(InputIt begin, InputIt end, OutputIt out) {
static_assert(sizeof(decltype(*begin)) == 1, "from_base32z requires chars/bytes");
uint_fast16_t curr = 0;
int bits = 0; // number of bits we've loaded into val; we always keep this < 8.
while (begin != end) {
curr = curr << 5 | detail::b32z_lut.from_b32z(static_cast<unsigned char>(*begin++));
if (bits >= 3) {
bits -= 3; // Added 5, removing 8
*out++ = static_cast<detail::byte_type_t<OutputIt>>(
static_cast<uint8_t>(curr >> bits));
curr &= (1 << bits) - 1;
} else {
bits += 5;
}
}
// Ignore any trailing bits. base32z encoding always has at least as many bits as the source
// bytes, which means we should not be able to get here from a properly encoded b32z value with
// anything other than 0s: if we have no extra bits (e.g. 5 bytes == 8 b32z chars) then we have
// a 0-bit value; if we have some extra bits (e.g. 6 bytes requires 10 b32z chars, but that
// contains 50 bits > 48 bits) then those extra bits will be 0s (and this covers the bits -= 3
// case above: it'll leave us with 0-4 extra bits, but those extra bits would be 0 if produced
// from an actual byte sequence).
//
// The "bits += 5" case, then, means that we could end with 5-7 bits. This, however, cannot be
// produced by a valid encoding:
// - 0 bytes gives us 0 chars with 0 leftover bits
// - 1 byte gives us 2 chars with 2 leftover bits
// - 2 bytes gives us 4 chars with 4 leftover bits
// - 3 bytes gives us 5 chars with 1 leftover bit
// - 4 bytes gives us 7 chars with 3 leftover bits
// - 5 bytes gives us 8 chars with 0 leftover bits (this is where the cycle repeats)
//
// So really the only way we can get 5-7 leftover bits is if you took a 0, 2 or 5 char output (or
// any 8n + {0,2,5} char output) and added a base32z character to the end. If you do that,
// well, too bad: you're giving invalid output and so we're just going to pretend that extra
// character you added isn't there by not doing anything here.
base32z_decoder it{begin, end};
auto bend = it.end();
while (it != bend)
*out++ = static_cast<detail::byte_type_t<OutputIt>>(*it++);
return out;
}
@ -197,8 +286,10 @@ OutputIt from_base32z(InputIt begin, InputIt end, OutputIt out) {
template <typename It>
std::string from_base32z(It begin, It end) {
std::string bytes;
if constexpr (std::is_base_of_v<std::random_access_iterator_tag, typename std::iterator_traits<It>::iterator_category>)
bytes.reserve((std::distance(begin, end)*5 + 7) / 8); // == chars*5/8, rounded up.
if constexpr (std::is_base_of_v<std::random_access_iterator_tag, typename std::iterator_traits<It>::iterator_category>) {
using std::distance;
bytes.reserve(from_base32z_size(distance(begin, end)));
}
from_base32z(begin, end, std::back_inserter(bytes));
return bytes;
}

View File

@ -76,74 +76,153 @@ static_assert(b64_lut.from_b64('/') == 63 && b64_lut.from_b64('7') == 59 && b64_
} // namespace detail
/// Returns the number of characters required to encode a base64 string from the given number of bytes.
inline constexpr size_t to_base64_size(size_t byte_size, bool padded = true) {
return padded
? (byte_size + 2) / 3 * 4 // bytes*4/3, rounded up to the next multiple of 4
: (byte_size * 4 + 2) / 3; // ⌈bytes*4/3⌉
}
/// Returns the (maximum) number of bytes required to decode a base64 string of the given size.
/// Note that this may overallocate by 1-2 bytes if the size includes 1-2 padding chars.
inline constexpr size_t from_base64_size(size_t b64_size) {
return b64_size * 3 / 4; // == ⌊bits/8⌋; floor because we ignore trailing "impossible" bits (see below)
}
/// Iterable object for on-the-fly base64 encoding. Used internally, but also particularly useful
/// when converting from one encoding to another.
template <typename InputIt>
struct base64_encoder final {
private:
InputIt _it, _end;
static_assert(sizeof(decltype(*_it)) == 1, "base64_encoder requires chars/bytes input iterator");
// How much padding (at most) we can add at the end
int padding;
// Number of bits held in r; will always be >= 6 until we are at the end.
int bits{_it != _end ? 8 : 0};
// Holds bits of data we've already read, which might belong to current or next chars
uint_fast16_t r{bits ? static_cast<unsigned char>(*_it) : (unsigned char)0};
public:
using iterator_category = std::input_iterator_tag;
using difference_type = std::ptrdiff_t;
using value_type = char;
using reference = value_type;
using pointer = void;
base64_encoder(InputIt begin, InputIt end, bool padded = true)
: _it{std::move(begin)}, _end{std::move(end)}, padding{padded} {}
base64_encoder end() { return {_end, _end, false}; }
bool operator==(const base64_encoder& i) { return _it == i._it && bits == i.bits && padding == i.padding; }
bool operator!=(const base64_encoder& i) { return !(*this == i); }
base64_encoder& operator++() {
if (bits == 0) {
padding--;
return *this;
}
assert(bits >= 6);
// Discard the most significant 6 bits
bits -= 6;
r &= (1 << bits) - 1;
// If we end up with less than 6 significant bits then try to pull another 8 bits:
if (bits < 6 && _it != _end) {
if (++_it != _end) {
r = (r << 8) | static_cast<unsigned char>(*_it);
bits += 8;
} else if (bits > 0) {
// No more input bytes, so shift `r` to put the bits we have into the most
// significant bit position for the final character, and figure out how many padding
// bytes we want to append. E.g. if we have "11" we want
// the last character to be encoded "110000".
if (padding) {
// padding should be:
// 3n+0 input => 4n output, no padding, handled below
// 3n+1 input => 4n+2 output + 2 padding; we'll land here with 2 trailing bits
// 3n+2 input => 4n+3 output + 1 padding; we'll land here with 4 trailing bits
padding = 3 - bits / 2;
}
r <<= (6 - bits);
bits = 6;
} else {
padding = 0; // No excess bits, so input was a multiple of 3 and thus no padding
}
}
return *this;
}
base64_encoder operator++(int) { base64_encoder copy{*this}; ++*this; return copy; }
char operator*() {
if (bits == 0 && padding)
return '=';
// Right-shift off the excess bits we aren't accessing yet
return detail::b64_lut.to_b64(r >> (bits - 6));
}
};
/// Converts bytes into a base64 encoded character sequence, writing them starting at `out`.
/// Returns the final value of out (i.e. the iterator positioned just after the last written base64
/// character).
template <typename InputIt, typename OutputIt>
OutputIt to_base64(InputIt begin, InputIt end, OutputIt out) {
OutputIt to_base64(InputIt begin, InputIt end, OutputIt out, bool padded = true) {
static_assert(sizeof(decltype(*begin)) == 1, "to_base64 requires chars/bytes");
int bits = 0; // Tracks the number of unconsumed bits held in r, will always be in {0, 2, 4}
std::uint_fast16_t r = 0;
while (begin != end) {
r = r << 8 | static_cast<unsigned char>(*begin++);
// we just added 8 bits, so we can *always* consume 6 to produce one character, so (net) we
// are adding 2 bits.
bits += 2;
*out++ = detail::b64_lut.to_b64(r >> bits); // Right-shift off the bits we aren't consuming right now
// Drop the bits we don't want to keep (because we just consumed them)
r &= (1 << bits) - 1;
if (bits == 6) { // We have enough bits to produce a second character (which means we had 4 before and added 8)
bits = 0;
*out++ = detail::b64_lut.to_b64(r);
r = 0;
}
}
// If bits == 0 then we ended our 6-bit outputs coinciding with 8-bit values, i.e. at a multiple
// of 24 bits: this means we don't have anything else to output and don't need any padding.
if (bits == 2) {
// We finished with 2 unconsumed bits, which means we ended 1 byte past a 24-bit group (e.g.
// 1 byte, 4 bytes, 301 bytes, etc.); since we need to always be a multiple of 4 output
// characters that means we've produced 1: so we right-fill 0s to get the next char, then
// add two padding ='s.
*out++ = detail::b64_lut.to_b64(r << 4);
*out++ = '=';
*out++ = '=';
} else if (bits == 4) {
// 4 bits left means we produced 2 6-bit values from the first 2 bytes of a 3-byte group.
// Fill 0s to get the last one, plus one padding output.
*out++ = detail::b64_lut.to_b64(r << 2);
*out++ = '=';
}
return out;
auto it = base64_encoder{begin, end, padded};
return std::copy(it, it.end(), out);
}
/// Creates and returns a base64 string from an iterator pair of a character sequence
/// Creates and returns a base64 string from an iterator pair of a character sequence. The
/// resulting string will have '=' padding, if appropriate.
template <typename It>
std::string to_base64(It begin, It end) {
std::string base64;
if constexpr (std::is_base_of_v<std::random_access_iterator_tag, typename std::iterator_traits<It>::iterator_category>)
base64.reserve((std::distance(begin, end) + 2) / 3 * 4); // bytes*4/3, rounded up to the next multiple of 4
if constexpr (std::is_base_of_v<std::random_access_iterator_tag, typename std::iterator_traits<It>::iterator_category>) {
using std::distance;
base64.reserve(to_base64_size(distance(begin, end)));
}
to_base64(begin, end, std::back_inserter(base64));
return base64;
}
/// Creates a base64 string from an iterable, std::string-like object
/// Creates and returns a base64 string from an iterator pair of a character sequence. The
/// resulting string will not be padded.
template <typename It>
std::string to_base64_unpadded(It begin, It end) {
std::string base64;
if constexpr (std::is_base_of_v<std::random_access_iterator_tag, typename std::iterator_traits<It>::iterator_category>) {
using std::distance;
base64.reserve(to_base64_size(distance(begin, end), false));
}
to_base64(begin, end, std::back_inserter(base64), false);
return base64;
}
/// Creates a base64 string from an iterable, std::string-like object. The string will have '='
/// padding, if appropriate.
template <typename CharT>
std::string to_base64(std::basic_string_view<CharT> s) { return to_base64(s.begin(), s.end()); }
inline std::string to_base64(std::string_view s) { return to_base64<>(s); }
/// Creates a base64 string from an iterable, std::string-like object. The string will not be
/// padded.
template <typename CharT>
std::string to_base64_unpadded(std::basic_string_view<CharT> s) { return to_base64_unpadded(s.begin(), s.end()); }
inline std::string to_base64_unpadded(std::string_view s) { return to_base64_unpadded<>(s); }
/// Returns true if the range is a base64 encoded value; we allow (but do not require) '=' padding,
/// but only at the end, only 1 or 2, and only if it pads out the total to a multiple of 4.
/// Otherwise the string must contain only valid base64 characters, and must not have a length of
/// 4n+1 (because that cannot be produced by base64 encoding).
template <typename It>
constexpr bool is_base64(It begin, It end) {
static_assert(sizeof(decltype(*begin)) == 1, "is_base64 requires chars/bytes");
using std::distance;
using std::prev;
size_t count = 0;
constexpr bool random = std::is_base_of_v<std::random_access_iterator_tag, typename std::iterator_traits<It>::iterator_category>;
if constexpr (random) {
count = distance(begin, end) % 4;
if (count == 1)
return false;
}
// Allow 1 or 2 padding chars *if* they pad it to a multiple of 4.
if (begin != end && distance(begin, end) % 4 == 0) {
@ -158,7 +237,14 @@ constexpr bool is_base64(It begin, It end) {
auto c = static_cast<unsigned char>(*begin);
if (detail::b64_lut.from_b64(c) == 0 && c != 'A')
return false;
if constexpr (!random)
count++;
}
if constexpr (!random)
if (count % 4 == 1) // base64 encoding will produce 4n, 4n+2, 4n+3, but never 4n+1
return false;
return true;
}
@ -167,6 +253,82 @@ template <typename CharT>
constexpr bool is_base64(std::basic_string_view<CharT> s) { return is_base64(s.begin(), s.end()); }
constexpr bool is_base64(std::string_view s) { return is_base64(s.begin(), s.end()); }
/// Iterable object for on-the-fly base64 decoding. Used internally, but also particularly useful
/// when converting from one encoding to another. The input range must be a valid base64 encoded
/// string (with or without padding).
///
/// Note that we ignore "padding" bits without requiring that they actually be 0. For instance, the
/// bytes "\ff\ff" are ideally encoded as "//8=" (16 bits of 1s + 2 padding 0 bits, then a full
/// 6-bit padding char). We don't, however, require that the padding bits be 0. That is, "///=",
/// "//9=", "//+=", etc. will all decode to the same \ff\ff output string.
template <typename InputIt>
struct base64_decoder final {
private:
InputIt _it, _end;
static_assert(sizeof(decltype(*_it)) == 1, "base64_decoder requires chars/bytes input iterator");
uint_fast16_t in = 0;
int bits = 0; // number of bits loaded into `in`; will be in [8, 12] until we hit the end
public:
using iterator_category = std::input_iterator_tag;
using difference_type = std::ptrdiff_t;
using value_type = char;
using reference = value_type;
using pointer = void;
base64_decoder(InputIt begin, InputIt end) : _it{std::move(begin)}, _end{std::move(end)} {
if (_it != _end)
load_byte();
}
base64_decoder end() { return {_end, _end}; }
bool operator==(const base64_decoder& i) { return _it == i._it; }
bool operator!=(const base64_decoder& i) { return _it != i._it; }
base64_decoder& operator++() {
// Discard 8 most significant bits
bits -= 8;
in &= (1 << bits) - 1;
if (++_it != _end)
load_byte();
return *this;
}
base64_decoder operator++(int) { base64_decoder copy{*this}; ++*this; return copy; }
char operator*() {
return in >> (bits - 8);
}
private:
void load_in() {
// We hit padding trying to read enough for a full byte, so we're done. (And since you were
// already supposed to have checked validity with is_base64, the padding can only be at the
// end).
auto c = static_cast<unsigned char>(*_it);
if (c == '=') {
_it = _end;
bits = 0;
return;
}
in = in << 6
| detail::b64_lut.from_b64(c);
bits += 6;
}
void load_byte() {
load_in();
if (bits && bits < 8 && ++_it != _end)
load_in();
// If we hit the _end iterator above then we hit the end of the input (or hit padding) with
// fewer than 8 bits accumulated to make a full byte. For a properly encoded base64 string
// this should only be possible with 0, 2, or 4 bits of all 0s; these are essentially
// "padding" bits (e.g. encoding 2 byte (16 bits) requires 3 b64 chars (18 bits), where
// only the first 16 bits are significant). Ideally any padding bits should be 0, but we
// don't check that and rather just ignore them.
}
};
/// Converts a sequence of base64 digits to bytes. Undefined behaviour if any characters are not
/// valid base64 alphabet characters. It is permitted for the input and output ranges to overlap as
/// long as `out` is no later than `begin`. Trailing padding characters are permitted but not
@ -182,29 +344,10 @@ constexpr bool is_base64(std::string_view s) { return is_base64(s.begin(), s.end
template <typename InputIt, typename OutputIt>
OutputIt from_base64(InputIt begin, InputIt end, OutputIt out) {
static_assert(sizeof(decltype(*begin)) == 1, "from_base64 requires chars/bytes");
uint_fast16_t curr = 0;
int bits = 0; // number of bits we've loaded into val; we always keep this < 8.
while (begin != end) {
auto c = static_cast<unsigned char>(*begin++);
// padding; don't bother checking if we're at the end because is_base64 is a precondition
// and we're allowed UB if it isn't satisfied.
if (c == '=') continue;
curr = curr << 6 | detail::b64_lut.from_b64(c);
if (bits == 0)
bits = 6;
else {
bits -= 2; // Added 6, removing 8
*out++ = static_cast<detail::byte_type_t<OutputIt>>(
static_cast<uint8_t>(curr >> bits));
curr &= (1 << bits) - 1;
}
}
// Don't worry about leftover bits because either they have to be 0, or they can't happen at
// all. See base32z.h for why: the reasoning is exactly the same (except using 6 bits per
// character here instead of 5).
base64_decoder it{begin, end};
auto bend = it.end();
while (it != bend)
*out++ = static_cast<detail::byte_type_t<OutputIt>>(*it++);
return out;
}
@ -213,8 +356,10 @@ OutputIt from_base64(InputIt begin, InputIt end, OutputIt out) {
template <typename It>
std::string from_base64(It begin, It end) {
std::string bytes;
if constexpr (std::is_base_of_v<std::random_access_iterator_tag, typename std::iterator_traits<It>::iterator_category>)
bytes.reserve(std::distance(begin, end)*6 / 8); // each digit carries 6 bits; this may overallocate by 1-2 bytes due to padding
if constexpr (std::is_base_of_v<std::random_access_iterator_tag, typename std::iterator_traits<It>::iterator_category>) {
using std::distance;
bytes.reserve(from_base64_size(distance(begin, end)));
}
from_base64(begin, end, std::back_inserter(bytes));
return bytes;
}

View File

@ -62,26 +62,65 @@ static_assert(hex_lut.from_hex('a') == 10 && hex_lut.from_hex('F') == 15 && hex_
} // namespace detail
/// Returns the number of characters required to encode a hex string from the given number of bytes.
inline constexpr size_t to_hex_size(size_t byte_size) { return byte_size * 2; }
/// Returns the number of bytes required to decode a hex string of the given size.
inline constexpr size_t from_hex_size(size_t hex_size) { return hex_size / 2; }
/// Iterable object for on-the-fly hex encoding. Used internally, but also particularly useful when
/// converting from one encoding to another.
template <typename InputIt>
struct hex_encoder final {
private:
InputIt _it, _end;
static_assert(sizeof(decltype(*_it)) == 1, "hex_encoder requires chars/bytes input iterator");
uint8_t c = 0;
bool second_half = false;
public:
using iterator_category = std::input_iterator_tag;
using difference_type = std::ptrdiff_t;
using value_type = char;
using reference = value_type;
using pointer = void;
hex_encoder(InputIt begin, InputIt end) : _it{std::move(begin)}, _end{std::move(end)} {}
hex_encoder end() { return {_end, _end}; }
bool operator==(const hex_encoder& i) { return _it == i._it && second_half == i.second_half; }
bool operator!=(const hex_encoder& i) { return !(*this == i); }
hex_encoder& operator++() {
second_half = !second_half;
if (!second_half)
++_it;
return *this;
}
hex_encoder operator++(int) { hex_encoder copy{*this}; ++*this; return copy; }
char operator*() {
return detail::hex_lut.to_hex(second_half
? c & 0x0f
: (c = static_cast<uint8_t>(*_it)) >> 4);
}
};
/// Creates hex digits from a character sequence given by iterators, writes them starting at `out`.
/// Returns the final value of out (i.e. the iterator positioned just after the last written
/// hex character).
template <typename InputIt, typename OutputIt>
OutputIt to_hex(InputIt begin, InputIt end, OutputIt out) {
static_assert(sizeof(decltype(*begin)) == 1, "to_hex requires chars/bytes");
for (; begin != end; ++begin) {
uint8_t c = static_cast<uint8_t>(*begin);
*out++ = detail::hex_lut.to_hex(c >> 4);
*out++ = detail::hex_lut.to_hex(c & 0x0f);
}
return out;
auto it = hex_encoder{begin, end};
return std::copy(it, it.end(), out);
}
/// Creates a string of hex digits from a character sequence iterator pair
template <typename It>
std::string to_hex(It begin, It end) {
std::string hex;
if constexpr (std::is_base_of_v<std::random_access_iterator_tag, typename std::iterator_traits<It>::iterator_category>)
hex.reserve(2 * std::distance(begin, end));
if constexpr (std::is_base_of_v<std::random_access_iterator_tag, typename std::iterator_traits<It>::iterator_category>) {
using std::distance;
hex.reserve(to_hex_size(distance(begin, end)));
}
to_hex(begin, end, std::back_inserter(hex));
return hex;
}
@ -104,9 +143,11 @@ template <typename It>
constexpr bool is_hex(It begin, It end) {
static_assert(sizeof(decltype(*begin)) == 1, "is_hex requires chars/bytes");
constexpr bool ra = std::is_base_of_v<std::random_access_iterator_tag, typename std::iterator_traits<It>::iterator_category>;
if constexpr (ra)
if (std::distance(begin, end) % 2 != 0)
if constexpr (ra) {
using std::distance;
if (distance(begin, end) % 2 != 0)
return false;
}
size_t count = 0;
for (; begin != end; ++begin) {
@ -132,6 +173,48 @@ constexpr char from_hex_digit(unsigned char x) noexcept {
/// Constructs a byte value from a pair of hex digits
constexpr char from_hex_pair(unsigned char a, unsigned char b) noexcept { return (from_hex_digit(a) << 4) | from_hex_digit(b); }
/// Iterable object for on-the-fly hex decoding. Used internally but also particularly useful when
/// converting from one encoding to another. Undefined behaviour if the given iterator range is not
/// a valid hex string with even length (i.e. is_hex() should return true).
template <typename InputIt>
struct hex_decoder final {
private:
InputIt _it, _end;
static_assert(sizeof(decltype(*_it)) == 1, "hex_encoder requires chars/bytes input iterator");
char byte;
public:
using iterator_category = std::input_iterator_tag;
using difference_type = std::ptrdiff_t;
using value_type = char;
using reference = value_type;
using pointer = void;
hex_decoder(InputIt begin, InputIt end) : _it{std::move(begin)}, _end{std::move(end)} {
if (_it != _end)
load_byte();
}
hex_decoder end() { return {_end, _end}; }
bool operator==(const hex_decoder& i) { return _it == i._it; }
bool operator!=(const hex_decoder& i) { return _it != i._it; }
hex_decoder& operator++() {
if (++_it != _end)
load_byte();
return *this;
}
hex_decoder operator++(int) { hex_decoder copy{*this}; ++*this; return copy; }
char operator*() const { return byte; }
private:
void load_byte() {
auto a = *_it;
auto b = *++_it;
byte = from_hex_pair(static_cast<unsigned char>(a), static_cast<unsigned char>(b));
}
};
/// Converts a sequence of hex digits to bytes. Undefined behaviour if any characters are not in
/// [0-9a-fA-F] or if the input sequence length is not even: call `is_hex` first if you need to
/// check. It is permitted for the input and output ranges to overlap as long as out is no later
@ -139,14 +222,11 @@ constexpr char from_hex_pair(unsigned char a, unsigned char b) noexcept { return
/// last written character).
template <typename InputIt, typename OutputIt>
OutputIt from_hex(InputIt begin, InputIt end, OutputIt out) {
using std::distance;
assert(is_hex(begin, end));
while (begin != end) {
auto a = *begin++;
auto b = *begin++;
*out++ = static_cast<detail::byte_type_t<OutputIt>>(
from_hex_pair(static_cast<unsigned char>(a), static_cast<unsigned char>(b)));
}
auto it = hex_decoder(begin, end);
const auto hend = it.end();
while (it != hend)
*out++ = static_cast<detail::byte_type_t<OutputIt>>(*it++);
return out;
}
@ -155,8 +235,10 @@ OutputIt from_hex(InputIt begin, InputIt end, OutputIt out) {
template <typename It>
std::string from_hex(It begin, It end) {
std::string bytes;
if constexpr (std::is_base_of_v<std::random_access_iterator_tag, typename std::iterator_traits<It>::iterator_category>)
bytes.reserve(std::distance(begin, end) / 2);
if constexpr (std::is_base_of_v<std::random_access_iterator_tag, typename std::iterator_traits<It>::iterator_category>) {
using std::distance;
bytes.reserve(from_hex_size(distance(begin, end)));
}
from_hex(begin, end, std::back_inserter(bytes));
return bytes;
}

View File

@ -60,6 +60,16 @@ TEST_CASE("hex encoding/decoding", "[encoding][decoding][hex]") {
std::basic_string_view<std::byte> hex_bytes{bytes.data(), bytes.size()};
REQUIRE( oxenmq::is_hex(hex_bytes) );
REQUIRE( oxenmq::from_hex(hex_bytes) == "\xff\x42\x12\x34" );
REQUIRE( oxenmq::to_hex_size(1) == 2 );
REQUIRE( oxenmq::to_hex_size(2) == 4 );
REQUIRE( oxenmq::to_hex_size(3) == 6 );
REQUIRE( oxenmq::to_hex_size(4) == 8 );
REQUIRE( oxenmq::to_hex_size(100) == 200 );
REQUIRE( oxenmq::from_hex_size(2) == 1 );
REQUIRE( oxenmq::from_hex_size(4) == 2 );
REQUIRE( oxenmq::from_hex_size(6) == 3 );
REQUIRE( oxenmq::from_hex_size(98) == 49 );
}
TEST_CASE("base32z encoding/decoding", "[encoding][decoding][base32z]") {
@ -128,6 +138,37 @@ TEST_CASE("base32z encoding/decoding", "[encoding][decoding][base32z]") {
std::basic_string_view<std::byte> b32_bytes{bytes.data(), bytes.size()};
REQUIRE( oxenmq::is_base32z(b32_bytes) );
REQUIRE( oxenmq::from_base32z(b32_bytes) == "\x00\xff"sv );
REQUIRE( oxenmq::is_base32z("") );
REQUIRE_FALSE( oxenmq::is_base32z("y") );
REQUIRE( oxenmq::is_base32z("yy") );
REQUIRE_FALSE( oxenmq::is_base32z("yyy") );
REQUIRE( oxenmq::is_base32z("yyyy") );
REQUIRE( oxenmq::is_base32z("yyyyy") );
REQUIRE_FALSE( oxenmq::is_base32z("yyyyyy") );
REQUIRE( oxenmq::is_base32z("yyyyyyy") );
REQUIRE( oxenmq::is_base32z("yyyyyyyy") );
REQUIRE( oxenmq::to_base32z_size(1) == 2 );
REQUIRE( oxenmq::to_base32z_size(2) == 4 );
REQUIRE( oxenmq::to_base32z_size(3) == 5 );
REQUIRE( oxenmq::to_base32z_size(4) == 7 );
REQUIRE( oxenmq::to_base32z_size(5) == 8 );
REQUIRE( oxenmq::to_base32z_size(30) == 48 );
REQUIRE( oxenmq::to_base32z_size(31) == 50 );
REQUIRE( oxenmq::to_base32z_size(32) == 52 );
REQUIRE( oxenmq::to_base32z_size(33) == 53 );
REQUIRE( oxenmq::to_base32z_size(100) == 160 );
REQUIRE( oxenmq::from_base32z_size(160) == 100 );
REQUIRE( oxenmq::from_base32z_size(53) == 33 );
REQUIRE( oxenmq::from_base32z_size(52) == 32 );
REQUIRE( oxenmq::from_base32z_size(50) == 31 );
REQUIRE( oxenmq::from_base32z_size(48) == 30 );
REQUIRE( oxenmq::from_base32z_size(8) == 5 );
REQUIRE( oxenmq::from_base32z_size(7) == 4 );
REQUIRE( oxenmq::from_base32z_size(5) == 3 );
REQUIRE( oxenmq::from_base32z_size(4) == 2 );
REQUIRE( oxenmq::from_base32z_size(2) == 1 );
}
TEST_CASE("base64 encoding/decoding", "[encoding][decoding][base64]") {
@ -145,6 +186,13 @@ TEST_CASE("base64 encoding/decoding", "[encoding][decoding][base64]") {
REQUIRE( oxenmq::to_base64("abcde") == "YWJjZGU=" );
REQUIRE( oxenmq::to_base64("abcdef") == "YWJjZGVm" );
REQUIRE( oxenmq::to_base64_unpadded("a") == "YQ" );
REQUIRE( oxenmq::to_base64_unpadded("ab") == "YWI" );
REQUIRE( oxenmq::to_base64_unpadded("abc") == "YWJj" );
REQUIRE( oxenmq::to_base64_unpadded("abcd") == "YWJjZA" );
REQUIRE( oxenmq::to_base64_unpadded("abcde") == "YWJjZGU" );
REQUIRE( oxenmq::to_base64_unpadded("abcdef") == "YWJjZGVm" );
REQUIRE( oxenmq::to_base64("\0\0\0\xff"s) == "AAAA/w==" );
REQUIRE( oxenmq::to_base64("\0\0\0\xff\xff"s) == "AAAA//8=" );
REQUIRE( oxenmq::to_base64("\0\0\0\xff\xff\xff"s) == "AAAA////" );
@ -179,6 +227,7 @@ TEST_CASE("base64 encoding/decoding", "[encoding][decoding][base64]") {
REQUIRE( oxenmq::is_base64("YWJjZB") ); // not really valid, but we explicitly accept it
REQUIRE_FALSE( oxenmq::is_base64("YWJjZ=") ); // invalid padding (padding can only be 4th or 3rd+4th of a 4-char block)
REQUIRE_FALSE( oxenmq::is_base64("YYYYA") ); // invalid: base64 can never be length 4n+1
REQUIRE_FALSE( oxenmq::is_base64("YWJj=") );
REQUIRE_FALSE( oxenmq::is_base64("YWJj=A") );
REQUIRE_FALSE( oxenmq::is_base64("YWJjA===") );
@ -228,6 +277,114 @@ TEST_CASE("base64 encoding/decoding", "[encoding][decoding][base64]") {
std::basic_string_view<std::byte> b64_bytes{bytes.data(), bytes.size()};
REQUIRE( oxenmq::is_base64(b64_bytes) );
REQUIRE( oxenmq::from_base64(b64_bytes) == "\xff\x00"sv );
REQUIRE( oxenmq::to_base64_size(1) == 4 );
REQUIRE( oxenmq::to_base64_size(2) == 4 );
REQUIRE( oxenmq::to_base64_size(3) == 4 );
REQUIRE( oxenmq::to_base64_size(4) == 8 );
REQUIRE( oxenmq::to_base64_size(5) == 8 );
REQUIRE( oxenmq::to_base64_size(6) == 8 );
REQUIRE( oxenmq::to_base64_size(30) == 40 );
REQUIRE( oxenmq::to_base64_size(31) == 44 );
REQUIRE( oxenmq::to_base64_size(32) == 44 );
REQUIRE( oxenmq::to_base64_size(33) == 44 );
REQUIRE( oxenmq::to_base64_size(100) == 136 );
REQUIRE( oxenmq::from_base64_size(136) == 102 ); // Not symmetric because we don't know the last two are padding
REQUIRE( oxenmq::from_base64_size(134) == 100 ); // Unpadded
REQUIRE( oxenmq::from_base64_size(44) == 33 );
REQUIRE( oxenmq::from_base64_size(43) == 32 );
REQUIRE( oxenmq::from_base64_size(42) == 31 );
REQUIRE( oxenmq::from_base64_size(40) == 30 );
REQUIRE( oxenmq::from_base64_size(8) == 6 );
REQUIRE( oxenmq::from_base64_size(7) == 5 );
REQUIRE( oxenmq::from_base64_size(6) == 4 );
REQUIRE( oxenmq::from_base64_size(4) == 3 );
REQUIRE( oxenmq::from_base64_size(3) == 2 );
REQUIRE( oxenmq::from_base64_size(2) == 1 );
}
TEST_CASE("transcoding", "[decoding][encoding][base32z][hex][base64]") {
// Decoders:
oxenmq::base64_decoder in64{pk_b64.begin(), pk_b64.end()};
oxenmq::base32z_decoder in32z{pk_b32z.begin(), pk_b32z.end()};
oxenmq::hex_decoder in16{pk_hex.begin(), pk_hex.end()};
// Transcoders:
oxenmq::base32z_encoder b64_to_b32z{in64, in64.end()};
oxenmq::base32z_encoder hex_to_b32z{in16, in16.end()};
oxenmq::hex_encoder b64_to_hex{in64, in64.end()};
oxenmq::hex_encoder b32z_to_hex{in32z, in32z.end()};
oxenmq::base64_encoder hex_to_b64{in16, in16.end()};
oxenmq::base64_encoder b32z_to_b64{in32z, in32z.end()};
// These ones are stupid, but should work anyway:
oxenmq::base64_encoder b64_to_b64{in64, in64.end()};
oxenmq::base32z_encoder b32z_to_b32z{in32z, in32z.end()};
oxenmq::hex_encoder hex_to_hex{in16, in16.end()};
// Decoding to bytes:
std::string x;
auto xx = std::back_inserter(x);
std::copy(in64, in64.end(), xx);
REQUIRE( x == pk );
x.clear();
std::copy(in32z, in32z.end(), xx);
REQUIRE( x == pk );
x.clear();
std::copy(in16, in16.end(), xx);
REQUIRE( x == pk );
// Transcoding
x.clear();
std::copy(b64_to_hex, b64_to_hex.end(), xx);
CHECK( x == pk_hex );
x.clear();
std::copy(b64_to_b32z, b64_to_b32z.end(), xx);
CHECK( x == pk_b32z );
x.clear();
std::copy(b64_to_b64, b64_to_b64.end(), xx);
CHECK( x == pk_b64 );
x.clear();
std::copy(b32z_to_hex, b32z_to_hex.end(), xx);
CHECK( x == pk_hex );
x.clear();
std::copy(b32z_to_b32z, b32z_to_b32z.end(), xx);
CHECK( x == pk_b32z );
x.clear();
std::copy(b32z_to_b64, b32z_to_b64.end(), xx);
CHECK( x == pk_b64 );
x.clear();
std::copy(hex_to_hex, hex_to_hex.end(), xx);
CHECK( x == pk_hex );
x.clear();
std::copy(hex_to_b32z, hex_to_b32z.end(), xx);
CHECK( x == pk_b32z );
x.clear();
std::copy(hex_to_b64, hex_to_b64.end(), xx);
CHECK( x == pk_b64 );
// Make a big chain of conversions
oxenmq::base32z_encoder it1{in64, in64.end()};
oxenmq::base32z_decoder it2{it1, it1.end()};
oxenmq::base64_encoder it3{it2, it2.end()};
oxenmq::base64_decoder it4{it3, it3.end()};
oxenmq::hex_encoder it5{it4, it4.end()};
x.clear();
std::copy(it5, it5.end(), xx);
CHECK( x == pk_hex );
// No-padding b64 encoding:
oxenmq::base64_encoder b64_nopad{pk.begin(), pk.end(), false};
x.clear();
std::copy(b64_nopad, b64_nopad.end(), xx);
CHECK( x == pk_b64.substr(0, pk_b64.size()-1) );
}
TEST_CASE("std::byte decoding", "[decoding][hex][base32z][base64]") {