|
|
|
@ -76,74 +76,153 @@ static_assert(b64_lut.from_b64('/') == 63 && b64_lut.from_b64('7') == 59 && b64_
|
|
|
|
|
|
|
|
|
|
} // namespace detail
|
|
|
|
|
|
|
|
|
|
/// Converts bytes into a base64 encoded character sequence, writing them starting at `out`.
|
|
|
|
|
/// Returns the final value of out (i.e. the iterator positioned just after the last written base64
|
|
|
|
|
/// character).
|
|
|
|
|
template <typename InputIt, typename OutputIt> |
|
|
|
|
OutputIt to_base64(InputIt begin, InputIt end, OutputIt out) { |
|
|
|
|
static_assert(sizeof(decltype(*begin)) == 1, "to_base64 requires chars/bytes"); |
|
|
|
|
int bits = 0; // Tracks the number of unconsumed bits held in r, will always be in {0, 2, 4}
|
|
|
|
|
std::uint_fast16_t r = 0; |
|
|
|
|
while (begin != end) { |
|
|
|
|
r = r << 8 | static_cast<unsigned char>(*begin++); |
|
|
|
|
/// Returns the number of characters required to encode a base64 string from the given number of bytes.
|
|
|
|
|
inline constexpr size_t to_base64_size(size_t byte_size, bool padded = true) { |
|
|
|
|
return padded |
|
|
|
|
? (byte_size + 2) / 3 * 4 // bytes*4/3, rounded up to the next multiple of 4
|
|
|
|
|
: (byte_size * 4 + 2) / 3; // ⌈bytes*4/3⌉
|
|
|
|
|
} |
|
|
|
|
/// Returns the (maximum) number of bytes required to decode a base64 string of the given size.
|
|
|
|
|
/// Note that this may overallocate by 1-2 bytes if the size includes 1-2 padding chars.
|
|
|
|
|
inline constexpr size_t from_base64_size(size_t b64_size) { |
|
|
|
|
return b64_size * 3 / 4; // == ⌊bits/8⌋; floor because we ignore trailing "impossible" bits (see below)
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// we just added 8 bits, so we can *always* consume 6 to produce one character, so (net) we
|
|
|
|
|
// are adding 2 bits.
|
|
|
|
|
bits += 2; |
|
|
|
|
*out++ = detail::b64_lut.to_b64(r >> bits); // Right-shift off the bits we aren't consuming right now
|
|
|
|
|
/// Iterable object for on-the-fly base64 encoding. Used internally, but also particularly useful
|
|
|
|
|
/// when converting from one encoding to another.
|
|
|
|
|
template <typename InputIt> |
|
|
|
|
struct base64_encoder final { |
|
|
|
|
private: |
|
|
|
|
InputIt _it, _end; |
|
|
|
|
static_assert(sizeof(decltype(*_it)) == 1, "base64_encoder requires chars/bytes input iterator"); |
|
|
|
|
// How much padding (at most) we can add at the end
|
|
|
|
|
int padding; |
|
|
|
|
// Number of bits held in r; will always be >= 6 until we are at the end.
|
|
|
|
|
int bits{_it != _end ? 8 : 0}; |
|
|
|
|
// Holds bits of data we've already read, which might belong to current or next chars
|
|
|
|
|
uint_fast16_t r{bits ? static_cast<unsigned char>(*_it) : (unsigned char)0}; |
|
|
|
|
public: |
|
|
|
|
using iterator_category = std::input_iterator_tag; |
|
|
|
|
using difference_type = std::ptrdiff_t; |
|
|
|
|
using value_type = char; |
|
|
|
|
using reference = value_type; |
|
|
|
|
using pointer = void; |
|
|
|
|
base64_encoder(InputIt begin, InputIt end, bool padded = true) |
|
|
|
|
: _it{std::move(begin)}, _end{std::move(end)}, padding{padded} {} |
|
|
|
|
|
|
|
|
|
// Drop the bits we don't want to keep (because we just consumed them)
|
|
|
|
|
r &= (1 << bits) - 1; |
|
|
|
|
base64_encoder end() { return {_end, _end, false}; } |
|
|
|
|
|
|
|
|
|
if (bits == 6) { // We have enough bits to produce a second character (which means we had 4 before and added 8)
|
|
|
|
|
bits = 0; |
|
|
|
|
*out++ = detail::b64_lut.to_b64(r); |
|
|
|
|
r = 0; |
|
|
|
|
bool operator==(const base64_encoder& i) { return _it == i._it && bits == i.bits && padding == i.padding; } |
|
|
|
|
bool operator!=(const base64_encoder& i) { return !(*this == i); } |
|
|
|
|
|
|
|
|
|
base64_encoder& operator++() { |
|
|
|
|
if (bits == 0) { |
|
|
|
|
padding--; |
|
|
|
|
return *this; |
|
|
|
|
} |
|
|
|
|
assert(bits >= 6); |
|
|
|
|
// Discard the most significant 6 bits
|
|
|
|
|
bits -= 6; |
|
|
|
|
r &= (1 << bits) - 1; |
|
|
|
|
// If we end up with less than 6 significant bits then try to pull another 8 bits:
|
|
|
|
|
if (bits < 6 && _it != _end) { |
|
|
|
|
if (++_it != _end) { |
|
|
|
|
r = (r << 8) | static_cast<unsigned char>(*_it); |
|
|
|
|
bits += 8; |
|
|
|
|
} else if (bits > 0) { |
|
|
|
|
// No more input bytes, so shift `r` to put the bits we have into the most
|
|
|
|
|
// significant bit position for the final character, and figure out how many padding
|
|
|
|
|
// bytes we want to append. E.g. if we have "11" we want
|
|
|
|
|
// the last character to be encoded "110000".
|
|
|
|
|
if (padding) { |
|
|
|
|
// padding should be:
|
|
|
|
|
// 3n+0 input => 4n output, no padding, handled below
|
|
|
|
|
// 3n+1 input => 4n+2 output + 2 padding; we'll land here with 2 trailing bits
|
|
|
|
|
// 3n+2 input => 4n+3 output + 1 padding; we'll land here with 4 trailing bits
|
|
|
|
|
padding = 3 - bits / 2; |
|
|
|
|
} |
|
|
|
|
r <<= (6 - bits); |
|
|
|
|
bits = 6; |
|
|
|
|
} else { |
|
|
|
|
padding = 0; // No excess bits, so input was a multiple of 3 and thus no padding
|
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
return *this; |
|
|
|
|
} |
|
|
|
|
base64_encoder operator++(int) { base64_encoder copy{*this}; ++*this; return copy; } |
|
|
|
|
|
|
|
|
|
// If bits == 0 then we ended our 6-bit outputs coinciding with 8-bit values, i.e. at a multiple
|
|
|
|
|
// of 24 bits: this means we don't have anything else to output and don't need any padding.
|
|
|
|
|
if (bits == 2) { |
|
|
|
|
// We finished with 2 unconsumed bits, which means we ended 1 byte past a 24-bit group (e.g.
|
|
|
|
|
// 1 byte, 4 bytes, 301 bytes, etc.); since we need to always be a multiple of 4 output
|
|
|
|
|
// characters that means we've produced 1: so we right-fill 0s to get the next char, then
|
|
|
|
|
// add two padding ='s.
|
|
|
|
|
*out++ = detail::b64_lut.to_b64(r << 4); |
|
|
|
|
*out++ = '='; |
|
|
|
|
*out++ = '='; |
|
|
|
|
} else if (bits == 4) { |
|
|
|
|
// 4 bits left means we produced 2 6-bit values from the first 2 bytes of a 3-byte group.
|
|
|
|
|
// Fill 0s to get the last one, plus one padding output.
|
|
|
|
|
*out++ = detail::b64_lut.to_b64(r << 2); |
|
|
|
|
*out++ = '='; |
|
|
|
|
char operator*() { |
|
|
|
|
if (bits == 0 && padding) |
|
|
|
|
return '='; |
|
|
|
|
// Right-shift off the excess bits we aren't accessing yet
|
|
|
|
|
return detail::b64_lut.to_b64(r >> (bits - 6)); |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
return out; |
|
|
|
|
/// Converts bytes into a base64 encoded character sequence, writing them starting at `out`.
|
|
|
|
|
/// Returns the final value of out (i.e. the iterator positioned just after the last written base64
|
|
|
|
|
/// character).
|
|
|
|
|
template <typename InputIt, typename OutputIt> |
|
|
|
|
OutputIt to_base64(InputIt begin, InputIt end, OutputIt out, bool padded = true) { |
|
|
|
|
static_assert(sizeof(decltype(*begin)) == 1, "to_base64 requires chars/bytes"); |
|
|
|
|
auto it = base64_encoder{begin, end, padded}; |
|
|
|
|
return std::copy(it, it.end(), out); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// Creates and returns a base64 string from an iterator pair of a character sequence
|
|
|
|
|
/// Creates and returns a base64 string from an iterator pair of a character sequence. The
|
|
|
|
|
/// resulting string will have '=' padding, if appropriate.
|
|
|
|
|
template <typename It> |
|
|
|
|
std::string to_base64(It begin, It end) { |
|
|
|
|
std::string base64; |
|
|
|
|
if constexpr (std::is_base_of_v<std::random_access_iterator_tag, typename std::iterator_traits<It>::iterator_category>) |
|
|
|
|
base64.reserve((std::distance(begin, end) + 2) / 3 * 4); // bytes*4/3, rounded up to the next multiple of 4
|
|
|
|
|
if constexpr (std::is_base_of_v<std::random_access_iterator_tag, typename std::iterator_traits<It>::iterator_category>) { |
|
|
|
|
using std::distance; |
|
|
|
|
base64.reserve(to_base64_size(distance(begin, end))); |
|
|
|
|
} |
|
|
|
|
to_base64(begin, end, std::back_inserter(base64)); |
|
|
|
|
return base64; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// Creates a base64 string from an iterable, std::string-like object
|
|
|
|
|
/// Creates and returns a base64 string from an iterator pair of a character sequence. The
|
|
|
|
|
/// resulting string will not be padded.
|
|
|
|
|
template <typename It> |
|
|
|
|
std::string to_base64_unpadded(It begin, It end) { |
|
|
|
|
std::string base64; |
|
|
|
|
if constexpr (std::is_base_of_v<std::random_access_iterator_tag, typename std::iterator_traits<It>::iterator_category>) { |
|
|
|
|
using std::distance; |
|
|
|
|
base64.reserve(to_base64_size(distance(begin, end), false)); |
|
|
|
|
} |
|
|
|
|
to_base64(begin, end, std::back_inserter(base64), false); |
|
|
|
|
return base64; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// Creates a base64 string from an iterable, std::string-like object. The string will have '='
|
|
|
|
|
/// padding, if appropriate.
|
|
|
|
|
template <typename CharT> |
|
|
|
|
std::string to_base64(std::basic_string_view<CharT> s) { return to_base64(s.begin(), s.end()); } |
|
|
|
|
inline std::string to_base64(std::string_view s) { return to_base64<>(s); } |
|
|
|
|
|
|
|
|
|
/// Creates a base64 string from an iterable, std::string-like object. The string will not be
|
|
|
|
|
/// padded.
|
|
|
|
|
template <typename CharT> |
|
|
|
|
std::string to_base64_unpadded(std::basic_string_view<CharT> s) { return to_base64_unpadded(s.begin(), s.end()); } |
|
|
|
|
inline std::string to_base64_unpadded(std::string_view s) { return to_base64_unpadded<>(s); } |
|
|
|
|
|
|
|
|
|
/// Returns true if the range is a base64 encoded value; we allow (but do not require) '=' padding,
|
|
|
|
|
/// but only at the end, only 1 or 2, and only if it pads out the total to a multiple of 4.
|
|
|
|
|
/// Otherwise the string must contain only valid base64 characters, and must not have a length of
|
|
|
|
|
/// 4n+1 (because that cannot be produced by base64 encoding).
|
|
|
|
|
template <typename It> |
|
|
|
|
constexpr bool is_base64(It begin, It end) { |
|
|
|
|
static_assert(sizeof(decltype(*begin)) == 1, "is_base64 requires chars/bytes"); |
|
|
|
|
using std::distance; |
|
|
|
|
using std::prev; |
|
|
|
|
size_t count = 0; |
|
|
|
|
constexpr bool random = std::is_base_of_v<std::random_access_iterator_tag, typename std::iterator_traits<It>::iterator_category>; |
|
|
|
|
if constexpr (random) { |
|
|
|
|
count = distance(begin, end) % 4; |
|
|
|
|
if (count == 1) |
|
|
|
|
return false; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Allow 1 or 2 padding chars *if* they pad it to a multiple of 4.
|
|
|
|
|
if (begin != end && distance(begin, end) % 4 == 0) { |
|
|
|
@ -158,7 +237,14 @@ constexpr bool is_base64(It begin, It end) {
|
|
|
|
|
auto c = static_cast<unsigned char>(*begin); |
|
|
|
|
if (detail::b64_lut.from_b64(c) == 0 && c != 'A') |
|
|
|
|
return false; |
|
|
|
|
if constexpr (!random) |
|
|
|
|
count++; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if constexpr (!random) |
|
|
|
|
if (count % 4 == 1) // base64 encoding will produce 4n, 4n+2, 4n+3, but never 4n+1
|
|
|
|
|
return false; |
|
|
|
|
|
|
|
|
|
return true; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -167,6 +253,82 @@ template <typename CharT>
|
|
|
|
|
constexpr bool is_base64(std::basic_string_view<CharT> s) { return is_base64(s.begin(), s.end()); } |
|
|
|
|
constexpr bool is_base64(std::string_view s) { return is_base64(s.begin(), s.end()); } |
|
|
|
|
|
|
|
|
|
/// Iterable object for on-the-fly base64 decoding. Used internally, but also particularly useful
|
|
|
|
|
/// when converting from one encoding to another. The input range must be a valid base64 encoded
|
|
|
|
|
/// string (with or without padding).
|
|
|
|
|
///
|
|
|
|
|
/// Note that we ignore "padding" bits without requiring that they actually be 0. For instance, the
|
|
|
|
|
/// bytes "\ff\ff" are ideally encoded as "//8=" (16 bits of 1s + 2 padding 0 bits, then a full
|
|
|
|
|
/// 6-bit padding char). We don't, however, require that the padding bits be 0. That is, "///=",
|
|
|
|
|
/// "//9=", "//+=", etc. will all decode to the same \ff\ff output string.
|
|
|
|
|
template <typename InputIt> |
|
|
|
|
struct base64_decoder final { |
|
|
|
|
private: |
|
|
|
|
InputIt _it, _end; |
|
|
|
|
static_assert(sizeof(decltype(*_it)) == 1, "base64_decoder requires chars/bytes input iterator"); |
|
|
|
|
uint_fast16_t in = 0; |
|
|
|
|
int bits = 0; // number of bits loaded into `in`; will be in [8, 12] until we hit the end
|
|
|
|
|
public: |
|
|
|
|
using iterator_category = std::input_iterator_tag; |
|
|
|
|
using difference_type = std::ptrdiff_t; |
|
|
|
|
using value_type = char; |
|
|
|
|
using reference = value_type; |
|
|
|
|
using pointer = void; |
|
|
|
|
base64_decoder(InputIt begin, InputIt end) : _it{std::move(begin)}, _end{std::move(end)} { |
|
|
|
|
if (_it != _end) |
|
|
|
|
load_byte(); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
base64_decoder end() { return {_end, _end}; } |
|
|
|
|
|
|
|
|
|
bool operator==(const base64_decoder& i) { return _it == i._it; } |
|
|
|
|
bool operator!=(const base64_decoder& i) { return _it != i._it; } |
|
|
|
|
|
|
|
|
|
base64_decoder& operator++() { |
|
|
|
|
// Discard 8 most significant bits
|
|
|
|
|
bits -= 8; |
|
|
|
|
in &= (1 << bits) - 1; |
|
|
|
|
if (++_it != _end) |
|
|
|
|
load_byte(); |
|
|
|
|
return *this; |
|
|
|
|
} |
|
|
|
|
base64_decoder operator++(int) { base64_decoder copy{*this}; ++*this; return copy; } |
|
|
|
|
|
|
|
|
|
char operator*() { |
|
|
|
|
return in >> (bits - 8); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private: |
|
|
|
|
void load_in() { |
|
|
|
|
// We hit padding trying to read enough for a full byte, so we're done. (And since you were
|
|
|
|
|
// already supposed to have checked validity with is_base64, the padding can only be at the
|
|
|
|
|
// end).
|
|
|
|
|
auto c = static_cast<unsigned char>(*_it); |
|
|
|
|
if (c == '=') { |
|
|
|
|
_it = _end; |
|
|
|
|
bits = 0; |
|
|
|
|
return; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
in = in << 6 |
|
|
|
|
| detail::b64_lut.from_b64(c); |
|
|
|
|
bits += 6; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void load_byte() { |
|
|
|
|
load_in(); |
|
|
|
|
if (bits && bits < 8 && ++_it != _end) |
|
|
|
|
load_in(); |
|
|
|
|
|
|
|
|
|
// If we hit the _end iterator above then we hit the end of the input (or hit padding) with
|
|
|
|
|
// fewer than 8 bits accumulated to make a full byte. For a properly encoded base64 string
|
|
|
|
|
// this should only be possible with 0, 2, or 4 bits of all 0s; these are essentially
|
|
|
|
|
// "padding" bits (e.g. encoding 2 byte (16 bits) requires 3 b64 chars (18 bits), where
|
|
|
|
|
// only the first 16 bits are significant). Ideally any padding bits should be 0, but we
|
|
|
|
|
// don't check that and rather just ignore them.
|
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
/// Converts a sequence of base64 digits to bytes. Undefined behaviour if any characters are not
|
|
|
|
|
/// valid base64 alphabet characters. It is permitted for the input and output ranges to overlap as
|
|
|
|
|
/// long as `out` is no later than `begin`. Trailing padding characters are permitted but not
|
|
|
|
@ -182,29 +344,10 @@ constexpr bool is_base64(std::string_view s) { return is_base64(s.begin(), s.end
|
|
|
|
|
template <typename InputIt, typename OutputIt> |
|
|
|
|
OutputIt from_base64(InputIt begin, InputIt end, OutputIt out) { |
|
|
|
|
static_assert(sizeof(decltype(*begin)) == 1, "from_base64 requires chars/bytes"); |
|
|
|
|
uint_fast16_t curr = 0; |
|
|
|
|
int bits = 0; // number of bits we've loaded into val; we always keep this < 8.
|
|
|
|
|
while (begin != end) { |
|
|
|
|
auto c = static_cast<unsigned char>(*begin++); |
|
|
|
|
|
|
|
|
|
// padding; don't bother checking if we're at the end because is_base64 is a precondition
|
|
|
|
|
// and we're allowed UB if it isn't satisfied.
|
|
|
|
|
if (c == '=') continue; |
|
|
|
|
|
|
|
|
|
curr = curr << 6 | detail::b64_lut.from_b64(c); |
|
|
|
|
if (bits == 0) |
|
|
|
|
bits = 6; |
|
|
|
|
else { |
|
|
|
|
bits -= 2; // Added 6, removing 8
|
|
|
|
|
*out++ = static_cast<detail::byte_type_t<OutputIt>>( |
|
|
|
|
static_cast<uint8_t>(curr >> bits)); |
|
|
|
|
curr &= (1 << bits) - 1; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
// Don't worry about leftover bits because either they have to be 0, or they can't happen at
|
|
|
|
|
// all. See base32z.h for why: the reasoning is exactly the same (except using 6 bits per
|
|
|
|
|
// character here instead of 5).
|
|
|
|
|
|
|
|
|
|
base64_decoder it{begin, end}; |
|
|
|
|
auto bend = it.end(); |
|
|
|
|
while (it != bend) |
|
|
|
|
*out++ = static_cast<detail::byte_type_t<OutputIt>>(*it++); |
|
|
|
|
return out; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -213,8 +356,10 @@ OutputIt from_base64(InputIt begin, InputIt end, OutputIt out) {
|
|
|
|
|
template <typename It> |
|
|
|
|
std::string from_base64(It begin, It end) { |
|
|
|
|
std::string bytes; |
|
|
|
|
if constexpr (std::is_base_of_v<std::random_access_iterator_tag, typename std::iterator_traits<It>::iterator_category>) |
|
|
|
|
bytes.reserve(std::distance(begin, end)*6 / 8); // each digit carries 6 bits; this may overallocate by 1-2 bytes due to padding
|
|
|
|
|
if constexpr (std::is_base_of_v<std::random_access_iterator_tag, typename std::iterator_traits<It>::iterator_category>) { |
|
|
|
|
using std::distance; |
|
|
|
|
bytes.reserve(from_base64_size(distance(begin, end))); |
|
|
|
|
} |
|
|
|
|
from_base64(begin, end, std::back_inserter(bytes)); |
|
|
|
|
return bytes; |
|
|
|
|
} |
|
|
|
|