Make {to,from}_{hex/b64/b32} return output iterator

Changes the 3-iterator versions of to_hex, from_b32z, etc. to return the
final output iterator, which allows for much easier in-place "from"
conversion without needing a new string by doing something like:

    std::string data = /* some hex */;
    auto end = oxenmq::from_hex(data.begin(), data.end(), data.begin();
    data.erase(end, data.end());

Returning from the "to" converters is a bit less useful but doing it
anyway for consistency (and because it could still have some use, e.g.
if output is into some fixed buffer it lets you determine how much was
written).
This commit is contained in:
Jason Rhinelander 2021-08-20 16:08:33 -03:00
parent ae884d2f13
commit 2ac4379fa6
4 changed files with 61 additions and 12 deletions

View File

@ -74,9 +74,11 @@ static_assert(b32z_lut.from_b32z('w') == 20 && b32z_lut.from_b32z('T') == 17 &&
} // namespace detail
/// Converts bytes into a base32z encoded character sequence.
/// Converts bytes into a base32z encoded character sequence, writing them starting at `out`.
/// Returns the final value of out (i.e. the iterator positioned just after the last written base32z
/// character).
template <typename InputIt, typename OutputIt>
void to_base32z(InputIt begin, InputIt end, OutputIt out) {
OutputIt to_base32z(InputIt begin, InputIt end, OutputIt out) {
static_assert(sizeof(decltype(*begin)) == 1, "to_base32z requires chars/bytes");
int bits = 0; // Tracks the number of unconsumed bits held in r, will always be in [0, 4]
std::uint_fast16_t r = 0;
@ -100,6 +102,8 @@ void to_base32z(InputIt begin, InputIt end, OutputIt out) {
if (bits > 0) // We hit the end, but still have some unconsumed bits so need one final character to append
*out++ = detail::b32z_lut.to_b32z(r << (5 - bits));
return out;
}
/// Creates a base32z string from an iterator pair of a byte sequence.
@ -138,7 +142,8 @@ constexpr bool is_base32z(std::string_view s) { return is_base32z<>(s); }
/// valid base32z alphabet characters. It is permitted for the input and output ranges to overlap
/// as long as `out` is no later than `begin`. Note that if you pass in a sequence that could not
/// have been created by a base32z encoding of a byte sequence, we treat the excess bits as if they
/// were not provided.
/// were not provided. Returns the final value of out (that is, the iterator positioned just after
/// the last written character).
///
/// For example, "yyy" represents a 15-bit value, but a byte sequence is either 8-bit (requiring 2
/// characters) or 16-bit (requiring 4). Similarly, "yb" is an impossible encoding because it has
@ -146,7 +151,7 @@ constexpr bool is_base32z(std::string_view s) { return is_base32z<>(s); }
/// 16th or 24th or ... bit). We treat any such bits as if they were not specified (even if they
/// are): which means "yy", "yb", "yyy", "yy9", "yd", etc. all decode to the same 1-byte value "\0".
template <typename InputIt, typename OutputIt>
void from_base32z(InputIt begin, InputIt end, OutputIt out) {
OutputIt from_base32z(InputIt begin, InputIt end, OutputIt out) {
static_assert(sizeof(decltype(*begin)) == 1, "from_base32z requires chars/bytes");
uint_fast16_t curr = 0;
int bits = 0; // number of bits we've loaded into val; we always keep this < 8.
@ -183,6 +188,8 @@ void from_base32z(InputIt begin, InputIt end, OutputIt out) {
// any 8n + {0,2,5} char output) and added a base32z character to the end. If you do that,
// well, too bad: you're giving invalid output and so we're just going to pretend that extra
// character you added isn't there by not doing anything here.
return out;
}
/// Convert a base32z sequence into a std::string of bytes. Undefined behaviour if any characters

View File

@ -76,9 +76,11 @@ static_assert(b64_lut.from_b64('/') == 63 && b64_lut.from_b64('7') == 59 && b64_
} // namespace detail
/// Converts bytes into a base64 encoded character sequence.
/// Converts bytes into a base64 encoded character sequence, writing them starting at `out`.
/// Returns the final value of out (i.e. the iterator positioned just after the last written base64
/// character).
template <typename InputIt, typename OutputIt>
void to_base64(InputIt begin, InputIt end, OutputIt out) {
OutputIt to_base64(InputIt begin, InputIt end, OutputIt out) {
static_assert(sizeof(decltype(*begin)) == 1, "to_base64 requires chars/bytes");
int bits = 0; // Tracks the number of unconsumed bits held in r, will always be in {0, 2, 4}
std::uint_fast16_t r = 0;
@ -116,6 +118,8 @@ void to_base64(InputIt begin, InputIt end, OutputIt out) {
*out++ = detail::b64_lut.to_b64(r << 2);
*out++ = '=';
}
return out;
}
/// Creates and returns a base64 string from an iterator pair of a character sequence
@ -166,7 +170,8 @@ constexpr bool is_base64(std::string_view s) { return is_base64(s.begin(), s.end
/// Converts a sequence of base64 digits to bytes. Undefined behaviour if any characters are not
/// valid base64 alphabet characters. It is permitted for the input and output ranges to overlap as
/// long as `out` is no later than `begin`. Trailing padding characters are permitted but not
/// required.
/// required. Returns the final value of out (that is, the iterator positioned just after the
/// last written character).
///
/// It is possible to provide "impossible" base64 encoded values; for example "YWJja" which has 30
/// bits of data even though a base64 encoded byte string should have 24 (4 chars) or 36 (6 chars)
@ -175,7 +180,7 @@ constexpr bool is_base64(std::string_view s) { return is_base64(s.begin(), s.end
/// encoding of "abcd") and "YWJjZB", "YWJjZC", ..., "YWJjZP" all decode to the same "abcd" value:
/// the last 4 bits of the last character are essentially considered padding.
template <typename InputIt, typename OutputIt>
void from_base64(InputIt begin, InputIt end, OutputIt out) {
OutputIt from_base64(InputIt begin, InputIt end, OutputIt out) {
static_assert(sizeof(decltype(*begin)) == 1, "from_base64 requires chars/bytes");
uint_fast16_t curr = 0;
int bits = 0; // number of bits we've loaded into val; we always keep this < 8.
@ -199,6 +204,8 @@ void from_base64(InputIt begin, InputIt end, OutputIt out) {
// Don't worry about leftover bits because either they have to be 0, or they can't happen at
// all. See base32z.h for why: the reasoning is exactly the same (except using 6 bits per
// character here instead of 5).
return out;
}
/// Converts base64 digits from a iterator pair of characters into a std::string of bytes.

View File

@ -62,15 +62,18 @@ static_assert(hex_lut.from_hex('a') == 10 && hex_lut.from_hex('F') == 15 && hex_
} // namespace detail
/// Creates hex digits from a character sequence.
/// Creates hex digits from a character sequence given by iterators, writes them starting at `out`.
/// Returns the final value of out (i.e. the iterator positioned just after the last written
/// hex character).
template <typename InputIt, typename OutputIt>
void to_hex(InputIt begin, InputIt end, OutputIt out) {
OutputIt to_hex(InputIt begin, InputIt end, OutputIt out) {
static_assert(sizeof(decltype(*begin)) == 1, "to_hex requires chars/bytes");
for (; begin != end; ++begin) {
uint8_t c = static_cast<uint8_t>(*begin);
*out++ = detail::hex_lut.to_hex(c >> 4);
*out++ = detail::hex_lut.to_hex(c & 0x0f);
}
return out;
}
/// Creates a string of hex digits from a character sequence iterator pair
@ -132,9 +135,10 @@ constexpr char from_hex_pair(unsigned char a, unsigned char b) noexcept { return
/// Converts a sequence of hex digits to bytes. Undefined behaviour if any characters are not in
/// [0-9a-fA-F] or if the input sequence length is not even: call `is_hex` first if you need to
/// check. It is permitted for the input and output ranges to overlap as long as out is no later
/// than begin.
/// than begin. Returns the final value of out (that is, the iterator positioned just after the
/// last written character).
template <typename InputIt, typename OutputIt>
void from_hex(InputIt begin, InputIt end, OutputIt out) {
OutputIt from_hex(InputIt begin, InputIt end, OutputIt out) {
using std::distance;
assert(is_hex(begin, end));
while (begin != end) {
@ -143,6 +147,7 @@ void from_hex(InputIt begin, InputIt end, OutputIt out) {
*out++ = static_cast<detail::byte_type_t<OutputIt>>(
from_hex_pair(static_cast<unsigned char>(a), static_cast<unsigned char>(b)));
}
return out;
}
/// Converts a sequence of hex digits to a string of bytes and returns it. Undefined behaviour if

View File

@ -44,6 +44,16 @@ TEST_CASE("hex encoding/decoding", "[encoding][decoding][hex]") {
std::basic_string_view<std::byte> b{bytes.data(), bytes.size()};
REQUIRE( oxenmq::to_hex(b) == "ff421234"s );
// In-place decoding and truncation via to_hex's returned iterator:
std::string some_hex = "48656c6c6f";
some_hex.erase(oxenmq::from_hex(some_hex.begin(), some_hex.end(), some_hex.begin()), some_hex.end());
REQUIRE( some_hex == "Hello" );
// Test the returned iterator from encoding
std::string hellohex;
*oxenmq::to_hex(some_hex.begin(), some_hex.end(), std::back_inserter(hellohex))++ = '!';
REQUIRE( hellohex == "48656c6c6f!" );
bytes.resize(8);
bytes[0] = std::byte{'f'}; bytes[1] = std::byte{'f'}; bytes[2] = std::byte{'4'}; bytes[3] = std::byte{'2'};
bytes[4] = std::byte{'1'}; bytes[5] = std::byte{'2'}; bytes[6] = std::byte{'3'}; bytes[7] = std::byte{'4'};
@ -99,6 +109,16 @@ TEST_CASE("base32z encoding/decoding", "[encoding][decoding][base32z]") {
REQUIRE( pk_b32z_again == pk_b32z );
REQUIRE( pk_again == pk );
// In-place decoding and truncation via returned iterator:
std::string some_b32z = "jb1sa5dx";
some_b32z.erase(oxenmq::from_base32z(some_b32z.begin(), some_b32z.end(), some_b32z.begin()), some_b32z.end());
REQUIRE( some_b32z == "Hello" );
// Test the returned iterator from encoding
std::string hellob32z;
*oxenmq::to_base32z(some_b32z.begin(), some_b32z.end(), std::back_inserter(hellob32z))++ = '!';
REQUIRE( hellob32z == "jb1sa5dx!" );
std::vector<std::byte> bytes{{std::byte{0}, std::byte{255}}};
std::basic_string_view<std::byte> b{bytes.data(), bytes.size()};
REQUIRE( oxenmq::to_base32z(b) == "yd9o" );
@ -189,6 +209,16 @@ TEST_CASE("base64 encoding/decoding", "[encoding][decoding][base64]") {
REQUIRE( pk_b64_again == pk_b64 );
REQUIRE( pk_again == pk );
// In-place decoding and truncation via returned iterator:
std::string some_b64 = "SGVsbG8=";
some_b64.erase(oxenmq::from_base64(some_b64.begin(), some_b64.end(), some_b64.begin()), some_b64.end());
REQUIRE( some_b64 == "Hello" );
// Test the returned iterator from encoding
std::string hellob64;
*oxenmq::to_base64(some_b64.begin(), some_b64.end(), std::back_inserter(hellob64))++ = '!';
REQUIRE( hellob64 == "SGVsbG8=!" );
std::vector<std::byte> bytes{{std::byte{0}, std::byte{255}}};
std::basic_string_view<std::byte> b{bytes.data(), bytes.size()};
REQUIRE( oxenmq::to_base64(b) == "AP8=" );