mirror of https://github.com/oxen-io/oxen-mq.git
Add base64 encoder/decoder
This commit is contained in:
parent
46d007e1ac
commit
be4cbc6641
|
@ -0,0 +1,217 @@
|
|||
// Copyright (c) 2019-2020, The Loki Project
|
||||
//
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification, are
|
||||
// permitted provided that the following conditions are met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright notice, this list of
|
||||
// conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright notice, this list
|
||||
// of conditions and the following disclaimer in the documentation and/or other
|
||||
// materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the copyright holder nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without specific
|
||||
// prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
|
||||
// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#pragma once
|
||||
#include "string_view.h"
|
||||
#include <array>
|
||||
#include <iterator>
|
||||
#include <cassert>
|
||||
|
||||
namespace lokimq {
|
||||
|
||||
namespace detail {
|
||||
|
||||
/// Compile-time generated lookup tables for base64 conversion.
|
||||
struct b64_table {
|
||||
// Store the 0-63 decoded value of every possible char; all the chars that aren't valid are set
|
||||
// to 0. (If you don't trust your data, check it with is_base64 first, which uses these 0's
|
||||
// to detect invalid characters -- which is why we want a full 256 element array).
|
||||
char from_b64_lut[256];
|
||||
// Store the encoded character of every 0-63 (6 bit) value.
|
||||
char to_b64_lut[64];
|
||||
|
||||
// constexpr constructor that fills out the above (and should do it at compile time for any half
|
||||
// decent compiler).
|
||||
constexpr b64_table() noexcept : from_b64_lut{}, to_b64_lut{} {
|
||||
for (unsigned char c = 0; c < 26; c++) {
|
||||
from_b64_lut[(unsigned char)('A' + c)] = 0 + c;
|
||||
to_b64_lut[ (unsigned char)( 0 + c)] = 'A' + c;
|
||||
}
|
||||
for (unsigned char c = 0; c < 26; c++) {
|
||||
from_b64_lut[(unsigned char)('a' + c)] = 26 + c;
|
||||
to_b64_lut[ (unsigned char)(26 + c)] = 'a' + c;
|
||||
}
|
||||
for (unsigned char c = 0; c < 10; c++) {
|
||||
from_b64_lut[(unsigned char)('0' + c)] = 52 + c;
|
||||
to_b64_lut[ (unsigned char)(52 + c)] = '0' + c;
|
||||
}
|
||||
to_b64_lut[62] = '+'; from_b64_lut[(unsigned char) '+'] = 62;
|
||||
to_b64_lut[63] = '/'; from_b64_lut[(unsigned char) '/'] = 63;
|
||||
}
|
||||
// Convert a b64 encoded character into a 0-63 value
|
||||
constexpr char from_b64(unsigned char c) const noexcept { return from_b64_lut[c]; }
|
||||
// Convert a 0-31 value into a b64 encoded character
|
||||
constexpr char to_b64(unsigned char b) const noexcept { return to_b64_lut[b]; }
|
||||
} constexpr b64_lut;
|
||||
|
||||
// This main point of this static assert is to force the compiler to compile-time build the constexpr tables.
|
||||
static_assert(b64_lut.from_b64('/') == 63 && b64_lut.from_b64('7') == 59 && b64_lut.to_b64(38) == 'm', "");
|
||||
|
||||
} // namespace detail
|
||||
|
||||
/// Converts bytes into a base64 encoded character sequence.
|
||||
template <typename InputIt, typename OutputIt>
|
||||
void to_base64(InputIt begin, InputIt end, OutputIt out) {
|
||||
static_assert(sizeof(*begin) == 1, "to_base64 requires chars/bytes");
|
||||
int bits = 0; // Tracks the number of unconsumed bits held in r, will always be in {0, 2, 4}
|
||||
std::uint_fast16_t r = 0;
|
||||
while (begin != end) {
|
||||
r = r << 8 | static_cast<unsigned char>(*begin++);
|
||||
|
||||
// we just added 8 bits, so we can *always* consume 6 to produce one character, so (net) we
|
||||
// are adding 2 bits.
|
||||
bits += 2;
|
||||
*out++ = detail::b64_lut.to_b64(r >> bits); // Right-shift off the bits we aren't consuming right now
|
||||
|
||||
// Drop the bits we don't want to keep (because we just consumed them)
|
||||
r &= (1 << bits) - 1;
|
||||
|
||||
if (bits == 6) { // We have enough bits to produce a second character (which means we had 4 before and added 8)
|
||||
bits = 0;
|
||||
*out++ = detail::b64_lut.to_b64(r);
|
||||
r = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// If bits == 0 then we ended our 6-bit outputs coinciding with 8-bit values, i.e. at a multiple
|
||||
// of 24 bits: this means we don't have anything else to output and don't need any padding.
|
||||
if (bits == 2) {
|
||||
// We finished with 2 unconsumed bits, which means we ended 1 byte past a 24-bit group (e.g.
|
||||
// 1 byte, 4 bytes, 301 bytes, etc.); since we need to always be a multiple of 4 output
|
||||
// characters that means we've produced 1: so we right-fill 0s to get the next char, then
|
||||
// add two padding ='s.
|
||||
*out++ = detail::b64_lut.to_b64(r << 4);
|
||||
*out++ = '=';
|
||||
*out++ = '=';
|
||||
} else if (bits == 4) {
|
||||
// 4 bits left means we produced 2 6-bit values from the first 2 bytes of a 3-byte group.
|
||||
// Fill 0s to get the last one, plus one padding output.
|
||||
*out++ = detail::b64_lut.to_b64(r << 2);
|
||||
*out++ = '=';
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a base64 string from an iterable, std::string-like object
|
||||
inline std::string to_base64(string_view s) {
|
||||
std::string base64;
|
||||
base64.reserve((s.size() + 2) / 3 * 4);
|
||||
to_base64(s.begin(), s.end(), std::back_inserter(base64));
|
||||
return base64;
|
||||
}
|
||||
|
||||
inline std::string to_base64(ustring_view s) {
|
||||
std::string base64;
|
||||
base64.reserve((s.size() + 2) / 3 * 4);
|
||||
to_base64(s.begin(), s.end(), std::back_inserter(base64));
|
||||
return base64;
|
||||
}
|
||||
|
||||
/// Returns true if the range is a base64 encoded value; we allow (but do not require) '=' padding,
|
||||
/// but only at the end, only 1 or 2, and only if it pads out the total to a multiple of 4.
|
||||
template <typename It>
|
||||
constexpr bool is_base64(It begin, It end) {
|
||||
static_assert(sizeof(*begin) == 1, "is_base64 requires chars/bytes");
|
||||
using std::distance;
|
||||
using std::prev;
|
||||
|
||||
// Allow 1 or 2 padding chars *if* they pad it to a multiple of 4.
|
||||
if (begin != end && distance(begin, end) % 4 == 0) {
|
||||
auto last = prev(end);
|
||||
if (*last == '=')
|
||||
end = last--;
|
||||
if (*last == '=')
|
||||
end = last;
|
||||
}
|
||||
|
||||
for (; begin != end; ++begin) {
|
||||
auto c = *begin;
|
||||
if (detail::b64_lut.from_b64(c) == 0 && c != 'A')
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Returns true if the string-like value is a base64 encoded value
|
||||
constexpr bool is_base64(string_view s) { return is_base64(s.begin(), s.end()); }
|
||||
constexpr bool is_base64(ustring_view s) { return is_base64(s.begin(), s.end()); }
|
||||
|
||||
/// Converts a sequence of base64 digits to bytes. Undefined behaviour if any characters are not
|
||||
/// valid base64 alphabet characters. It is permitted for the input and output ranges to overlap as
|
||||
/// long as `out` is no earlier than `begin`. Trailing padding characters are permitted but not
|
||||
/// required.
|
||||
///
|
||||
/// It is possible to provide "impossible" base64 encoded values; for example "YWJja" which has 30
|
||||
/// bits of data even though a base64 encoded byte string should have 24 (4 chars) or 36 (6 chars)
|
||||
/// bits for a 3- and 4-byte input, respectively. We ignore any such "impossible" bits, and
|
||||
/// similarly ignore impossible bits in the bit "overhang"; that means "YWJjZA==" (the proper
|
||||
/// encoding of "abcd") and "YWJjZB", "YWJjZC", ..., "YWJjZP" all decode to the same "abcd" value:
|
||||
/// the last 4 bits of the last character are essentially considered padding.
|
||||
template <typename InputIt, typename OutputIt>
|
||||
void from_base64(InputIt begin, InputIt end, OutputIt out) {
|
||||
using Char = decltype(*begin);
|
||||
static_assert(sizeof(Char) == 1, "from_base64 requires chars/bytes");
|
||||
uint_fast16_t curr;
|
||||
int bits = 0; // number of bits we've loaded into val; we always keep this < 8.
|
||||
while (begin != end) {
|
||||
Char c = *begin++;
|
||||
|
||||
// padding; don't bother checking if we're at the end because is_base64 is a precondition
|
||||
// and we're allowed UB if it isn't satisfied.
|
||||
if (c == '=') continue;
|
||||
|
||||
curr = curr << 6 | detail::b64_lut.from_b64(c);
|
||||
if (bits == 0)
|
||||
bits = 6;
|
||||
else {
|
||||
bits -= 2; // Added 6, removing 8
|
||||
*out++ = static_cast<Char>(curr >> bits);
|
||||
curr &= (1 << bits) - 1;
|
||||
}
|
||||
}
|
||||
// Don't worry about leftover bits because either they have to be 0, or they can't happen at
|
||||
// all. See base32z.h for why: the reasoning is exactly the same (except using 6 bits per
|
||||
// character here instead of 5).
|
||||
}
|
||||
|
||||
/// Converts base64 digits from a std::string-like object into a std::string of bytes. Undefined
|
||||
/// behaviour if any characters are not valid base64 characters.
|
||||
inline std::string from_base64(string_view s) {
|
||||
std::string bytes;
|
||||
bytes.reserve(s.size()*6 / 8);
|
||||
from_base64(s.begin(), s.end(), std::back_inserter(bytes));
|
||||
return bytes;
|
||||
}
|
||||
|
||||
inline std::string from_base64(ustring_view s) {
|
||||
std::string bytes;
|
||||
bytes.reserve(s.size()*6 / 8);
|
||||
from_base64(s.begin(), s.end(), std::back_inserter(bytes));
|
||||
return bytes;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,13 +1,20 @@
|
|||
#include "lokimq/hex.h"
|
||||
#include <iostream>
|
||||
#include "lokimq/base32z.h"
|
||||
#include "lokimq/base64.h"
|
||||
#include "common.h"
|
||||
|
||||
using namespace std::literals;
|
||||
|
||||
TEST_CASE("hex encoding/decoding", "[encoding][decoding][hex]") {
|
||||
REQUIRE( lokimq::to_hex("\xff\x42\x12\x34") == "ff421234"s );
|
||||
std::vector<uint8_t> chars{{1, 10, 100, 254}};
|
||||
std::array<uint8_t, 8> out;
|
||||
std::array<uint8_t, 8> expected{{'0', '1', '0', 'a', '6', '4', 'f', 'e'}};
|
||||
lokimq::to_hex(chars.begin(), chars.end(), out.begin());
|
||||
REQUIRE( out == expected );
|
||||
|
||||
REQUIRE( lokimq::from_hex("12345678ffEDbca9") == "\x12\x34\x56\x78\xff\xed\xbc\xa9"s );
|
||||
|
||||
REQUIRE( lokimq::is_hex("1234567890abcdefABCDEF1234567890abcdefABCDEF") );
|
||||
REQUIRE_FALSE( lokimq::is_hex("1234567890abcdefABCDEF1234567890aGcdefABCDEF") );
|
||||
REQUIRE_FALSE( lokimq::is_hex("1234567890abcdefABCDEF1234567890agcdefABCDEF") );
|
||||
|
@ -50,3 +57,72 @@ TEST_CASE("base32z encoding/decoding", "[encoding][decoding][base32z]") {
|
|||
// This one won't round-trip to the same value since it has ignored garbage bytes at the end
|
||||
REQUIRE( lokimq::to_base32z(lokimq::from_base32z("ybndrf4"s)) == "ybndrfa" );
|
||||
}
|
||||
|
||||
TEST_CASE("base64 encoding/decoding", "[encoding][decoding][base64]") {
|
||||
// 00000000 00000000 00000000 -> 000000 000000 000000 000000
|
||||
REQUIRE( lokimq::to_base64("\0\0\0"s) == "AAAA" );
|
||||
// 00000001 00000002 00000003 -> 000000 010000 000200 000003
|
||||
REQUIRE( lokimq::to_base64("\x01\x02\x03"s) == "AQID" );
|
||||
REQUIRE( lokimq::to_base64("\0\0\0\0"s) == "AAAAAA==" );
|
||||
// 00000000 00000000 00000000 11111111 ->
|
||||
// 000000 000000 000000 000000 111111 110000 (pad) (pad)
|
||||
REQUIRE( lokimq::to_base64("a") == "YQ==" );
|
||||
REQUIRE( lokimq::to_base64("ab") == "YWI=" );
|
||||
REQUIRE( lokimq::to_base64("abc") == "YWJj" );
|
||||
REQUIRE( lokimq::to_base64("abcd") == "YWJjZA==" );
|
||||
REQUIRE( lokimq::to_base64("abcde") == "YWJjZGU=" );
|
||||
REQUIRE( lokimq::to_base64("abcdef") == "YWJjZGVm" );
|
||||
|
||||
REQUIRE( lokimq::to_base64("\0\0\0\xff"s) == "AAAA/w==" );
|
||||
REQUIRE( lokimq::to_base64("\0\0\0\xff\xff"s) == "AAAA//8=" );
|
||||
REQUIRE( lokimq::to_base64("\0\0\0\xff\xff\xff"s) == "AAAA////" );
|
||||
REQUIRE( lokimq::to_base64(
|
||||
"Man is distinguished, not only by his reason, but by this singular passion from other "
|
||||
"animals, which is a lust of the mind, that by a perseverance of delight in the "
|
||||
"continued and indefatigable generation of knowledge, exceeds the short vehemence of "
|
||||
"any carnal pleasure.")
|
||||
==
|
||||
"TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlz"
|
||||
"IHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2Yg"
|
||||
"dGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGlu"
|
||||
"dWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRo"
|
||||
"ZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4=" );
|
||||
|
||||
REQUIRE( lokimq::from_base64("A+/A") == "\x03\xef\xc0" );
|
||||
REQUIRE( lokimq::from_base64("YWJj") == "abc" );
|
||||
REQUIRE( lokimq::from_base64("YWJjZA==") == "abcd" );
|
||||
REQUIRE( lokimq::from_base64("YWJjZA") == "abcd" );
|
||||
REQUIRE( lokimq::from_base64("YWJjZB") == "abcd" ); // ignore superfluous bits
|
||||
REQUIRE( lokimq::from_base64("YWJjZB") == "abcd" ); // ignore superfluous bits
|
||||
REQUIRE( lokimq::from_base64("YWJj+") == "abc" ); // ignore superfluous bits
|
||||
REQUIRE( lokimq::from_base64("YWJjZGU=") == "abcde" );
|
||||
REQUIRE( lokimq::from_base64("YWJjZGU") == "abcde" );
|
||||
REQUIRE( lokimq::from_base64("YWJjZGVm") == "abcdef" );
|
||||
|
||||
REQUIRE( lokimq::is_base64("YWJjZGVm") );
|
||||
REQUIRE( lokimq::is_base64("YWJjZGU") );
|
||||
REQUIRE( lokimq::is_base64("YWJjZGU=") );
|
||||
REQUIRE( lokimq::is_base64("YWJjZA==") );
|
||||
REQUIRE( lokimq::is_base64("YWJjZA") );
|
||||
REQUIRE( lokimq::is_base64("YWJjZB") ); // not really valid, but we explicitly accept it
|
||||
|
||||
REQUIRE_FALSE( lokimq::is_base64("YWJjZ=") ); // invalid padding (padding can only be 4th or 3rd+4th of a 4-char block)
|
||||
REQUIRE_FALSE( lokimq::is_base64("YWJj=") );
|
||||
REQUIRE_FALSE( lokimq::is_base64("YWJj=A") );
|
||||
REQUIRE_FALSE( lokimq::is_base64("YWJjA===") );
|
||||
REQUIRE_FALSE( lokimq::is_base64("YWJ[") );
|
||||
REQUIRE_FALSE( lokimq::is_base64("YWJ.") );
|
||||
REQUIRE_FALSE( lokimq::is_base64("_YWJ") );
|
||||
|
||||
REQUIRE( lokimq::from_base64(
|
||||
"TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlz"
|
||||
"IHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2Yg"
|
||||
"dGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGlu"
|
||||
"dWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRo"
|
||||
"ZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4=" )
|
||||
==
|
||||
"Man is distinguished, not only by his reason, but by this singular passion from other "
|
||||
"animals, which is a lust of the mind, that by a perseverance of delight in the "
|
||||
"continued and indefatigable generation of knowledge, exceeds the short vehemence of "
|
||||
"any carnal pleasure.");
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue