Introduced FieldData and LogLineData structs

Used to hold logs data
This commit is contained in:
Valentino Orlandi 2024-01-21 14:21:40 +01:00
parent bd62f5b706
commit 58a96a1281
Signed by: elB4RTO
GPG key ID: 1719E976DB2D4E71
2 changed files with 736 additions and 0 deletions

View file

@ -0,0 +1,577 @@
#include "../lib.h"
#include "modules/exceptions.h"
#include "modules/craplog/modules/lib.h"
#include "modules/craplog/modules/datetime.h"
#include "utilities/strings.h"
#include "utilities/vectors.h"
LogLineData::LogLineData(const std::string& line, const LogsFormat& logs_format)
{
bool add_pm{false};
size_t start, stop{logs_format.initial.size()},
sep_i{0};
const size_t line_size{ line.size()-1ul },
max_seps{ logs_format.separators.size() },
n_seps{ max_seps-1ul };
while ( sep_i <= max_seps ) {
// split fields
start = stop; // stop updated at the end of the loop
std::string_view sep;
if ( sep_i <= n_seps ) {
sep = logs_format.separators.at( sep_i );
stop = line.find( sep, start );
if ( stop == std::string::npos ) {
throw LogParserException( "Separator not found", std::string{sep} );
}
} else if ( sep_i == max_seps ) {
// final separator
sep = logs_format.final;
if ( sep.empty() ) {
stop = line_size+1ul;
} else {
stop = line.find( sep, start );
if ( stop == std::string::npos ) {
throw LogParserException( "Final separator not found", std::string{sep} );
}
}
} else [[unlikely]] {
// should be unreachable
throw ("Unexpected section reached");
}
const size_t sep_size = sep.size();
// get the field
const std::string& fld = logs_format.fields.at( sep_i );
if ( fld != "NONE" ) {
// only parse the considered fields
std::string fld_str{ line.substr(start, stop-start) };
if ( sep_i < n_seps ) {
// not the last separator, check for mistakes
size_t aux_stop = stop;
if ( sep == " " ) {
// check the fields with whitespace-separated values
const size_t n{ fld == "request_full" ? 2ul
: fld == "date_time_ncsa" ? 1ul
: fld == "date_time_mcs" ? 4ul
: fld == "date_time_gmt" ? 3ul
: 0ul };
if ( n > 0ul ) {
size_t c{ StringOps::count( fld_str, ' ' ) };
if ( c < n ) {
// loop until the correct number of whitespaces is reached
size_t aux_start = line[stop+1ul] == ' ' ? stop : stop+1ul;
while ( c < n ) {
aux_stop = line.find( sep, aux_start );
if ( aux_stop == std::string::npos ) {
// not found
throw LogParserException( "Separator not found", std::string{sep} );
}
aux_start = aux_stop+1ul;
++c;
}
} else if ( c > n ) [[unlikely]] {
// should be unreachable
throw LogParserException( "Unexpected count for separator", std::string{sep} );
}
}
} else if ( sep.front() == '"' && fld == "user_agent" ) {
// atm the only support is for escaped quotes
if ( fld_str.back() == '\\' ) {
// the found separator is not actually the separator but is part of the user-agent string
// keep searching until the real separator is found
size_t aux_start = stop + sep_size;
while (true) {
aux_stop = line.find( sep, aux_start );
if ( aux_stop == std::string::npos ) {
// not found
throw LogParserException( "Separator not found", std::string{sep} );
} else if ( line.at( aux_stop-1ul ) != '\\' ) {
// non-backslashed quotes, real separator found (hopefully)
break;
}
aux_start = aux_stop + sep_size;
}
}
}
// finally update if needed
if ( aux_stop > stop ) {
stop = aux_stop;
fld_str = line.substr(start, stop-start);
}
}
if ( ! fld_str.empty() ) {
// process the field
const int& fld_id{ this->field2id.at(fld) };
if ( fld_id > 0 ) {
// no need to process, append directly if non-empty
if ( fld_id == 13 && fld_str == "-" ) {
continue;
}
this->data( fld_id ) = FieldData( std::move(fld_str) );
} else {
// process the field
// process the date to get year, month, day, hour and minute
if ( StringOps::startsWith( fld, "date_time" ) ) {
auto dt = DateTimeOps::processDateTime( fld_str, fld.substr( 10 ) ); // cut away the "date_time_" part
if ( auto& year{ dt.at(0) }; !year.empty() ) {
// year
this->year = FieldData( std::move(year) );
}
if ( auto& month{ dt.at(1) }; !month.empty() ) {
// month
this->month = FieldData( std::move(month) );
}
if ( auto& day{ dt.at(2) }; !day.empty() ) {
// day
this->day = FieldData( std::move(day) );
}
if ( auto& hour{ dt.at(3) }; !hour.empty() ) {
// hour
if ( hour == "PM" ) {
add_pm |= true;
} else {
this->hour = FieldData( std::move(hour) );
}
}
if ( auto& minute{ dt.at(4) }; !minute.empty() ) {
// minute
this->minute = FieldData( std::move(minute) );
}
if ( auto& second{ dt.at(5) }; !second.empty() ) {
// second
this->second = FieldData( std::move(second) );
}
// process the request to get the protocol, method, resource and query
} else if ( fld == "request_full" ) {
// check whether the request string has the proper number of spaces
const size_t n_spaces{ StringOps::count( fld_str, ' ' ) };
if ( n_spaces == 0ul ) [[unlikely]] {
// no spaces
if ( VecOps::contains( this->valid_methods, fld_str ) ) {
this->method = FieldData( std::move(fld_str) );
} else if ( VecOps::contains( this->valid_protocols, fld_str ) ) {
this->protocol = FieldData( std::move(fld_str) );
} else {
this->storeUriQuery( std::move(fld_str) );
}
} else if ( n_spaces == 1ul ) [[unlikely]] {
// 1 field is missing
this->storeMalformedRequestOneSpace( std::move(fld_str) );
} else if ( n_spaces > 2ul ) [[unlikely]] {
// most likely a malicious attempt
if ( sep == " " ) {
// hard to say how to handle it properly
throw LogParserException( "Malformed request string", fld_str );
}
this->storeMalformedRequestMultiSpace( std::move(fld_str) );
} else [[likely]] {
// correct amount of spaces
const size_t aux_stop1{ fld_str.find( ' ' ) },
aux_start{ aux_stop1+1ul },
aux_stop2{ fld_str.find( ' ', aux_start ) };
if ( aux_stop1 > 0ul && aux_stop2 > aux_start ) [[likely]] {
std::string method{ fld_str.substr( 0ul, aux_stop1 ) };
std::string protocol{ fld_str.substr( aux_stop2+1ul ) };
if ( VecOps::contains( this->valid_methods, method )
&& VecOps::contains( this->valid_protocols, protocol ) ) [[likely]] {
this->method = FieldData( std::move(method) );
this->protocol = FieldData( std::move(protocol) );
this->storeUriQuery( fld_str.substr( aux_start, aux_stop2-aux_start ) );
} else [[unlikely]] {
this->storeMalformedRequestMultiSpace( std::move(fld_str) );
}
} else [[unlikely]] {
this->storeMalformedRequestMultiSpace( std::move(fld_str) );
}
}
// process the request to get uri and query
} else if ( fld == "request_uri_query" ) {
// search for the query
std::string uri, query;
const size_t aux_{ fld_str.find( '?' ) };
if ( aux_ != std::string::npos ) {
uri = fld_str.substr( 0ul, aux_ );
query = fld_str.substr( aux_+1ul );
} else {
// query not found
uri = fld_str;
}
if ( ! uri.empty() ) {
this->uri = FieldData( std::move(uri) );
}
if ( ! query.empty() ) {
this->query = FieldData( std::move(query) );
}
// process the time taken to convert to milliseconds
} else if ( fld.rfind("time_taken_",0ul) == 0ul ) {
float t{ std::stof( fld_str ) };
const std::string u{ fld.substr( 11ul ) };
if ( u == "us" ) {
// from microseconds
t /= 1000.0f;
} else if ( u == "s" || u == "s.ms" ) {
// from seconds
t *= 1000.0f;
}
this->time_taken = FieldData( std::to_string( static_cast<int>( t ) ) );
// something went wrong
} else {
// hmmm.. no...
throw LogParserException( "Unexpected LogField", fld );
}
}
}
}
// update the stop for the next start
stop += sep_size;
sep_i++;
if ( stop > line_size ) {
// this was the final separator
break;
}
}
if ( add_pm ) {
try {
// add +12 hours for PM
this->hour = FieldData( std::to_string( 12 + std::stoi(*this->hour) ) );
} catch (...) {
// no hour data
}
}
}
void LogLineData::storeUriQuery(std::string&& str)
{
if ( ! str.empty() ) {
if ( const auto pos{ str.find( '?' ) }; pos != std::string::npos ) {
this->uri = FieldData( str.substr( 0ul, pos ) );
this->query = FieldData( str.substr( pos+1ul ) );
} else {
this->uri = FieldData( std::move(str) );
}
}
}
void LogLineData::storeMalformedRequestOneSpace(std::string&& str)
{
const size_t pos{ str.find( ' ' ) };
std::string field1{ str.substr( 0ul, pos ) },
field2{ str.substr( pos+1 ) };
const bool is_method1{ VecOps::contains( this->valid_methods, field1 ) },
is_method2{ VecOps::contains( this->valid_methods, field2 ) },
is_protocol1{ VecOps::contains( this->valid_protocols, field1 ) },
is_protocol2{ VecOps::contains( this->valid_protocols, field2 ) };
/**
*** P\M
*** 00 01 10 11
***
*** 00 U+ UM MU U+
***
*** 01 UP -- MP --
***
*** 10 PU PM -- --
***
*** 11 U+ -- -- --
**/
if ( is_method1 ) {
if ( is_method2 ) {
// uri = 12
this->storeUriQuery( StringOps::strip( str ) );
} else if ( is_protocol2 ) {
// method = 1 // protocol = 2
this->method = FieldData( std::move(field1) );
this->protocol = FieldData( std::move(field2) );
} else {
// method = 1 // uri = 2
this->method = FieldData( std::move(field1) );
this->storeUriQuery( std::move(field2) );
}
} else if ( is_method2 ) {
if ( is_protocol1 ) {
// protocol = 1 // method = 2
this->protocol = FieldData( std::move(field1) );
this->method = FieldData( std::move(field2) );
} else {
// uri = 1 // method = 2
this->storeUriQuery( std::move(field1) );
this->method = FieldData( std::move(field2) );
}
} else if ( is_protocol1 ) {
if ( is_protocol2 ) {
// uri = 12
this->storeUriQuery( StringOps::strip( str ) );
} else {
// protocol = 1 // uri = 2
this->protocol = FieldData( std::move(field1) );
this->storeUriQuery( std::move(field2) );
}
} else if ( is_protocol2 ) {
// uri = 1 // protocol = 2
this->storeUriQuery( std::move(field1) );
this->protocol = FieldData( std::move(field2) );
} else {
// uri = 12
this->storeUriQuery( StringOps::strip( str ) );
}
}
void LogLineData::storeMalformedRequestMultiSpace(std::string&& str)
{
const size_t pos1{ str.find( ' ' ) },
pos2{ str.rfind( ' ' ) };
std::string field1{ str.substr( 0ul, pos1 ) };
std::string field2{ StringOps::strip( str.substr( pos1+1ul, pos2-pos1-1ul ) ) };
std::string field3{ str.substr( pos2+1ul ) };
const bool is_method1{ VecOps::contains( this->valid_methods, field1 ) },
is_method2{ VecOps::contains( this->valid_methods, field2 ) },
is_method3{ VecOps::contains( this->valid_methods, field3 ) },
is_protocol1{ VecOps::contains( this->valid_protocols, field1 ) },
is_protocol2{ VecOps::contains( this->valid_protocols, field2 ) },
is_protocol3{ VecOps::contains( this->valid_protocols, field3 ) };
/**
*** P\M
*** 000 001 010 011 100 101 110 111
***
*** 000 +U+ +UM +U+ +U+ MU+ +U+ +U+ +U+
***
*** 001 +UP --- UMP --- MUP --- +UP ---
***
*** 010 +U+ UPM --- --- MPU +U+ --- ---
***
*** 011 +U+ --- --- --- MU+ --- --- ---
***
*** 100 PU+ PUM PMU PU+ --- --- --- ---
***
*** 101 +U+ --- +U+ --- --- --- --- ---
***
*** 110 +U+ +UM --- --- --- --- --- ---
***
*** 111 +U+ --- --- --- --- --- --- ---
**/
if ( is_method1 && is_method3 ) {
// uri = 123
this->storeUriQuery( StringOps::strip( str ) );
} else if ( is_method1 && is_method2 ) {
if ( is_protocol3 ) {
// uri = 12 // protocol = 3
this->storeUriQuery( StringOps::strip( str.substr( 0ul, pos2 ) ) );
this->protocol = FieldData( std::move(field3) );
} else {
// uri = 123
this->storeUriQuery( StringOps::strip( str ) );
}
} else if ( is_method2 && is_method3 ) {
if ( is_protocol1 ) {
// protocol = 1 // uri = 23
this->protocol = FieldData( std::move(field1) );
this->storeUriQuery( StringOps::strip( str.substr( pos1+1ul ) ) );
} else {
// uri = 123
this->storeUriQuery( StringOps::strip( str ) );
}
} else if ( is_method1 ) {
if ( is_protocol2 && !is_protocol3 ) {
// method = 1 // protocol = 2 // uri = 3
this->method = FieldData( std::move(field1) );
this->protocol = FieldData( std::move(field2) );
this->storeUriQuery( std::move(field3) );
} else if ( is_protocol3 && !is_protocol2 ) {
// method = 1 // uri = 2 // protocol = 3
this->method = FieldData( std::move(field1) );
this->storeUriQuery( std::move(field2) );
this->protocol = FieldData( std::move(field3) );
} else {
// method = 1 // uri = 23
this->method = FieldData( std::move(field1) );
this->storeUriQuery( StringOps::strip( str.substr( pos1+1ul ) ) );
}
} else if ( is_method2 ) {
if ( is_protocol1 && !is_protocol3 ) {
// protocol = 1 // method = 2 // uri = 3
this->protocol = FieldData( std::move(field1) );
this->method = FieldData( std::move(field2) );
this->storeUriQuery( std::move(field3) );
} else if ( is_protocol3 && !is_protocol1 ) {
// uri = 1 // method = 2 // protocol = 3
this->storeUriQuery( std::move(field1) );
this->method = FieldData( std::move(field2) );
this->protocol = FieldData( std::move(field3) );
} else {
// uri = 123
this->storeUriQuery( StringOps::strip( str ) );
}
} else if ( is_method3 ) {
if ( is_protocol1 && !is_protocol2 ) {
// protocol = 1 // uri = 2 // method = 3
this->protocol = FieldData( std::move(field1) );
this->storeUriQuery( std::move(field2) );
this->method = FieldData( std::move(field3) );
} else if ( is_protocol2 && !is_protocol1 ) {
// uri = 1 // protocol = 2 // method = 3
this->storeUriQuery( std::move(field1) );
this->protocol = FieldData( std::move(field2) );
this->method = FieldData( std::move(field3) );
} else {
// uri = 12 // method = 3
this->storeUriQuery( StringOps::strip( str.substr( 0ul, pos2 ) ) );
this->method = FieldData( std::move(field3) );
}
} else if ( is_protocol1 && is_protocol3 ) {
// uri = 123
this->storeUriQuery( StringOps::strip( str ) );
} else if ( is_protocol1 && is_protocol2 ) {
if ( is_method3 ) {
// uri = 12 // method = 3
this->storeUriQuery( StringOps::strip( str.substr( 0ul, pos2 ) ) );
this->method = FieldData( std::move(field3) );
} else {
// uri = 123
this->storeUriQuery( StringOps::strip( str ) );
}
} else if ( is_protocol2 && is_protocol3 ) {
if ( is_method1 ) {
// method = 1 // uri = 23
this->method = FieldData( std::move(field1) );
this->storeUriQuery( StringOps::strip( str.substr( pos1+1ul ) ) );
} else {
// uri = 123
this->storeUriQuery( StringOps::strip( str ) );
}
} else if ( is_protocol1 ) {
if ( !is_method2 && !is_method3 ) {
// protocol = 1 // uri = 23
this->protocol = FieldData( std::move(field1) );
this->storeUriQuery( StringOps::strip( str.substr( pos1+1ul ) ) );
} else {
// uri = 123
this->storeUriQuery( StringOps::strip( str ) );
}
} else if ( is_protocol3 ) {
if ( !is_method2 && !is_method1 ) {
// uri = 12 // protocol = 3
this->storeUriQuery( StringOps::strip( str.substr( 0ul, pos2 ) ) );
this->protocol = FieldData( std::move(field3) );
} else {
// uri = 123
this->storeUriQuery( StringOps::strip( str ) );
}
} else {
// uri = 123
this->storeUriQuery( StringOps::strip( str ) );
}
}
size_t LogLineData::size() const
{
return this->year
+ this->month
+ this->day
+ this->hour
+ this->minute
+ this->second
+ this->protocol
+ this->method
+ this->uri
+ this->query
+ this->response_code
+ this->time_taken
+ this->bytes_sent
+ this->bytes_received
+ this->referrer
+ this->client
+ this->user_agent
+ this->cookie;
}
FieldData& LogLineData::data(const int& id)
{
switch (id) {
case 1:
return this->year;
case 2:
return this->month;
case 3:
return this->day;
case 4:
return this->hour;
case 5:
return this->minute;
case 6:
return this->second;
case 10:
return this->protocol;
case 11:
return this->method;
case 12:
return this->uri;
case 13:
return this->query;
case 14:
return this->response_code;
case 15:
return this->time_taken;
case 16:
return this->bytes_sent;
case 17:
return this->bytes_received;
case 18:
return this->referrer;
case 20:
return this->client;
case 21:
return this->user_agent;
case 22:
return this->cookie;
default:
throw LogParserException( "Unexpected LogField ID", std::to_string(id) );
}
}

View file

@ -1,8 +1,14 @@
#ifndef LOGDOCTOR__CRAPLOG__WORKERS__LIB_H
#define LOGDOCTOR__CRAPLOG__WORKERS__LIB_H
#include <QMetaType>
struct LogsFormat;
//! Signals which dialog to show
enum class WorkerDialog {
errGeneric,
errDirNotExists,
@ -17,4 +23,157 @@ enum class WorkerDialog {
Q_DECLARE_METATYPE(WorkerDialog)
//! FieldData
/*!
Holds the data of a single field from a log line
\see LogLineData
*/
struct FieldData
{
FieldData() noexcept
: is_set{false} {}
explicit FieldData(std::string&& data) noexcept
: is_set{!data.empty()}, data{std::move(data)} {}
~FieldData() noexcept = default;
FieldData(FieldData&& other) noexcept = default;
FieldData& operator=(FieldData&& rhs) noexcept = default;
Q_DISABLE_COPY(FieldData)
inline operator bool() const
{ return this->is_set; }
inline const std::string& operator *() const
{ return this->data; }
inline size_t operator +(const FieldData& rhs) const
{ return this->data.size() + rhs.data.size(); }
private:
bool is_set;
std::string data;
};
inline size_t operator +(const size_t lhs, const FieldData& rhs)
{
return lhs + (*rhs).size();
}
//! LogLineData
/*!
Holds the data of a single log line
*/
struct LogLineData
{
LogLineData(const std::string& line, const LogsFormat& logs_format);
~LogLineData() noexcept = default;
LogLineData(LogLineData&& other) noexcept = default;
LogLineData& operator=(LogLineData&& rhs) noexcept = delete;
Q_DISABLE_COPY(LogLineData)
size_t size() const;
// date and time
FieldData year; // 1
FieldData month; // 2
FieldData day; // 3
FieldData hour; // 4
FieldData minute; // 5
FieldData second; // 6
// request
FieldData protocol; // 10
FieldData method; // 11
FieldData uri; // 12
FieldData query; // 13
// server
FieldData response_code; // 14
FieldData time_taken; // 15
FieldData bytes_sent; // 16
FieldData bytes_received; // 17
// client
FieldData client; // 20
FieldData cookie; // 22
FieldData user_agent; // 21
FieldData referrer; // 18
private:
FieldData& data(const int& id);
void storeUriQuery(std::string&& str);
void storeMalformedRequestOneSpace(std::string&& str);
void storeMalformedRequestMultiSpace(std::string&& str);
inline static const std::unordered_map<std::string, int> field2id{
// date-time
{"date_time_year", 1},
{"date_time_month", 2},
{"date_time_day", 3},
{"date_time_hour", 4},
{"date_time_minute", 5},
{"date_time_second", 6},
{"date_time_ncsa", 0},
{"date_time_iso", 0},
{"date_time_mcs", 0},
{"date_time_gmt", 0},
{"date_time_utc_d", 0},
{"date_time_utc_t", 0},
{"date_time_epoch_s", 0},
{"date_time_epoch_s.ms", 0},
{"date_time_epoch_ms", 0},
{"date_time_epoch_us", 0},
{"date_time_YYYYMMDD", 0},
{"date_time_MMDDYY", 0},
{"date_time_MDYYYY", 0},
{"date_time_year_short", 0},
{"date_time_month_str", 0},
{"date_time_clock_12", 0},
{"date_time_clock_24", 0},
{"date_time_clock_short", 0},
// request
{"request_protocol", 10},
{"request_method", 11},
{"request_uri", 12},
{"request_query", 13},
{"response_code", 14},
{"request_full", 0},
// performance
{"time_taken_ms", 15},
{"time_taken_us", 0},
{"time_taken_s.ms", 0},
{"time_taken_s", 0},
{"bytes_sent", 16},
{"bytes_received", 17},
// referer
{"referer", 18},
// client data
{"client", 20},
{"user_agent", 21},
{"cookie", 22}
};
inline static const std::vector<std::string> valid_methods{
"GET",
"POST",
"HEAD",
"PUT",
"DELETE",
"OPTIONS",
"CONNECT",
"TRACE",
"PATCH"
};
inline static const std::vector<std::string> valid_protocols{
"HTTP/0.9",
"HTTP/1.0",
"HTTP/1.1",
"HTTP/2",
"HTTP/3"
};
};
#endif // LOGDOCTOR__CRAPLOG__WORKERS__LIB_H