From 236b7eeb33318de9b08ce203be2948004a6a6cf6 Mon Sep 17 00:00:00 2001 From: Valentino Orlandi Date: Thu, 8 Feb 2024 22:25:41 +0100 Subject: [PATCH] Improvements and updates --- logdoctor/modules/craplog/craplog.cpp | 1 + logdoctor/modules/craplog/craplog.h | 2 +- .../modules/craplog/modules/datetime.cpp | 155 ++-- logdoctor/modules/craplog/modules/datetime.h | 6 +- logdoctor/modules/craplog/modules/formats.cpp | 50 +- logdoctor/modules/craplog/modules/formats.h | 662 +++++++++--------- logdoctor/modules/craplog/modules/lib.h | 41 +- .../modules/workers/impl/loglinedata.cpp | 152 ++-- .../modules/craplog/modules/workers/lib.h | 104 +-- logdoctor/tests/white_box.cpp | 96 +-- 10 files changed, 620 insertions(+), 649 deletions(-) diff --git a/logdoctor/modules/craplog/craplog.cpp b/logdoctor/modules/craplog/craplog.cpp index ee37455c..5393edc1 100644 --- a/logdoctor/modules/craplog/craplog.cpp +++ b/logdoctor/modules/craplog/craplog.cpp @@ -17,6 +17,7 @@ #include "modules/blacklists/blacklists.h" +#include "modules/craplog/modules/lib.h" #include "modules/craplog/modules/donuts.h" #include "modules/craplog/modules/logs.h" #include "modules/craplog/modules/workers/lister.h" diff --git a/logdoctor/modules/craplog/craplog.h b/logdoctor/modules/craplog/craplog.h index 3c2ad237..b3a23be9 100644 --- a/logdoctor/modules/craplog/craplog.h +++ b/logdoctor/modules/craplog/craplog.h @@ -4,13 +4,13 @@ #include -#include "modules/craplog/modules/lib.h" #include "modules/craplog/modules/hash.h" #include "modules/craplog/modules/formats.h" #include "modules/craplog/modules/workers/lib.h" +struct LogFile; struct Blacklists; class QWaitCondition; diff --git a/logdoctor/modules/craplog/modules/datetime.cpp b/logdoctor/modules/craplog/modules/datetime.cpp index 0341dea9..fa0f6b37 100644 --- a/logdoctor/modules/craplog/modules/datetime.cpp +++ b/logdoctor/modules/craplog/modules/datetime.cpp @@ -1,6 +1,8 @@ #include "datetime.h" +#include "lib.h" + #include "modules/exceptions.h" #include "utilities/strings.h" @@ -57,12 +59,12 @@ const std::string convertMonth( std::string_view month ) } // namespace (private) -std::vector processDateTime( std::string_view datetime_, std::string_view format ) +std::vector processDateTime( std::string_view datetime_, const LogsFormatField format ) { - std::string aux, datetime{datetime_}; + std::string datetime{datetime_}; std::string year, month, day, hour, minute, second; - if ( format == "ncsa" ) { + if ( format == date_time_ncsa ) { datetime = StringOps::strip( datetime, "[ ]" ); day = datetime.substr( 0ul, 2ul ); month = convertMonth( datetime.substr( 3ul, 3ul ) ); @@ -71,7 +73,7 @@ std::vector processDateTime( std::string_view datetime_, std::strin minute = datetime.substr( 15ul, 2ul ); second = datetime.substr( 18ul, 2ul ); - } else if ( format == "mcs" ) { + } else if ( format == date_time_mcs ) { month = convertMonth( datetime.substr( 4ul, 3ul ) ); day = datetime.substr( 8ul, 2ul ); hour = datetime.substr( 11ul, 2ul ); @@ -79,7 +81,7 @@ std::vector processDateTime( std::string_view datetime_, std::strin second = datetime.substr( 17ul, 2ul ); year = datetime.substr( datetime.size()-4 ); - } else if ( format == "gmt" ) { + } else if ( format == date_time_gmt ) { size_t start{ datetime.find( ", " ) + 2ul }; day = datetime.substr( start, 2ul ); start += 3ul; @@ -93,7 +95,7 @@ std::vector processDateTime( std::string_view datetime_, std::strin start += 3ul; second = datetime.substr( start, 2ul ); - } else if ( StringOps::startsWith( format, "iso" ) ) { + } else if ( format == date_time_iso ) { year = datetime.substr( 0ul, 4ul ); month = datetime.substr( 5ul, 2ul ); day = datetime.substr( 8ul, 2ul ); @@ -101,31 +103,36 @@ std::vector processDateTime( std::string_view datetime_, std::strin minute = datetime.substr( 14ul, 2ul ); second = datetime.substr( 17ul, 2ul ); - } else if ( StringOps::startsWith( format, "utc" ) ) { - if ( format == "utc_d" ) { + } else if ( _DATE_TIME_UTC & format ) { + if ( format == date_time_utc_d ) { // date year = datetime.substr( 0ul, 4ul ); month = datetime.substr( 5ul, 2ul ); day = datetime.substr( 8ul, 2ul ); - } else { + } else if ( format == date_time_utc_t ) { // time hour = datetime.substr( 0ul, 2ul ); minute = datetime.substr( 3ul, 2ul ); second = datetime.substr( 6ul, 2ul ); + } else [[unlikely]] { + // wronthing went some ... + throw DateTimeException("Unexpected DateTime UTC: "+std::string{datetime_}+" - format: "+std::to_string(format)); } - } else if ( StringOps::startsWith( format, "epoch_" ) ) { - aux = format.substr( 6ul ); + } else if ( _DATE_TIME_EPOCH & format ) { // convert to seconds - if ( aux == "us" ) { + if ( format == date_time_epoch_us ) { // from microseconds datetime.resize( datetime.size()-6ul ); - } else if ( aux == "ms" ) { + } else if ( format == date_time_epoch_ms ) { // from milliseconds datetime.resize( datetime.size()-3ul ); - } else if ( aux == "s.ms" ) { + } else if ( format == date_time_epoch_s_ms ) { // from seconds.milliseconds datetime = std::to_string( std::stoi( datetime ) ); + } else if ( format != date_time_epoch_s ) [[unlikely]] { + // wronthing went some ... + throw DateTimeException("Unexpected DateTime EPOCH: "+std::string{datetime_}+" - format: "+std::to_string(format)); } // convert to iso date format const QDateTime e{ QDateTime::fromSecsSinceEpoch( std::stoi( datetime ) ) }; @@ -139,20 +146,20 @@ std::vector processDateTime( std::string_view datetime_, std::strin minute = datetime.substr( 14ul, 2ul ); second = datetime.substr( 17ul, 2ul ); - } else { - if ( format == "YYYYMMDD" ) { + } else if ( _DATE_TIME_DATE & format ) { + if ( format == date_time_yyyymmdd ) { year = datetime.substr( 0ul, 4ul ); month = datetime.substr( 5ul, 2ul ); day = datetime.substr( 8ul, 2ul ); - } else if ( format == "MMDDYY" ) { + } else if ( format == date_time_mmddyy ) { const int y{ std::stoi( datetime.substr( 6ul, 2ul ) ) }; month = datetime.substr( 0ul, 2ul ); day = datetime.substr( 3ul, 2ul ); year = (y<70) ? "20" : "19"; year += (y<10) ? "0"+std::to_string( y ) : std::to_string( y ); - } else if ( format == "MDYYYY" ) { + } else if ( format == date_time_mdyyyy ) { size_t aux_; if ( datetime.at(2) == '/' ) { month = datetime.substr( 0ul, 2ul ); @@ -170,63 +177,65 @@ std::vector processDateTime( std::string_view datetime_, std::strin } year = datetime.substr( aux_ ); - } else if ( StringOps::startsWith( format, "year" ) ) { - year = datetime; - if ( format == "year_short" ) { - const int y{ std::stoi( year ) }; - year = (y<70) ? "20" : "19"; - year += year; - } - - } else if ( StringOps::startsWith( format, "month" ) ) { - if ( format.size() <= 5ul ) { - month = datetime; - } else { - datetime.resize( 3ul ); // may be the full name - month = convertMonth( datetime ); - } - - } else if ( format == "day" ) { - day = datetime; - - } else if ( StringOps::startsWith( format, "clock_" ) ) { - aux = format.substr( 6ul ); - if ( aux == "24" ) { - hour = datetime.substr( 0ul, 2ul ); - minute = datetime.substr( 3ul, 2ul ); - second = datetime.substr( 6ul, 2ul ); - - } else if ( aux == "12" ) { - hour = datetime.substr( 0ul, 2ul ); - minute = datetime.substr( 3ul, 2ul ); - second = datetime.substr( 6ul, 2ul ); - if ( datetime.substr( 9ul, 2ul ) == "pm" ) { - hour = std::to_string( 12 + std::stoi(hour) ); - } - - } else if ( aux == "short" ) { - hour = datetime.substr( 0ul, 2ul ); - minute = datetime.substr( 3ul, 2ul ); - - } else if ( aux == "meridian" ) { - if ( datetime == "pm" ) { - hour = "PM"; // to mark for final update - } - } - - } else if ( format == "hour" ) { - hour = datetime; - - } else if ( format == "minute" ) { - minute = datetime; - - } else if ( format == "second" ) { - second = datetime; - - } else { + } else [[unlikely]] { // wronthing went some ... - throw DateTimeException("Unexpected DateTime format: "+std::string{datetime_}); + throw DateTimeException("Unexpected DateTime DATE: "+std::string{datetime_}+" - format: "+std::to_string(format)); } + + } else if ( _DATE_TIME_CLOCK & format ) { + if ( format == date_time_clock_24 ) { + hour = datetime.substr( 0ul, 2ul ); + minute = datetime.substr( 3ul, 2ul ); + second = datetime.substr( 6ul, 2ul ); + + } else if ( format == date_time_clock_12 ) { + hour = datetime.substr( 0ul, 2ul ); + minute = datetime.substr( 3ul, 2ul ); + second = datetime.substr( 6ul, 2ul ); + if ( datetime.substr( 9ul, 2ul ) == "pm" ) { + hour = std::to_string( 12 + std::stoi(hour) ); + } + + } else if ( format == date_time_clock_short ) { + hour = datetime.substr( 0ul, 2ul ); + minute = datetime.substr( 3ul, 2ul ); + + } else [[unlikely]] { + // wronthing went some ... + throw DateTimeException("Unexpected DateTime CLOCK: "+std::string{datetime_}+" - format: "+std::to_string(format)); + } + + } else if ( _DATE_TIME_YEAR & format ) { + year = datetime; + if ( format == date_time_year_short ) { + const int y{ std::stoi( year ) }; + year = (y<70) ? "20" : "19"; + year += year; + } + + } else if ( _DATE_TIME_MONTH & format ) { + if ( format == date_time_month ) { + month = datetime; + } else { + datetime.resize( 3ul ); // may be the full name + month = convertMonth( datetime ); + } + + } else if ( format == date_time_day ) { + day = datetime; + + } else if ( format == date_time_hour ) { + hour = datetime; + + } else if ( format == date_time_minute ) { + minute = datetime; + + } else if ( format == date_time_second ) { + second = datetime; + + } else [[unlikely]] { + // wronthing went some ... + throw DateTimeException("Unexpected DateTime: "+std::string{datetime_}+" - format: "+std::to_string(format)); } return { year, month, day, hour, minute, second }; diff --git a/logdoctor/modules/craplog/modules/datetime.h b/logdoctor/modules/craplog/modules/datetime.h index 5806853b..6b9515b7 100644 --- a/logdoctor/modules/craplog/modules/datetime.h +++ b/logdoctor/modules/craplog/modules/datetime.h @@ -2,10 +2,14 @@ #define LOGDOCTOR__CRAPLOG__MODULES__DATETIME_H +#include #include #include +enum LogsFormatField : uint32_t; + + //! DateTimeOps /*! Operations for the dates @@ -20,7 +24,7 @@ namespace DateTimeOps \throw DateTimeException \return The list of items */ -std::vector processDateTime( std::string_view datetime, std::string_view format ); +std::vector processDateTime( std::string_view datetime, const LogsFormatField format ); } // namespace DateTimeOps diff --git a/logdoctor/modules/craplog/modules/formats.cpp b/logdoctor/modules/craplog/modules/formats.cpp index e9737645..285d97e9 100644 --- a/logdoctor/modules/craplog/modules/formats.cpp +++ b/logdoctor/modules/craplog/modules/formats.cpp @@ -273,18 +273,20 @@ LogsFormat FormatOps::processApacheFormatString( const std::string& f_str ) cons const auto& f_map_v { this->APACHE_ALF_V }; std::string initial, final; - std::vector separators, fields; + std::vector separators; + std::vector fields; // parse the string to convert keyargs in craplog's fields format - bool is_strftime_sep; + bool is_strftime_sep{ false }; int n_fld{ 0 }; size_t start, stop{0ul}, aux, aux_start, aux_stop; const size_t max{ f_str.size()-1ul }; - std::string aux_fld, aux_fld_v, cur_fld, cur_sep; + std::string aux_fld, aux_fld_v, cur_sep; + LogsFormatField cur_fld{ _INVALID }; // find and convert any field while (true) { // start after the last found field start = stop; - if ( cur_fld == "date_time_ncsa" ) { + if ( cur_fld == date_time_ncsa ) { // NCAS time format is always enclosed inside brackets cur_sep += "]"; } @@ -381,7 +383,7 @@ LogsFormat FormatOps::processApacheFormatString( const std::string& f_str ) cons const auto& aux_map{ f_map_v.at( aux_fld_v ) }; if ( aux_map.empty() ) { // module not considered and always giving out something, even if invalid varname is passed - cur_fld = "NONE"; + cur_fld = _DISCARDED; } else if ( aux_fld.empty() ) { // no need to check further, the dafault is used in this case @@ -393,11 +395,12 @@ LogsFormat FormatOps::processApacheFormatString( const std::string& f_str ) cons } else if ( aux_fld_v == "p" || aux_fld_v == "P" || aux_fld_v == "T" ) { // still not considered (except 'T'), but invalid fields get used as text // field concatenation not allowed, whole content used as varname - if ( aux_map.find( aux_fld ) != aux_map.end() ) { + if ( aux_map.contains( aux_fld ) ) { // valid varname cur_fld = aux_map.at( aux_fld ); } else { // invalid varname, use as text + cur_fld = _INVALID; cur_sep += aux_fld; start = stop = aux_stop; continue; @@ -406,15 +409,15 @@ LogsFormat FormatOps::processApacheFormatString( const std::string& f_str ) cons } else if ( aux_fld_v == "a" || aux_fld_v == "h" ) { // whatever the varname is (valid, invalid, empty), always returns the client // field concatenation not allowed, the entire content is used as varname - cur_fld = "client" ; + cur_fld = client; } else if ( aux_fld_v == "i" ) { // always giving a result, may the varname be valid or not ('-' if invalid/empty) // field concatenation not allowed, the entire content is used as varname - if ( aux_map.find( aux_fld ) != aux_map.end() ) { + if ( aux_map.contains( aux_fld ) ) { cur_fld = aux_map.at( aux_fld ); } else { - cur_fld = "NONE"; + cur_fld = _DISCARDED; } } else /*if ( aux_fld_v == "t" )*/ { @@ -423,12 +426,13 @@ LogsFormat FormatOps::processApacheFormatString( const std::string& f_str ) cons if ( aux_aux == std::string::npos ) { // no concatenation, only valid fields used, anything else used as text // whole content used as varname - if ( aux_map.find( aux_fld ) != aux_map.end() ) { + if ( aux_map.contains( aux_fld ) ) { // valid cur_fld = aux_map.at( aux_fld ); is_strftime_sep = true; } else { // invalid, append to current separator + cur_fld = _INVALID; cur_sep += aux_fld; start = stop = aux_stop; continue; @@ -475,7 +479,7 @@ LogsFormat FormatOps::processApacheFormatString( const std::string& f_str ) cons aux_aux_fld = aux_fld.substr( aux_aux, 2ul ); aux_aux_stop = aux_aux+2ul; // check if the field is valid - if ( aux_map.find( aux_aux_fld ) != aux_map.end() ) { + if ( aux_map.contains( aux_aux_fld ) ) { // valid, append cur_fld = aux_map.at( aux_aux_fld ); // append the separator @@ -517,10 +521,10 @@ LogsFormat FormatOps::processApacheFormatString( const std::string& f_str ) cons ++ aux_stop; } // check if the module is valid - if ( f_map.find( aux_fld ) != f_map.end() ) { + if ( f_map.contains( aux_fld ) ) { // valid cur_fld = f_map.at( aux_fld ); - if ( cur_fld == "date_time_ncsa" ) { + if ( cur_fld == date_time_ncsa ) { // apache's NCSA time format is always enclosed inside brackets cur_sep += "["; } @@ -544,7 +548,7 @@ LogsFormat FormatOps::processApacheFormatString( const std::string& f_str ) cons break; } - if ( cur_fld.empty() ) { + if ( cur_fld == _INVALID ) { // invalid field, used as text (namely, added to current separator) continue; } @@ -608,7 +612,8 @@ LogsFormat FormatOps::processNginxFormatString( const std::string& f_str ) const const auto& f_map{ this->NGINX_ALF }; std::string initial, final; - std::vector separators, fields; + std::vector separators; + std::vector fields; // parse the string to convert keyargs in craplog's fields format bool finished{ false }; size_t start, aux, stop{0ul}; @@ -654,14 +659,14 @@ LogsFormat FormatOps::processNginxFormatString( const std::string& f_str ) const } // check if the field is valid - if ( f_map.find( cur_fld ) != f_map.end() ) { + if ( const auto it{ f_map.find( cur_fld ) }; it != f_map.end() ) { // valid, append if ( start == 0ul ) { initial = parseNginxEscapes( cur_sep ); } else { separators.push_back( parseNginxEscapes( cur_sep ) ); } - fields.push_back( f_map.at( cur_fld ) ); + fields.push_back( it->second ); if ( finished ) { // this was the last field break; @@ -705,18 +710,19 @@ LogsFormat FormatOps::processIisFormatString( const std::string& f_str, const in { checkIisString( f_str ); std::string initial, final; - std::vector separators, fields; + std::vector separators; + std::vector fields; switch ( l_mod ) { case 2: // IIS logging module final = ","; separators = {", ",", ",", ",", ",", ",", ",", ",", ",", ",", ",", ",", ",", ",", "}; - fields = {"client","NONE","date_time_MDYYYY","date_time_utc_t","NONE","NONE","NONE","time_taken_ms","bytes_received","bytes_sent","response_code","NONE","request_method","request_uri","request_query"}; + fields = {client,_DISCARDED,date_time_mdyyyy,date_time_utc_t,_DISCARDED,_DISCARDED,_DISCARDED,time_taken_ms,bytes_received,bytes_sent,response_code,_DISCARDED,request_method,request_uri,request_query}; break; case 1: // NCSA logging module separators = {" "," "," [","] \"","\" "," "}; - fields = {"client","NONE","NONE","date_time_ncsa","request_full","response_code","bytes_sent"}; + fields = {client,_DISCARDED,_DISCARDED,date_time_ncsa,request_full,response_code,bytes_sent}; break; case 0: // W3C logging module @@ -745,9 +751,9 @@ LogsFormat FormatOps::processIisFormatString( const std::string& f_str, const in ++ stop; // check if the module is valid - if ( f_map.find( cur_fld ) != f_map.end() ) { + if ( const auto it{ f_map.find( cur_fld ) }; it != f_map.end() ) { // valid, append - fields.push_back( f_map.at( cur_fld ) ); + fields.push_back( it->second ); if ( ! finished ) { separators.push_back( cur_sep ); } else { diff --git a/logdoctor/modules/craplog/modules/formats.h b/logdoctor/modules/craplog/modules/formats.h index c0767517..a6b3b44c 100644 --- a/logdoctor/modules/craplog/modules/formats.h +++ b/logdoctor/modules/craplog/modules/formats.h @@ -2,6 +2,8 @@ #define LOGDOCTOR__CRAPLOG__MODULES__FORMATS_H +#include "lib.h" + #include #include @@ -15,6 +17,10 @@ struct LogsFormat; */ class FormatOps final { + using simple_fields_umap_t = std::unordered_map; + using composed_fields_umap_t = std::unordered_map>; + using samples_umap_t = std::unordered_map; + public: //! Processes the given string to extrapolate the format for Apache2 @@ -79,362 +85,362 @@ private: ///////////////// //// APACHE2 //// - //!< Access logs fields formats - const std::unordered_map APACHE_ALF{ - {"a", "NONE"}, - {"A", "NONE"}, - {"b", "NONE"}, - {"B", "NONE"}, - {"C", "NONE"}, - {"D", "time_taken_ms"}, - {"e", "NONE"}, - {"f", "NONE"}, - {"h", "client"}, - {"H", "request_protocol"}, - {"i", "NONE"}, - {"I", "bytes_received"}, - {"k", "NONE"}, - {"l", "NONE"}, - {"L", "NONE"}, - {"m", "request_method"}, - {"n", "NONE"}, - {"o", "NONE"}, - {"O", "bytes_sent"}, - {"p", "NONE"}, - {"P", "NONE"}, - {"q", "request_query"}, - {"r", "request_full"}, - {"R", "NONE"}, - {"s", "response_code"}, - {"s", "response_code"}, - {"S", "NONE"}, - {"t", "date_time_ncsa"}, - {"T", "time_taken_s"}, - {"u", "NONE"}, - {"U", "request_uri"}, - {"v", "NONE"}, - {"V", "NONE"}, - {"X", "NONE"} }; + //! Access logs fields formats + const simple_fields_umap_t APACHE_ALF{ + {"a", _DISCARDED}, + {"A", _DISCARDED}, + {"b", _DISCARDED}, + {"B", _DISCARDED}, + {"C", _DISCARDED}, + {"D", time_taken_ms}, + {"e", _DISCARDED}, + {"f", _DISCARDED}, + {"h", client}, + {"H", request_protocol}, + {"i", _DISCARDED}, + {"I", bytes_received}, + {"k", _DISCARDED}, + {"l", _DISCARDED}, + {"L", _DISCARDED}, + {"m", request_method}, + {"n", _DISCARDED}, + {"o", _DISCARDED}, + {"O", bytes_sent}, + {"p", _DISCARDED}, + {"P", _DISCARDED}, + {"q", request_query}, + {"r", request_full}, + {"R", _DISCARDED}, + {"s", response_code}, + {"s", response_code}, + {"S", _DISCARDED}, + {"t", date_time_ncsa}, + {"T", time_taken_s}, + {"u", _DISCARDED}, + {"U", request_uri}, + {"v", _DISCARDED}, + {"V", _DISCARDED}, + {"X", _DISCARDED} }; - //!< Composed access logs fields formats - const std::unordered_map> APACHE_ALF_V{ - {"a", { {"", "client"}, // as %h (from single fields) - {"c", "client"}}}, - {"h", { {"", "client"}, // as %h (from single fields) - {"c", "client"}}}, - {"i", { {"", "NONE"}, - {"Cookie", "cookie"}, - {"Referer", "referer"}, - {"User-agent", "user_agent"}}}, - {"t", { {"", "date_time_ncsa"}, // as %t (from single fields) - //{"%%", "NONE"}, // the percent sign - //{"%n", "NONE"}, // line feed - //{"%t", "NONE"}, // horizontal tab - {"sec", "date_time_epoch_s"}, - {"msec", "date_time_epoch_ms"}, - {"usec", "date_time_epoch_us"}, - {"msec_frac", "NONE"}, // milliseconds fraction - {"usec_frac", "NONE"}, // microseconds fraction - {"%a", "NONE"}, // abbreviated weekday name - {"%A", "NONE"}, // weekday name - {"%b", "date_time_month_str"}, - {"%B", "date_time_month_str"}, - {"%c", "date_time_mcs"}, - {"%C", "NONE"}, // year (first 2 digits, aka centuries) - {"%d", "date_time_day"}, - {"%D", "date_time_MMDDYY"}, - {"%e", "date_time_day"}, - {"%F", "date_time_YYYYMMDD"}, - {"%g", "NONE"}, // weel-based year (last 2 digits) - {"%G", "NONE"}, // week-based year, namely the year which contains the current week - {"%h", "date_time_month_str"}, - {"%H", "date_time_hour"}, - {"%I", "NONE"}, // hour (12h format) - {"%j", "NONE"}, // day of the year number - {"%k", "date_time_hour"}, // ?!? hour (24h format) !?! no documentation ?!? - {"%l", "NONE"}, // hour (12h format) - {"%m", "date_time_month"}, - {"%M", "date_time_minute"}, - {"%p", "NONE"}, // AM or PM - {"%r", "date_time_clock_12"}, - {"%R", "date_time_clock_short"}, - {"%s", "date_time_epoch_s"}, - {"%S", "date_time_second"}, - {"%T", "date_time_clock_24"}, - {"%u", "NONE"}, // weekday number (1-7, Monday is 1) - {"%U", "NONE"}, // week of the year number, with the first Sunday as the first day of week one - {"%V", "NONE"}, // week of the year number - {"%w", "NONE"}, // weekday number (0-6, Sunday is 0) - {"%W", "NONE"}, // week of the year number, with the first Monday as the first day of week one - {"%x", "date_time_MMDDYY"}, - {"%X", "date_time_clock_24"}, - {"%y", "date_time_year_short"}, - {"%Y", "date_time_year"}, - {"%z", "NONE"}, // timezone offset from UTC (1 minute=1, 1 hour=100) - {"%Z", "NONE"}}}, // timezone name abbreviation - {"T", { {"", "time_taken_s"}, // as s - {"s", "time_taken_s"}, - {"ms", "time_taken_ms"}, - {"us", "time_taken_us"}}}, + //! Composed access logs fields formats + const composed_fields_umap_t APACHE_ALF_V{ + {"a", { {"", client}, // as %h (from single fields) + {"c", client}}}, + {"h", { {"", client}, // as %h (from single fields) + {"c", client}}}, + {"i", { {"", _DISCARDED}, + {"Cookie", cookie}, + {"Referer", referer}, + {"User-agent", user_agent}}}, + {"t", { {"", date_time_ncsa}, // as %t (from single fields) + //{"%%", _DISCARDED}, // the percent sign + //{"%n", _DISCARDED}, // line feed + //{"%t", _DISCARDED}, // horizontal tab + {"sec", date_time_epoch_s}, + {"msec", date_time_epoch_ms}, + {"usec", date_time_epoch_us}, + {"msec_frac", _DISCARDED}, // milliseconds fraction + {"usec_frac", _DISCARDED}, // microseconds fraction + {"%a", _DISCARDED}, // abbreviated weekday name + {"%A", _DISCARDED}, // weekday name + {"%b", date_time_month_str}, + {"%B", date_time_month_str}, + {"%c", date_time_mcs}, + {"%C", _DISCARDED}, // year (first 2 digits, aka centuries) + {"%d", date_time_day}, + {"%D", date_time_mmddyy}, + {"%e", date_time_day}, + {"%F", date_time_yyyymmdd}, + {"%g", _DISCARDED}, // weel-based year (last 2 digits) + {"%G", _DISCARDED}, // week-based year, namely the year which contains the current week + {"%h", date_time_month_str}, + {"%H", date_time_hour}, + {"%I", _DISCARDED}, // hour (12h format) + {"%j", _DISCARDED}, // day of the year number + {"%k", date_time_hour}, // ?!? hour (24h format) !?! no documentation ?!? + {"%l", _DISCARDED}, // hour (12h format) + {"%m", date_time_month}, + {"%M", date_time_minute}, + {"%p", _DISCARDED}, // AM or PM + {"%r", date_time_clock_12}, + {"%R", date_time_clock_short}, + {"%s", date_time_epoch_s}, + {"%S", date_time_second}, + {"%T", date_time_clock_24}, + {"%u", _DISCARDED}, // weekday number (1-7, Monday is 1) + {"%U", _DISCARDED}, // week of the year number, with the first Sunday as the first day of week one + {"%V", _DISCARDED}, // week of the year number + {"%w", _DISCARDED}, // weekday number (0-6, Sunday is 0) + {"%W", _DISCARDED}, // week of the year number, with the first Monday as the first day of week one + {"%x", date_time_mmddyy}, + {"%X", date_time_clock_24}, + {"%y", date_time_year_short}, + {"%Y", date_time_year}, + {"%z", _DISCARDED}, // timezone offset from UTC (1 minute=1, 1 hour=100) + {"%Z", _DISCARDED}}}, // timezone name abbreviation + {"T", { {"", time_taken_s}, // as s + {"s", time_taken_s}, + {"ms", time_taken_ms}, + {"us", time_taken_us}}}, // composed not in use {"C", {}}, {"e", {}}, {"L", {}}, {"n", {}}, {"o", {}}, - {"p", {{"", "NONE"}, // as canonical - {"canonical", "NONE"}, - {"local", "NONE"}, - {"remote", "NONE"}}}, - {"P", {{"", "NONE"}, // as pid - {"pid", "NONE"}, - {"tid", "NONE"}, - {"hextid", "NONE"}}}, + {"p", {{"", _DISCARDED}, // as canonical + {"canonical", _DISCARDED}, + {"local", _DISCARDED}, + {"remote", _DISCARDED}}}, + {"P", {{"", _DISCARDED}, // as pid + {"pid", _DISCARDED}, + {"tid", _DISCARDED}, + {"hextid", _DISCARDED}}}, {"^ti", {}}, {"^to", {}} }; // Access logs fields formats samples - const std::unordered_map APACHE_ALF_SAMPLES{ - {"NONE", "DISCARDED"}, - {"date_time_epoch_s", "946771199"}, - {"date_time_epoch_ms", "946771199000"}, - {"date_time_epoch_us", "946771199000000"}, - {"date_time_ncsa", "01/Jan/2000:23:59:59 +0000"}, - {"date_time_mcs", "Sat Jan 01 23:59:59 2000"}, - {"date_time_YYYYMMDD", "2000-01-01"}, - {"date_time_MMDDYY", "01/01/00"}, - {"date_time_year", "2000"}, - {"date_time_year_short", "00"}, - {"date_time_month_str", "January"}, - {"date_time_month", "01"}, - {"date_time_day", "01"}, - {"date_time_clock_12", "11:59:59 pm"}, - {"date_time_clock_24", "23:59:59"}, - {"date_time_clock_short", "23:59"}, - {"date_time_hour", "23"}, - {"date_time_minute", "59"}, - {"date_time_second", "59"}, - {"request_full", "GET /index.php?query=x HTTP/1.1"}, - {"request_protocol", "HTTP/1.1"}, - {"request_method", "GET"}, - {"request_uri", "/index.php"}, - {"request_query", "query=x"}, - {"response_code", "404"}, - {"bytes_sent", "1234"}, - {"bytes_received", "123"}, - {"time_taken_s", "1"}, - {"time_taken_ms", "1000"}, - {"time_taken_us", "1000000"}, - {"referer", "http://www.referrer.site"}, - {"cookie", "aCookie=abc123"}, - {"user_agent", "UserAgent/3.0 (Details stuff) Info/123"}, - {"client", "192.168.1.123"} }; + const samples_umap_t APACHE_ALF_SAMPLES{ + {_DISCARDED, R"(DISCARDED)"}, + {date_time_epoch_s, R"(946771199)"}, + {date_time_epoch_ms, R"(946771199000)"}, + {date_time_epoch_us, R"(946771199000000)"}, + {date_time_ncsa, R"(01/Jan/2000:23:59:59 +0000)"}, + {date_time_mcs, R"(Sat Jan 01 23:59:59 2000)"}, + {date_time_yyyymmdd, R"(2000-01-01)"}, + {date_time_mmddyy, R"(01/01/00)"}, + {date_time_year, R"(2000)"}, + {date_time_year_short, R"(00)"}, + {date_time_month_str, R"(January)"}, + {date_time_month, R"(01)"}, + {date_time_day, R"(01)"}, + {date_time_clock_12, R"(11:59:59 pm)"}, + {date_time_clock_24, R"(23:59:59)"}, + {date_time_clock_short, R"(23:59)"}, + {date_time_hour, R"(23)"}, + {date_time_minute, R"(59)"}, + {date_time_second, R"(59)"}, + {request_full, R"(GET /index.php?query=x HTTP/1.1)"}, + {request_protocol, R"(HTTP/1.1)"}, + {request_method, R"(GET)"}, + {request_uri, R"(/index.php)"}, + {request_query, R"(query=x)"}, + {response_code, R"(404)"}, + {bytes_sent, R"(1234)"}, + {bytes_received, R"(123)"}, + {time_taken_s, R"(1)"}, + {time_taken_ms, R"(1000)"}, + {time_taken_us, R"(1000000)"}, + {referer, R"(http://www.referrer.site)"}, + {cookie, R"(aCookie=abc123)"}, + {user_agent, R"(UserAgent/3.0 (Details stuff) Info/123)"}, + {client, R"(192.168.1.123)"} }; /////////////// //// NGINX //// - //!< Access logs fields formats - const std::unordered_map NGINX_ALF{ - {"remote_addr", "client"}, - {"realip_remote_addr", "client"}, - {"time_local", "date_time_ncsa"}, - {"time_iso8601", "date_time_iso"}, - {"date_gmt", "date_time_gmt"}, - {"msec", "date_time_epoch_s.ms"}, - {"request", "request_full"}, - {"server_protocol", "request_protocol"}, - {"request_method", "request_method"}, - {"request_uri", "request_uri_query"}, - {"uri", "request_uri"}, - {"query_string", "request_query"}, - {"status", "response_code"}, - {"bytes_sent", "bytes_sent"}, - {"request_length", "bytes_received"}, - {"request_time", "time_taken_s.ms"}, - {"http_referer", "referer"}, - {"cookie_", "cookie"}, - {"http_user_agent", "user_agent"}, + //! Access logs fields formats + const simple_fields_umap_t NGINX_ALF{ + {"remote_addr", client}, + {"realip_remote_addr", client}, + {"time_local", date_time_ncsa}, + {"time_iso8601", date_time_iso}, + {"date_gmt", date_time_gmt}, + {"msec", date_time_epoch_s_ms}, + {"request", request_full}, + {"server_protocol", request_protocol}, + {"request_method", request_method}, + {"request_uri", request_uri_query}, + {"uri", request_uri}, + {"query_string", request_query}, + {"status", response_code}, + {"bytes_sent", bytes_sent}, + {"request_length", bytes_received}, + {"request_time", time_taken_s_ms}, + {"http_referer", referer}, + {"cookie_", cookie}, + {"http_user_agent", user_agent}, // not in use, will be discarded - {"ancient_browser", "NONE"}, - {"arg_", "NONE"}, - {"args", "NONE"}, - {"binary_remote_addr", "NONE"}, - {"body_bytes_sent", "NONE"}, - {"connection", "NONE"}, - {"connection_requests", "NONE"}, - {"connections_active", "NONE"}, - {"connections_reading", "NONE"}, - {"connections_waiting", "NONE"}, - {"connections_writing", "NONE"}, - {"content_length", "NONE"}, - {"content_type", "NONE"}, - {"date_local", "NONE"}, - {"document_root", "NONE"}, - {"document_uri", "NONE"}, - {"fastcgi_path_info", "NONE"}, - {"fastcgi_script_name", "NONE"}, - {"geoip_area_code", "NONE"}, - {"geoip_city", "NONE"}, - {"geoip_city_continent_code", "NONE"}, - {"geoip_city_country_code", "NONE"}, - {"geoip_city_country_code3", "NONE"}, - {"geoip_city_country_name", "NONE"}, - {"geoip_country_code", "NONE"}, - {"geoip_country_code3", "NONE"}, - {"geoip_country_name", "NONE"}, - {"geoip_dma_code", "NONE"}, - {"geoip_latitude", "NONE"}, - {"geoip_longitude", "NONE"}, - {"geoip_org", "NONE"}, - {"geoip_postal_code", "NONE"}, - {"geoip_region", "NONE"}, - {"geoip_region_name", "NONE"}, - {"gzip_ratio", "NONE"}, - {"host", "NONE"}, - {"hostname", "NONE"}, - {"http2", "NONE"}, - {"http_", "NONE"}, - {"https", "NONE"}, - {"invalid_referer", "NONE"}, - {"is_args", "NONE"}, - {"limit_rate", "NONE"}, - {"memcached_key", "NONE"}, - {"modern_browser", "NONE"}, - {"msie", "NONE"}, - {"nginx_version", "NONE"}, - {"pid", "NONE"}, - {"pipe", "NONE"}, - {"proxy_add_x_forwarded_for", "NONE"}, - {"proxy_host", "NONE"}, - {"proxy_port", "NONE"}, - {"proxy_protocol_addr", "NONE"}, - {"proxy_protocol_port", "NONE"}, - {"realip_remote_port", "NONE"}, - {"realpath_root", "NONE"}, - {"remote_port", "NONE"}, - {"remote_user", "NONE"}, - {"request_body", "NONE"}, - {"request_body_file", "NONE"}, - {"request_completion", "NONE"}, - {"request_filename", "NONE"}, - {"request_id", "NONE"}, - {"scheme", "NONE"}, - {"secure_link", "NONE"}, - {"secure_link_expires", "NONE"}, - {"sent_http_", "NONE"}, - {"server_addr", "NONE"}, - {"server_name", "NONE"}, - {"server_port", "NONE"}, - {"session_log_binary_id", "NONE"}, - {"session_log_id", "NONE"}, - {"slice_range", "NONE"}, - {"spdy", "NONE"}, - {"spdy_request_priority", "NONE"}, - {"ssl_cipher", "NONE"}, - {"ssl_client_cert", "NONE"}, - {"ssl_client_fingerprint", "NONE"}, - {"ssl_client_i_dn", "NONE"}, - {"ssl_client_raw_cert", "NONE"}, - {"ssl_client_s_dn", "NONE"}, - {"ssl_client_serial", "NONE"}, - {"ssl_client_verify", "NONE"}, - {"ssl_protocol", "NONE"}, - {"ssl_server_name", "NONE"}, - {"ssl_session_id", "NONE"}, - {"ssl_session_reused", "NONE"}, - {"tcpinfo_rtt", "NONE"}, - {"tcpinfo_rttvar", "NONE"}, - {"tcpinfo_snd_cwnd", "NONE"}, - {"tcpinfo_rcv_space", "NONE"}, - {"uid_got", "NONE"}, - {"uid_reset", "NONE"}, - {"uid_set", "NONE"}, - {"upstream_addr", "NONE"}, - {"upstream_cache_status", "NONE"}, - {"upstream_connect_time", "NONE"}, - {"upstream_cookie_", "NONE"}, - {"upstream_header_time", "NONE"}, - {"upstream_http_", "NONE"}, - {"upstream_response_length", "NONE"}, - {"upstream_response_time", "NONE"}, - {"upstream_status", "NONE"} }; + {"ancient_browser", _DISCARDED}, + {"arg_", _DISCARDED}, + {"args", _DISCARDED}, + {"binary_remote_addr", _DISCARDED}, + {"body_bytes_sent", _DISCARDED}, + {"connection", _DISCARDED}, + {"connection_requests", _DISCARDED}, + {"connections_active", _DISCARDED}, + {"connections_reading", _DISCARDED}, + {"connections_waiting", _DISCARDED}, + {"connections_writing", _DISCARDED}, + {"content_length", _DISCARDED}, + {"content_type", _DISCARDED}, + {"date_local", _DISCARDED}, + {"document_root", _DISCARDED}, + {"document_uri", _DISCARDED}, + {"fastcgi_path_info", _DISCARDED}, + {"fastcgi_script_name", _DISCARDED}, + {"geoip_area_code", _DISCARDED}, + {"geoip_city", _DISCARDED}, + {"geoip_city_continent_code", _DISCARDED}, + {"geoip_city_country_code", _DISCARDED}, + {"geoip_city_country_code3", _DISCARDED}, + {"geoip_city_country_name", _DISCARDED}, + {"geoip_country_code", _DISCARDED}, + {"geoip_country_code3", _DISCARDED}, + {"geoip_country_name", _DISCARDED}, + {"geoip_dma_code", _DISCARDED}, + {"geoip_latitude", _DISCARDED}, + {"geoip_longitude", _DISCARDED}, + {"geoip_org", _DISCARDED}, + {"geoip_postal_code", _DISCARDED}, + {"geoip_region", _DISCARDED}, + {"geoip_region_name", _DISCARDED}, + {"gzip_ratio", _DISCARDED}, + {"host", _DISCARDED}, + {"hostname", _DISCARDED}, + {"http2", _DISCARDED}, + {"http_", _DISCARDED}, + {"https", _DISCARDED}, + {"invalid_referer", _DISCARDED}, + {"is_args", _DISCARDED}, + {"limit_rate", _DISCARDED}, + {"memcached_key", _DISCARDED}, + {"modern_browser", _DISCARDED}, + {"msie", _DISCARDED}, + {"nginx_version", _DISCARDED}, + {"pid", _DISCARDED}, + {"pipe", _DISCARDED}, + {"proxy_add_x_forwarded_for", _DISCARDED}, + {"proxy_host", _DISCARDED}, + {"proxy_port", _DISCARDED}, + {"proxy_protocol_addr", _DISCARDED}, + {"proxy_protocol_port", _DISCARDED}, + {"realip_remote_port", _DISCARDED}, + {"realpath_root", _DISCARDED}, + {"remote_port", _DISCARDED}, + {"remote_user", _DISCARDED}, + {"request_body", _DISCARDED}, + {"request_body_file", _DISCARDED}, + {"request_completion", _DISCARDED}, + {"request_filename", _DISCARDED}, + {"request_id", _DISCARDED}, + {"scheme", _DISCARDED}, + {"secure_link", _DISCARDED}, + {"secure_link_expires", _DISCARDED}, + {"sent_http_", _DISCARDED}, + {"server_addr", _DISCARDED}, + {"server_name", _DISCARDED}, + {"server_port", _DISCARDED}, + {"session_log_binary_id", _DISCARDED}, + {"session_log_id", _DISCARDED}, + {"slice_range", _DISCARDED}, + {"spdy", _DISCARDED}, + {"spdy_request_priority", _DISCARDED}, + {"ssl_cipher", _DISCARDED}, + {"ssl_client_cert", _DISCARDED}, + {"ssl_client_fingerprint", _DISCARDED}, + {"ssl_client_i_dn", _DISCARDED}, + {"ssl_client_raw_cert", _DISCARDED}, + {"ssl_client_s_dn", _DISCARDED}, + {"ssl_client_serial", _DISCARDED}, + {"ssl_client_verify", _DISCARDED}, + {"ssl_protocol", _DISCARDED}, + {"ssl_server_name", _DISCARDED}, + {"ssl_session_id", _DISCARDED}, + {"ssl_session_reused", _DISCARDED}, + {"tcpinfo_rtt", _DISCARDED}, + {"tcpinfo_rttvar", _DISCARDED}, + {"tcpinfo_snd_cwnd", _DISCARDED}, + {"tcpinfo_rcv_space", _DISCARDED}, + {"uid_got", _DISCARDED}, + {"uid_reset", _DISCARDED}, + {"uid_set", _DISCARDED}, + {"upstream_addr", _DISCARDED}, + {"upstream_cache_status", _DISCARDED}, + {"upstream_connect_time", _DISCARDED}, + {"upstream_cookie_", _DISCARDED}, + {"upstream_header_time", _DISCARDED}, + {"upstream_http_", _DISCARDED}, + {"upstream_response_length", _DISCARDED}, + {"upstream_response_time", _DISCARDED}, + {"upstream_status", _DISCARDED} }; // Access logs fields formats samples - const std::unordered_map NGINX_ALF_SAMPLES{ - {"NONE", "DISCARDED"}, - {"date_time_epoch_s.ms", "946771199.000"}, - {"date_time_ncsa", "01/Jan/2000:23:59:59 +0000"}, - {"date_time_iso", "2000-01-01T23:59:59+00:00"}, - {"date_time_gmt", "Saturday, 01-Jan-2000 23:59:59 UTC"}, - {"request_full", "GET /index.php?query=x HTTP/1.1"}, - {"request_protocol", "HTTP/1.1"}, - {"request_method", "GET"}, - {"request_uri_query", "/index.php?query=x"}, - {"request_uri", "/index.php"}, - {"request_query", "query=x"}, - {"response_code", "404"}, - {"bytes_sent", "1234"}, - {"bytes_received", "123"}, - {"time_taken_s.ms", "1.000"}, - {"referer", "http://www.referrer.site"}, - {"cookie", "aCookie=abc123"}, - {"user_agent", "UserAgent/3.0 (Details stuff) Info/123"}, - {"client", "192.168.1.123"} }; + const samples_umap_t NGINX_ALF_SAMPLES{ + {_DISCARDED, R"(DISCARDED)"}, + {date_time_epoch_s_ms, R"(946771199.000)"}, + {date_time_ncsa, R"(01/Jan/2000:23:59:59 +0000)"}, + {date_time_iso, R"(2000-01-01T23:59:59+00:00)"}, + {date_time_gmt, R"(Saturday, 01-Jan-2000 23:59:59 UTC)"}, + {request_full, R"(GET /index.php?query=x HTTP/1.1)"}, + {request_protocol, R"(HTTP/1.1)"}, + {request_method, R"(GET)"}, + {request_uri_query, R"(/index.php?query=x)"}, + {request_uri, R"(/index.php)"}, + {request_query, R"(query=x)"}, + {response_code, R"(404)"}, + {bytes_sent, R"(1234)"}, + {bytes_received, R"(123)"}, + {time_taken_s_ms, R"(1.000)"}, + {referer, R"(http://www.referrer.site)"}, + {cookie, R"(aCookie=abc123)"}, + {user_agent, R"(UserAgent/3.0 (Details stuff) Info/123)"}, + {client, R"(192.168.1.123)"} }; ///////////// //// IIS //// - //!< Access logs fields formats (W3C) - const std::unordered_map IIS_ALF{ - {"date", "date_time_utc_d"}, - {"time", "date_time_utc_t"}, - {"cs-version", "request_protocol"}, - {"cs-method", "request_method"}, - {"cs-uri-stem", "request_uri"}, - {"cs-uri-query", "request_query"}, - {"sc-status", "response_code"}, - {"sc-bytes", "bytes_sent"}, - {"cs-bytes", "bytes_received"}, - {"time-taken", "time_taken_ms"}, - {"cs(Referer)", "referer"}, - {"cs(Cookie)", "cookie"}, - {"cs(User-Agent)", "user_agent"}, - {"c-ip", "client"}, + //! Access logs fields formats (W3C) + const simple_fields_umap_t IIS_ALF{ + {"date", date_time_utc_d}, + {"time", date_time_utc_t}, + {"cs-version", request_protocol}, + {"cs-method", request_method}, + {"cs-uri-stem", request_uri}, + {"cs-uri-query", request_query}, + {"sc-status", response_code}, + {"sc-bytes", bytes_sent}, + {"cs-bytes", bytes_received}, + {"time-taken", time_taken_ms}, + {"cs(Referer)", referer}, + {"cs(Cookie)", cookie}, + {"cs(User-Agent)", user_agent}, + {"c-ip", client}, // not in use, will be discarded - {"s-sitename", "NONE"}, - {"s-computername", "NONE"}, - {"s-ip", "NONE"}, - {"s-port", "NONE"}, - {"cs-username", "NONE"}, - {"cs-host", "NONE"}, - {"sc-substatus", "NONE"}, - {"sc-win32-status", "NONE"}, - {"streamid", "NONE"} }; + {"s-sitename", _DISCARDED}, + {"s-computername", _DISCARDED}, + {"s-ip", _DISCARDED}, + {"s-port", _DISCARDED}, + {"cs-username", _DISCARDED}, + {"cs-host", _DISCARDED}, + {"sc-substatus", _DISCARDED}, + {"sc-win32-status", _DISCARDED}, + {"streamid", _DISCARDED} }; // Access logs fields formats samples - const std::unordered_map IIS_ALF_SAMPLES{ - {"NONE", "DISCARDED"}, - {"date_time_ncsa", "01/Jan/2000:23:59:59 +0000"}, - {"date_time_MDYYYY", "1/1/2000"}, - {"date_time_utc_d", "2000-01-01"}, - {"date_time_utc_t", "23:59:59"}, - {"request_full", "GET /index.php?query=x HTTP/1.1"}, - {"request_protocol", "HTTP/1.1"}, - {"request_method", "GET"}, - {"request_uri", "/index.php"}, - {"request_query", "query=x"}, - {"response_code", "404"}, - {"bytes_sent", "1234"}, - {"bytes_received", "123"}, - {"time_taken_ms", "1000"}, - {"referer", "http://www.referrer.site"}, - {"cookie", "aCookie=abc123"}, - {"user_agent", "UserAgent/3.0+(Details+stuff)+Info/123"}, - {"client", "192.168.1.123"} }; + const samples_umap_t IIS_ALF_SAMPLES{ + {_DISCARDED, R"(DISCARDED)"}, + {date_time_ncsa, R"(01/Jan/2000:23:59:59 +0000)"}, + {date_time_mdyyyy, R"(1/1/2000)"}, + {date_time_utc_d, R"(2000-01-01)"}, + {date_time_utc_t, R"(23:59:59)"}, + {request_full, R"(GET /index.php?query=x HTTP/1.1)"}, + {request_protocol, R"(HTTP/1.1)"}, + {request_method, R"(GET)"}, + {request_uri, R"(/index.php)"}, + {request_query, R"(query=x)"}, + {response_code, R"(404)"}, + {bytes_sent, R"(1234)"}, + {bytes_received, R"(123)"}, + {time_taken_ms, R"(1000)"}, + {referer, R"(http://www.referrer.site)"}, + {cookie, R"(aCookie=abc123)"}, + {user_agent, R"(UserAgent/3.0+(Details+stuff)+Info/123)"}, + {client, R"(192.168.1.123)"} }; }; diff --git a/logdoctor/modules/craplog/modules/lib.h b/logdoctor/modules/craplog/modules/lib.h index 36dddd47..cf5315a4 100644 --- a/logdoctor/modules/craplog/modules/lib.h +++ b/logdoctor/modules/craplog/modules/lib.h @@ -36,30 +36,31 @@ struct LogFile final LogFile& operator=(LogFile&& other) noexcept = default; LogFile(const LogFile& other) noexcept = default; LogFile& operator=(const LogFile& other) noexcept = default; - //! Wheter the file has been selected to be used or not + + //! Returns whether the file has been selected to be used or not inline bool isSelected() const noexcept - { return this->selected; } + { return this->selected; } //! Sets the file as selected inline void setSelected() noexcept - { this->selected |= true; } + { this->selected |= true; } //! Sets the file as unselected inline void setUnselected() noexcept - { this->selected &= false; } - //! Wheter the file has been used already or not + { this->selected &= false; } + //! Returns whether the file has been used already or not inline bool hasBeenUsed() const noexcept - { return this->used_already; } - //! The size of the file + { return this->used_already; } + //! Returns the size of the file inline size_t size() const noexcept - { return this->size_; } - //! The name of the file, to be displayed in the list + { return this->size_; } + //! Returns the name of the file, to be displayed in the list inline const QString& name() const noexcept - { return this->name_; } - //! The sha256 hash of the content + { return this->name_; } + //! Returns the sha256 hash of the content inline const std::string& hash() const noexcept - { return this->hash_; } - //! The path of the file, including the file name + { return this->hash_; } + //! Returns the path of the file, including the file name inline const std::string& path() const noexcept - { return this->path_; } + { return this->path_; } private: bool selected; bool used_already; @@ -149,16 +150,16 @@ enum LogsFormatField : uint32_t { //! Holds informations about a log format struct LogsFormat final { - explicit LogsFormat() noexcept = default; - explicit LogsFormat - (const std::string& str,std::string&& itl,std::string&& fnl,std::vector&& seps,std::vector&& flds,const unsigned nl) noexcept - :string{str},initial{std::move(itl)},final{std::move(fnl)},separators{std::move(seps)},fields{std::move(flds)},new_lines{nl}{} std::string string; //!< The logs format string std::string initial; //!< The initial separator std::string final; //!< The final separator std::vector separators; //!< The separators in the middle - std::vector fields; //!< The fields - size_t new_lines; //!< The number of new lines + std::vector fields; //!< The logged fields + size_t new_lines; //!< The number of new lines within the string + + explicit LogsFormat() noexcept = default; + explicit LogsFormat(const std::string& str,std::string&& itl,std::string&& fnl,std::vector&& seps,std::vector&& flds,const size_t nl) noexcept + :string{str},initial{std::move(itl)},final{std::move(fnl)},separators{std::move(seps)},fields{std::move(flds)},new_lines{nl}{} }; diff --git a/logdoctor/modules/craplog/modules/workers/impl/loglinedata.cpp b/logdoctor/modules/craplog/modules/workers/impl/loglinedata.cpp index 7fe99d8e..1c4ae0c7 100644 --- a/logdoctor/modules/craplog/modules/workers/impl/loglinedata.cpp +++ b/logdoctor/modules/craplog/modules/workers/impl/loglinedata.cpp @@ -13,7 +13,8 @@ LogLineData::LogLineData(const std::string& line, const LogsFormat& logs_format) { - bool add_pm{false}; + using F = LogsFormatField; + size_t start, stop{logs_format.initial.size()}, sep_i{0}; const size_t line_size{ line.size()-1ul }, @@ -46,11 +47,11 @@ LogLineData::LogLineData(const std::string& line, const LogsFormat& logs_format) // should be unreachable throw ("Unexpected section reached"); } - const size_t sep_size = sep.size(); + const size_t sep_size{ sep.size() }; // get the field - const std::string& fld = logs_format.fields.at( sep_i ); - if ( fld != "NONE" ) { + const F fld{ logs_format.fields.at( sep_i ) }; + if ( _DISCARDED | fld ) { // only parse the considered fields std::string fld_str{ line.substr(start, stop-start) }; @@ -59,34 +60,30 @@ LogLineData::LogLineData(const std::string& line, const LogsFormat& logs_format) // not the last separator, check for mistakes size_t aux_stop = stop; - if ( sep == " " ) { + if ( (_MAY_HAVE_SPACES & fld) && sep == " " ) { // check the fields with whitespace-separated values - const size_t n{ fld == "request_full" ? 2ul - : fld == "date_time_ncsa" ? 1ul - : fld == "date_time_mcs" ? 4ul - : fld == "date_time_gmt" ? 3ul - : 0ul }; - if ( n > 0ul ) { - size_t c{ StringOps::count( fld_str, ' ' ) }; - if ( c < n ) { - // loop until the correct number of whitespaces is reached - size_t aux_start = line[stop+1ul] == ' ' ? stop : stop+1ul; - while ( c < n ) { - aux_stop = line.find( sep, aux_start ); - if ( aux_stop == std::string::npos ) { - // not found - throw LogParserException( "Separator not found", std::string{sep} ); - } - aux_start = aux_stop+1ul; - ++c; + const size_t n{ fld == request_full ? 2ul + : fld & _COUNT_SPACES }; + + size_t c{ StringOps::count( fld_str, ' ' ) }; + if ( c < n ) { + // loop until the correct number of whitespaces is reached + size_t aux_start = line[stop+1ul] == ' ' ? stop : stop+1ul; + while ( c < n ) { + aux_stop = line.find( sep, aux_start ); + if ( aux_stop == std::string::npos ) { + // not found + throw LogParserException( "Separator not found", std::string{sep} ); } - } else if ( c > n ) [[unlikely]] { - // should be unreachable - throw LogParserException( "Unexpected count for separator", std::string{sep} ); + aux_start = aux_stop+1ul; + ++c; } + } else if ( c > n ) [[unlikely]] { + // should be unreachable + throw LogParserException( "Unexpected count for separator", std::string{sep} ); } - } else if ( sep.front() == '"' && fld == "user_agent" ) { + } else if ( sep.front() == '"' && fld == F::user_agent ) { // atm the only support is for escaped quotes if ( fld_str.back() == '\\' ) { // the found separator is not actually the separator but is part of the user-agent string @@ -115,20 +112,19 @@ LogLineData::LogLineData(const std::string& line, const LogsFormat& logs_format) if ( ! fld_str.empty() ) { // process the field - const int& fld_id{ this->field2id.at(fld) }; - if ( fld_id > 0 ) { + if ( _NO_PARSE_NEEDED & fld ) { // no need to process, append directly if non-empty - if ( fld_id == 13 && fld_str == "-" ) { + if ( fld == request_query && fld_str == "-" ) { continue; } - this->data( fld_id ) = FieldData( std::move(fld_str) ); + this->data( fld ) = FieldData( std::move(fld_str) ); } else { // process the field // process the date to get year, month, day, hour and minute - if ( StringOps::startsWith( fld, "date_time" ) ) { - auto dt = DateTimeOps::processDateTime( fld_str, fld.substr( 10 ) ); // cut away the "date_time_" part + if ( _DATE_TIME & fld ) { + auto dt = DateTimeOps::processDateTime( fld_str, fld ); // cut away the "date_time_" part if ( auto& year{ dt.at(0) }; !year.empty() ) { // year this->year = FieldData( std::move(year) ); @@ -143,11 +139,7 @@ LogLineData::LogLineData(const std::string& line, const LogsFormat& logs_format) } if ( auto& hour{ dt.at(3) }; !hour.empty() ) { // hour - if ( hour == "PM" ) { - add_pm |= true; - } else { - this->hour = FieldData( std::move(hour) ); - } + this->hour = FieldData( std::move(hour) ); } if ( auto& minute{ dt.at(4) }; !minute.empty() ) { // minute @@ -159,8 +151,21 @@ LogLineData::LogLineData(const std::string& line, const LogsFormat& logs_format) } + // process the time taken to convert to milliseconds + } else if ( _TIME_TAKEN & fld ) { + float t{ std::stof( fld_str ) }; + if ( fld == time_taken_us ) { + // from microseconds + t /= 1000.0f; + } else if ( fld & time_taken_s ) { + // from seconds or seconds.milliseconds + t *= 1000.0f; + } + this->time_taken = FieldData( std::to_string( static_cast( t ) ) ); + + // process the request to get the protocol, method, resource and query - } else if ( fld == "request_full" ) { + } else if ( fld == request_full ) { // check whether the request string has the proper number of spaces const size_t n_spaces{ StringOps::count( fld_str, ' ' ) }; @@ -212,7 +217,7 @@ LogLineData::LogLineData(const std::string& line, const LogsFormat& logs_format) // process the request to get uri and query - } else if ( fld == "request_uri_query" ) { + } else if ( fld == request_uri_query ) { // search for the query std::string uri, query; const size_t aux_{ fld_str.find( '?' ) }; @@ -231,24 +236,10 @@ LogLineData::LogLineData(const std::string& line, const LogsFormat& logs_format) } - // process the time taken to convert to milliseconds - } else if ( fld.rfind("time_taken_",0ul) == 0ul ) { - float t{ std::stof( fld_str ) }; - const std::string u{ fld.substr( 11ul ) }; - if ( u == "us" ) { - // from microseconds - t /= 1000.0f; - } else if ( u == "s" || u == "s.ms" ) { - // from seconds - t *= 1000.0f; - } - this->time_taken = FieldData( std::to_string( static_cast( t ) ) ); - - // something went wrong } else { // hmmm.. no... - throw LogParserException( "Unexpected LogField", fld ); + throw LogParserException( "Unexpected LogFormatField", std::to_string(fld) ); } } } @@ -263,15 +254,6 @@ LogLineData::LogLineData(const std::string& line, const LogsFormat& logs_format) } } - - if ( add_pm ) { - try { - // add +12 hours for PM - this->hour = FieldData( std::to_string( 12 + std::stoi(*this->hour) ) ); - } catch (...) { - // no hour data - } - } } void LogLineData::storeUriQuery(std::string&& str) noexcept @@ -532,46 +514,48 @@ size_t LogLineData::size() const noexcept + this->cookie; } -FieldData& LogLineData::data(const int& id) +FieldData& LogLineData::data(const LogsFormatField id) { + using F = LogsFormatField; + switch (id) { - case 1: + case F::date_time_year: return this->year; - case 2: + case F::date_time_month: return this->month; - case 3: + case F::date_time_day: return this->day; - case 4: + case F::date_time_hour: return this->hour; - case 5: + case F::date_time_minute: return this->minute; - case 6: + case F::date_time_second: return this->second; - case 10: + case F::request_protocol: return this->protocol; - case 11: + case F::request_method: return this->method; - case 12: + case F::request_uri: return this->uri; - case 13: + case F::request_query: return this->query; - case 14: + case F::response_code: return this->response_code; - case 15: + case F::time_taken_s: return this->time_taken; - case 16: + case F::bytes_sent: return this->bytes_sent; - case 17: + case F::bytes_received: return this->bytes_received; - case 18: + case F::referer: return this->referrer; - case 20: + case F::client: return this->client; - case 21: + case F::user_agent: return this->user_agent; - case 22: + case F::cookie: return this->cookie; default: - throw LogParserException( "Unexpected LogField ID", std::to_string(id) ); + throw LogParserException( "Unexpected LogFormatField", std::to_string(id) ); } } diff --git a/logdoctor/modules/craplog/modules/workers/lib.h b/logdoctor/modules/craplog/modules/workers/lib.h index 0f96fc54..99ce437e 100644 --- a/logdoctor/modules/craplog/modules/workers/lib.h +++ b/logdoctor/modules/craplog/modules/workers/lib.h @@ -5,6 +5,8 @@ #include +enum LogsFormatField : uint32_t; + struct LogsFormat; @@ -44,13 +46,17 @@ struct FieldData final FieldData& operator=(FieldData&& rhs) noexcept = default; Q_DISABLE_COPY(FieldData) + //! Returns whether the field contains data inline operator bool() const noexcept - { return this->is_set; } + { return this->is_set; } + //! Returns a reference to the field's data inline const std::string& operator *() const noexcept - { return this->data; } - + { return this->data; } + //! Returns the sum of the field's data size with another field's data size inline size_t operator +(const FieldData& rhs) const noexcept - { return this->data.size() + rhs.data.size(); } + { return this->data.size() + rhs.data.size(); } + //! Returns the sum of the field's data size with the given size + friend inline size_t operator +(const size_t lhs, const FieldData& rhs) noexcept; private: bool is_set; @@ -60,7 +66,7 @@ private: inline size_t operator +(const size_t lhs, const FieldData& rhs) noexcept { - return lhs + (*rhs).size(); + return lhs + rhs.data.size(); } @@ -77,87 +83,41 @@ struct LogLineData final LogLineData& operator=(LogLineData&& rhs) noexcept = delete; Q_DISABLE_COPY(LogLineData) + //! Returns the total size of all the fields' data size_t size() const noexcept; // date and time - FieldData year; // 1 - FieldData month; // 2 - FieldData day; // 3 - FieldData hour; // 4 - FieldData minute; // 5 - FieldData second; // 6 + FieldData year; + FieldData month; + FieldData day; + FieldData hour; + FieldData minute; + FieldData second; // request - FieldData protocol; // 10 - FieldData method; // 11 - FieldData uri; // 12 - FieldData query; // 13 + FieldData protocol; + FieldData method; + FieldData uri; + FieldData query; // server - FieldData response_code; // 14 - FieldData time_taken; // 15 - FieldData bytes_sent; // 16 - FieldData bytes_received; // 17 + FieldData response_code; + FieldData time_taken; + FieldData bytes_sent; + FieldData bytes_received; // client - FieldData client; // 20 - FieldData cookie; // 22 - FieldData user_agent; // 21 - FieldData referrer; // 18 + FieldData client; + FieldData cookie; + FieldData user_agent; + FieldData referrer; private: - FieldData& data(const int& id); + //! Returns a reference to the field data corresponding to the given field identifier + FieldData& data(const LogsFormatField id); void storeUriQuery(std::string&& str) noexcept; void storeMalformedRequestOneSpace(std::string&& str) noexcept; void storeMalformedRequestMultiSpace(std::string&& str) noexcept; - inline static const std::unordered_map field2id{ - // date-time - {"date_time_year", 1}, - {"date_time_month", 2}, - {"date_time_day", 3}, - {"date_time_hour", 4}, - {"date_time_minute", 5}, - {"date_time_second", 6}, - {"date_time_ncsa", 0}, - {"date_time_iso", 0}, - {"date_time_mcs", 0}, - {"date_time_gmt", 0}, - {"date_time_utc_d", 0}, - {"date_time_utc_t", 0}, - {"date_time_epoch_s", 0}, - {"date_time_epoch_s.ms", 0}, - {"date_time_epoch_ms", 0}, - {"date_time_epoch_us", 0}, - {"date_time_YYYYMMDD", 0}, - {"date_time_MMDDYY", 0}, - {"date_time_MDYYYY", 0}, - {"date_time_year_short", 0}, - {"date_time_month_str", 0}, - {"date_time_clock_12", 0}, - {"date_time_clock_24", 0}, - {"date_time_clock_short", 0}, - // request - {"request_protocol", 10}, - {"request_method", 11}, - {"request_uri", 12}, - {"request_query", 13}, - {"response_code", 14}, - {"request_full", 0}, - // performance - {"time_taken_ms", 15}, - {"time_taken_us", 0}, - {"time_taken_s.ms", 0}, - {"time_taken_s", 0}, - {"bytes_sent", 16}, - {"bytes_received", 17}, - // referer - {"referer", 18}, - // client data - {"client", 20}, - {"user_agent", 21}, - {"cookie", 22} - }; - inline static const std::vector valid_methods{ "GET", "POST", diff --git a/logdoctor/tests/white_box.cpp b/logdoctor/tests/white_box.cpp index 468deff6..e03afa7d 100644 --- a/logdoctor/tests/white_box.cpp +++ b/logdoctor/tests/white_box.cpp @@ -392,39 +392,39 @@ void testCraplogModules() const std::string e_str{ std::to_string( e ) }; const std::string epochs[4]{ e_str, e_str+".000", e_str+"000", e_str+"000000" }; std::vector target{"2000","01","01","23","59","59"}; - assert( DateTimeOps::processDateTime("[01/Jan/2000:23:59:59 +0000]", "ncsa") == target ); - assert( DateTimeOps::processDateTime("Sat Jan 01 23:59:59 2000", "mcs") == target ); - assert( DateTimeOps::processDateTime("Saturday, 01-Jan-2000 23:59:59 UTC", "gmt") == target ); - assert( DateTimeOps::processDateTime("2000-01-01T23:59:59+00:00", "iso") == target ); - assert( DateTimeOps::processDateTime(epochs[0], "epoch_s") == target ); - assert( DateTimeOps::processDateTime(epochs[1], "epoch_s.ms") == target ); - assert( DateTimeOps::processDateTime(epochs[2], "epoch_ms") == target ); - assert( DateTimeOps::processDateTime(epochs[3], "epoch_us") == target ); + assert( DateTimeOps::processDateTime("[01/Jan/2000:23:59:59 +0000]", date_time_ncsa) == target ); + assert( DateTimeOps::processDateTime("Sat Jan 01 23:59:59 2000", date_time_mcs) == target ); + assert( DateTimeOps::processDateTime("Saturday, 01-Jan-2000 23:59:59 UTC", date_time_gmt) == target ); + assert( DateTimeOps::processDateTime("2000-01-01T23:59:59+00:00", date_time_iso) == target ); + assert( DateTimeOps::processDateTime(epochs[0], date_time_epoch_s) == target ); + assert( DateTimeOps::processDateTime(epochs[1], date_time_epoch_s_ms) == target ); + assert( DateTimeOps::processDateTime(epochs[2], date_time_epoch_ms) == target ); + assert( DateTimeOps::processDateTime(epochs[3], date_time_epoch_us) == target ); target = {"2000","01","01","","",""}; - assert( DateTimeOps::processDateTime("2000-01-01", "utc_d") == target ); - assert( DateTimeOps::processDateTime("2000-01-01", "YYYYMMDD") == target ); - assert( DateTimeOps::processDateTime("01/01/00", "MMDDYY") == target ); - assert( DateTimeOps::processDateTime("1/1/2000", "MDYYYY") == target ); + assert( DateTimeOps::processDateTime("2000-01-01", date_time_utc_d) == target ); + assert( DateTimeOps::processDateTime("2000-01-01", date_time_yyyymmdd) == target ); + assert( DateTimeOps::processDateTime("01/01/00", date_time_mmddyy) == target ); + assert( DateTimeOps::processDateTime("1/1/2000", date_time_mdyyyy) == target ); target = {"","","","23","59","59"}; - assert( DateTimeOps::processDateTime("23:59:59", "utc_t") == target ); - assert( DateTimeOps::processDateTime("11:59:59 pm", "clock_12") == target ); - assert( DateTimeOps::processDateTime("23:59:59", "clock_24") == target ); + assert( DateTimeOps::processDateTime("23:59:59", date_time_utc_t) == target ); + assert( DateTimeOps::processDateTime("11:59:59 pm", date_time_clock_12) == target ); + assert( DateTimeOps::processDateTime("23:59:59", date_time_clock_24) == target ); target = {"","","","23","59",""}; - assert( DateTimeOps::processDateTime("23:59", "clock_short") == target ); - target = {"","","","PM","",""}; - assert( DateTimeOps::processDateTime("pm", "clock_meridian") == target ); + assert( DateTimeOps::processDateTime("23:59", date_time_clock_short) == target ); + /*target = {"","","","PM","",""}; + assert( DateTimeOps::processDateTime("pm", date_time_clock_meridian) == target );*/ target = {"2000","","","","",""}; - assert( DateTimeOps::processDateTime("2000", "year") == target ); + assert( DateTimeOps::processDateTime("2000", date_time_year) == target ); target = {"","01","","","",""}; - assert( DateTimeOps::processDateTime("01", "month") == target ); + assert( DateTimeOps::processDateTime("01", date_time_month) == target ); target = {"","","01","","",""}; - assert( DateTimeOps::processDateTime("01", "day") == target ); + assert( DateTimeOps::processDateTime("01", date_time_day) == target ); target = {"","","","23","",""}; - assert( DateTimeOps::processDateTime("23", "hour") == target ); + assert( DateTimeOps::processDateTime("23", date_time_hour) == target ); target = {"","","","","59",""}; - assert( DateTimeOps::processDateTime("59", "minute") == target ); + assert( DateTimeOps::processDateTime("59", date_time_minute) == target ); target = {"","","","","","59"}; - assert( DateTimeOps::processDateTime("59", "second") == target ); + assert( DateTimeOps::processDateTime("59", date_time_second) == target ); } T_PRINT("DateTimeOps::processDateTime"); @@ -435,11 +435,11 @@ void testCraplogModules() FormatOps fo; LogsFormat lf; std::string format_string; - std::vector fields; + std::vector fields; std::vector separators; // test the default string format_string = "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\""; - fields = {"client","NONE","NONE","date_time_ncsa","request_full","response_code","NONE","referer","user_agent"}; + fields = {client,_DISCARDED,_DISCARDED,date_time_ncsa,request_full,response_code,_DISCARDED,referer,user_agent}; separators = {" "," "," [","] \"","\" "," "," \"","\" \""}; lf = fo.processApacheFormatString(format_string); assert( lf.initial.empty() ); @@ -448,7 +448,7 @@ void testCraplogModules() assert( lf.final == "\"" ); // test all simple fields format_string = "%%%h %% %t\t%r\n%H %m [%U%%%q} <%s> %s %O %I %T %D %a %A %b %B %e %f %k %l %L %p %P %R %S %u %v %V %% %X%%"; - fields = {"client","date_time_ncsa","request_full","request_protocol","request_method","request_uri","request_query","response_code","response_code","response_code","bytes_sent","bytes_received","time_taken_s","time_taken_ms","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE"}; + fields = {client,date_time_ncsa,request_full,request_protocol,request_method,request_uri,request_query,response_code,response_code,response_code,bytes_sent,bytes_received,time_taken_s,time_taken_ms,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED}; separators = {" % [","]\t","\n"," "," [","%","} <","> "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," % "}; lf = fo.processApacheFormatString(format_string); assert( lf.initial == "%" ); @@ -470,7 +470,7 @@ void testCraplogModules() } // test client related composed fields format_string = "%{}a %{c}a %{}h %{c}h %{Cookie}i %200{Cookie}i %{User-agent}i %302,400{User-agent}i %!200{Referer}i %,200{Referer}i %{Referer}i"; - fields = {"client","client","client","client","cookie","cookie","user_agent","user_agent","referer","referer","referer"}; + fields = {client,client,client,client,cookie,cookie,user_agent,user_agent,referer,referer,referer}; separators = {" "," "," "," "," "," "," "," "," "," "}; lf = fo.processApacheFormatString(format_string); assert( lf.initial.empty() ); @@ -479,7 +479,7 @@ void testCraplogModules() assert( lf.final.empty() ); // test unexisting/unsupported client related composed fields format_string = "%{ }a %{x}a %{NOPE}a %{ }h %{y}h %{NOPE}h %{}i %{ }i %{Random}i %{Cookies}i"; - fields = {"client","client","client","client","client","client","NONE","NONE","NONE","NONE"}; + fields = {client,client,client,client,client,client,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED}; separators = {" "," "," "," "," "," "," "," "," "}; lf = fo.processApacheFormatString(format_string); assert( lf.initial.empty() ); @@ -488,7 +488,7 @@ void testCraplogModules() assert( lf.final.empty() ); // test date-time composed fields format_string = "%{%%}t %{%n}t %{%t}t %{}t %{sec}t %{msec}t %{usec}t %{msec_frac}t %{usec_frac}t %{%a}t %{%A}t %{%b}t %{%B}t %{%c}t %{%C}t %{%d}t %{%D}t %{%e}t %{%F}t %{%g}t %{%G}t %{%h}t %{%H}t %{%I}t %{%j}t %{%k}t %{%m}t %{%M}t %{%p}t %{%r}t %{%R}t %{%S}t %{%T}t %{%u}t %{%U}t %{%V}t %{%w}t %{%W}t %{%x}t %{%X}t %{%y}t %{%Y}t %{%z}t %{%Z}t"; - fields = {"date_time_ncsa","date_time_epoch_s","date_time_epoch_ms","date_time_epoch_us","NONE","NONE","NONE","NONE","date_time_month_str","date_time_month_str","date_time_mcs","NONE","date_time_day","date_time_MMDDYY","date_time_day","date_time_YYYYMMDD","NONE","NONE","date_time_month_str","date_time_hour","NONE","NONE","date_time_hour","date_time_month","date_time_minute","NONE","date_time_clock_12","date_time_clock_short","date_time_second","date_time_clock_24","NONE","NONE","NONE","NONE","NONE","date_time_MMDDYY","date_time_clock_24","date_time_year_short","date_time_year","NONE","NONE"}; + fields = {date_time_ncsa,date_time_epoch_s,date_time_epoch_ms,date_time_epoch_us,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,date_time_month_str,date_time_month_str,date_time_mcs,_DISCARDED,date_time_day,date_time_mmddyy,date_time_day,date_time_yyyymmdd,_DISCARDED,_DISCARDED,date_time_month_str,date_time_hour,_DISCARDED,_DISCARDED,date_time_hour,date_time_month,date_time_minute,_DISCARDED,date_time_clock_12,date_time_clock_short,date_time_second,date_time_clock_24,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,date_time_mmddyy,date_time_clock_24,date_time_year_short,date_time_year,_DISCARDED,_DISCARDED}; separators = {"] "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "}; lf = fo.processApacheFormatString(format_string); assert( lf.initial == "% \n \t [" ); @@ -497,7 +497,7 @@ void testCraplogModules() assert( lf.final.empty() ); // test date-time composed fields, with one field only format_string = "%{}t"; - fields = {"date_time_ncsa"}; + fields = {date_time_ncsa}; lf = fo.processApacheFormatString(format_string); assert( lf.initial == "[" ); assert( lf.fields == fields ); @@ -505,7 +505,7 @@ void testCraplogModules() assert( lf.final == "]" ); // test date-time composed fields, with many aggreagated fields format_string = "%{%%%Y_%m_%e%t%H@%M@%S%%}t"; - fields = {"date_time_year","date_time_month","date_time_day","date_time_hour","date_time_minute","date_time_second"}; + fields = {date_time_year,date_time_month,date_time_day,date_time_hour,date_time_minute,date_time_second}; separators = {"_","_","\t","@","@"}; lf = fo.processApacheFormatString(format_string); assert( lf.initial == "%" ); @@ -514,7 +514,7 @@ void testCraplogModules() assert( lf.final == "%" ); // test date-time composed fields, with all fields aggeregated in one format_string = "%{%% %n %t %a %A %b %B %c %C %d %D %e %F %g %G %h %H %I %j %k %m %M %p %r %R %S %T %u %U %V %w %W %x %X %y %Y %z %Z}t"; - fields = {"NONE","NONE","date_time_month_str","date_time_month_str","date_time_mcs","NONE","date_time_day","date_time_MMDDYY","date_time_day","date_time_YYYYMMDD","NONE","NONE","date_time_month_str","date_time_hour","NONE","NONE","date_time_hour","date_time_month","date_time_minute","NONE","date_time_clock_12","date_time_clock_short","date_time_second","date_time_clock_24","NONE","NONE","NONE","NONE","NONE","date_time_MMDDYY","date_time_clock_24","date_time_year_short","date_time_year","NONE","NONE"}; + fields = {_DISCARDED,_DISCARDED,date_time_month_str,date_time_month_str,date_time_mcs,_DISCARDED,date_time_day,date_time_mmddyy,date_time_day,date_time_yyyymmdd,_DISCARDED,_DISCARDED,date_time_month_str,date_time_hour,_DISCARDED,_DISCARDED,date_time_hour,date_time_month,date_time_minute,_DISCARDED,date_time_clock_12,date_time_clock_short,date_time_second,date_time_clock_24,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,date_time_mmddyy,date_time_clock_24,date_time_year_short,date_time_year,_DISCARDED,_DISCARDED}; separators = {" "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "}; lf = fo.processApacheFormatString(format_string); assert( lf.initial == "% \n \t " ); @@ -543,7 +543,7 @@ void testCraplogModules() } // test time taken related composed fields format_string = "%{}T %{s}T %{ms}T %{us}T"; - fields = {"time_taken_s","time_taken_s","time_taken_ms","time_taken_us"}; + fields = {time_taken_s,time_taken_s,time_taken_ms,time_taken_us}; separators = {" "," "," "}; lf = fo.processApacheFormatString(format_string); assert( lf.initial.empty() ); @@ -560,7 +560,7 @@ void testCraplogModules() assert( lf.final.empty() ); // test unused composed fields format_string = "%{}C %{}e %{}L %{}n %{}o %{}p %{canonical}p %{local}p %{remote}p %{}P %{pid}P %{tid}P %{hextid}P %{}^ti %{}^to"; - fields = {"NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE"}; + fields = {_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED}; separators = {" "," "," "," "," "," "," "," "," "," "," "," "," "," "}; lf = fo.processApacheFormatString(format_string); assert( lf.initial.empty() ); @@ -569,7 +569,7 @@ void testCraplogModules() assert( lf.final.empty() ); // test unused composed fields, with random content format_string = "%{TEST}C %{TEST}e %{TEST}L %{TEST}n %{TEST}o %{TEST}p %{TEST}P %{TEST}^ti %{TEST}^to"; - fields = {"NONE","NONE","NONE","NONE","NONE","NONE","NONE"}; + fields = {_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED}; separators = {" "," "," "," "," TEST TEST "," "}; lf = fo.processApacheFormatString(format_string); assert( lf.initial.empty() ); @@ -578,7 +578,7 @@ void testCraplogModules() assert( lf.final.empty() ); // test the default string with dumb logging: no characters to enclose the full request format_string = "%h %l %u %t %r %>s %b \"%{Referer}i\" \"%{User-agent}i\""; - fields = {"client","NONE","NONE","date_time_ncsa","request_full","response_code","NONE","referer","user_agent"}; + fields = {client,_DISCARDED,_DISCARDED,date_time_ncsa,request_full,response_code,_DISCARDED,referer,user_agent}; separators = {" "," "," [","] "," "," "," \"","\" \""}; lf = fo.processApacheFormatString(format_string); assert( lf.initial.empty() ); @@ -599,11 +599,11 @@ void testCraplogModules() FormatOps fo; LogsFormat lf; std::string format_string; - std::vector fields; + std::vector fields; std::vector separators; // test the default string format_string = "$remote_addr - $remote_user [$time_local] \"$request\" $status $bytes_sent \"$http_referer\" \"$http_user_agent\""; - fields = {"client","NONE","date_time_ncsa","request_full","response_code","bytes_sent","referer","user_agent"}; + fields = {client,_DISCARDED,date_time_ncsa,request_full,response_code,bytes_sent,referer,user_agent}; separators = {" - "," [","] \"","\" "," "," \"","\" \""}; lf = fo.processNginxFormatString(format_string); assert( lf.initial.empty() ); @@ -612,7 +612,7 @@ void testCraplogModules() assert( lf.final == "\"" ); // test all the considered fields format_string = "$remote_addr $realip_remote_addr $time_local $time_iso8601 $date_gmt $msec $request $server_protocol $request_method $request_uri $uri $query_string $status $bytes_sent $request_length $request_time $http_referer $cookie_ $http_user_agent"; - fields = {"client","client","date_time_ncsa","date_time_iso","date_time_gmt","date_time_epoch_s.ms","request_full","request_protocol","request_method","request_uri_query","request_uri","request_query","response_code","bytes_sent","bytes_received","time_taken_s.ms","referer","cookie","user_agent"}; + fields = {client,client,date_time_ncsa,date_time_iso,date_time_gmt,date_time_epoch_s_ms,request_full,request_protocol,request_method,request_uri_query,request_uri,request_query,response_code,bytes_sent,bytes_received,time_taken_s_ms,referer,cookie,user_agent}; separators = {" "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "}; lf = fo.processNginxFormatString(format_string); assert( lf.initial.empty() ); @@ -621,7 +621,7 @@ void testCraplogModules() assert( lf.final.empty() ); // test all the non-considered fields format_string = "$ancient_browser $arg_ $args $binary_remote_addr $body_bytes_sent $connection $connection_requests $connections_active $connections_reading $connections_waiting $connections_writing $content_length $content_type $date_local $document_root $document_uri $fastcgi_path_info $fastcgi_script_name $geoip_area_code $geoip_city $geoip_city_continent_code $geoip_city_country_code $geoip_city_country_code3 $geoip_city_country_name $geoip_country_code $geoip_country_code3 $geoip_country_name $geoip_dma_code $geoip_latitude $geoip_longitude $geoip_org $geoip_postal_code $geoip_region $geoip_region_name $gzip_ratio $host $hostname $http2 $http_ $https $invalid_referer $is_args $limit_rate $memcached_key $modern_browser $msie $nginx_version $pid $pipe $proxy_add_x_forwarded_for $proxy_host $proxy_port $proxy_protocol_addr $proxy_protocol_port $realip_remote_port $realpath_root $remote_port $remote_user $request_body $request_body_file $request_completion $request_filename $request_id $scheme $secure_link $secure_link_expires $sent_http_ $server_addr $server_name $server_port $session_log_binary_id $session_log_id $slice_range $spdy $spdy_request_priority $ssl_cipher $ssl_client_cert $ssl_client_fingerprint $ssl_client_i_dn $ssl_client_raw_cert $ssl_client_s_dn $ssl_client_serial $ssl_client_verify $ssl_protocol $ssl_server_name $ssl_session_id $ssl_session_reused $tcpinfo_rtt $tcpinfo_rttvar $tcpinfo_snd_cwnd $tcpinfo_rcv_space $uid_got $uid_reset $uid_set $upstream_addr $upstream_cache_status $upstream_connect_time $upstream_cookie_ $upstream_header_time $upstream_http_ $upstream_response_length $upstream_response_time $upstream_status"; - fields = {"NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE"}; + fields = {_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED}; separators = {" "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "}; lf = fo.processNginxFormatString(format_string); assert( lf.initial.empty() ); @@ -642,11 +642,11 @@ void testCraplogModules() FormatOps fo; LogsFormat lf; std::string format_string; - std::vector fields; + std::vector fields; std::vector separators; // test the default string for the W3C module format_string = "date time s-ip cs-method cs-uri-stem cs-uri-query s-port cs-username c-ip cs(User-Agent) cs(Referer) sc-status sc-substatus sc-win32-status time-taken"; - fields = {"date_time_utc_d","date_time_utc_t","NONE","request_method","request_uri","request_query","NONE","NONE","client","user_agent","referer","response_code","NONE","NONE","time_taken_ms"}; + fields = {date_time_utc_d,date_time_utc_t,_DISCARDED,request_method,request_uri,request_query,_DISCARDED,_DISCARDED,client,user_agent,referer,response_code,_DISCARDED,_DISCARDED,time_taken_ms}; separators = {" "," "," "," "," "," "," "," "," "," "," "," "," "," "}; lf = fo.processIisFormatString(format_string, 0); assert( lf.initial.empty() ); @@ -655,7 +655,7 @@ void testCraplogModules() assert( lf.final.empty() ); // test all the considered fields for the W3C module format_string = "date time cs-version cs-method cs-uri-stem cs-uri-query sc-status sc-bytes cs-bytes time-taken cs(Referer) cs(Cookie) cs(User-Agent) c-ip"; - fields = {"date_time_utc_d","date_time_utc_t","request_protocol","request_method","request_uri","request_query","response_code","bytes_sent","bytes_received","time_taken_ms","referer","cookie","user_agent","client"}; + fields = {date_time_utc_d,date_time_utc_t,request_protocol,request_method,request_uri,request_query,response_code,bytes_sent,bytes_received,time_taken_ms,referer,cookie,user_agent,client}; separators = {" "," "," "," "," "," "," "," "," "," "," "," "," "}; lf = fo.processIisFormatString(format_string, 0); assert( lf.initial.empty() ); @@ -664,7 +664,7 @@ void testCraplogModules() assert( lf.final.empty() ); // test all the non-considered fields for the W3C module format_string = "s-sitename s-computername s-ip s-port cs-username cs-host sc-substatus sc-win32-status streamid"; - fields = {"NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE","NONE"}; + fields = {_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED,_DISCARDED}; separators = {" "," "," "," "," "," "," "," "}; lf = fo.processIisFormatString(format_string, 0); assert( lf.initial.empty() ); @@ -680,7 +680,7 @@ void testCraplogModules() assert( lf.final.empty() ); // test the the NCSA module format_string = "some random useless text"; - fields = {"client","NONE","NONE","date_time_ncsa","request_full","response_code","bytes_sent"}; + fields = {client,_DISCARDED,_DISCARDED,date_time_ncsa,request_full,response_code,bytes_sent}; separators = {" "," "," [","] \"","\" "," "}; lf = fo.processIisFormatString(format_string, 1); assert( lf.initial.empty() ); @@ -696,7 +696,7 @@ void testCraplogModules() assert( lf.final.empty() ); // test the the IIS module format_string = "some random useless text"; - fields = {"client","NONE","date_time_MDYYYY","date_time_utc_t","NONE","NONE","NONE","time_taken_ms","bytes_received","bytes_sent","response_code","NONE","request_method","request_uri","request_query"}; + fields = {client,_DISCARDED,date_time_mdyyyy,date_time_utc_t,_DISCARDED,_DISCARDED,_DISCARDED,time_taken_ms,bytes_received,bytes_sent,response_code,_DISCARDED,request_method,request_uri,request_query}; separators = {", ",", ",", ",", ",", ",", ",", ",", ",", ",", ",", ",", ",", ",", "}; lf = fo.processIisFormatString(format_string, 2); assert( lf.initial.empty() ); @@ -717,7 +717,7 @@ void testCraplogModules() //// LOGS TYPE //// { - LogsFormat lf{ "","","]",{" ","_"},{"","",""},0 }; + LogsFormat lf{ "","","]",{" ","_"},{_DISCARDED,_DISCARDED,_DISCARDED},0 }; assert( LogOps::defineFileType({"ok ok_ok]","a a_a]","TEST TEST_TEST]"}, lf) == LogType::Access ); assert( LogOps::defineFileType({"no no no!","some thing wrong","with this file!"}, lf) == LogType::Discarded ); assert( LogOps::defineFileType({}, lf) == LogType::Failed );