Removed stuff which is now done by Craplog's worker.
This commit is contained in:
Valentino Orlandi 2023-01-27 02:35:36 +01:00
parent 3ba11c342d
commit d744f346fb
Signed by: elB4RTO
GPG Key ID: 1719E976DB2D4E71
2 changed files with 0 additions and 448 deletions

View File

@ -13,12 +13,6 @@ LogOps::LogOps()
}
void LogOps::setMutex( std::mutex* craplog_mutex )
{
this->mutex = craplog_mutex;
}
const LogOps::LogType LogOps::defineFileType( const std::vector<std::string>& lines, const FormatOps::LogsFormat& format ) const
{
if ( lines.size() == 0 ) {
@ -142,338 +136,3 @@ const bool LogOps::deepTypeCheck( const std::string& line, const FormatOps::Logs
return result;
}
void LogOps::cleanLines( std::vector<std::string>& lines ) const
{
std::vector<std::string> aux;
for ( const std::string& line : lines ) {
if ( !StringOps::startsWith( line, "#" ) ) {
// not a commented line
aux.push_back( line );
}
}
lines = std::move( aux );
}
const std::unordered_map<int, std::string> LogOps::parseLine( const std::string& line, const FormatOps::LogsFormat& format )
{
std::unordered_map<int, std::string> data;
std::string sep, fld, fld_str;
bool add_pm=false;
size_t start, stop=0, aux_start, aux_stop,
line_size = line.size()-1;
int i=0, n_sep=format.separators.size()-1;
// add the initial chars
stop = format.initial.size();
while (true) {
// split fields
start = stop; // stop updated at the end of the loop
if ( i <= n_sep ) {
sep = format.separators.at( i );
stop = line.find( sep, start );
} else if ( i == n_sep+1 ) {
// final separator
sep = format.final;
if ( sep == "" ) {
stop = line_size+1;
} else {
stop = line.find( sep, start );
if ( stop == std::string::npos ) {
stop = line_size +1;
}
}
} else {
// no more separators
break;
}
if ( stop == std::string::npos ) {
// separator not found, abort
throw LogParserException( "Separator not found", sep );
}
// get the field
fld = format.fields.at( i );
if ( fld != "NONE" ) {
// only parse the considered fields
fld_str = StringOps::strip( line.substr(start, stop-start), " " );
if ( i+1 <= n_sep ) {
// not the last separator, check for mistakes
bool ok = true;
aux_stop = stop;
if ( sep == " " ) {
// whitespace-separated-values fields
int c = StringOps::count( fld_str, sep ),
n = 0;
if ( fld == "request_full" ) {
n = 2;
} else if ( fld == "date_time_mcs" ) {
n = 4;
} else if ( fld == "date_time_ncsa" ) {
n = 1;
} else if ( fld == "date_time_gmt" ) {
n = 3;
}
if ( n > 0 && c < n ) {
// loop until the correct number of whitespaces is reached
aux_start = stop + 1;
while ( c < n ) {
aux_stop = line.find( sep, aux_start );
if ( aux_stop == std::string::npos ) {
// not found
ok = false;
break;
}
aux_start = aux_stop + 1;
c++;
}
}
} else if ( fld == "user_agent" && StringOps::startsWith( sep, "\"" ) ) {
// atm the only support is for escaped quotes
if ( fld_str.back() == '\\' ) {
aux_start = stop + sep.size();
while (true) {
aux_stop = line.find( sep, aux_start );
if ( aux_stop == std::string::npos ) {
// not found
break;
} else if ( line.at( aux_stop-1 ) != '\\' ) {
// non-backslashed quotes
break;
}
aux_start = aux_stop + sep.size();
}
}
}
// finally update if needed
if ( ok && aux_stop >= stop ) {
stop = aux_stop;
fld_str = StringOps::strip( line.substr(start, stop-start), " " );
}
}
// process the field
{
std::unique_lock<std::mutex> lock( *this->mutex );
this->parsed_size += fld_str.size();
}
if ( fld_str != "" ) {
int fld_id = this->field2id.at(fld);
if ( fld_id > 0 ) {
// no need to process, append directly if non-empty
data.emplace( fld_id, fld_str );
} else {
// process the field
// process the date to get year, month, day, hour and minute
if ( StringOps::startsWith( fld, "date_time" ) ) {
const std::vector<std::string> dt = DateTimeOps::processDateTime( fld_str, fld.substr( 10 ) ); // cut away the "date_time_" part which is useless from now on
if ( dt.at( 0 ) != "" ) {
// year
data.emplace( this->field2id.at("date_time_year"), dt.at( 0 ) );
}
if ( dt.at( 1 ) != "" ) {
// month
data.emplace( this->field2id.at("date_time_month"), dt.at( 1 ) );
}
if ( dt.at( 2 ) != "" ) {
// day
data.emplace( this->field2id.at("date_time_day"), dt.at( 2 ) );
}
if ( dt.at( 3 ) != "" ) {
// hour
if ( dt.at( 3 ) == "PM" ) {
add_pm = true;
} else {
data.emplace( this->field2id.at("date_time_hour"), dt.at( 3 ) );
}
}
if ( dt.at( 4 ) != "" ) {
// minute
data.emplace( this->field2id.at("date_time_minute"), dt.at( 4 ) );
}
if ( dt.at( 5 ) != "" ) {
// second
data.emplace( this->field2id.at("date_time_second"), dt.at( 5 ) );
}
// process the request to get the protocol, method, resource and query
} else if ( fld == "request_full" ) {
size_t aux;
std::string aux_fld, protocol="", method="", page="", query="";
aux_fld = fld_str;
// method
aux = aux_fld.find( ' ' );
if ( aux != std::string::npos ) {
method = aux_fld.substr( 0, aux );
aux_fld = StringOps::lstrip( aux_fld.substr( aux ) );
// page & query
aux = aux_fld.find( ' ' );
if ( aux != std::string::npos ) {
std::string aux_str = aux_fld.substr( 0, aux );
// search for the query
int aux_ = aux_str.find( '?' );
if ( aux_ != std::string::npos ) {
page = aux_str.substr( 0, aux_ );
query = aux_str.substr( aux_+1 );
} else {
// query not found
page = aux_str;
}
aux_fld = StringOps::lstrip( aux_fld.substr( aux ) );
// protocol
protocol = aux_fld;
}
}
// append non-empty data
if ( protocol != "" ) {
data.emplace( this->field2id.at("request_protocol"), protocol );
}
if ( method != "" ) {
data.emplace( this->field2id.at("request_method"), method );
}
if ( page != "" ) {
data.emplace( this->field2id.at("request_uri"), page );
}
if ( query != "" ) {
data.emplace( this->field2id.at("request_query"), query );
}
// process the request to get uri and query
} else if ( fld == "request_uri_query" ) {
// search for the query
std::string page, query;
size_t aux_ = fld_str.find( '?' );
if ( aux_ != std::string::npos ) {
page = fld_str.substr( 0, aux_ );
query = fld_str.substr( aux_+1 );
} else {
// query not found
page = fld_str;
}
if ( page != "" ) {
data.emplace( this->field2id.at("request_uri"), page );
}
if ( query != "" ) {
data.emplace( this->field2id.at("request_query"), query );
}
// process the time taken to convert to milliseconds
} else if ( StringOps::startsWith( fld, "time_taken_" ) ) {
float t = std::stof( fld_str );
fld = fld.substr( 11 );
if ( fld == "us" ) {
// from microseconds
t /= 1000;
} else if ( fld == "s" || fld == "s.ms" ) {
// from seconds
t *= 1000;
}
data.emplace( this->field2id.at("time_taken"), std::to_string( (int)t ) );
// something went wrong
} else {
// hmmm.. no...
throw LogParserException( "Unexpected LogField", fld );
}
}
}
}
// update the stop for the next start
stop += sep.size();
i++;
if ( stop > line_size ) {
// this was the final separator
break;
}
}
if ( add_pm ) {
try {
// add +12 hours for PM
data.at( 4 ) = std::to_string( 12 + std::stoi(data.at( 4 )) );
} catch (...) {
// no hour data
}
}
// set the default warning mark ( 0=false ) to default status
data.emplace( 99, "0" );
{
std::unique_lock<std::mutex> lock( *this->mutex );
this->total_size += line_size;
this->parsed_lines ++;
}
return data;
}
void LogOps::parseLines( std::vector<std::unordered_map<int, std::string>>& data, const std::vector<std::string>& lines, const FormatOps::LogsFormat& format )
{
data.clear();
data.shrink_to_fit();
int nl = format.new_lines;
if ( nl == 0 ) {
data.reserve( lines.size() );
for ( const std::string& line : lines ) {
data.push_back( this->parseLine( line, format ) );
}
} else {
data.reserve( parsed_size / (nl+1) );
parsed_size --;
for ( int i=0; i<parsed_size; i++ ) {
std::string line = lines.at( i );
for ( int n=0; n<nl; n++ ) {
i++;
line += "\n" + lines.at( i );
}
data.push_back( this->parseLine( line, format ) );
}
}
if ( data.size() < data.capacity() ) {
data.shrink_to_fit();
}
}
void LogOps::resetPerfData()
{
this->total_size = 0;
this->parsed_size = 0;
this->parsed_lines = 0;
}
const unsigned LogOps::getTotalSize()
{
std::unique_lock<std::mutex> lock( *this->mutex );
return this->total_size;
}
const unsigned LogOps::getParsedSize()
{
std::unique_lock<std::mutex> lock( *this->mutex );
return this->parsed_size;
}
const unsigned LogOps::getParsedLines()
{
std::unique_lock<std::mutex> lock( *this->mutex );
return this->parsed_lines;
}

View File

@ -19,13 +19,6 @@ public:
explicit LogOps();
//! Receives the mutex to be shared with Craplog
/*!
\param craplog_mutex The mutex from Craplog
*/
void setMutex( std::mutex* craplog_mutex=nullptr );
//! Enumerates log file types
/*!
File types used to decide whether a file should be considered valid or not
@ -49,87 +42,9 @@ public:
const FormatOps::LogsFormat& format
) const;
//! Removes commented lines from the given list
/*!
\param lines The lines to clean
*/
void cleanLines(
std::vector<std::string>& lines
) const;
//! Parses log lines to extract data
/*!
\param data The data collection which will hold the data
\param lines The list of lines to parse
\param format The logs format to use
\throw LogParserException
\see parseLine(), Craplog::parseLogLines(), FormatOps::LogsFormat
*/
void parseLines(
std::vector<std::unordered_map<int, std::string>>& data,
const std::vector<std::string>& lines,
const FormatOps::LogsFormat& format
);
//! Resets the performances data
void resetPerfData();
// share perf data with craplog
const unsigned getTotalSize(); //!< Returns the total size of the logs lines. \see total_size
const unsigned getParsedSize(); //!< Returns the parsed logs size. \see parsed_size
const unsigned getParsedLines(); //!< Returns the number of parsed log lines. \see parsed_lines
private:
// Map to convert log fields to field IDs
const std::unordered_map<std::string, int> field2id = {
// date-time
{"date_time_year", 1},
{"date_time_month", 2},
{"date_time_day", 3},
{"date_time_hour", 4},
{"date_time_minute", 5},
{"date_time_second", 6},
{"date_time_ncsa", 0},
{"date_time_iso", 0},
{"date_time_mcs", 0},
{"date_time_gmt", 0},
{"date_time_utc_d", 0},
{"date_time_utc_t", 0},
{"date_time_epoch_s", 0},
{"date_time_epoch_s.ms", 0},
{"date_time_epoch_ms", 0},
{"date_time_epoch_us", 0},
{"date_time_YYYYMMDD", 0},
{"date_time_MMDDYY", 0},
{"date_time_MDYY", 0},
{"date_time_year_short", 0},
{"date_time_month_str", 0},
{"date_time_clock_12", 0},
{"date_time_clock_24", 0},
{"date_time_clock_short", 0},
// request
{"request_protocol", 10},
{"request_method", 11},
{"request_uri", 12},
{"request_query", 13},
{"response_code", 14},
{"request_full", 0},
// performance
{"time_taken_ms", 15},
{"time_taken_us", 0},
{"time_taken_s.ms", 0},
{"time_taken_s", 0},
{"bytes_sent", 16},
{"bytes_received", 17},
// referer
{"referer", 18},
// client data
{"client", 20},
{"user_agent", 21},
{"cookie", 22}
};
//! Parse the given line using the given format
/*!
\param line The log line to check
@ -142,28 +57,6 @@ private:
const FormatOps::LogsFormat& format
) const;
//! Parses a line to extract data
/*!
\param line The log line to parse
\param format The logs format to use
\return A data collection item
\throw LogParserException
\see parseLines(), Craplog::data_collection, FormatOps::LogsFormat
*/
const std::unordered_map<int, std::string> parseLine(
const std::string& line,
const FormatOps::LogsFormat& format
);
// a mutex shared with craplog
std::mutex* mutex = nullptr;
// temporary vars
unsigned total_size=0; //!< Total size of the parsed logs. \see getTotalSize()
unsigned parsed_size=0; //!< Size of the parsed logs. \see getParsedSize()
unsigned parsed_lines=0; //!< Number of parsed logs lines. \see getParsedLines()
};
#endif // LOGS_H