Update
Removed stuff which is now done by Craplog's worker.
This commit is contained in:
parent
3ba11c342d
commit
d744f346fb
|
@ -13,12 +13,6 @@ LogOps::LogOps()
|
|||
}
|
||||
|
||||
|
||||
void LogOps::setMutex( std::mutex* craplog_mutex )
|
||||
{
|
||||
this->mutex = craplog_mutex;
|
||||
}
|
||||
|
||||
|
||||
const LogOps::LogType LogOps::defineFileType( const std::vector<std::string>& lines, const FormatOps::LogsFormat& format ) const
|
||||
{
|
||||
if ( lines.size() == 0 ) {
|
||||
|
@ -142,338 +136,3 @@ const bool LogOps::deepTypeCheck( const std::string& line, const FormatOps::Logs
|
|||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
void LogOps::cleanLines( std::vector<std::string>& lines ) const
|
||||
{
|
||||
std::vector<std::string> aux;
|
||||
for ( const std::string& line : lines ) {
|
||||
if ( !StringOps::startsWith( line, "#" ) ) {
|
||||
// not a commented line
|
||||
aux.push_back( line );
|
||||
}
|
||||
}
|
||||
lines = std::move( aux );
|
||||
}
|
||||
|
||||
|
||||
|
||||
const std::unordered_map<int, std::string> LogOps::parseLine( const std::string& line, const FormatOps::LogsFormat& format )
|
||||
{
|
||||
std::unordered_map<int, std::string> data;
|
||||
std::string sep, fld, fld_str;
|
||||
bool add_pm=false;
|
||||
size_t start, stop=0, aux_start, aux_stop,
|
||||
line_size = line.size()-1;
|
||||
int i=0, n_sep=format.separators.size()-1;
|
||||
|
||||
// add the initial chars
|
||||
stop = format.initial.size();
|
||||
|
||||
while (true) {
|
||||
// split fields
|
||||
start = stop; // stop updated at the end of the loop
|
||||
if ( i <= n_sep ) {
|
||||
sep = format.separators.at( i );
|
||||
stop = line.find( sep, start );
|
||||
} else if ( i == n_sep+1 ) {
|
||||
// final separator
|
||||
sep = format.final;
|
||||
if ( sep == "" ) {
|
||||
stop = line_size+1;
|
||||
} else {
|
||||
stop = line.find( sep, start );
|
||||
if ( stop == std::string::npos ) {
|
||||
stop = line_size +1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// no more separators
|
||||
break;
|
||||
}
|
||||
if ( stop == std::string::npos ) {
|
||||
// separator not found, abort
|
||||
throw LogParserException( "Separator not found", sep );
|
||||
}
|
||||
|
||||
// get the field
|
||||
fld = format.fields.at( i );
|
||||
if ( fld != "NONE" ) {
|
||||
// only parse the considered fields
|
||||
fld_str = StringOps::strip( line.substr(start, stop-start), " " );
|
||||
|
||||
if ( i+1 <= n_sep ) {
|
||||
// not the last separator, check for mistakes
|
||||
bool ok = true;
|
||||
aux_stop = stop;
|
||||
|
||||
if ( sep == " " ) {
|
||||
// whitespace-separated-values fields
|
||||
int c = StringOps::count( fld_str, sep ),
|
||||
n = 0;
|
||||
if ( fld == "request_full" ) {
|
||||
n = 2;
|
||||
} else if ( fld == "date_time_mcs" ) {
|
||||
n = 4;
|
||||
} else if ( fld == "date_time_ncsa" ) {
|
||||
n = 1;
|
||||
} else if ( fld == "date_time_gmt" ) {
|
||||
n = 3;
|
||||
}
|
||||
if ( n > 0 && c < n ) {
|
||||
// loop until the correct number of whitespaces is reached
|
||||
aux_start = stop + 1;
|
||||
while ( c < n ) {
|
||||
aux_stop = line.find( sep, aux_start );
|
||||
if ( aux_stop == std::string::npos ) {
|
||||
// not found
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
aux_start = aux_stop + 1;
|
||||
c++;
|
||||
}
|
||||
}
|
||||
|
||||
} else if ( fld == "user_agent" && StringOps::startsWith( sep, "\"" ) ) {
|
||||
// atm the only support is for escaped quotes
|
||||
if ( fld_str.back() == '\\' ) {
|
||||
aux_start = stop + sep.size();
|
||||
while (true) {
|
||||
aux_stop = line.find( sep, aux_start );
|
||||
if ( aux_stop == std::string::npos ) {
|
||||
// not found
|
||||
break;
|
||||
} else if ( line.at( aux_stop-1 ) != '\\' ) {
|
||||
// non-backslashed quotes
|
||||
break;
|
||||
}
|
||||
aux_start = aux_stop + sep.size();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// finally update if needed
|
||||
if ( ok && aux_stop >= stop ) {
|
||||
stop = aux_stop;
|
||||
fld_str = StringOps::strip( line.substr(start, stop-start), " " );
|
||||
}
|
||||
}
|
||||
|
||||
// process the field
|
||||
{
|
||||
std::unique_lock<std::mutex> lock( *this->mutex );
|
||||
this->parsed_size += fld_str.size();
|
||||
}
|
||||
|
||||
if ( fld_str != "" ) {
|
||||
int fld_id = this->field2id.at(fld);
|
||||
if ( fld_id > 0 ) {
|
||||
// no need to process, append directly if non-empty
|
||||
data.emplace( fld_id, fld_str );
|
||||
|
||||
} else {
|
||||
// process the field
|
||||
|
||||
// process the date to get year, month, day, hour and minute
|
||||
if ( StringOps::startsWith( fld, "date_time" ) ) {
|
||||
const std::vector<std::string> dt = DateTimeOps::processDateTime( fld_str, fld.substr( 10 ) ); // cut away the "date_time_" part which is useless from now on
|
||||
if ( dt.at( 0 ) != "" ) {
|
||||
// year
|
||||
data.emplace( this->field2id.at("date_time_year"), dt.at( 0 ) );
|
||||
}
|
||||
if ( dt.at( 1 ) != "" ) {
|
||||
// month
|
||||
data.emplace( this->field2id.at("date_time_month"), dt.at( 1 ) );
|
||||
}
|
||||
if ( dt.at( 2 ) != "" ) {
|
||||
// day
|
||||
data.emplace( this->field2id.at("date_time_day"), dt.at( 2 ) );
|
||||
}
|
||||
if ( dt.at( 3 ) != "" ) {
|
||||
// hour
|
||||
if ( dt.at( 3 ) == "PM" ) {
|
||||
add_pm = true;
|
||||
} else {
|
||||
data.emplace( this->field2id.at("date_time_hour"), dt.at( 3 ) );
|
||||
}
|
||||
}
|
||||
if ( dt.at( 4 ) != "" ) {
|
||||
// minute
|
||||
data.emplace( this->field2id.at("date_time_minute"), dt.at( 4 ) );
|
||||
}
|
||||
if ( dt.at( 5 ) != "" ) {
|
||||
// second
|
||||
data.emplace( this->field2id.at("date_time_second"), dt.at( 5 ) );
|
||||
}
|
||||
|
||||
|
||||
// process the request to get the protocol, method, resource and query
|
||||
} else if ( fld == "request_full" ) {
|
||||
size_t aux;
|
||||
std::string aux_fld, protocol="", method="", page="", query="";
|
||||
aux_fld = fld_str;
|
||||
// method
|
||||
aux = aux_fld.find( ' ' );
|
||||
if ( aux != std::string::npos ) {
|
||||
method = aux_fld.substr( 0, aux );
|
||||
aux_fld = StringOps::lstrip( aux_fld.substr( aux ) );
|
||||
|
||||
// page & query
|
||||
aux = aux_fld.find( ' ' );
|
||||
if ( aux != std::string::npos ) {
|
||||
std::string aux_str = aux_fld.substr( 0, aux );
|
||||
// search for the query
|
||||
int aux_ = aux_str.find( '?' );
|
||||
if ( aux_ != std::string::npos ) {
|
||||
page = aux_str.substr( 0, aux_ );
|
||||
query = aux_str.substr( aux_+1 );
|
||||
} else {
|
||||
// query not found
|
||||
page = aux_str;
|
||||
}
|
||||
aux_fld = StringOps::lstrip( aux_fld.substr( aux ) );
|
||||
|
||||
// protocol
|
||||
protocol = aux_fld;
|
||||
}
|
||||
}
|
||||
// append non-empty data
|
||||
if ( protocol != "" ) {
|
||||
data.emplace( this->field2id.at("request_protocol"), protocol );
|
||||
}
|
||||
if ( method != "" ) {
|
||||
data.emplace( this->field2id.at("request_method"), method );
|
||||
}
|
||||
if ( page != "" ) {
|
||||
data.emplace( this->field2id.at("request_uri"), page );
|
||||
}
|
||||
if ( query != "" ) {
|
||||
data.emplace( this->field2id.at("request_query"), query );
|
||||
}
|
||||
|
||||
|
||||
|
||||
// process the request to get uri and query
|
||||
} else if ( fld == "request_uri_query" ) {
|
||||
// search for the query
|
||||
std::string page, query;
|
||||
size_t aux_ = fld_str.find( '?' );
|
||||
if ( aux_ != std::string::npos ) {
|
||||
page = fld_str.substr( 0, aux_ );
|
||||
query = fld_str.substr( aux_+1 );
|
||||
} else {
|
||||
// query not found
|
||||
page = fld_str;
|
||||
}
|
||||
if ( page != "" ) {
|
||||
data.emplace( this->field2id.at("request_uri"), page );
|
||||
}
|
||||
if ( query != "" ) {
|
||||
data.emplace( this->field2id.at("request_query"), query );
|
||||
}
|
||||
|
||||
|
||||
|
||||
// process the time taken to convert to milliseconds
|
||||
} else if ( StringOps::startsWith( fld, "time_taken_" ) ) {
|
||||
float t = std::stof( fld_str );
|
||||
fld = fld.substr( 11 );
|
||||
if ( fld == "us" ) {
|
||||
// from microseconds
|
||||
t /= 1000;
|
||||
} else if ( fld == "s" || fld == "s.ms" ) {
|
||||
// from seconds
|
||||
t *= 1000;
|
||||
}
|
||||
data.emplace( this->field2id.at("time_taken"), std::to_string( (int)t ) );
|
||||
|
||||
|
||||
// something went wrong
|
||||
} else {
|
||||
// hmmm.. no...
|
||||
throw LogParserException( "Unexpected LogField", fld );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// update the stop for the next start
|
||||
stop += sep.size();
|
||||
i++;
|
||||
if ( stop > line_size ) {
|
||||
// this was the final separator
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( add_pm ) {
|
||||
try {
|
||||
// add +12 hours for PM
|
||||
data.at( 4 ) = std::to_string( 12 + std::stoi(data.at( 4 )) );
|
||||
} catch (...) {
|
||||
// no hour data
|
||||
}
|
||||
}
|
||||
|
||||
// set the default warning mark ( 0=false ) to default status
|
||||
data.emplace( 99, "0" );
|
||||
{
|
||||
std::unique_lock<std::mutex> lock( *this->mutex );
|
||||
this->total_size += line_size;
|
||||
this->parsed_lines ++;
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
void LogOps::parseLines( std::vector<std::unordered_map<int, std::string>>& data, const std::vector<std::string>& lines, const FormatOps::LogsFormat& format )
|
||||
{
|
||||
data.clear();
|
||||
data.shrink_to_fit();
|
||||
int nl = format.new_lines;
|
||||
if ( nl == 0 ) {
|
||||
data.reserve( lines.size() );
|
||||
for ( const std::string& line : lines ) {
|
||||
data.push_back( this->parseLine( line, format ) );
|
||||
}
|
||||
} else {
|
||||
data.reserve( parsed_size / (nl+1) );
|
||||
parsed_size --;
|
||||
for ( int i=0; i<parsed_size; i++ ) {
|
||||
std::string line = lines.at( i );
|
||||
for ( int n=0; n<nl; n++ ) {
|
||||
i++;
|
||||
line += "\n" + lines.at( i );
|
||||
}
|
||||
data.push_back( this->parseLine( line, format ) );
|
||||
}
|
||||
}
|
||||
if ( data.size() < data.capacity() ) {
|
||||
data.shrink_to_fit();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void LogOps::resetPerfData()
|
||||
{
|
||||
this->total_size = 0;
|
||||
this->parsed_size = 0;
|
||||
this->parsed_lines = 0;
|
||||
}
|
||||
const unsigned LogOps::getTotalSize()
|
||||
{
|
||||
std::unique_lock<std::mutex> lock( *this->mutex );
|
||||
return this->total_size;
|
||||
}
|
||||
const unsigned LogOps::getParsedSize()
|
||||
{
|
||||
std::unique_lock<std::mutex> lock( *this->mutex );
|
||||
return this->parsed_size;
|
||||
}
|
||||
const unsigned LogOps::getParsedLines()
|
||||
{
|
||||
std::unique_lock<std::mutex> lock( *this->mutex );
|
||||
return this->parsed_lines;
|
||||
}
|
||||
|
|
|
@ -19,13 +19,6 @@ public:
|
|||
explicit LogOps();
|
||||
|
||||
|
||||
//! Receives the mutex to be shared with Craplog
|
||||
/*!
|
||||
\param craplog_mutex The mutex from Craplog
|
||||
*/
|
||||
void setMutex( std::mutex* craplog_mutex=nullptr );
|
||||
|
||||
|
||||
//! Enumerates log file types
|
||||
/*!
|
||||
File types used to decide whether a file should be considered valid or not
|
||||
|
@ -49,87 +42,9 @@ public:
|
|||
const FormatOps::LogsFormat& format
|
||||
) const;
|
||||
|
||||
//! Removes commented lines from the given list
|
||||
/*!
|
||||
\param lines The lines to clean
|
||||
*/
|
||||
void cleanLines(
|
||||
std::vector<std::string>& lines
|
||||
) const;
|
||||
|
||||
//! Parses log lines to extract data
|
||||
/*!
|
||||
\param data The data collection which will hold the data
|
||||
\param lines The list of lines to parse
|
||||
\param format The logs format to use
|
||||
\throw LogParserException
|
||||
\see parseLine(), Craplog::parseLogLines(), FormatOps::LogsFormat
|
||||
*/
|
||||
void parseLines(
|
||||
std::vector<std::unordered_map<int, std::string>>& data,
|
||||
const std::vector<std::string>& lines,
|
||||
const FormatOps::LogsFormat& format
|
||||
);
|
||||
|
||||
//! Resets the performances data
|
||||
void resetPerfData();
|
||||
|
||||
// share perf data with craplog
|
||||
const unsigned getTotalSize(); //!< Returns the total size of the logs lines. \see total_size
|
||||
const unsigned getParsedSize(); //!< Returns the parsed logs size. \see parsed_size
|
||||
const unsigned getParsedLines(); //!< Returns the number of parsed log lines. \see parsed_lines
|
||||
|
||||
private:
|
||||
|
||||
// Map to convert log fields to field IDs
|
||||
const std::unordered_map<std::string, int> field2id = {
|
||||
// date-time
|
||||
{"date_time_year", 1},
|
||||
{"date_time_month", 2},
|
||||
{"date_time_day", 3},
|
||||
{"date_time_hour", 4},
|
||||
{"date_time_minute", 5},
|
||||
{"date_time_second", 6},
|
||||
{"date_time_ncsa", 0},
|
||||
{"date_time_iso", 0},
|
||||
{"date_time_mcs", 0},
|
||||
{"date_time_gmt", 0},
|
||||
{"date_time_utc_d", 0},
|
||||
{"date_time_utc_t", 0},
|
||||
{"date_time_epoch_s", 0},
|
||||
{"date_time_epoch_s.ms", 0},
|
||||
{"date_time_epoch_ms", 0},
|
||||
{"date_time_epoch_us", 0},
|
||||
{"date_time_YYYYMMDD", 0},
|
||||
{"date_time_MMDDYY", 0},
|
||||
{"date_time_MDYY", 0},
|
||||
{"date_time_year_short", 0},
|
||||
{"date_time_month_str", 0},
|
||||
{"date_time_clock_12", 0},
|
||||
{"date_time_clock_24", 0},
|
||||
{"date_time_clock_short", 0},
|
||||
// request
|
||||
{"request_protocol", 10},
|
||||
{"request_method", 11},
|
||||
{"request_uri", 12},
|
||||
{"request_query", 13},
|
||||
{"response_code", 14},
|
||||
{"request_full", 0},
|
||||
// performance
|
||||
{"time_taken_ms", 15},
|
||||
{"time_taken_us", 0},
|
||||
{"time_taken_s.ms", 0},
|
||||
{"time_taken_s", 0},
|
||||
{"bytes_sent", 16},
|
||||
{"bytes_received", 17},
|
||||
// referer
|
||||
{"referer", 18},
|
||||
// client data
|
||||
{"client", 20},
|
||||
{"user_agent", 21},
|
||||
{"cookie", 22}
|
||||
};
|
||||
|
||||
//! Parse the given line using the given format
|
||||
/*!
|
||||
\param line The log line to check
|
||||
|
@ -142,28 +57,6 @@ private:
|
|||
const FormatOps::LogsFormat& format
|
||||
) const;
|
||||
|
||||
//! Parses a line to extract data
|
||||
/*!
|
||||
\param line The log line to parse
|
||||
\param format The logs format to use
|
||||
\return A data collection item
|
||||
\throw LogParserException
|
||||
\see parseLines(), Craplog::data_collection, FormatOps::LogsFormat
|
||||
*/
|
||||
const std::unordered_map<int, std::string> parseLine(
|
||||
const std::string& line,
|
||||
const FormatOps::LogsFormat& format
|
||||
);
|
||||
|
||||
// a mutex shared with craplog
|
||||
std::mutex* mutex = nullptr;
|
||||
|
||||
// temporary vars
|
||||
unsigned total_size=0; //!< Total size of the parsed logs. \see getTotalSize()
|
||||
unsigned parsed_size=0; //!< Size of the parsed logs. \see getParsedSize()
|
||||
unsigned parsed_lines=0; //!< Number of parsed logs lines. \see getParsedLines()
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif // LOGS_H
|
||||
|
|
Loading…
Reference in New Issue