LogDoctor/logdoctor/modules/craplog/modules/workers/parser.cpp
2024-01-22 00:14:26 +01:00

546 lines
18 KiB
C++

#include "parser.h"
#include "defines/web_servers.h"
#include "utilities/checks.h"
#include "utilities/gzip.h"
#include "utilities/io.h"
#include "utilities/strings.h"
#include "utilities/vectors.h"
#include "modules/dialogs.h"
#include "modules/exceptions.h"
#include "modules/craplog/modules/workers/lib.h"
#include <QSqlDatabase>
#include <QSqlQuery>
#include <QSqlError>
CraplogParser::CraplogParser( const WebServer web_server, const unsigned dialogs_level, const std::string& db_data_path, const std::string& db_hashes_path, const LogsFormat& logs_format, const bw_lists_t& blacklists, const bw_lists_t& warnlists, const worker_files_t& log_files, QObject* parent )
: QObject { parent }
, web_server { web_server }
, dialogs_level { dialogs_level }
, db_data_path { db_data_path }
, db_hashes_path { db_hashes_path }
, blacklists { blacklists }
, warnlists { warnlists }
, logs_format { logs_format }
, files_to_use { log_files }
{
}
void CraplogParser::sendPerfData() noexcept
{
emit this->perfData(
this->parsed_size,
this->parsed_lines
);
}
void CraplogParser::sendChartData() noexcept
{
emit this->chartData(
this->total_size,
this->total_lines,
this->warnlisted_size,
this->blacklisted_size
);
}
void CraplogParser::work()
{
this->proceed |= true;
this->db_edited &= false;
try {
if ( this->proceed ) [[likely]] {
// collect log lines
this->joinLogLines();
}
if ( this->proceed ) [[likely]] {
// parse the log lines to fill the collection
emit this->startedParsing();
this->parseLogLines();
emit this->finishedParsing();
}
// clear log lines data
this->logs_lines.clear();
if ( this->proceed && !this->data_collection.empty() ) [[likely]] {
// store the new data
this->storeLogLines();
this->db_edited |= this->proceed;
}
} catch ( GenericException& e ) {
emit this->showDialog( WorkerDialog::errGeneric,
{e.what()} );
this->proceed &= false;
} catch ( LogParserException& e ) {
emit this->showDialog( WorkerDialog::errFailedParsingLogs,
{e.what()} );
this->proceed &= false;
}
// send the final data
if ( ! this->proceed ) {
this->total_lines = 0ul;
this->parsed_lines = 0ul;
this->total_size = 0ul;
this->parsed_size = 0ul;
this->warnlisted_size = 0ul;
this->blacklisted_size = 0ul;
}
this->sendPerfData();
this->sendChartData();
emit this->done( this->db_edited );
emit this->retire();
}
void CraplogParser::joinLogLines()
{
const auto cleanLines = [](std::vector<std::string>& lines) {
std::vector<std::string> aux;
aux.reserve( lines.size() );
for ( const std::string& line : lines ) {
if ( line.front() != '#' ) [[likely]] {
// not a commented line
aux.push_back( line );
}
}
aux.shrink_to_fit();
lines = std::move( aux );
};
std::string aux;
std::vector<std::string> content;
for ( const auto& file : this->files_to_use ) {
if ( ! this->proceed ) { break; }
const std::string& file_path = std::get<0>( file );
// collect lines
try {
// try reading
content.clear();
aux.clear();
try {
// try as gzip compressed archive first
GZutils::readFile( file_path, aux );
} catch ( const GenericException& ) {
// failed closing file pointer
throw;
} catch (...) {
// fallback on reading as normal file
if ( ! aux.empty() ) {
aux.clear();
}
IOutils::readFile( file_path, aux );
}
StringOps::splitrip( content, aux );
this->total_lines += content.size();
this->total_size += aux.size();
if ( this->web_server == WS_IIS ) {
cleanLines( content );
}
// re-catched in run()
} catch ( const GenericException& ) {
// failed closing gzip file pointer
throw GenericException( QString("%1:\n%2").arg(
DialogSec::tr("An error occured while reading the gzipped file"),
QString::fromStdString( file_path )
).toStdString() );
} catch ( const std::ios_base::failure& ) {
// failed reading as text
throw GenericException( QString("%1:\n%2").arg(
DialogSec::tr("An error occured while reading the file"),
QString::fromStdString( file_path )
).toStdString() );
} catch (...) {
// failed somehow
throw GenericException( QString("%1:\n%2").arg(
DialogSec::tr("Something failed while handling the file"),
QString::fromStdString( file_path )
).toStdString() );
}
// append to the list
this->logs_lines.insert( this->logs_lines.end(), content.begin(), content.end() );
}
if ( this->logs_lines.empty() ) {
this->proceed &= false;
}
}
void CraplogParser::parseLogLines()
{
const auto parseLine = [this]( const std::string& line, const LogsFormat& logs_format ) {
this->data_collection.emplace_back( LogLineData(line, logs_format) );
this->parsed_size += line.size();
++ this->parsed_lines;
};
// parse all the lines
if ( this->proceed ) {
const size_t n_lines{ this->logs_lines.size() };
const size_t nl{ this->logs_format.new_lines };
size_t send{ 0ul };
if ( nl == 0ul ) {
const size_t send_gap{ n_lines>1000ul ? n_lines/100 : n_lines>100ul ? n_lines/10 : 10 };
const LogsFormat& lf {this->logs_format};
this->data_collection.reserve( n_lines );
for ( const std::string& line : this->logs_lines ) {
parseLine( line, lf );
if (send == send_gap) {
this->sendPerfData();
send = 0ul;
}
++send;
}
} else {
const size_t real_lines{ n_lines / (nl+1ul) };
const size_t send_gap{ real_lines>1000ul ? real_lines/100 : real_lines>100ul ? real_lines/10 : 10 };
const LogsFormat& lf {this->logs_format};
this->data_collection.reserve( real_lines );
for ( size_t i{0ul}; i<n_lines; ++i ) {
std::string line = this->logs_lines.at( i );
for ( size_t n{0ul}; n<nl; ++n ) {
++i;
line += "\n" + this->logs_lines.at( i );
}
parseLine( line, lf );
if (send == send_gap) {
this->sendPerfData();
send = 0ul;
}
++send;
}
}
this->sendPerfData();
}
}
void CraplogParser::storeLogLines()
{
QString db_path{ QString::fromStdString( this->db_data_path ) };
QString db_name{ QString::fromStdString( this->db_data_path.substr( this->db_data_path.find_last_of( '/' ) + 1ul ) ) };
QSqlDatabase db{ QSqlDatabase::addDatabase("QSQLITE") };
db.setDatabaseName( db_path );
if ( ! CheckSec::checkDatabaseFile( this->db_data_path, db_name ) ) {
this->proceed &= false;
} else if ( ! db.open() ) {
// error opening database
this->proceed &= false;
QString err_msg;
if ( this->dialogs_level == 2 ) {
err_msg = db.lastError().text();
}
emit this->showDialog( WorkerDialog::errDatabaseFailedOpening,
{db_name, err_msg} );
} else {
try {
// ACID transaction
if ( ! db.transaction() ) {
// error opening database
this->proceed &= false;
QString stmt_msg, err_msg;
if ( this->dialogs_level > 0 ) {
stmt_msg = "db.transaction()";
if ( this->dialogs_level == 2 ) {
err_msg = db.lastError().text();
}
}
emit this->showDialog( WorkerDialog::errDatabaseFailedExecuting,
{db_name, stmt_msg, err_msg} );
}
if ( this->proceed ) {
this->proceed &= this->storeData( db );
}
if ( this->proceed ) {
// commit the transaction
if ( ! db.commit() ) {
// error opening database
this->proceed &= false;
QString stmt_msg, err_msg;
if ( this->dialogs_level > 0 ) {
stmt_msg = "db.commit()";
if ( this->dialogs_level == 2 ) {
err_msg= db.lastError().text();
}
}
emit this->showDialog( WorkerDialog::errDatabaseFailedExecuting,
{db_name, stmt_msg, err_msg} );
}
}
if ( ! this->proceed ) {
// rollback
throw (std::exception());
}
} catch (...) {
// wrongthing w3nt some.,.
this->proceed &= false;
bool err_shown = false;
// rollback the transaction
if ( ! db.rollback() ) {
// error rolling back commits
QString stmt_msg, err_msg;
if ( this->dialogs_level > 0 ) {
stmt_msg = "db.rollback()";
if ( this->dialogs_level == 2 ) {
err_msg = db.lastError().text();
}
}
emit this->showDialog( WorkerDialog::errDatabaseFailedExecuting,
{db_name, stmt_msg, err_msg} );
err_shown = true;
}
if ( ! err_shown ) {
// show a message
emit this->showDialog(
WorkerDialog::errGeneric,
{QString("%1\n\n%2").arg(
DialogSec::tr("An error occured while working on the database"),
DialogSec::tr("Aborting") )} );
}
}
if ( db.isOpen() ) {
db.close();
}
}
}
#define APPEND_TO_QUERY_AS_NUMBER(LOG_FIELD)\
query_stmt += QStringLiteral(", ");\
if ( LOG_FIELD ) {\
query_stmt += QString::fromStdString( *LOG_FIELD ).replace("'","''");\
} else {\
query_stmt += QStringLiteral("NULL");\
}
#define APPEND_TO_QUERY_AS_STRING(LOG_FIELD)\
query_stmt += QStringLiteral(", ");\
if ( LOG_FIELD ) {\
query_stmt += QString("'%1'").arg( QString::fromStdString( *LOG_FIELD ).replace("'","''") );\
} else {\
query_stmt += QStringLiteral("NULL");\
}
// in IIS logs the user-agent is logged with '+' instead of ' ' (whitespace)
#define APPEND_TO_QUERY_USER_AGENT(LOG_FIELD)\
query_stmt += QStringLiteral(", ");\
if ( LOG_FIELD ) {\
if ( this->web_server == WS_IIS ) {\
query_stmt += QString("'%1'").arg( QString::fromStdString( *LOG_FIELD ).replace("+"," ").replace("'","''") );\
} else {\
query_stmt += QString("'%1'").arg( QString::fromStdString( *LOG_FIELD ).replace("'","''") );\
}\
} else {\
query_stmt += QStringLiteral("NULL");\
}
bool CraplogParser::storeData( QSqlDatabase& db )
{
const QString db_name{ QString::fromStdString(
this->db_data_path.substr(
this->db_data_path.find_last_of( '/' ) + 1ul ) ) };
// get blacklist/warnlist items
const bool check_bl_cli { this->blacklists.at( 20 ).used };
const bool check_wl_met { this->warnlists.at( 11 ).used };
const bool check_wl_uri { this->warnlists.at( 12 ).used };
const bool check_wl_cli { this->warnlists.at( 20 ).used };
const bool check_wl_ua { this->warnlists.at( 21 ).used };
const std::vector<std::string> empty;
const std::vector<std::string>& bl_cli_list{ (check_bl_cli)
? this->blacklists.at( 20 ).list
: empty };
const std::vector<std::string>& wl_met_list{ (check_wl_met)
? this->warnlists.at( 11 ).list
: empty };
const std::vector<std::string>& wl_req_list{ (check_wl_uri)
? this->warnlists.at( 12 ).list
: empty };
const std::vector<std::string>& wl_cli_list{ (check_wl_cli)
? this->warnlists.at( 20 ).list
: empty };
const std::vector<std::string>& wl_ua_list{ (check_wl_ua)
? this->warnlists.at( 21 ).list
: empty };
// prepare the database related studd
QString table;
switch ( this->web_server ) {
case WS_APACHE:
table += "apache";
break;
case WS_NGINX:
table += "nginx";
break;
case WS_IIS:
table += "iis";
break;
default:
// wrong WebServerID, but should be unreachable because of the previous operations
throw WebServerException( "Unexpected WebServer: " + toString(this->web_server) );
}
/*int perf_size;*/
bool warning{ false };
QSqlQuery query{ db };
// parse every row of data
for ( const LogLineData& line_data : this->data_collection ) {
// check blacklisted clients
if ( check_bl_cli && line_data.client ) {
if ( VecOps::contains( bl_cli_list, *line_data.client ) ) {
this->blacklisted_size += line_data.size();
continue;
}
}
// check warnlisted clients
if ( check_wl_cli && line_data.client ) {
if ( VecOps::contains( wl_cli_list, *line_data.client ) ) {
warning |= true;
goto end_of_warnings_check;
}
}
// check warnlisted user-agents
if ( check_wl_ua && line_data.user_agent ) {
if ( VecOps::contains( wl_ua_list, *line_data.user_agent ) ) {
// match found! skip this line
warning |= true;
goto end_of_warnings_check;
}
}
// check warnlisted methods
if ( check_wl_met && line_data.method ) {
if ( VecOps::contains( wl_met_list, *line_data.method ) ) {
// match found! skip this line
warning |= true;
goto end_of_warnings_check;
}
}
// check warnlisted requests URIs
if ( check_wl_uri && line_data.uri ) {
if ( VecOps::contains( wl_req_list, *line_data.uri ) ) {
// match found! skip this line
warning |= true;
}
}
end_of_warnings_check:
// initialize the SQL statement
QString query_stmt{ "INSERT INTO \""+table+"\" (\"warning\", \"year\", \"month\", \"day\", \"hour\", \"minute\", \"second\", \"protocol\", \"method\", \"uri\", \"query\", \"response\", \"time_taken\", \"bytes_sent\", \"bytes_received\", \"referrer\", \"client\", \"user_agent\", \"cookie\") "
"VALUES (" };
// complete and execute the statement, binding NULL if not found
// warning
if ( warning ) {
warning &= false;
this->warnlisted_size += line_data.size();
query_stmt += "1";
} else {
query_stmt += "0";
}
// date and time
APPEND_TO_QUERY_AS_NUMBER(line_data.year) // 1
APPEND_TO_QUERY_AS_NUMBER(line_data.month) // 2
APPEND_TO_QUERY_AS_NUMBER(line_data.day) // 3
APPEND_TO_QUERY_AS_NUMBER(line_data.hour) // 4
APPEND_TO_QUERY_AS_NUMBER(line_data.minute) // 5
APPEND_TO_QUERY_AS_NUMBER(line_data.second) // 6
// request
APPEND_TO_QUERY_AS_STRING(line_data.protocol) // 10
APPEND_TO_QUERY_AS_STRING(line_data.method) // 11
APPEND_TO_QUERY_AS_STRING(line_data.uri) // 12
APPEND_TO_QUERY_AS_STRING(line_data.query) // 13
APPEND_TO_QUERY_AS_NUMBER(line_data.response_code) // 14
APPEND_TO_QUERY_AS_NUMBER(line_data.time_taken) // 15
APPEND_TO_QUERY_AS_NUMBER(line_data.bytes_sent) // 16
APPEND_TO_QUERY_AS_NUMBER(line_data.bytes_received) // 17
// client data and referrer
APPEND_TO_QUERY_AS_STRING(line_data.referrer) // 18
APPEND_TO_QUERY_AS_STRING(line_data.client) // 20
APPEND_TO_QUERY_USER_AGENT(line_data.user_agent) // 21
APPEND_TO_QUERY_AS_STRING(line_data.cookie) // 22
query_stmt += ");";
// encode the statement
if ( ! query.prepare( query_stmt ) ) {
// error opening database
QString query_msg, err_msg;
if ( this->dialogs_level > 0 ) {
query_msg = "query.prepare()";
if ( this->dialogs_level == 2 ) {
err_msg = query.lastError().text();
}
}
emit this->showDialog( WorkerDialog::errDatabaseFailedExecuting,
{db_name, query_msg, err_msg} );
return false;
}
// finalize this statement
if ( ! query.exec() ) {
// error finalizing step
QString query_msg, err_msg;
if ( this->dialogs_level > 0 ) {
query_msg = "query.exec()";
if ( this->dialogs_level == 2 ) {
err_msg = query.lastError().text();
}
}
emit this->showDialog( WorkerDialog::errDatabaseFailedExecuting,
{db_name, query_msg, err_msg} );
return false;
}
// reset the statement to prepare for the next one
query.finish();
}
return true;
}