Improved Apache Format String parsing for composed formats
This commit is contained in:
parent
cde245febd
commit
27d4178616
2 changed files with 117 additions and 29 deletions
|
@ -198,7 +198,7 @@ const FormatOps::LogsFormat FormatOps::processApacheFormatString( const std::str
|
||||||
int n_fld=0,
|
int n_fld=0,
|
||||||
start, stop=0, aux, aux_start, aux_stop,
|
start, stop=0, aux, aux_start, aux_stop,
|
||||||
max=f_str.size()-1;
|
max=f_str.size()-1;
|
||||||
std::string aux_fld, aux_fld_v, cur_fld, cur_sep;
|
std::string aux_fld, aux_fld_v, cur_fld, cur_sep="";
|
||||||
// find and convert any field
|
// find and convert any field
|
||||||
while (true) {
|
while (true) {
|
||||||
// start after the last found field
|
// start after the last found field
|
||||||
|
@ -210,10 +210,17 @@ const FormatOps::LogsFormat FormatOps::processApacheFormatString( const std::str
|
||||||
// hunt the next field
|
// hunt the next field
|
||||||
aux = f_str.find_first_of( '%', stop );
|
aux = f_str.find_first_of( '%', stop );
|
||||||
// check if false positive
|
// check if false positive
|
||||||
if ( aux > 0 && aux <= max ) {
|
if ( aux >= 0 && aux <= max ) {
|
||||||
if ( f_str.at(aux-1) == '%' || f_str.at(aux-1) == '\\' ) {
|
if ( aux > 0) {
|
||||||
|
if ( f_str.at(aux-1) == '%' || f_str.at(aux-1) == '\\' ) {
|
||||||
|
// the percent sign character
|
||||||
|
stop = aux + 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( f_str.at(aux+1) == '%' ) {
|
||||||
// the percent sign character
|
// the percent sign character
|
||||||
stop = aux + 1;
|
stop = aux + 2;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -243,42 +250,102 @@ const FormatOps::LogsFormat FormatOps::processApacheFormatString( const std::str
|
||||||
aux - aux_start );
|
aux - aux_start );
|
||||||
// get the module
|
// get the module
|
||||||
aux_fld_v = f_str.at( aux+1 );
|
aux_fld_v = f_str.at( aux+1 );
|
||||||
try {
|
if ( aux_fld_v == "^" ) {
|
||||||
// try if the module is valid
|
aux_stop += 2;
|
||||||
cur_fld = f_map_v->at( aux_fld_v ).at( aux_fld );
|
aux_fld_v = f_str.substr( aux+1, 3 );
|
||||||
// if here, is valid. stop hunting and append
|
}
|
||||||
stop = aux_stop;
|
if ( f_map_v->find( aux_fld_v ) == f_map_v->end() ) {
|
||||||
break;
|
|
||||||
|
|
||||||
} catch (...) {
|
|
||||||
// invalid, append all as separator and restart hunting
|
// invalid, append all as separator and restart hunting
|
||||||
cur_sep += f_str.substr( stop, aux_stop-stop );
|
cur_sep += f_str.substr( stop, aux_stop-stop );
|
||||||
stop = aux_stop;
|
start = stop = aux_stop;
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
// module is valud
|
||||||
|
const auto &aux_map = f_map_v->at( aux_fld_v );
|
||||||
|
bool skip;
|
||||||
|
int aux_aux_start,
|
||||||
|
aux_aux_stop = 0,
|
||||||
|
aux_aux;
|
||||||
|
std::string aux_aux_fld;
|
||||||
|
while (true) {
|
||||||
|
// loop inside the composed field
|
||||||
|
skip = true;
|
||||||
|
aux_aux_start = aux_aux_stop;
|
||||||
|
for ( const std::string& fld : this->A_ALFs_v.at( aux_fld_v ) ) {
|
||||||
|
aux_aux = aux_fld.find( fld, aux_aux_start );
|
||||||
|
// check if has been found
|
||||||
|
if ( aux_aux >= 0 && aux_aux <= aux_fld.size() ) {
|
||||||
|
// check if false positive
|
||||||
|
if ( aux_fld.at(aux_aux) == '%' ) {
|
||||||
|
if ( aux_fld.at(aux_aux-1) == '%' || aux_fld.at(aux_aux-1) == '\\' ) {
|
||||||
|
// the percent sign character
|
||||||
|
aux_aux_start = aux_aux + 1;
|
||||||
|
continue;
|
||||||
|
} else if ( f_str.at(aux+1) == '%' ) {
|
||||||
|
// the percent sign character
|
||||||
|
aux_aux_start = aux_aux + 2;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
skip = false;
|
||||||
|
aux_aux_stop = aux_aux + fld.size();
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( skip == true ) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// append the current separator
|
||||||
|
cur_sep += aux_fld.substr( aux_aux_start, aux_aux-aux_aux_start );
|
||||||
|
aux_aux_fld = aux_fld.substr( aux_aux, aux_aux_stop-aux_aux );
|
||||||
|
// check if the module is valid
|
||||||
|
if ( aux_map.size() == 0 ) {
|
||||||
|
// not considered
|
||||||
|
fields.push_back( "NONE" );
|
||||||
|
// append to separators list
|
||||||
|
separators.push_back( cur_sep );
|
||||||
|
cur_sep = "";
|
||||||
|
|
||||||
|
} else if ( aux_map.find( aux_aux_fld ) != aux_map.end() ) {
|
||||||
|
// valid, append
|
||||||
|
cur_fld = aux_map.at( aux_aux_fld );
|
||||||
|
fields.push_back( cur_fld );
|
||||||
|
// append to separators list
|
||||||
|
separators.push_back( cur_sep );
|
||||||
|
cur_sep = "";
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// invalid, append as separator and keep hunting
|
||||||
|
cur_sep += aux_fld.substr( aux_aux, aux_aux_stop-aux_aux );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// items already appended as needed, next main hunting loop round
|
||||||
|
start = stop = aux_stop;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// normal
|
// normal
|
||||||
try {
|
aux_fld = f_str.substr( aux, 2 );
|
||||||
// try if the module is valid
|
aux_stop = aux+2;
|
||||||
|
if ( aux_fld == "%>" ) {
|
||||||
aux_fld = f_str.substr( aux, 3 );
|
aux_fld = f_str.substr( aux, 3 );
|
||||||
aux_stop = aux+3;
|
aux_stop = aux+3;
|
||||||
if ( aux_fld != "%>s" ) {
|
}
|
||||||
aux_fld = f_str.substr( aux, 2 );
|
// check if the module is valid
|
||||||
aux_stop = aux+2;
|
if ( f_map->find( aux_fld ) != f_map->end() ) {
|
||||||
}
|
// valid
|
||||||
cur_fld = f_map->at( aux_fld );
|
cur_fld = f_map->at( aux_fld );
|
||||||
// if here, is valid. stop hunting and append
|
|
||||||
stop = aux_stop;
|
stop = aux_stop;
|
||||||
break;
|
break;
|
||||||
|
} else {
|
||||||
} catch (...) {
|
|
||||||
// invalid, append all as separator and restart hunting
|
// invalid, append all as separator and restart hunting
|
||||||
stop ++;
|
cur_sep += aux_fld;
|
||||||
cur_sep += f_str.at( stop );
|
stop = aux_stop;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// outside hunting loop
|
// outside hunting loop
|
||||||
|
@ -307,6 +374,8 @@ const FormatOps::LogsFormat FormatOps::processApacheFormatString( const std::str
|
||||||
n_fld++;
|
n_fld++;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
//delete f_map;
|
||||||
|
//delete f_map_v;
|
||||||
|
|
||||||
return FormatOps::LogsFormat{
|
return FormatOps::LogsFormat{
|
||||||
.string = f_str,
|
.string = f_str,
|
||||||
|
@ -343,8 +412,8 @@ const FormatOps::LogsFormat FormatOps::processNginxFormatString( const std::stri
|
||||||
std::vector<std::string> separators, fields;
|
std::vector<std::string> separators, fields;
|
||||||
// parse the string to convert keyargs in craplog's fields format
|
// parse the string to convert keyargs in craplog's fields format
|
||||||
int n_fld=0,
|
int n_fld=0,
|
||||||
start, aux=0, stop=0,
|
start, aux, stop=0,
|
||||||
min_dist=0, max_dist=f_str.size()-1;
|
min_dist, max_dist=f_str.size()-1;
|
||||||
std::string cur_fld, cur_sep;
|
std::string cur_fld, cur_sep;
|
||||||
// find and convert any field
|
// find and convert any field
|
||||||
while (true) {
|
while (true) {
|
||||||
|
@ -356,7 +425,7 @@ const FormatOps::LogsFormat FormatOps::processNginxFormatString( const std::stri
|
||||||
for ( const std::string& fld : *f_flds ) {
|
for ( const std::string& fld : *f_flds ) {
|
||||||
// run untill a valid field is found
|
// run untill a valid field is found
|
||||||
aux = f_str.find( fld, start );
|
aux = f_str.find( fld, start );
|
||||||
if ( aux < 0 | aux > min_dist ) {
|
if ( aux < 0 || aux > min_dist ) {
|
||||||
// not found, skip to the next
|
// not found, skip to the next
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -421,6 +490,8 @@ const FormatOps::LogsFormat FormatOps::processNginxFormatString( const std::stri
|
||||||
// step at the end of the current field for the next start
|
// step at the end of the current field for the next start
|
||||||
stop = min_dist + cur_fld.size();
|
stop = min_dist + cur_fld.size();
|
||||||
}
|
}
|
||||||
|
//delete f_map;
|
||||||
|
//delete f_flds;
|
||||||
|
|
||||||
return FormatOps::LogsFormat{
|
return FormatOps::LogsFormat{
|
||||||
.string = f_str,
|
.string = f_str,
|
||||||
|
@ -468,7 +539,7 @@ const FormatOps::LogsFormat FormatOps::processIisFormatString( const std::string
|
||||||
for ( const std::string& fld : f_flds ) {
|
for ( const std::string& fld : f_flds ) {
|
||||||
// run untill a valid field is found
|
// run untill a valid field is found
|
||||||
aux = f_str.find( fld, start );
|
aux = f_str.find( fld, start );
|
||||||
if ( aux < 0 | aux > min_dist ) {
|
if ( aux < 0 || aux > min_dist ) {
|
||||||
// not found, skip to the next
|
// not found, skip to the next
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,6 +48,23 @@ private:
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// list of access logs fields formats
|
// list of access logs fields formats
|
||||||
|
const std::unordered_map<std::string, std::vector<std::string>> A_ALFs_v = {
|
||||||
|
{"h", {"c"} },
|
||||||
|
{"t", {"sec","msec","usec","\%b","\%B","\%c","\%d","\%D","\%e","\%F","\%h","\%H","\%m","\%M","\%r","\%R","\%S","\%T","\%x","\%X","\%y","\%Y"} },
|
||||||
|
{"i", {"Cookie","User-agent","Referer"} },
|
||||||
|
{"T", {"s","ms","us"} },
|
||||||
|
// composed not in use
|
||||||
|
{"a", {}},
|
||||||
|
{"c", {}},
|
||||||
|
{"e", {}},
|
||||||
|
{"n", {}},
|
||||||
|
{"o", {}},
|
||||||
|
{"p", {}},
|
||||||
|
{"P", {}},
|
||||||
|
{"^ti", {}},
|
||||||
|
{"^to", {}}
|
||||||
|
};
|
||||||
|
|
||||||
const std::vector<std::string> N_ALFs = {"$remote_addr", "$time_local", "$time_iso8601", "$msec", "$request", "$status", "$bytes_sent", "$request_length", "$request_time", "$http_referer", "$http_user_agent",
|
const std::vector<std::string> N_ALFs = {"$remote_addr", "$time_local", "$time_iso8601", "$msec", "$request", "$status", "$bytes_sent", "$request_length", "$request_time", "$http_referer", "$http_user_agent",
|
||||||
/*NOT IN USE->*/"$remote_user", "$gzip_ratio", "$connection", "$connection_requests", "$pipe"};
|
/*NOT IN USE->*/"$remote_user", "$gzip_ratio", "$connection", "$connection_requests", "$pipe"};
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue