1
1
Fork 0
mirror of https://github.com/oxen-io/lokinet synced 2023-12-14 06:53:00 +01:00

new http parser

This commit is contained in:
Rick V 2019-09-09 13:23:33 -05:00
parent 8fd13577ba
commit 70dea71db6
No known key found for this signature in database
GPG key ID: C0EDC8723FDC3465
7 changed files with 599 additions and 0 deletions

View file

@ -0,0 +1,79 @@
/*-
* Copyright 2012 Matthew Endsley
* All rights reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted providing that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
static const unsigned char http_chunk_state[] = {
/* * LF CR HEX */
0xC1, 0xC1, 0xC1, 1, /* s0: initial hex char */
0xC1, 0xC1, 2, 0x81, /* s1: additional hex chars, followed by CR */
0xC1, 0x83, 0xC1, 0xC1, /* s2: trailing LF */
0xC1, 0xC1, 4, 0xC1, /* s3: CR after chunk block */
0xC1, 0xC0, 0xC1, 0xC1, /* s4: LF after chunk block */
};
int http_parse_chunked(int* state, int *size, char ch)
{
int newstate, code = 0;
switch (ch) {
case '\n': code = 1; break;
case '\r': code = 2; break;
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
case '8': case '9': case 'a': case 'b':
case 'c': case 'd': case 'e': case 'f':
case 'A': case 'B': case 'C': case 'D':
case 'E': case 'F': code = 3; break;
}
newstate = http_chunk_state[*state * 4 + code];
*state = (newstate & 0xF);
switch (newstate) {
case 0xC0:
return *size != 0;
case 0xC1: /* error */
*size = -1;
return 0;
case 0x01: /* initial char */
*size = 0;
/* fallthrough */
case 0x81: /* size char */
if (ch >= 'a')
*size = *size * 16 + (ch - 'a' + 10);
else if (ch >= 'A')
*size = *size * 16 + (ch - 'A' + 10);
else
*size = *size * 16 + (ch - '0');
break;
case 0x83:
return *size == 0;
}
return 1;
}

View file

@ -0,0 +1,48 @@
/*-
* Copyright 2012 Matthew Endsley
* All rights reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted providing that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef HTTP_CHUNK_H
#define HTTP_CHUNK_H
#if defined(__cplusplus)
extern "C" {
#endif
/**
* Parses the size out of a chunk-encoded HTTP response. Returns non-zero if it
* needs more data. Retuns zero success or error. When error: size == -1 On
* success, size = size of following chunk data excluding trailing \r\n. User is
* expected to process or otherwise seek past chunk data up to the trailing
* \r\n. The state parameter is used for internal state and should be
* initialized to zero the first call.
*/
int http_parse_chunked(int* state, int *size, char ch);
#if defined(__cplusplus)
}
#endif
#endif

View file

@ -0,0 +1,19 @@
/*
* Copyright (C) 2019 Rick V. All rights reserved.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* HTTP client for loki-msgr
*/

View file

@ -0,0 +1,72 @@
/*-
* Copyright 2012 Matthew Endsley
* All rights reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted providing that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "header.h"
static unsigned char http_header_state[] = {
/* * \t \n \r ' ' , : PAD */
0x80, 1, 0xC1, 0xC1, 1, 0x80, 0x80, 0xC1, /* state 0: HTTP version */
0x81, 2, 0xC1, 0xC1, 2, 1, 1, 0xC1, /* state 1: Response code */
0x82, 0x82, 4, 3, 0x82, 0x82, 0x82, 0xC1, /* state 2: Response reason */
0xC1, 0xC1, 4, 0xC1, 0xC1, 0xC1, 0xC1, 0xC1, /* state 3: HTTP version newline */
0x84, 0xC1, 0xC0, 5, 0xC1, 0xC1, 6, 0xC1, /* state 4: Start of header field */
0xC1, 0xC1, 0xC0, 0xC1, 0xC1, 0xC1, 0xC1, 0xC1, /* state 5: Last CR before end of header */
0x87, 6, 0xC1, 0xC1, 6, 0x87, 0x87, 0xC1, /* state 6: leading whitespace before header value */
0x87, 0x87, 0xC4, 10, 0x87, 0x88, 0x87, 0xC1, /* state 7: header field value */
0x87, 0x88, 6, 9, 0x88, 0x88, 0x87, 0xC1, /* state 8: Split value field value */
0xC1, 0xC1, 6, 0xC1, 0xC1, 0xC1, 0xC1, 0xC1, /* state 9: CR after split value field */
0xC1, 0xC1, 0xC4, 0xC1, 0xC1, 0xC1, 0xC1, 0xC1, /* state 10:CR after header value */
};
int http_parse_header_char(int* state, char ch)
{
int newstate, code = 0;
switch (ch) {
case '\t': code = 1; break;
case '\n': code = 2; break;
case '\r': code = 3; break;
case ' ': code = 4; break;
case ',': code = 5; break;
case ':': code = 6; break;
}
newstate = http_header_state[*state * 8 + code];
*state = (newstate & 0xF);
switch (newstate) {
case 0xC0: return http_header_status_done;
case 0xC1: return http_header_status_done;
case 0xC4: return http_header_status_store_keyvalue;
case 0x80: return http_header_status_version_character;
case 0x81: return http_header_status_code_character;
case 0x82: return http_header_status_status_character;
case 0x84: return http_header_status_key_character;
case 0x87: return http_header_status_value_character;
case 0x88: return http_header_status_value_character;
}
return http_header_status_continue;
}

View file

@ -0,0 +1,61 @@
/*-
* Copyright 2012 Matthew Endsley
* All rights reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted providing that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef HTTP_HEADER_H
#define HTTP_HEADER_H
#if defined(__cplusplus)
extern "C" {
#endif
enum http_header_status
{
http_header_status_done,
http_header_status_continue,
http_header_status_version_character,
http_header_status_code_character,
http_header_status_status_character,
http_header_status_key_character,
http_header_status_value_character,
http_header_status_store_keyvalue
};
/**
* Parses a single character of an HTTP header stream. The state parameter is
* used as internal state and should be initialized to zero for the first call.
* Return value is a value from the http_header_status enuemeration specifying
* the semantics of the character. If an error is encountered,
* http_header_status_done will be returned with a non-zero state parameter. On
* success http_header_status_done is returned with the state parameter set to
* zero.
*/
int http_parse_header_char(int* state, char ch);
#if defined(__cplusplus)
}
#endif
#endif

View file

@ -0,0 +1,227 @@
/*-
* Copyright 2012 Matthew Endsley
* All rights reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted providing that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "http.h"
#include <ctype.h>
#include <string.h>
#include "header.h"
#include "chunk.h"
static void append_body(struct http_roundtripper* rt, const char* data, int ndata)
{
rt->funcs.body(rt->opaque, data, ndata);
}
static void grow_scratch(struct http_roundtripper* rt, int size)
{
if (rt->nscratch >= size)
return;
if (size < 64)
size = 64;
int nsize = (rt->nscratch * 3) / 2;
if (nsize < size)
nsize = size;
rt->scratch = (char*)rt->funcs.realloc_scratch(rt->opaque, rt->scratch, nsize);
rt->nscratch = nsize;
}
static int min(int a, int b)
{
return a > b ? b : a;
}
enum http_roundtripper_state {
http_roundtripper_header,
http_roundtripper_chunk_header,
http_roundtripper_chunk_data,
http_roundtripper_raw_data,
http_roundtripper_unknown_data,
http_roundtripper_close,
http_roundtripper_error,
};
void http_init(struct http_roundtripper* rt, struct http_funcs funcs, void* opaque)
{
rt->funcs = funcs;
rt->scratch = 0;
rt->opaque = opaque;
rt->code = 0;
rt->parsestate = 0;
rt->contentlength = -1;
rt->state = http_roundtripper_header;
rt->nscratch = 0;
rt->nkey = 0;
rt->nvalue = 0;
rt->chunked = 0;
}
void http_free(struct http_roundtripper* rt)
{
if (rt->scratch) {
rt->funcs.realloc_scratch(rt->opaque, rt->scratch, 0);
rt->scratch = 0;
}
}
int http_data(struct http_roundtripper* rt, const char* data, int size, int* read)
{
const int initial_size = size;
while (size) {
switch (rt->state) {
case http_roundtripper_header:
switch (http_parse_header_char(&rt->parsestate, *data)) {
case http_header_status_done:
rt->funcs.code(rt->opaque, rt->code);
if (rt->parsestate != 0)
rt->state = http_roundtripper_error;
else if (rt->chunked) {
rt->contentlength = 0;
rt->state = http_roundtripper_chunk_header;
} else if (rt->contentlength == 0)
rt->state = http_roundtripper_close;
else if (rt->contentlength > 0)
rt->state = http_roundtripper_raw_data;
else if (rt->contentlength == -1)
rt->state = http_roundtripper_unknown_data;
else
rt->state = http_roundtripper_error;
break;
case http_header_status_code_character:
rt->code = rt->code * 10 + *data - '0';
break;
case http_header_status_key_character:
grow_scratch(rt, rt->nkey + 1);
rt->scratch[rt->nkey] = tolower(*data);
++rt->nkey;
break;
case http_header_status_value_character:
grow_scratch(rt, rt->nkey + rt->nvalue + 1);
rt->scratch[rt->nkey+rt->nvalue] = *data;
++rt->nvalue;
break;
case http_header_status_store_keyvalue:
if (rt->nkey == 17 && 0 == strncmp(rt->scratch, "transfer-encoding", rt->nkey))
rt->chunked = (rt->nvalue == 7 && 0 == strncmp(rt->scratch + rt->nkey, "chunked", rt->nvalue));
else if (rt->nkey == 14 && 0 == strncmp(rt->scratch, "content-length", rt->nkey)) {
int ii, end;
rt->contentlength = 0;
for (ii = rt->nkey, end = rt->nkey + rt->nvalue; ii != end; ++ii)
rt->contentlength = rt->contentlength * 10 + rt->scratch[ii] - '0';
}
rt->funcs.header(rt->opaque, rt->scratch, rt->nkey, rt->scratch + rt->nkey, rt->nvalue);
rt->nkey = 0;
rt->nvalue = 0;
break;
}
--size;
++data;
break;
case http_roundtripper_chunk_header:
if (!http_parse_chunked(&rt->parsestate, &rt->contentlength, *data)) {
if (rt->contentlength == -1)
rt->state = http_roundtripper_error;
else if (rt->contentlength == 0)
rt->state = http_roundtripper_close;
else
rt->state = http_roundtripper_chunk_data;
}
--size;
++data;
break;
case http_roundtripper_chunk_data: {
const int chunksize = min(size, rt->contentlength);
append_body(rt, data, chunksize);
rt->contentlength -= chunksize;
size -= chunksize;
data += chunksize;
if (rt->contentlength == 0) {
rt->contentlength = 1;
rt->state = http_roundtripper_chunk_header;
}
}
break;
case http_roundtripper_raw_data: {
const int chunksize = min(size, rt->contentlength);
append_body(rt, data, chunksize);
rt->contentlength -= chunksize;
size -= chunksize;
data += chunksize;
if (rt->contentlength == 0)
rt->state = http_roundtripper_close;
}
break;
case http_roundtripper_unknown_data: {
if (size == 0)
rt->state = http_roundtripper_close;
else {
append_body(rt, data, size);
size -= size;
data += size;
}
}
break;
case http_roundtripper_close:
case http_roundtripper_error:
break;
}
if (rt->state == http_roundtripper_error || rt->state == http_roundtripper_close) {
if (rt->scratch) {
rt->funcs.realloc_scratch(rt->opaque, rt->scratch, 0);
rt->scratch = 0;
}
*read = initial_size - size;
return 0;
}
}
*read = initial_size - size;
return 1;
}
int http_iserror(struct http_roundtripper* rt)
{
return rt->state == http_roundtripper_error;
}

View file

@ -0,0 +1,93 @@
/*-
* Copyright 2012 Matthew Endsley
* All rights reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted providing that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef HTTP_HTTP_H
#define HTTP_HTTP_H
#if defined(__cplusplus)
extern "C" {
#endif
/**
* Callbacks for handling response data.
* realloc_scratch - reallocate memory, cannot fail. There will only
* be one scratch buffer. Implemnentation may take
* advantage of this fact.
* body - handle HTTP response body data
* header - handle an HTTP header key/value pair
* code - handle the HTTP status code for the response
*/
struct http_funcs {
void* (*realloc_scratch)(void* opaque, void* ptr, int size);
void (*body)(void* opaque, const char* data, int size);
void (*header)(void* opaque, const char* key, int nkey, const char* value, int nvalue);
void (*code)(void* opqaue, int code);
};
struct http_roundtripper {
struct http_funcs funcs;
void *opaque;
char *scratch;
int code;
int parsestate;
int contentlength;
int state;
int nscratch;
int nkey;
int nvalue;
int chunked;
};
/**
* Initializes a rountripper with the specified response functions. This must
* be called before the rt object is used.
*/
void http_init(struct http_roundtripper* rt, struct http_funcs, void* opaque);
/**
* Frees any scratch memory allocated during parsing.
*/
void http_free(struct http_roundtripper* rt);
/**
* Parses a block of HTTP response data. Returns zero if the parser reached the
* end of the response, or an error was encountered. Use http_iserror to check
* for the presence of an error. Returns non-zero if more data is required for
* the response.
*/
int http_data(struct http_roundtripper* rt, const char* data, int size, int* read);
/**
* Returns non-zero if a completed parser encounted an error. If http_data did
* not return non-zero, the results of this function are undefined.
*/
int http_iserror(struct http_roundtripper* rt);
#if defined(__cplusplus)
}
#endif
#endif