summaryrefslogtreecommitdiff
path: root/http_parser.c
diff options
context:
space:
mode:
authorMikhail Burakov <mburakov@mailbox.org>2022-07-26 12:20:41 +0200
committerMikhail Burakov <mburakov@mailbox.org>2022-07-26 12:20:41 +0200
commit5366104ae61e531fbaa7291ba44822b6b38b8b3d (patch)
tree5aedc1ec5e434365180a6421a9dc335b2bc60f58 /http_parser.c
Import existing toolbox components
Diffstat (limited to 'http_parser.c')
-rw-r--r--http_parser.c308
1 files changed, 308 insertions, 0 deletions
diff --git a/http_parser.c b/http_parser.c
new file mode 100644
index 0000000..b668a92
--- /dev/null
+++ b/http_parser.c
@@ -0,0 +1,308 @@
+/*
+ * Copyright (C) 2022 Mikhail Burakov. This file is part of toolbox.
+ *
+ * toolbox is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * toolbox is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with toolbox. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include "http_parser.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define _(...) __VA_ARGS__
+
+/**
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ * | |.0|.1|.2|.3|.4|.5|.6|.7|.8|.9|.a|.b|.c|.d|.e|.f|
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ * |2.| | !| "| #| $| %| &| '| (| )| *| +| ,| -| .| /|
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ * |3.| 0| 1| 2| 3| 4| 5| 6| 7| 8| 9| :| ;| <| =| >| ?|
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ * |4.| @| A| B| C| D| E| F| G| H| I| J| K| L| M| N| O|
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ * |5.| P| Q| R| S| T| U| V| W| X| Y| Z| [| \| ]| ^| _|
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ * |6.| `| a| b| c| d| e| f| g| h| i| j| k| l| m| n| o|
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ * |7.| p| q| r| s| t| u| v| w| x| y| z| {| || }| ~| |
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ */
+
+// mburakov: RFC9110 5.6.2 Tokens
+static inline _Bool IsTchar(char item) {
+ static const _Bool kAllowed[] = {
+ _(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ _(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ _(0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0),
+ _(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0),
+ _(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
+ _(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1),
+ _(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
+ _(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0),
+ _(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ _(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ _(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ _(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ _(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ _(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ _(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ _(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ };
+ return kAllowed[(uint8_t)item];
+}
+
+// mburakov: RFC3986 3.3 Path
+static inline _Bool IsPchar(char item) {
+ static const _Bool kAllowed[] = {
+ _(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ _(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ _(0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
+ _(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1),
+ _(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
+ _(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1),
+ _(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
+ _(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0),
+ _(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ _(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ _(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ _(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ _(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ _(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ _(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ _(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+ };
+ return kAllowed[(uint8_t)item];
+}
+
+// mburakov: RFC9110 5.6.3 Whitespace
+static inline _Bool IsOws(char item) {
+ _Bool result = item == ' ' || item == '\t';
+ return result;
+}
+
+// mburakov: RFC9110 5.5 Field Values
+static inline _Bool IsVchar(char item) {
+ uint8_t octet = (uint8_t)item;
+ return octet > ' ' && octet < 0xff;
+}
+
+static enum HttpParser_Result ParseMethod(
+ struct HttpParser_State* state, const void* buffer, size_t buffer_size,
+ const struct HttpParser_Callbacks* callbacks, void* user);
+static enum HttpParser_Result ParseTarget(
+ struct HttpParser_State* state, const void* buffer, size_t buffer_size,
+ const struct HttpParser_Callbacks* callbacks, void* user);
+static enum HttpParser_Result ParseVersion(
+ struct HttpParser_State* state, const void* buffer, size_t buffer_size,
+ const struct HttpParser_Callbacks* callbacks, void* user);
+static enum HttpParser_Result ParseFieldName(
+ struct HttpParser_State* state, const void* buffer, size_t buffer_size,
+ const struct HttpParser_Callbacks* callbacks, void* user);
+static enum HttpParser_Result ParseFieldValue(
+ struct HttpParser_State* state, const void* buffer, size_t buffer_size,
+ const struct HttpParser_Callbacks* callbacks, void* user);
+
+static enum HttpParser_Result ParseMethod(
+ struct HttpParser_State* state, const void* buffer, size_t buffer_size,
+ const struct HttpParser_Callbacks* callbacks, void* user) {
+ (void)callbacks;
+ (void)user;
+ for (const char* data = buffer;;) {
+ if (state->parsing_offset == buffer_size) {
+ return kHttpParser_ResultWantMore;
+ }
+ if (IsTchar(data[state->parsing_offset])) {
+ state->parsing_offset++;
+ continue;
+ }
+ if (data[state->parsing_offset] == ' ') {
+ state->stage = ParseTarget;
+ state->parsing_offset++;
+ state->first_size = state->parsing_offset - 1 - state->first_offset;
+ state->second_offset = state->parsing_offset;
+ return kHttpParser_ResultFinished;
+ }
+ state->stage = 0;
+ return kHttpParser_ResultError;
+ }
+}
+
+static enum HttpParser_Result ParseTarget(
+ struct HttpParser_State* state, const void* buffer, size_t buffer_size,
+ const struct HttpParser_Callbacks* callbacks, void* user) {
+ (void)callbacks;
+ (void)user;
+ for (const char* data = buffer;;) {
+ if (state->parsing_offset == buffer_size) {
+ return kHttpParser_ResultWantMore;
+ }
+ if (IsPchar(data[state->parsing_offset])) {
+ state->parsing_offset++;
+ continue;
+ }
+ if (data[state->parsing_offset] == ' ') {
+ state->stage = ParseVersion;
+ state->parsing_offset++;
+ state->second_size = state->parsing_offset - 1 - state->second_offset;
+ state->first_offset = state->parsing_offset;
+ return kHttpParser_ResultFinished;
+ }
+ state->stage = 0;
+ return kHttpParser_ResultError;
+ }
+}
+
+static enum HttpParser_Result ParseVersion(
+ struct HttpParser_State* state, const void* buffer, size_t buffer_size,
+ const struct HttpParser_Callbacks* callbacks, void* user) {
+ static const char kReferences[][10] = {
+ {'H', 'T', 'T', 'P', '/', '1', '.', '1', '\r', '\n'},
+ {'H', 'T', 'T', 'P', '/', '1', '.', '0', '\r', '\n'}};
+ for (const char* data = buffer;;) {
+ size_t reference_index = state->parsing_offset - state->first_offset;
+ if (reference_index == sizeof(kReferences[0])) {
+ if (callbacks && callbacks->on_request) {
+ callbacks->on_request(user, buffer, state->first_size,
+ data + state->second_offset, state->second_size);
+ }
+ state->stage = ParseFieldName;
+ state->first_offset = state->parsing_offset;
+ state->second_size = 0;
+ return kHttpParser_ResultFinished;
+ }
+ if (state->parsing_offset == buffer_size) {
+ return kHttpParser_ResultWantMore;
+ }
+ if (data[state->parsing_offset] == kReferences[0][reference_index] ||
+ data[state->parsing_offset] == kReferences[1][reference_index]) {
+ state->parsing_offset++;
+ continue;
+ }
+ state->stage = 0;
+ return kHttpParser_ResultError;
+ }
+}
+
+static enum HttpParser_Result ParseFieldName(
+ struct HttpParser_State* state, const void* buffer, size_t buffer_size,
+ const struct HttpParser_Callbacks* callbacks, void* user) {
+ for (const char* data = buffer;;) {
+ if (state->parsing_offset == buffer_size) {
+ return kHttpParser_ResultWantMore;
+ }
+ if (data[state->parsing_offset] == '\r' &&
+ state->parsing_offset == state->first_offset) {
+ state->maybe_eol = 1;
+ state->parsing_offset++;
+ continue;
+ }
+ if (data[state->parsing_offset] == '\n' &&
+ state->parsing_offset == state->first_offset + 1 && state->maybe_eol) {
+ state->stage = 0;
+ if (callbacks && callbacks->on_finished) {
+ callbacks->on_finished(user, state->parsing_offset + 1);
+ }
+ return kHttpParser_ResultFinished;
+ }
+ if (state->maybe_eol) {
+ state->stage = 0;
+ return kHttpParser_ResultError;
+ }
+ if (IsTchar(data[state->parsing_offset])) {
+ state->parsing_offset++;
+ continue;
+ }
+ if (data[state->parsing_offset] == ':') {
+ state->stage = ParseFieldValue;
+ state->parsing_offset++;
+ state->first_size = state->parsing_offset - 1 - state->first_offset;
+ if (state->first_size) return kHttpParser_ResultFinished;
+ }
+ state->stage = 0;
+ return kHttpParser_ResultError;
+ }
+}
+
+static enum HttpParser_Result ParseFieldValue(
+ struct HttpParser_State* state, const void* buffer, size_t buffer_size,
+ const struct HttpParser_Callbacks* callbacks, void* user) {
+ for (const char* data = buffer;;) {
+ if (state->parsing_offset == buffer_size) {
+ return kHttpParser_ResultWantMore;
+ }
+ if (!state->second_size) {
+ if (IsOws(data[state->parsing_offset])) {
+ state->parsing_offset++;
+ continue;
+ }
+ state->second_offset = state->parsing_offset;
+ state->second_size = 0;
+ }
+ if (data[state->parsing_offset] == '\r') {
+ if (!state->second_size) {
+ state->stage = 0;
+ return kHttpParser_ResultError;
+ }
+ state->maybe_eol = 1;
+ state->parsing_offset++;
+ continue;
+ }
+ if (data[state->parsing_offset] == '\n') {
+ if (!state->maybe_eol) {
+ state->stage = 0;
+ return kHttpParser_ResultError;
+ }
+ if (callbacks && callbacks->on_field) {
+ callbacks->on_field(user, data + state->first_offset, state->first_size,
+ data + state->second_offset, state->second_size);
+ }
+ state->stage = ParseFieldName;
+ state->maybe_eol = 0;
+ state->parsing_offset++;
+ state->first_offset = state->parsing_offset;
+ state->second_size = 0;
+ return kHttpParser_ResultFinished;
+ }
+ if (state->maybe_eol) {
+ state->stage = 0;
+ return kHttpParser_ResultError;
+ }
+ if (IsVchar(data[state->parsing_offset])) {
+ state->parsing_offset++;
+ state->second_size = state->parsing_offset - state->second_offset;
+ continue;
+ }
+ if (IsOws(data[state->parsing_offset])) {
+ state->parsing_offset++;
+ continue;
+ }
+ state->stage = 0;
+ return kHttpParser_ResultError;
+ }
+}
+
+void HttpParser_Reset(struct HttpParser_State* state) {
+ struct HttpParser_State reset = {.stage = ParseMethod};
+ *state = reset;
+}
+
+enum HttpParser_Result HttpParser_Parse(
+ struct HttpParser_State* state, const void* buffer, size_t buffer_size,
+ const struct HttpParser_Callbacks* callbacks, void* user) {
+ enum HttpParser_Result result = kHttpParser_ResultFinished;
+ while (result == kHttpParser_ResultFinished && state->stage)
+ result = (state->stage)(state, buffer, buffer_size, callbacks, user);
+ return result;
+}