|
|
@@ -0,0 +1,228 @@ |
|
|
|
#define _GNU_SOURCE |
|
|
|
#include <ctype.h> |
|
|
|
#include <stdbool.h> |
|
|
|
#include <stdio.h> |
|
|
|
#include <stdlib.h> |
|
|
|
#include <string.h> |
|
|
|
|
|
|
|
#ifndef BUFSIZE |
|
|
|
#define BUFSIZE 1048576 |
|
|
|
#endif |
|
|
|
|
|
|
|
#define STATE_HEADERS 0 |
|
|
|
#define STATE_BODY 1 // Body with a Content-Length header |
|
|
|
#define STATE_CHUNK_LINE 2 |
|
|
|
#define STATE_CHUNK_CONTENTS 3 |
|
|
|
|
|
|
|
#ifdef DEBUG |
|
|
|
#define DEBUG_PRINTF(...) do { fprintf(stderr, __VA_ARGS__); } while (false) |
|
|
|
#else |
|
|
|
#define DEBUG_PRINTF(...) do {} while (false) |
|
|
|
#endif |
|
|
|
|
|
|
|
int main(int argc, char* argv[]) { |
|
|
|
//TODO --meta or a similar way to get something like that? |
|
|
|
|
|
|
|
// Read stdin, decode HTTP responses, dump all bodies to stdout. |
|
|
|
// One LF is inserted at the end of each response to ensure that a new response always begins on a new line. |
|
|
|
// Headers and chunk lines must fit into BUFSIZE. |
|
|
|
// Does not fully comply with the HTTP spec. For example, headers must be capitalised canonically, and continuation lines are unsupported. |
|
|
|
char buf[2 * BUFSIZE]; |
|
|
|
size_t n; |
|
|
|
int state = STATE_HEADERS; |
|
|
|
char* bufp; |
|
|
|
char* m0; |
|
|
|
char* m1; |
|
|
|
char* eoh; |
|
|
|
size_t nscan; |
|
|
|
size_t bytes_read; |
|
|
|
size_t length; |
|
|
|
|
|
|
|
while ((n = fread(buf, 1, BUFSIZE, stdin)) > 0) { |
|
|
|
bufp = buf; |
|
|
|
checkstate: |
|
|
|
DEBUG_PRINTF("Have %zu bytes of buffer (at %p)\n", n, (void*)bufp); |
|
|
|
DEBUG_PRINTF("Beginning of buffer: "); |
|
|
|
for (int i = 0; i < 64; ++i) DEBUG_PRINTF(isprint(*(bufp + i)) ? "%c" : "\\x%02x", *(bufp + i) & 0xFF); |
|
|
|
DEBUG_PRINTF("\n"); |
|
|
|
if (n == 0) { |
|
|
|
break; |
|
|
|
} |
|
|
|
DEBUG_PRINTF("State: %d\n", state); |
|
|
|
if (state == STATE_HEADERS) { |
|
|
|
if (n < 9) { |
|
|
|
fprintf(stderr, "Error: too little data before HTTP headers\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
if (memcmp(bufp, "HTTP/1.1 ", 9) == 0) { |
|
|
|
// Got some headers; find transfer encoding, content length, and end of headers |
|
|
|
eoh = memmem(bufp, n, "\r\n\r\n", 4); |
|
|
|
if (!eoh) { |
|
|
|
fprintf(stderr, "Error: end of headers not found\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
eoh += 4; |
|
|
|
DEBUG_PRINTF("Response body begins at %p (offset %zu)\n", (void*)eoh, eoh - bufp); |
|
|
|
|
|
|
|
m0 = memmem(bufp, n, "\r\nContent-Length:", 17); |
|
|
|
if (m0 && m0 < eoh) { |
|
|
|
DEBUG_PRINTF("Found Content-Length header at %p (offset %zu)\n", (void*)(m0 + 2), m0 + 2 - bufp); |
|
|
|
m1 = memmem(m0 + 1, n - (m0 + 1 - bufp), "\r\n", 2); |
|
|
|
if (!m1) { |
|
|
|
fprintf(stderr, "Error: CRLF after Content-Length missing\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
m0 += 17; |
|
|
|
while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0; |
|
|
|
if (!sscanf(m0, "%zu%n", &length, &nscan)) { |
|
|
|
fprintf(stderr, "Error: invalid Content-Length\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
if (nscan > n - (m0 - bufp)) { |
|
|
|
fprintf(stderr, "Error: buffer overread\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
m0 += nscan; |
|
|
|
while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0; |
|
|
|
if (m0 != m1) { |
|
|
|
fprintf(stderr, "Error: invalid Content-Length (noise before EOL)\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
DEBUG_PRINTF("Content length: %zu\n", length); |
|
|
|
|
|
|
|
state = STATE_BODY; |
|
|
|
} else { |
|
|
|
m0 = memmem(bufp, n, "\r\nTransfer-Encoding:", 20); |
|
|
|
if (!m0 || m0 >= eoh) { |
|
|
|
fprintf(stderr, "Error: Content-Length and Transfer-Encoding missing\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
DEBUG_PRINTF("Found Transfer-Encoding header at %p (offset %zu)\n", (void*)(m0 + 2), m0 + 2 - bufp); |
|
|
|
m1 = memmem(m0 + 1, n - (m0 + 1 - bufp), "\r\n", 2); |
|
|
|
if (!m1 || m1 >= eoh - 2) { |
|
|
|
fprintf(stderr, "Error: CRLF after Transfer-Encoding missing\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
m0 += 20; |
|
|
|
while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0; |
|
|
|
if (memcmp(m0, "chunked", 7) != 0) { |
|
|
|
fprintf(stderr, "Error: unsupported Transfer-Encoding\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
m0 += 7; |
|
|
|
while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0; |
|
|
|
if (m0 != m1) { |
|
|
|
fprintf(stderr, "Error: unsupported Transfer-Encoding\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
DEBUG_PRINTF("Chunked transfer encoding\n"); |
|
|
|
|
|
|
|
state = STATE_CHUNK_LINE; |
|
|
|
} |
|
|
|
|
|
|
|
DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", eoh - bufp); |
|
|
|
n = n - (eoh - bufp); |
|
|
|
bufp = eoh; |
|
|
|
bytes_read = 0; |
|
|
|
goto checkstate; |
|
|
|
} else { |
|
|
|
fprintf(stderr, "Error: expected header line, got something else\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
} else if (state == STATE_BODY || state == STATE_CHUNK_CONTENTS) { |
|
|
|
if (length + 2 - bytes_read > n) { |
|
|
|
// Only got part of the body |
|
|
|
DEBUG_PRINTF("Partial body\n"); |
|
|
|
DEBUG_PRINTF("Copying %zu bytes to stdout\n", n); |
|
|
|
fwrite(bufp, 1, n, stdout); |
|
|
|
bytes_read += n; |
|
|
|
DEBUG_PRINTF("%zu of %zu bytes from this response written\n", bytes_read, length); |
|
|
|
} else { |
|
|
|
// Remainder of the response is in the buffer. Same logic as above for small records fitting in the buffer with the headers. |
|
|
|
DEBUG_PRINTF("Full body\n"); |
|
|
|
DEBUG_PRINTF("Copying %zu bytes to stdout\n", length - bytes_read); |
|
|
|
fwrite(bufp, 1, length - bytes_read, stdout); |
|
|
|
fprintf(stdout, "\n"); |
|
|
|
if (state == STATE_CHUNK_CONTENTS && *(bufp + length - bytes_read) == '\r') { |
|
|
|
// Stupid hack to enforce the CRLF |
|
|
|
++length; |
|
|
|
} |
|
|
|
if (memcmp(bufp + length - bytes_read, "\n", 1) != 0) { |
|
|
|
fprintf(stderr, "Error: end of HTTP body not found\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", length + 1 - bytes_read); |
|
|
|
n = n - (length + 1 - bytes_read); |
|
|
|
bufp = bufp + length + 1 - bytes_read; |
|
|
|
if (n < BUFSIZE) { |
|
|
|
DEBUG_PRINTF("Buffer too small (%zu bytes), moving and refilling\n", n); |
|
|
|
memmove(buf, bufp, n); |
|
|
|
bufp = buf; |
|
|
|
n += fread(buf + n, 1, BUFSIZE, stdin); |
|
|
|
} |
|
|
|
if (state == STATE_BODY) { |
|
|
|
state = STATE_HEADERS; |
|
|
|
} else { |
|
|
|
state = STATE_CHUNK_LINE; |
|
|
|
} |
|
|
|
goto checkstate; |
|
|
|
} |
|
|
|
} else if (state == STATE_CHUNK_LINE) { |
|
|
|
m1 = memmem(bufp, n, "\r\n", 2); |
|
|
|
if (!m1) { |
|
|
|
fprintf(stderr, "Error: chunk line EOL missing\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
m0 = bufp; |
|
|
|
while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0; |
|
|
|
if (!sscanf(m0, "%x%n", &length, &nscan)) { |
|
|
|
fprintf(stderr, "Error: invalid chunk length\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
if (nscan > n - (m0 - bufp)) { |
|
|
|
fprintf(stderr, "Error: buffer overread\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
m0 += nscan; |
|
|
|
while (m0 < m1 && (*m0 == ' ' || *m0 == '\t')) ++m0; |
|
|
|
if (*m0 != ';' && m0 != m1) { |
|
|
|
fprintf(stderr, "Error: invalid Content-Length (noise before EOL)\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
DEBUG_PRINTF("Chunk length: %zu bytes\n", length); |
|
|
|
|
|
|
|
DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", m1 + 2 - bufp); |
|
|
|
n = n - (m1 + 2 - bufp); |
|
|
|
bufp = m1 + 2; |
|
|
|
|
|
|
|
if (length == 0) { |
|
|
|
// End of response, must be followed by CRLF + LF |
|
|
|
if (n < 3) { |
|
|
|
fprintf(stderr, "Error: buffer exhausted while looking for empty chunk CRLF\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
if (*(m1 + 2) != '\r' || *(m1 + 3) != '\n' || *(m1 + 4) != '\n') { |
|
|
|
fprintf(stderr, "Error: end of HTTP body not found\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
n -= 3; |
|
|
|
bufp += 3; |
|
|
|
state = STATE_HEADERS; |
|
|
|
} else { |
|
|
|
state = STATE_CHUNK_CONTENTS; |
|
|
|
} |
|
|
|
|
|
|
|
if (n < BUFSIZE) { |
|
|
|
DEBUG_PRINTF("Buffer too small (%zu bytes), moving and refilling\n", n); |
|
|
|
memmove(buf, bufp, n); |
|
|
|
bufp = buf; |
|
|
|
n += fread(buf + n, 1, BUFSIZE, stdin); |
|
|
|
} |
|
|
|
goto checkstate; |
|
|
|
} |
|
|
|
} |
|
|
|
if (state != STATE_HEADERS) { |
|
|
|
fprintf(stderr, "Error: incomplete body at the end of input\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
} |