diff --git a/http-response-bodies.c b/http-response-bodies.c
index f3952de..07e0223 100644
--- a/http-response-bodies.c
+++ b/http-response-bodies.c
@@ -20,13 +20,40 @@
#define DEBUG_PRINTF(...) do {} while (false)
#endif
+char* memcasemem(char* haystack, size_t haystacklen, char* needle, size_t needlelen) {
+ // Case-insensitive (for ASCII) slower version of memmem
+ // needle must already be in lower-case.
+ if (needlelen > haystacklen) {
+ // A needle longer than the haystack can never be in there.
+ return NULL;
+ }
+ char* p1;
+ char* p2;
+ char* p3;
+ for (p1 = haystack; p1 < haystack + haystacklen - needlelen; ++p1) {
+ if (tolower((unsigned char)*p1) == *needle) {
+ // Found a first char match, check the rest
+ // No need to constrain p2; due to the needlelen>haystacklen check above and the limits on p1, p2 can never exceed the haystack.
+ for (p2 = p1 + 1, p3 = needle + 1; p3 < needle + needlelen; p2++, p3++) {
+ if (tolower((unsigned char)*p2) != *p3) {
+ break;
+ }
+ }
+ if (p3 == needle + needlelen) {
+ // Full match
+ return p1;
+ }
+ }
+ }
+}
+
int main(int argc, char* argv[]) {
// Read stdin, decode HTTP responses, dump all bodies to stdout.
// stdin may contain an extra 'URL LENGTH\n' line before each response (--meta output from warc-dump-responses).
// One LF is inserted at the end of each response to ensure that a new response always begins on a new line.
// If --html-fake-base is provided and the input contains URL data, every HTML response (Content-Type: text/html header) is prefixed with one line containing a fake tag: . The line is terminated with a LF.
// Headers and chunk lines must fit into BUFSIZE.
- // Does not fully comply with the HTTP spec. For example, headers must be capitalised canonically, and continuation lines are unsupported.
+ // Does not fully comply with the HTTP spec. For example, continuation lines are unsupported.
char buf[2 * BUFSIZE];
size_t n;
int state = STATE_HEADERS;
@@ -119,7 +146,7 @@ checkstate:
eoh += 4;
DEBUG_PRINTF("Response body begins at %p (offset %zu)\n", (void*)eoh, eoh - bufp);
- m0 = memmem(bufp, n, "\r\nContent-Length:", 17);
+ m0 = memcasemem(bufp, n, "\r\ncontent-length:", 17);
if (m0 && m0 < eoh) {
DEBUG_PRINTF("Found Content-Length header at %p (offset %zu)\n", (void*)(m0 + 2), m0 + 2 - bufp);
m1 = memmem(m0 + 1, n - (m0 + 1 - bufp), "\r\n", 2);
@@ -151,7 +178,7 @@ checkstate:
state = STATE_BODY;
} else {
- m0 = memmem(bufp, n, "\r\nTransfer-Encoding:", 20);
+ m0 = memcasemem(bufp, n, "\r\ntransfer-encoding:", 20);
if (!m0 || m0 >= eoh) {
fprintf(stderr, "Error: Content-Length and Transfer-Encoding missing\n");
return 1;
@@ -180,7 +207,7 @@ checkstate:
}
if (html_fake_base) {
- m0 = memmem(bufp, n, "\r\nContent-Type:", 15);
+ m0 = memcasemem(bufp, n, "\r\ncontent-type:", 15);
if (m0 && m0 < eoh) {
DEBUG_PRINTF("Found Content-Type header at %p (offset %zu)\n", (void*)(m0 + 2), m0 + 2 - bufp);
m1 = memmem(m0 + 1, n - (m0 + 1 - bufp), "\r\n", 2);