|
|
@@ -42,49 +42,49 @@ checkstate: |
|
|
|
DEBUG_PRINTF("State: %d\n", state); |
|
|
|
if (state == STATE_BEFORE_RECORD) { |
|
|
|
if (n < 10) { |
|
|
|
fprintf(stderr, "Error: too little data before WARC headers"); |
|
|
|
fprintf(stderr, "Error: too little data before WARC headers\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
if (memcmp(buf, "WARC/1.0\r\n", 10) == 0 || memcmp(buf, "WARC/1.1\r\n", 10) == 0) { |
|
|
|
// Got some headers; find the record type, content length, and end of headers |
|
|
|
m0 = memmem(buf, n, "\r\nContent-Length:", 17); |
|
|
|
if (!m0) { |
|
|
|
fprintf(stderr, "Error: Content-Length missing"); |
|
|
|
fprintf(stderr, "Error: Content-Length missing\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
DEBUG_PRINTF("Found Content-Length header at %p (offset %zu)\n", (void*)(m0 + 2), m0 + 2 - buf); |
|
|
|
m1 = memmem(m0 + 1, n - (m0 + 1 - buf), "\r\n", 2); |
|
|
|
if (!m1) { |
|
|
|
fprintf(stderr, "Error: CRLF after Content-Length missing"); |
|
|
|
fprintf(stderr, "Error: CRLF after Content-Length missing\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
m0 += 17; |
|
|
|
while (m0 < buf + n && (*m0 == ' ' || *m0 == '\t')) ++m0; |
|
|
|
if (!sscanf(m0, "%zu%n", &record_length, &nscan)) { |
|
|
|
fprintf(stderr, "Error: invalid Content-Length"); |
|
|
|
fprintf(stderr, "Error: invalid Content-Length\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
if (nscan > n - (m0 - buf)) { |
|
|
|
fprintf(stderr, "Error: buffer overread"); |
|
|
|
fprintf(stderr, "Error: buffer overread\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
m0 += nscan; |
|
|
|
while (m0 < buf + n && (*m0 == ' ' || *m0 == '\t')) ++m0; |
|
|
|
if (m0 != m1) { |
|
|
|
fprintf(stderr, "Error: invalid Content-Length (noise before EOL)"); |
|
|
|
fprintf(stderr, "Error: invalid Content-Length (noise before EOL)\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
DEBUG_PRINTF("Record body length: %zu\n", record_length); |
|
|
|
|
|
|
|
m0 = memmem(buf, n, "\r\nWARC-Type:", 12); |
|
|
|
if (!m0) { |
|
|
|
fprintf(stderr, "Error: WARC-Type missing"); |
|
|
|
fprintf(stderr, "Error: WARC-Type missing\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
DEBUG_PRINTF("Found WARC-Type header at %p (offset %zu)\n", (void*)(m0 + 2), m0 + 2 - buf); |
|
|
|
m1 = memmem(m0, n - (m0 - buf), "\r\n", 2); |
|
|
|
if (!m1) { |
|
|
|
fprintf(stderr, "Error: CRLF after WARC-Type missing"); |
|
|
|
fprintf(stderr, "Error: CRLF after WARC-Type missing\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
m0 += 12; |
|
|
@@ -99,7 +99,7 @@ checkstate: |
|
|
|
|
|
|
|
m0 = memmem(buf, n, "\r\n\r\n", 4); |
|
|
|
if (!m0) { |
|
|
|
fprintf(stderr, "Error: end of headers not found"); |
|
|
|
fprintf(stderr, "Error: end of headers not found\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
m0 += 4; |
|
|
@@ -163,7 +163,7 @@ checkstate: |
|
|
|
fprintf(stdout, "\n"); |
|
|
|
} |
|
|
|
if (memcmp(buf + record_length - record_bytes_read, "\r\n\r\n", 4) != 0) { |
|
|
|
fprintf(stderr, "Error: end of block not found"); |
|
|
|
fprintf(stderr, "Error: end of block not found\n"); |
|
|
|
return 1; |
|
|
|
} |
|
|
|
DEBUG_PRINTF("Moving %zu bytes backwards by %zu\n", n - (record_length + 4 - record_bytes_read), record_length + 4 - record_bytes_read); |
|
|
|