|
@@ -31,6 +31,7 @@ int main(int argc, char* argv[]) { |
|
|
char* bufp; |
|
|
char* bufp; |
|
|
char* m0; |
|
|
char* m0; |
|
|
char* m1; |
|
|
char* m1; |
|
|
|
|
|
char* eoh; |
|
|
size_t record_bytes_read; |
|
|
size_t record_bytes_read; |
|
|
size_t record_length; |
|
|
size_t record_length; |
|
|
size_t nscan; |
|
|
size_t nscan; |
|
@@ -58,8 +59,16 @@ checkstate: |
|
|
} |
|
|
} |
|
|
if (memcmp(bufp, "WARC/1.0\r\n", 10) == 0 || memcmp(bufp, "WARC/1.1\r\n", 10) == 0) { |
|
|
if (memcmp(bufp, "WARC/1.0\r\n", 10) == 0 || memcmp(bufp, "WARC/1.1\r\n", 10) == 0) { |
|
|
// Got some headers; find the record type, content length, and end of headers |
|
|
// Got some headers; find the record type, content length, and end of headers |
|
|
|
|
|
eoh = memmem(bufp, n, "\r\n\r\n", 4); |
|
|
|
|
|
if (!eoh) { |
|
|
|
|
|
fprintf(stderr, "Error: end of headers not found\n"); |
|
|
|
|
|
return 1; |
|
|
|
|
|
} |
|
|
|
|
|
eoh += 4; |
|
|
|
|
|
DEBUG_PRINTF("Record body begins at %p (offset %zu)\n", (void*)eoh, eoh - bufp); |
|
|
|
|
|
|
|
|
m0 = memmem(bufp, n, "\r\nContent-Length:", 17); |
|
|
m0 = memmem(bufp, n, "\r\nContent-Length:", 17); |
|
|
if (!m0) { |
|
|
|
|
|
|
|
|
if (!m0 || m0 >= eoh) { |
|
|
fprintf(stderr, "Error: Content-Length missing\n"); |
|
|
fprintf(stderr, "Error: Content-Length missing\n"); |
|
|
return 1; |
|
|
return 1; |
|
|
} |
|
|
} |
|
@@ -88,7 +97,7 @@ checkstate: |
|
|
DEBUG_PRINTF("Record body length: %zu\n", record_length); |
|
|
DEBUG_PRINTF("Record body length: %zu\n", record_length); |
|
|
|
|
|
|
|
|
m0 = memmem(bufp, n, "\r\nWARC-Type:", 12); |
|
|
m0 = memmem(bufp, n, "\r\nWARC-Type:", 12); |
|
|
if (!m0) { |
|
|
|
|
|
|
|
|
if (!m0 || m0 >= eoh) { |
|
|
fprintf(stderr, "Error: WARC-Type missing\n"); |
|
|
fprintf(stderr, "Error: WARC-Type missing\n"); |
|
|
return 1; |
|
|
return 1; |
|
|
} |
|
|
} |
|
@@ -110,7 +119,7 @@ checkstate: |
|
|
|
|
|
|
|
|
if (meta && state == STATE_RESPONSE_RECORD) { |
|
|
if (meta && state == STATE_RESPONSE_RECORD) { |
|
|
m0 = memmem(bufp, n, "\r\nWARC-Target-URI:", 18); |
|
|
m0 = memmem(bufp, n, "\r\nWARC-Target-URI:", 18); |
|
|
if (!m0) { |
|
|
|
|
|
|
|
|
if (!m0 || m0 >= eoh) { |
|
|
fprintf(stderr, "Error: WARC-Target-URI missing\n"); |
|
|
fprintf(stderr, "Error: WARC-Target-URI missing\n"); |
|
|
return 1; |
|
|
return 1; |
|
|
} |
|
|
} |
|
@@ -134,17 +143,9 @@ checkstate: |
|
|
fprintf(stdout, " %zu\n", record_length); |
|
|
fprintf(stdout, " %zu\n", record_length); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
m0 = memmem(bufp, n, "\r\n\r\n", 4); |
|
|
|
|
|
if (!m0) { |
|
|
|
|
|
fprintf(stderr, "Error: end of headers not found\n"); |
|
|
|
|
|
return 1; |
|
|
|
|
|
} |
|
|
|
|
|
m0 += 4; |
|
|
|
|
|
DEBUG_PRINTF("Record body begins at %p (offset %zu)\n", (void*)m0, m0 - bufp); |
|
|
|
|
|
|
|
|
|
|
|
DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", m0 - bufp); |
|
|
|
|
|
n = n - (m0 - bufp); |
|
|
|
|
|
bufp = m0; |
|
|
|
|
|
|
|
|
DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", eoh - bufp); |
|
|
|
|
|
n = n - (eoh - bufp); |
|
|
|
|
|
bufp = eoh; |
|
|
record_bytes_read = 0; |
|
|
record_bytes_read = 0; |
|
|
goto checkstate; |
|
|
goto checkstate; |
|
|
} else { |
|
|
} else { |
|
|