From a432631d9bed72f95330984aa71d0ae490f238c8 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Sun, 1 Jan 2023 23:59:27 +0000 Subject: [PATCH] Replace memmove with pointer arithmetic --- warc-dump-responses.c | 51 +++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/warc-dump-responses.c b/warc-dump-responses.c index 2a91a2b..5ec9b5e 100644 --- a/warc-dump-responses.c +++ b/warc-dump-responses.c @@ -27,6 +27,7 @@ int main(int argc, char* argv[]) { char buf[2 * BUFSIZE]; size_t n; int state = STATE_BEFORE_RECORD; + char* bufp; char* m0; char* m1; size_t record_bytes_read; @@ -34,8 +35,9 @@ int main(int argc, char* argv[]) { size_t nscan; while ((n = fread(buf, 1, BUFSIZE, stdin)) > 0) { + bufp = buf; checkstate: - DEBUG_PRINTF("Have %zu bytes of buffer (at %p)\n", n, (void*)&buf); + DEBUG_PRINTF("Have %zu bytes of buffer (at %p)\n", n, (void*)bufp); if (n == 0) { break; } @@ -45,50 +47,50 @@ checkstate: fprintf(stderr, "Error: too little data before WARC headers\n"); return 1; } - if (memcmp(buf, "WARC/1.0\r\n", 10) == 0 || memcmp(buf, "WARC/1.1\r\n", 10) == 0) { + if (memcmp(bufp, "WARC/1.0\r\n", 10) == 0 || memcmp(bufp, "WARC/1.1\r\n", 10) == 0) { // Got some headers; find the record type, content length, and end of headers - m0 = memmem(buf, n, "\r\nContent-Length:", 17); + m0 = memmem(bufp, n, "\r\nContent-Length:", 17); if (!m0) { fprintf(stderr, "Error: Content-Length missing\n"); return 1; } - DEBUG_PRINTF("Found Content-Length header at %p (offset %zu)\n", (void*)(m0 + 2), m0 + 2 - buf); - m1 = memmem(m0 + 1, n - (m0 + 1 - buf), "\r\n", 2); + DEBUG_PRINTF("Found Content-Length header at %p (offset %zu)\n", (void*)(m0 + 2), m0 + 2 - bufp); + m1 = memmem(m0 + 1, n - (m0 + 1 - bufp), "\r\n", 2); if (!m1) { fprintf(stderr, "Error: CRLF after Content-Length missing\n"); return 1; } m0 += 17; - while (m0 < buf + n && (*m0 == ' ' || *m0 == '\t')) ++m0; + while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0; if (!sscanf(m0, "%zu%n", &record_length, &nscan)) { fprintf(stderr, "Error: invalid Content-Length\n"); return 1; } - if (nscan > n - (m0 - buf)) { + if (nscan > n - (m0 - bufp)) { fprintf(stderr, "Error: buffer overread\n"); return 1; } m0 += nscan; - while (m0 < buf + n && (*m0 == ' ' || *m0 == '\t')) ++m0; + while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0; if (m0 != m1) { fprintf(stderr, "Error: invalid Content-Length (noise before EOL)\n"); return 1; } DEBUG_PRINTF("Record body length: %zu\n", record_length); - m0 = memmem(buf, n, "\r\nWARC-Type:", 12); + m0 = memmem(bufp, n, "\r\nWARC-Type:", 12); if (!m0) { fprintf(stderr, "Error: WARC-Type missing\n"); return 1; } - DEBUG_PRINTF("Found WARC-Type header at %p (offset %zu)\n", (void*)(m0 + 2), m0 + 2 - buf); - m1 = memmem(m0, n - (m0 - buf), "\r\n", 2); + DEBUG_PRINTF("Found WARC-Type header at %p (offset %zu)\n", (void*)(m0 + 2), m0 + 2 - bufp); + m1 = memmem(m0, n - (m0 - bufp), "\r\n", 2); if (!m1) { fprintf(stderr, "Error: CRLF after WARC-Type missing\n"); return 1; } m0 += 12; - while (m0 < buf + n && (*m0 == ' ' || *m0 == '\t')) ++m0; + while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0; if (memcmp(m0, "response", 8) == 0) { DEBUG_PRINTF("Response record\n"); state = STATE_RESPONSE_RECORD; @@ -97,18 +99,17 @@ checkstate: state = STATE_OTHER_RECORD; } - m0 = memmem(buf, n, "\r\n\r\n", 4); + m0 = memmem(bufp, n, "\r\n\r\n", 4); if (!m0) { fprintf(stderr, "Error: end of headers not found\n"); return 1; } m0 += 4; - DEBUG_PRINTF("Record body begins at %p (offset %zu)\n", (void*)m0, m0 - buf); + DEBUG_PRINTF("Record body begins at %p (offset %zu)\n", (void*)m0, m0 - bufp); - //TODO Replace all the memmove business with pointer logic to avoid needless constant memory copying (is more wrooom). - DEBUG_PRINTF("Moving %zu bytes backwards by %zu\n", n - (m0 - buf), m0 - buf); - memmove(buf, m0, n - (m0 - buf)); - n = n - (m0 - buf); + DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", m0 - bufp); + n = n - (m0 - bufp); + bufp = m0; record_bytes_read = 0; goto checkstate; } else { @@ -121,7 +122,7 @@ checkstate: DEBUG_PRINTF("Partial record\n"); if (state == STATE_RESPONSE_RECORD) { DEBUG_PRINTF("Copying %zu bytes to stdout\n", n); - fwrite(buf, 1, n, stdout); + fwrite(bufp, 1, n, stdout); } record_bytes_read += n; DEBUG_PRINTF("%zu of %zu bytes from this record written\n", record_bytes_read, record_length); @@ -130,18 +131,20 @@ checkstate: DEBUG_PRINTF("Full record\n"); if (state == STATE_RESPONSE_RECORD) { DEBUG_PRINTF("Copying %zu bytes to stdout\n", record_length - record_bytes_read); - fwrite(buf, 1, record_length - record_bytes_read, stdout); + fwrite(bufp, 1, record_length - record_bytes_read, stdout); fprintf(stdout, "\n"); } - if (memcmp(buf + record_length - record_bytes_read, "\r\n\r\n", 4) != 0) { + if (memcmp(bufp + record_length - record_bytes_read, "\r\n\r\n", 4) != 0) { fprintf(stderr, "Error: end of block not found\n"); return 1; } - DEBUG_PRINTF("Moving %zu bytes backwards by %zu\n", n - (record_length + 4 - record_bytes_read), record_length + 4 - record_bytes_read); - memmove(buf, buf + record_length + 4 - record_bytes_read, n - (record_length + 4 - record_bytes_read)); + DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", record_length + 4 - record_bytes_read); n = n - (record_length + 4 - record_bytes_read); + bufp = bufp + record_length + 4 - record_bytes_read; if (n < BUFSIZE) { - DEBUG_PRINTF("Refilling buffer\n"); + DEBUG_PRINTF("Buffer too small (%zu bytes), moving and refilling\n", n); + memmove(buf, bufp, n); + bufp = buf; n += fread(buf + n, 1, BUFSIZE, stdin); } state = STATE_BEFORE_RECORD;