diff --git a/warc-tiny b/warc-tiny
index 796519b..f5ccf75 100755
--- a/warc-tiny
+++ b/warc-tiny
@@ -7,14 +7,34 @@
 #    With --meta, prefix every line with the filename, record offset, record ID, and target URI; e.g. 'file.warc.gz:123:<urn:uuid:41b76f1f-f946-4723-91f8-cee6491e92f3>:<https://example.org/>:    foobar'
 #      The record offset may be -1 if it is not known.
 #      The filename is wrapped in angled brackets if it contains a colon; the target URI is always wrapped in angled brackets (since it virtually always contains a colon).
+#  warc-tiny scrape [-u|--urls] FILES  --  extract all links and page requisites from the records; produces lines of filename, record offset, record URI, link type, inline flag, and URL as JSONL
+#    With --urls, only the URL is printed.
+#    wpull's scrapers are used for the extraction.
 #  warc-tiny verify FILES  --  verify the integrity of a WARC by comparing the digests
 
 import base64
 import gzip
 import hashlib
+import json
 import sys
+import tempfile
 import zlib
 
+try:
+	import wpull.body
+	import wpull.document.htmlparse.lxml_
+	try:
+		import wpull.protocol.http.request as wpull_protocol_http_request # wpull 2.x
+	except ImportError:
+		import wpull.http.request as wpull_protocol_http_request # wpull 1.x
+	import wpull.scraper.base
+	import wpull.scraper.css
+	import wpull.scraper.html
+	import wpull.scraper.javascript
+	import wpull.scraper.sitemap
+except ImportError:
+	wpull = None
+
 
 def GzipDecompressor():
 	return zlib.decompressobj(16 + zlib.MAX_WBITS)
@@ -52,6 +72,15 @@ class BeginOfRecord(Event):
 		return self._rawData
 
 
+class HTTPHeaders(Event):
+	def __init__(self, headers):
+		self._headers = headers
+
+	@property
+	def headers(self):
+		return self._headers
+
+
 class _DataChunk(Event):
 	def __init__(self, data):
 		self._data = data
@@ -153,6 +182,7 @@ def iter_warc(f):
 						gzipped = b'gzip' in transferEncodings
 
 					yield WARCBlockChunk(httpHeaders + b'\r\n\r\n', isHttpHeader = True)
+					yield HTTPHeaders(httpHeaderLines)
 					yield WARCBlockChunk(httpBody, isHttpHeader = False)
 					yield RawHTTPBodyChunk(httpBody)
 
@@ -404,8 +434,86 @@ class ColourMode(ProcessMode):
 		elif type(event) is EndOfRecord:
 			sys.stdout.buffer.write(b'\n\n')
 
+
+class ScrapeMode(ProcessMode):
+	@classmethod
+	def split_args(cls, args):
+		if args[0] == '-u' or args[0] == '--urls':
+			return (True,), args[1:]
+		return (False,), args
+
+	def __init__(self, urlsOnly):
+		self._urlsOnly = urlsOnly
+
+		assert wpull is not None, 'Scrape mode requires wpull'
+		htmlParser = wpull.document.htmlparse.lxml_.HTMLParser()
+		elementWalker = wpull.scraper.html.ElementWalker()
+		scrapers = []
+		scrapers.append(wpull.scraper.html.HTMLScraper(htmlParser, elementWalker))
+		scrapers.append(wpull.scraper.css.CSSScraper())
+		elementWalker.css_scraper = scrapers[-1]
+		scrapers.append(wpull.scraper.javascript.JavaScriptScraper())
+		elementWalker.javascript_scraper = scrapers[-1]
+		scrapers.append(wpull.scraper.sitemap.SitemapScraper(htmlParser))
+		self._scraper = wpull.scraper.base.DemuxDocumentScraper(scrapers)
+
+		self._isResponse = None
+		self._body = None
+		self._recordURI = None
+		self._statusCode = None
+		self._statusReason = None
+		if not self._urlsOnly:
+			self._filename = None
+			self._recordID = None
+
+	def process_event(self, event):
+		if type(event) is NewFile and not self._urlsOnly:
+			self._filename = event.filename
+		elif type(event) is BeginOfRecord:
+			warcContentType = next(x[1] for x in event.warcHeaders if x[0] == b'Content-Type')
+			warcType = next(x[1] for x in event.warcHeaders if x[0] == b'WARC-Type')
+			self._isResponse = warcContentType in (b'application/http;msgtype=response', b'application/http; msgtype=response') and warcType == b'response'
+			if self._isResponse:
+				self._body = wpull.body.Body(file = tempfile.SpooledTemporaryFile(max_size = 10485760)) # Up to 10 MiB in memory
+			self._printEOR = False
+			if not self._urlsOnly:
+				# Both of these are URIs, and per RFC 3986, those can only contain ASCII characters.
+				self._recordID = next(x[1] for x in event.warcHeaders if x[0] == b'WARC-Record-ID').decode('ascii')
+			self._recordURI = next((x[1] for x in event.warcHeaders if x[0] == b'WARC-Target-URI'), b'').decode('ascii')
+		elif type(event) is HTTPHeaders and self._isResponse:
+			assert len(event.headers[0]) == 1 and event.headers[0][0].startswith(b'HTTP/'), 'malformed HTTP response'
+			_, statusCode, reason = event.headers[0][0].decode('ascii').split(' ', 2)
+			self._statusCode = int(statusCode)
+			self._statusReason = reason
+		elif type(event) is HTTPBodyChunk and self._isResponse:
+			self._body.write(event.data)
+		elif type(event) is EndOfRecord and self._isResponse:
+			request = wpull_protocol_http_request.Request(self._recordURI)
+			response = wpull_protocol_http_request.Response(self._statusCode, self._statusReason)
+			response.body = self._body
+			response.body.seek(0)
+			for scraper, scrapeResult in self._scraper.scrape_info(request, response).items():
+				if not scrapeResult:
+					continue
+				for linkContext in scrapeResult.link_contexts:
+					if self._urlsOnly:
+						print(linkContext.link)
+						continue
+					o = {
+						'filename': self._filename,
+						'recordOffset': None,
+						'recordID': self._recordID,
+						'recordURI': self._recordURI,
+						'linkType': linkContext.link_type,
+						'inline': bool(linkContext.inline), # Needs manual casting; https://github.com/ArchiveTeam/wpull/issues/458
+						'linked': bool(linkContext.linked),
+						'url': linkContext.link,
+					}
+					print(json.dumps(o))
+
+
 def main():
-	processorMap = {'verify': VerifyMode, 'dump-responses': DumpResponsesMode, 'colour': ColourMode}
+	processorMap = {'verify': VerifyMode, 'dump-responses': DumpResponsesMode, 'colour': ColourMode, 'scrape': ScrapeMode}
 
 	assert len(sys.argv) - 1 >= 2
 	mode = sys.argv[1]