A framework for quick web archiving
Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

112 строки
3.8 KiB

  1. import aiohttp
  2. import aiohttp.client_proto
  3. import aiohttp.connector
  4. import functools
  5. import itertools
  6. import time
  7. # aiohttp does not expose the raw data sent over the wire, so we need to get a bit creative...
  8. # The ResponseHandler handles received data; the writes are done directly on the underlying transport.
  9. # So ResponseHandler is replaced with a class which keeps all received data in a list, and the transport's write method is replaced with one which sends back all written data to the ResponseHandler.
  10. # Because the ResponseHandler instance disappears when the connection is closed (ClientResponse.{_response_eof,close,release}), ClientResponse copies the references to the data objects in the RequestHandler.
  11. # aiohttp also does connection pooling/reuse, so ClientRequest resets the raw data when the request is sent. (This would not work with pipelining, but aiohttp does not support pipelining: https://github.com/aio-libs/aiohttp/issues/1740 )
  12. # This code has been developed for aiohttp version 2.3.10.
  13. class RawData:
  14. def __init__(self):
  15. self.requestTimestamp = None
  16. self.requestData = []
  17. self.responseTimestamp = None
  18. self.responseData = []
  19. class ResponseHandler(aiohttp.client_proto.ResponseHandler):
  20. def __init__(self, *args, **kwargs):
  21. super().__init__(*args, **kwargs)
  22. self.rawData = None
  23. self.remoteAddress = None
  24. def data_received(self, data):
  25. super().data_received(data)
  26. if not data:
  27. return
  28. if self.rawData.responseTimestamp is None:
  29. self.rawData.responseTimestamp = time.time()
  30. self.rawData.responseData.append(data)
  31. def reset_raw_data(self):
  32. self.rawData = RawData()
  33. def make_transport_write(transport, protocol):
  34. transport._real_write = transport.write
  35. def write(self, data):
  36. if protocol.rawData.requestTimestamp is None:
  37. protocol.rawData.requestTimestamp = time.time()
  38. protocol.rawData.requestData.append(data)
  39. self._real_write(data)
  40. return write
  41. class TCPConnector(aiohttp.connector.TCPConnector):
  42. def __init__(self, *args, loop = None, **kwargs):
  43. super().__init__(*args, loop = loop, **kwargs)
  44. self._factory = functools.partial(ResponseHandler, loop = loop)
  45. async def _wrap_create_connection(self, protocolFactory, host, port, *args, **kwargs): #FIXME: Uses internal API
  46. transport, protocol = await super()._wrap_create_connection(protocolFactory, host, port, *args, **kwargs)
  47. transport.write = make_transport_write(transport, protocol).__get__(transport, type(transport)) # https://stackoverflow.com/a/28127947
  48. protocol.remoteAddress = (host, port)
  49. return (transport, protocol)
  50. class ClientRequest(aiohttp.client_reqrep.ClientRequest):
  51. def send(self, connection):
  52. connection.protocol.reset_raw_data()
  53. return super().send(connection)
  54. class ClientResponse(aiohttp.client_reqrep.ClientResponse):
  55. def __init__(self, *args, **kwargs):
  56. super().__init__(*args, **kwargs)
  57. self._rawData = None
  58. self._remoteAddress = None
  59. async def start(self, connection, readUntilEof):
  60. self._rawData = connection.protocol.rawData
  61. self._remoteAddress = connection.protocol.remoteAddress
  62. return (await super().start(connection, readUntilEof))
  63. @property
  64. def rawRequestTimestamp(self):
  65. return self._rawData.requestTimestamp
  66. @property
  67. def rawRequestData(self):
  68. return b''.join(self._rawData.requestData)
  69. @property
  70. def rawResponseTimestamp(self):
  71. return self._rawData.responseTimestamp
  72. @property
  73. def rawResponseData(self):
  74. return b''.join(self._rawData.responseData)
  75. @property
  76. def remoteAddress(self):
  77. return self._remoteAddress
  78. def set_history(self, history):
  79. self._history = history #FIXME: Uses private attribute of aiohttp.client_reqrep.ClientResponse
  80. def iter_all(self):
  81. return itertools.chain(self.history, (self,))
  82. async def release(self):
  83. if not self.closed:
  84. self.connection.reset_raw_data()
  85. await super().release()