You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

779 lines
34 KiB

  1. import aiohttp
  2. import aiohttp.web
  3. import asyncio
  4. import base64
  5. import collections
  6. import concurrent.futures
  7. import importlib.util
  8. import inspect
  9. import itertools
  10. import logging
  11. import os.path
  12. import signal
  13. import ssl
  14. import string
  15. import sys
  16. import toml
  17. logger = logging.getLogger('http2irc')
  18. SSL_CONTEXTS = {'yes': True, 'no': False, 'insecure': ssl.SSLContext()}
  19. class InvalidConfig(Exception):
  20. '''Error in configuration file'''
  21. def is_valid_pem(path, withCert):
  22. '''Very basic check whether something looks like a valid PEM certificate'''
  23. try:
  24. with open(path, 'rb') as fp:
  25. contents = fp.read()
  26. # All of these raise exceptions if something's wrong...
  27. if withCert:
  28. assert contents.startswith(b'-----BEGIN CERTIFICATE-----\n')
  29. endCertPos = contents.index(b'-----END CERTIFICATE-----\n')
  30. base64.b64decode(contents[28:endCertPos].replace(b'\n', b''), validate = True)
  31. assert contents[endCertPos + 26:].startswith(b'-----BEGIN PRIVATE KEY-----\n')
  32. else:
  33. assert contents.startswith(b'-----BEGIN PRIVATE KEY-----\n')
  34. endCertPos = -26 # Please shoot me.
  35. endKeyPos = contents.index(b'-----END PRIVATE KEY-----\n')
  36. base64.b64decode(contents[endCertPos + 26 + 28: endKeyPos].replace(b'\n', b''), validate = True)
  37. assert contents[endKeyPos + 26:] == b''
  38. return True
  39. except: # Yes, really
  40. return False
  41. class Config(dict):
  42. def __init__(self, filename):
  43. super().__init__()
  44. self._filename = filename
  45. with open(self._filename, 'r') as fp:
  46. obj = toml.load(fp)
  47. # Sanity checks
  48. if any(x not in ('logging', 'irc', 'web', 'maps') for x in obj.keys()):
  49. raise InvalidConfig('Unknown sections found in base object')
  50. if any(not isinstance(x, collections.abc.Mapping) for x in obj.values()):
  51. raise InvalidConfig('Invalid section type(s), expected objects/dicts')
  52. if 'logging' in obj:
  53. if any(x not in ('level', 'format') for x in obj['logging']):
  54. raise InvalidConfig('Unknown key found in log section')
  55. if 'level' in obj['logging'] and obj['logging']['level'] not in ('DEBUG', 'INFO', 'WARNING', 'ERROR'):
  56. raise InvalidConfig('Invalid log level')
  57. if 'format' in obj['logging']:
  58. if not isinstance(obj['logging']['format'], str):
  59. raise InvalidConfig('Invalid log format')
  60. try:
  61. #TODO: Replace with logging.Formatter's validate option (3.8+); this test does not cover everything that could be wrong (e.g. invalid format spec or conversion)
  62. # This counts the number of replacement fields. Formatter.parse yields tuples whose second value is the field name; if it's None, there is no field (e.g. literal text).
  63. assert sum(1 for x in string.Formatter().parse(obj['logging']['format']) if x[1] is not None) > 0
  64. except (ValueError, AssertionError) as e:
  65. raise InvalidConfig('Invalid log format: parsing failed') from e
  66. if 'irc' in obj:
  67. if any(x not in ('host', 'port', 'ssl', 'nick', 'real', 'certfile', 'certkeyfile') for x in obj['irc']):
  68. raise InvalidConfig('Unknown key found in irc section')
  69. if 'host' in obj['irc'] and not isinstance(obj['irc']['host'], str): #TODO: Check whether it's a valid hostname
  70. raise InvalidConfig('Invalid IRC host')
  71. if 'port' in obj['irc'] and (not isinstance(obj['irc']['port'], int) or not 1 <= obj['irc']['port'] <= 65535):
  72. raise InvalidConfig('Invalid IRC port')
  73. if 'ssl' in obj['irc'] and obj['irc']['ssl'] not in ('yes', 'no', 'insecure'):
  74. raise InvalidConfig(f'Invalid IRC SSL setting: {obj["irc"]["ssl"]!r}')
  75. if 'nick' in obj['irc'] and not isinstance(obj['irc']['nick'], str): #TODO: Check whether it's a valid nickname
  76. raise InvalidConfig('Invalid IRC nick')
  77. if len(IRCClientProtocol.nick_command(obj['irc']['nick'])) > 510:
  78. raise InvalidConfig('Invalid IRC nick: NICK command too long')
  79. if 'real' in obj['irc'] and not isinstance(obj['irc']['real'], str):
  80. raise InvalidConfig('Invalid IRC realname')
  81. if len(IRCClientProtocol.user_command(obj['irc']['nick'], obj['irc']['real'])) > 510:
  82. raise InvalidConfig('Invalid IRC nick/realname combination: USER command too long')
  83. if ('certfile' in obj['irc']) != ('certkeyfile' in obj['irc']):
  84. raise InvalidConfig('Invalid IRC cert config: needs both certfile and certkeyfile')
  85. if 'certfile' in obj['irc']:
  86. if not isinstance(obj['irc']['certfile'], str):
  87. raise InvalidConfig('Invalid certificate file: not a string')
  88. obj['irc']['certfile'] = os.path.abspath(os.path.join(os.path.dirname(self._filename), obj['irc']['certfile']))
  89. if not os.path.isfile(obj['irc']['certfile']):
  90. raise InvalidConfig('Invalid certificate file: not a regular file')
  91. if not is_valid_pem(obj['irc']['certfile'], True):
  92. raise InvalidConfig('Invalid certificate file: not a valid PEM cert')
  93. if 'certkeyfile' in obj['irc']:
  94. if not isinstance(obj['irc']['certkeyfile'], str):
  95. raise InvalidConfig('Invalid certificate key file: not a string')
  96. obj['irc']['certkeyfile'] = os.path.abspath(os.path.join(os.path.dirname(self._filename), obj['irc']['certkeyfile']))
  97. if not os.path.isfile(obj['irc']['certkeyfile']):
  98. raise InvalidConfig('Invalid certificate key file: not a regular file')
  99. if not is_valid_pem(obj['irc']['certkeyfile'], False):
  100. raise InvalidConfig('Invalid certificate key file: not a valid PEM key')
  101. if 'web' in obj:
  102. if any(x not in ('host', 'port') for x in obj['web']):
  103. raise InvalidConfig('Unknown key found in web section')
  104. if 'host' in obj['web'] and not isinstance(obj['web']['host'], str): #TODO: Check whether it's a valid hostname (must resolve I guess?)
  105. raise InvalidConfig('Invalid web hostname')
  106. if 'port' in obj['web'] and (not isinstance(obj['web']['port'], int) or not 1 <= obj['web']['port'] <= 65535):
  107. raise InvalidConfig('Invalid web port')
  108. if 'maps' in obj:
  109. seenWebPaths = {}
  110. for key, map_ in obj['maps'].items():
  111. if not isinstance(key, str) or not key:
  112. raise InvalidConfig(f'Invalid map key {key!r}')
  113. if not isinstance(map_, collections.abc.Mapping):
  114. raise InvalidConfig(f'Invalid map for {key!r}')
  115. if any(x not in ('webpath', 'ircchannel', 'auth', 'module', 'moduleargs', 'overlongmode') for x in map_):
  116. raise InvalidConfig(f'Unknown key(s) found in map {key!r}')
  117. if 'webpath' not in map_:
  118. map_['webpath'] = f'/{key}'
  119. if not isinstance(map_['webpath'], str):
  120. raise InvalidConfig(f'Invalid map {key!r} web path: not a string')
  121. if not map_['webpath'].startswith('/'):
  122. raise InvalidConfig(f'Invalid map {key!r} web path: does not start at the root')
  123. if map_['webpath'] in seenWebPaths:
  124. raise InvalidConfig(f'Invalid map {key!r} web path: collides with map {seenWebPaths[map_["webpath"]]!r}')
  125. seenWebPaths[map_['webpath']] = key
  126. if 'ircchannel' not in map_:
  127. map_['ircchannel'] = f'#{key}'
  128. if not isinstance(map_['ircchannel'], str):
  129. raise InvalidConfig(f'Invalid map {key!r} IRC channel: not a string')
  130. if not map_['ircchannel'].startswith('#') and not map_['ircchannel'].startswith('&'):
  131. raise InvalidConfig(f'Invalid map {key!r} IRC channel: does not start with # or &')
  132. if any(x in map_['ircchannel'][1:] for x in (' ', '\x00', '\x07', '\r', '\n', ',')):
  133. raise InvalidConfig(f'Invalid map {key!r} IRC channel: contains forbidden characters')
  134. if len(map_['ircchannel']) > 200:
  135. raise InvalidConfig(f'Invalid map {key!r} IRC channel: too long')
  136. if 'auth' in map_:
  137. if map_['auth'] is not False and not isinstance(map_['auth'], str):
  138. raise InvalidConfig(f'Invalid map {key!r} auth: must be false or a string')
  139. if isinstance(map_['auth'], str) and ':' not in map_['auth']:
  140. raise InvalidConfig(f'Invalid map {key!r} auth: must contain a colon')
  141. if 'module' in map_:
  142. # If the path is relative, try to evaluate it relative to either the config file or this file; some modules are in the repo, but this also allows overriding them.
  143. for basePath in (os.path.dirname(self._filename), os.path.dirname(__file__)):
  144. if os.path.isfile(os.path.join(basePath, map_['module'])):
  145. map_['module'] = os.path.abspath(os.path.join(basePath, map_['module']))
  146. break
  147. else:
  148. raise InvalidConfig(f'Module {map_["module"]!r} in map {key!r} is not a file')
  149. if 'moduleargs' in map_:
  150. if not isinstance(map_['moduleargs'], list):
  151. raise InvalidConfig(f'Invalid module args for {key!r}: not an array')
  152. if 'module' not in map_:
  153. raise InvalidConfig(f'Module args cannot be specified without a module for {key!r}')
  154. if 'overlongmode' in map_:
  155. if not isinstance(map_['overlongmode'], str):
  156. raise InvalidConfig(f'Invalid map {key!r} overlongmode: not a string')
  157. if map_['overlongmode'] not in ('split', 'truncate'):
  158. raise InvalidConfig(f'Invalid map {key!r} overlongmode: unsupported value')
  159. # Default values
  160. finalObj = {'logging': {'level': 'INFO', 'format': '{asctime} {levelname} {name} {message}'}, 'irc': {'host': 'irc.hackint.org', 'port': 6697, 'ssl': 'yes', 'nick': 'h2ibot', 'real': 'I am an http2irc bot.', 'certfile': None, 'certkeyfile': None}, 'web': {'host': '127.0.0.1', 'port': 8080}, 'maps': {}}
  161. # Fill in default values for the maps
  162. for key, map_ in obj['maps'].items():
  163. # webpath is already set above for duplicate checking
  164. # ircchannel is set above for validation
  165. if 'auth' not in map_:
  166. map_['auth'] = False
  167. if 'module' not in map_:
  168. map_['module'] = None
  169. if 'moduleargs' not in map_:
  170. map_['moduleargs'] = []
  171. if 'overlongmode' not in map_:
  172. map_['overlongmode'] = 'split'
  173. # Load modules
  174. modulePaths = {} # path: str -> (extraargs: int, key: str)
  175. for key, map_ in obj['maps'].items():
  176. if map_['module'] is not None:
  177. if map_['module'] not in modulePaths:
  178. modulePaths[map_['module']] = (len(map_['moduleargs']), key)
  179. elif modulePaths[map_['module']][0] != len(map_['moduleargs']):
  180. raise InvalidConfig(f'Module {map_["module"]!r} process function extra argument inconsistency between {key!r} and {modulePaths[map_["module"]][1]!r}')
  181. modules = {} # path: str -> module: module
  182. for i, (path, (extraargs, _)) in enumerate(modulePaths.items()):
  183. try:
  184. # Build a name that is virtually guaranteed to be unique across a process.
  185. # Although importlib does not seem to perform any caching as of CPython 3.8, this is not guaranteed by spec.
  186. spec = importlib.util.spec_from_file_location(f'http2irc-module-{id(self)}-{i}', path)
  187. module = importlib.util.module_from_spec(spec)
  188. spec.loader.exec_module(module)
  189. except Exception as e: # This is ugly, but exec_module can raise virtually any exception
  190. raise InvalidConfig(f'Loading module {path!r} failed: {e!s}')
  191. if not hasattr(module, 'process'):
  192. raise InvalidConfig(f'Module {path!r} does not have a process function')
  193. if not inspect.iscoroutinefunction(module.process):
  194. raise InvalidConfig(f'Module {path!r} process attribute is not a coroutine function')
  195. nargs = len(inspect.signature(module.process).parameters)
  196. if nargs != 1 + extraargs:
  197. raise InvalidConfig(f'Module {path!r} process function takes {nargs} parameter{"s" if nargs > 1 else ""}, not {1 + extraargs}')
  198. modules[path] = module
  199. # Replace module value in maps
  200. for map_ in obj['maps'].values():
  201. if 'module' in map_ and map_['module'] is not None:
  202. map_['module'] = modules[map_['module']]
  203. # Merge in what was read from the config file and set keys on self
  204. for key in ('logging', 'irc', 'web', 'maps'):
  205. if key in obj:
  206. finalObj[key].update(obj[key])
  207. self[key] = finalObj[key]
  208. def __repr__(self):
  209. return f'<Config(logging={self["logging"]!r}, irc={self["irc"]!r}, web={self["web"]!r}, maps={self["maps"]!r})>'
  210. def reread(self):
  211. return Config(self._filename)
  212. class MessageQueue:
  213. # An object holding onto the messages received from nodeping
  214. # This is effectively a reimplementation of parts of asyncio.Queue with some specific additional code.
  215. # Unfortunately, asyncio.Queue's extensibility (_init, _put, and _get methods) is undocumented, so I don't want to rely on that.
  216. # Differences to asyncio.Queue include:
  217. # - No maxsize
  218. # - No put coroutine (not necessary since the queue can never be full)
  219. # - Only one concurrent getter
  220. # - putleft_nowait to put to the front of the queue (so that the IRC client can put a message back when delivery fails)
  221. logger = logging.getLogger('http2irc.MessageQueue')
  222. def __init__(self):
  223. self._getter = None # None | asyncio.Future
  224. self._queue = collections.deque()
  225. async def get(self):
  226. if self._getter is not None:
  227. raise RuntimeError('Cannot get concurrently')
  228. if len(self._queue) == 0:
  229. self._getter = asyncio.get_running_loop().create_future()
  230. self.logger.debug('Awaiting getter')
  231. try:
  232. await self._getter
  233. except asyncio.CancelledError:
  234. self.logger.debug('Cancelled getter')
  235. self._getter = None
  236. raise
  237. self.logger.debug('Awaited getter')
  238. self._getter = None
  239. # For testing the cancellation/putting back onto the queue
  240. #self.logger.debug('Delaying message queue get')
  241. #await asyncio.sleep(3)
  242. #self.logger.debug('Done delaying')
  243. return self.get_nowait()
  244. def get_nowait(self):
  245. if len(self._queue) == 0:
  246. raise asyncio.QueueEmpty
  247. return self._queue.popleft()
  248. def put_nowait(self, item):
  249. self._queue.append(item)
  250. if self._getter is not None and not self._getter.cancelled():
  251. self._getter.set_result(None)
  252. def putleft_nowait(self, *item):
  253. self._queue.extendleft(reversed(item))
  254. if self._getter is not None and not self._getter.cancelled():
  255. self._getter.set_result(None)
  256. def qsize(self):
  257. return len(self._queue)
  258. class IRCClientProtocol(asyncio.Protocol):
  259. logger = logging.getLogger('http2irc.IRCClientProtocol')
  260. def __init__(self, messageQueue, connectionClosedEvent, loop, config, channels):
  261. self.messageQueue = messageQueue
  262. self.connectionClosedEvent = connectionClosedEvent
  263. self.loop = loop
  264. self.config = config
  265. self.buffer = b''
  266. self.connected = False
  267. self.channels = channels # Currently joined/supposed-to-be-joined channels; set(str)
  268. self.unconfirmedMessages = []
  269. self.pongReceivedEvent = asyncio.Event()
  270. self.sasl = bool(self.config['irc']['certfile'] and self.config['irc']['certkeyfile'])
  271. self.authenticated = False
  272. self.usermask = None
  273. @staticmethod
  274. def nick_command(nick: str):
  275. return b'NICK ' + nick.encode('utf-8')
  276. @staticmethod
  277. def user_command(nick: str, real: str):
  278. nickb = nick.encode('utf-8')
  279. return b'USER ' + nickb + b' ' + nickb + b' ' + nickb + b' :' + real.encode('utf-8')
  280. def _maybe_set_usermask(self, usermask):
  281. if b'@' in usermask and b'!' in usermask.split(b'@')[0] and all(x not in usermask for x in (b' ', b'*', b'#', b'&')):
  282. self.usermask = usermask
  283. self.logger.debug(f'Usermask is now {usermask!r}')
  284. def connection_made(self, transport):
  285. self.logger.info('IRC connected')
  286. self.transport = transport
  287. self.connected = True
  288. if self.sasl:
  289. self.send(b'CAP REQ :sasl')
  290. self.send(self.nick_command(self.config['irc']['nick']))
  291. self.send(self.user_command(self.config['irc']['nick'], self.config['irc']['real']))
  292. def _send_join_part(self, command, channels):
  293. '''Split a JOIN or PART into multiple messages as necessary'''
  294. # command: b'JOIN' or b'PART'; channels: set[str]
  295. channels = [x.encode('utf-8') for x in channels]
  296. if len(command) + sum(1 + len(x) for x in channels) <= 510: # Total length = command + (separator + channel name for each channel, where the separator is a space for the first and then a comma)
  297. # Everything fits into one command.
  298. self.send(command + b' ' + b','.join(channels))
  299. return
  300. # List too long, need to split.
  301. limit = 510 - len(command)
  302. lengths = [1 + len(x) for x in channels] # separator + channel name
  303. chanLengthAcceptable = [l <= limit for l in lengths]
  304. if not all(chanLengthAcceptable):
  305. # There are channel names that are too long to even fit into one message on their own; filter them out and warn about them.
  306. # This should never happen since the config reader would already filter it out.
  307. tooLongChannels = [x for x, a in zip(channels, chanLengthAcceptable) if not a]
  308. channels = [x for x, a in zip(channels, chanLengthAcceptable) if a]
  309. lengths = [l for l, a in zip(lengths, chanLengthAcceptable) if a]
  310. for channel in tooLongChannels:
  311. self.logger.warning(f'Cannot {command} {channel}: name too long')
  312. runningLengths = list(itertools.accumulate(lengths)) # entry N = length of all entries up to and including channel N, including separators
  313. offset = 0
  314. while channels:
  315. i = next((x[0] for x in enumerate(runningLengths) if x[1] - offset > limit), -1)
  316. if i == -1: # Last batch
  317. i = len(channels)
  318. self.send(command + b' ' + b','.join(channels[:i]))
  319. offset = runningLengths[i-1]
  320. channels = channels[i:]
  321. runningLengths = runningLengths[i:]
  322. def update_channels(self, channels: set):
  323. channelsToPart = self.channels - channels
  324. channelsToJoin = channels - self.channels
  325. self.channels = channels
  326. if self.connected:
  327. if channelsToPart:
  328. self._send_join_part(b'PART', channelsToPart)
  329. if channelsToJoin:
  330. self._send_join_part(b'JOIN', channelsToJoin)
  331. def send(self, data):
  332. self.logger.debug(f'Send: {data!r}')
  333. if len(data) > 510:
  334. raise RuntimeError(f'IRC message too long ({len(data)} > 510): {data!r}')
  335. self.transport.write(data + b'\r\n')
  336. async def _get_message(self):
  337. self.logger.debug(f'Message queue {id(self.messageQueue)} length: {self.messageQueue.qsize()}')
  338. messageFuture = asyncio.create_task(self.messageQueue.get())
  339. done, pending = await asyncio.wait((messageFuture, self.connectionClosedEvent.wait()), return_when = concurrent.futures.FIRST_COMPLETED)
  340. if self.connectionClosedEvent.is_set():
  341. if messageFuture in pending:
  342. self.logger.debug('Cancelling messageFuture')
  343. messageFuture.cancel()
  344. try:
  345. await messageFuture
  346. except asyncio.CancelledError:
  347. self.logger.debug('Cancelled messageFuture')
  348. pass
  349. else:
  350. # messageFuture is already done but we're stopping, so put the result back onto the queue
  351. self.messageQueue.putleft_nowait(messageFuture.result())
  352. return None, None
  353. assert messageFuture in done, 'Invalid state: messageFuture not in done futures'
  354. return messageFuture.result()
  355. async def send_messages(self):
  356. while self.connected:
  357. self.logger.debug(f'Trying to get a message')
  358. channel, message, overlongmode = await self._get_message()
  359. self.logger.debug(f'Got message: {message!r}')
  360. if message is None:
  361. break
  362. channelB = channel.encode('utf-8')
  363. messageB = message.encode('utf-8')
  364. usermaskPrefixLength = 1 + (len(self.usermask) if self.usermask else 100) + 1
  365. if usermaskPrefixLength + len(b'PRIVMSG ' + channelB + b' :' + messageB) > 510:
  366. # Message too long, need to split or truncate. First try to split on spaces, then on codepoints. Ideally, would use graphemes between, but that's too complicated.
  367. self.logger.debug(f'Message too long, overlongmode = {overlongmode}')
  368. prefix = b'PRIVMSG ' + channelB + b' :'
  369. prefixLength = usermaskPrefixLength + len(prefix) # Need to account for the origin prefix included by the ircd when sending to others
  370. maxMessageLength = 510 - prefixLength # maximum length of the message part within each line
  371. if overlongmode == 'truncate':
  372. maxMessageLength -= 3 # Make room for an ellipsis at the end
  373. messages = []
  374. while message:
  375. if overlongmode == 'truncate' and messages:
  376. break # Only need the first message on truncation
  377. if len(messageB) <= maxMessageLength:
  378. messages.append(message)
  379. break
  380. spacePos = messageB.rfind(b' ', 0, maxMessageLength + 1)
  381. if spacePos != -1:
  382. messages.append(messageB[:spacePos].decode('utf-8'))
  383. messageB = messageB[spacePos + 1:]
  384. message = messageB.decode('utf-8')
  385. continue
  386. # No space found, need to search for a suitable codepoint location.
  387. pMessage = message[:maxMessageLength] # at most 510 codepoints which expand to at least 510 bytes
  388. pLengths = [len(x.encode('utf-8')) for x in pMessage] # byte size of each codepoint
  389. pRunningLengths = list(itertools.accumulate(pLengths)) # byte size up to each codepoint
  390. if pRunningLengths[-1] <= maxMessageLength: # Special case: entire pMessage is short enough
  391. messages.append(pMessage)
  392. message = message[maxMessageLength:]
  393. messageB = message.encode('utf-8')
  394. continue
  395. cutoffIndex = next(x[0] for x in enumerate(pRunningLengths) if x[1] > maxMessageLength)
  396. messages.append(message[:cutoffIndex])
  397. message = message[cutoffIndex:]
  398. messageB = message.encode('utf-8')
  399. if overlongmode == 'split':
  400. for msg in reversed(messages):
  401. self.messageQueue.putleft_nowait((channel, msg, overlongmode))
  402. elif overlongmode == 'truncate':
  403. self.messageQueue.putleft_nowait((channel, messages[0] + '…', overlongmode))
  404. else:
  405. self.logger.info(f'Sending {message!r} to {channel!r}')
  406. self.unconfirmedMessages.append((channel, message, overlongmode))
  407. self.send(b'PRIVMSG ' + channelB + b' :' + messageB)
  408. await asyncio.sleep(1) # Rate limit
  409. async def confirm_messages(self):
  410. while self.connected:
  411. await asyncio.wait((asyncio.sleep(60), self.connectionClosedEvent.wait()), return_when = concurrent.futures.FIRST_COMPLETED) # Confirm once per minute
  412. if not self.connected: # Disconnected while sleeping, can't confirm unconfirmed messages, requeue them directly
  413. self.messageQueue.putleft_nowait(*self.unconfirmedMessages)
  414. self.unconfirmedMessages = []
  415. break
  416. if not self.unconfirmedMessages:
  417. self.logger.debug('No messages to confirm')
  418. continue
  419. self.logger.debug('Trying to confirm message delivery')
  420. self.pongReceivedEvent.clear()
  421. self.send(b'PING :42')
  422. await asyncio.wait((asyncio.sleep(5), self.pongReceivedEvent.wait()), return_when = concurrent.futures.FIRST_COMPLETED)
  423. self.logger.debug(f'Message delivery successful: {self.pongReceivedEvent.is_set()}')
  424. if not self.pongReceivedEvent.is_set():
  425. # No PONG received in five seconds, assume connection's dead
  426. self.logger.warning(f'Message delivery confirmation failed, putting {len(self.unconfirmedMessages)} messages back into the queue')
  427. self.messageQueue.putleft_nowait(*self.unconfirmedMessages)
  428. self.transport.close()
  429. self.unconfirmedMessages = []
  430. def data_received(self, data):
  431. self.logger.debug(f'Data received: {data!r}')
  432. # Split received data on CRLF. If there's any data left in the buffer, prepend it to the first message and process that.
  433. # Then, process all messages except the last one (since data might not end on a CRLF) and keep the remainder in the buffer.
  434. # If data does end with CRLF, all messages will have been processed and the buffer will be empty again.
  435. messages = data.split(b'\r\n')
  436. if self.buffer:
  437. self.message_received(self.buffer + messages[0])
  438. messages = messages[1:]
  439. for message in messages[:-1]:
  440. self.message_received(message)
  441. self.buffer = messages[-1]
  442. def message_received(self, message):
  443. self.logger.debug(f'Message received: {message!r}')
  444. rawMessage = message
  445. if message.startswith(b':') and b' ' in message:
  446. # Prefixed message, extract command + parameters (the prefix cannot contain a space)
  447. message = message.split(b' ', 1)[1]
  448. # PING/PONG
  449. if message.startswith(b'PING '):
  450. self.send(b'PONG ' + message[5:])
  451. elif message.startswith(b'PONG '):
  452. self.pongReceivedEvent.set()
  453. # SASL
  454. elif message.startswith(b'CAP ') and self.sasl:
  455. if message[message.find(b' ', 4) + 1:] == b'ACK :sasl':
  456. self.send(b'AUTHENTICATE EXTERNAL')
  457. else:
  458. self.logger.error(f'Received unexpected CAP reply {message!r}, terminating connection')
  459. self.transport.close()
  460. elif message == b'AUTHENTICATE +':
  461. self.send(b'AUTHENTICATE +')
  462. elif message.startswith(b'900 '): # "You are now logged in", includes the usermask
  463. words = message.split(b' ')
  464. if len(words) >= 3 and b'!' in words[2] and b'@' in words[2]:
  465. if b'!~' not in words[2]:
  466. # At least Charybdis seems to always return the user without a tilde, even if identd failed. Assume no identd and account for that extra tilde.
  467. words[2] = words[2].replace(b'!', b'!~', 1)
  468. self._maybe_set_usermask(words[2])
  469. elif message.startswith(b'903 '): # SASL auth successful
  470. self.authenticated = True
  471. self.send(b'CAP END')
  472. elif any(message.startswith(x) for x in (b'902 ', b'904 ', b'905 ', b'906 ', b'908 ')):
  473. self.logger.error('SASL error, terminating connection')
  474. self.transport.close()
  475. # NICK errors
  476. elif any(message.startswith(x) for x in (b'431 ', b'432 ', b'433 ', b'436 ')):
  477. self.logger.error(f'Failed to set nickname: {message!r}, terminating connection')
  478. self.transport.close()
  479. # USER errors
  480. elif any(message.startswith(x) for x in (b'461 ', b'462 ')):
  481. self.logger.error(f'Failed to register: {message!r}, terminating connection')
  482. self.transport.close()
  483. # JOIN errors
  484. elif any(message.startswith(x) for x in (b'405 ', b'471 ', b'473 ', b'474 ', b'475 ')):
  485. self.logger.error(f'Failed to join channel: {message!r}, terminating connection')
  486. self.transport.close()
  487. # PART errors
  488. elif message.startswith(b'442 '):
  489. self.logger.error(f'Failed to part channel: {message!r}')
  490. # JOIN/PART errors
  491. elif message.startswith(b'403 '):
  492. self.logger.error(f'Failed to join or part channel: {message!r}')
  493. # PRIVMSG errors
  494. elif any(message.startswith(x) for x in (b'401 ', b'404 ', b'407 ', b'411 ', b'412 ', b'413 ', b'414 ')):
  495. self.logger.error(f'Failed to send message: {message!r}')
  496. # Connection registration reply
  497. elif message.startswith(b'001 '):
  498. self.logger.info('IRC connection registered')
  499. if self.sasl and not self.authenticated:
  500. self.logger.error('IRC connection registered but not authenticated, terminating connection')
  501. self.transport.close()
  502. return
  503. self._send_join_part(b'JOIN', self.channels)
  504. asyncio.create_task(self.send_messages())
  505. asyncio.create_task(self.confirm_messages())
  506. # JOIN success
  507. elif message.startswith(b'JOIN ') and not self.usermask:
  508. # If this is my own join message, it should contain the usermask in the prefix
  509. if rawMessage.startswith(b':' + self.config['irc']['nick'].encode('utf-8') + b'!') and b' ' in rawMessage:
  510. usermask = rawMessage.split(b' ', 1)[0][1:]
  511. self._maybe_set_usermask(usermask)
  512. # Services host change
  513. elif message.startswith(b'396 '):
  514. words = message.split(b' ')
  515. if len(words) >= 3:
  516. # Sanity check inspired by irssi src/irc/core/irc-servers.c
  517. if not any(x in words[2] for x in (b'*', b'?', b'!', b'#', b'&', b' ')) and not any(words[2].startswith(x) for x in (b'@', b':', b'-')) and words[2][-1:] != b'-':
  518. if b'@' in words[2]: # user@host
  519. self._maybe_set_usermask(self.config['irc']['nick'].encode('utf-8') + b'!' + words[2])
  520. else: # host (get user from previous mask or settings)
  521. if self.usermask:
  522. user = self.usermask.split(b'@')[0].split(b'!')[1]
  523. else:
  524. user = b'~' + self.config['irc']['nick'].encode('utf-8')
  525. self._maybe_set_usermask(self.config['irc']['nick'].encode('utf-8') + b'!' + user + b'@' + words[2])
  526. def connection_lost(self, exc):
  527. self.logger.info('IRC connection lost')
  528. self.connected = False
  529. self.connectionClosedEvent.set()
  530. class IRCClient:
  531. logger = logging.getLogger('http2irc.IRCClient')
  532. def __init__(self, messageQueue, config):
  533. self.messageQueue = messageQueue
  534. self.config = config
  535. self.channels = {map_['ircchannel'] for map_ in config['maps'].values()}
  536. self._transport = None
  537. self._protocol = None
  538. def update_config(self, config):
  539. needReconnect = self.config['irc'] != config['irc']
  540. self.config = config
  541. if self._transport: # if currently connected:
  542. if needReconnect:
  543. self._transport.close()
  544. else:
  545. self.channels = {map_['ircchannel'] for map_ in config['maps'].values()}
  546. self._protocol.update_channels(self.channels)
  547. def _get_ssl_context(self):
  548. ctx = SSL_CONTEXTS[self.config['irc']['ssl']]
  549. if self.config['irc']['certfile'] and self.config['irc']['certkeyfile']:
  550. if ctx is True:
  551. ctx = ssl.create_default_context()
  552. if isinstance(ctx, ssl.SSLContext):
  553. ctx.load_cert_chain(self.config['irc']['certfile'], keyfile = self.config['irc']['certkeyfile'])
  554. return ctx
  555. async def run(self, loop, sigintEvent):
  556. connectionClosedEvent = asyncio.Event()
  557. while True:
  558. connectionClosedEvent.clear()
  559. try:
  560. self._transport, self._protocol = await loop.create_connection(lambda: IRCClientProtocol(self.messageQueue, connectionClosedEvent, loop, self.config, self.channels), self.config['irc']['host'], self.config['irc']['port'], ssl = self._get_ssl_context())
  561. try:
  562. await asyncio.wait((connectionClosedEvent.wait(), sigintEvent.wait()), return_when = concurrent.futures.FIRST_COMPLETED)
  563. finally:
  564. self._transport.close() #TODO BaseTransport.close is asynchronous and then triggers the protocol's connection_lost callback; need to wait for connectionClosedEvent again perhaps to correctly handle ^C?
  565. except (ConnectionRefusedError, asyncio.TimeoutError) as e:
  566. self.logger.error(str(e))
  567. await asyncio.wait((asyncio.sleep(5), sigintEvent.wait()), return_when = concurrent.futures.FIRST_COMPLETED)
  568. if sigintEvent.is_set():
  569. break
  570. class WebServer:
  571. logger = logging.getLogger('http2irc.WebServer')
  572. def __init__(self, messageQueue, config):
  573. self.messageQueue = messageQueue
  574. self.config = config
  575. self._paths = {} # '/path' => ('#channel', auth, module, moduleargs) where auth is either False (no authentication) or the HTTP header value for basic auth
  576. self._app = aiohttp.web.Application()
  577. self._app.add_routes([aiohttp.web.post('/{path:.+}', self.post)])
  578. self.update_config(config)
  579. self._configChanged = asyncio.Event()
  580. def update_config(self, config):
  581. self._paths = {map_['webpath']: (map_['ircchannel'], f'Basic {base64.b64encode(map_["auth"].encode("utf-8")).decode("utf-8")}' if map_['auth'] else False, map_['module'], map_['moduleargs'], map_['overlongmode']) for map_ in config['maps'].values()}
  582. needRebind = self.config['web'] != config['web']
  583. self.config = config
  584. if needRebind:
  585. self._configChanged.set()
  586. async def run(self, stopEvent):
  587. while True:
  588. runner = aiohttp.web.AppRunner(self._app)
  589. await runner.setup()
  590. site = aiohttp.web.TCPSite(runner, self.config['web']['host'], self.config['web']['port'])
  591. await site.start()
  592. await asyncio.wait((stopEvent.wait(), self._configChanged.wait()), return_when = concurrent.futures.FIRST_COMPLETED)
  593. await runner.cleanup()
  594. if stopEvent.is_set():
  595. break
  596. self._configChanged.clear()
  597. async def post(self, request):
  598. self.logger.info(f'Received request {id(request)} from {request.remote!r} for {request.path!r} with body {(await request.read())!r}')
  599. try:
  600. channel, auth, module, moduleargs, overlongmode = self._paths[request.path]
  601. except KeyError:
  602. self.logger.info(f'Bad request {id(request)}: no path {request.path!r}')
  603. raise aiohttp.web.HTTPNotFound()
  604. if auth:
  605. authHeader = request.headers.get('Authorization')
  606. if not authHeader or authHeader != auth:
  607. self.logger.info(f'Bad request {id(request)}: authentication failed: {authHeader!r} != {auth}')
  608. raise aiohttp.web.HTTPForbidden()
  609. if module is not None:
  610. self.logger.debug(f'Processing request {id(request)} using {module!r}')
  611. try:
  612. message = await module.process(request, *moduleargs)
  613. except aiohttp.web.HTTPException as e:
  614. raise e
  615. except Exception as e:
  616. self.logger.error(f'Bad request {id(request)}: exception in module process function: {type(e).__module__}.{type(e).__name__}: {e!s}')
  617. raise aiohttp.web.HTTPBadRequest()
  618. if '\r' in message or '\n' in message:
  619. self.logger.error(f'Bad request {id(request)}: module process function returned message with linebreaks: {message!r}')
  620. raise aiohttp.web.HTTPBadRequest()
  621. else:
  622. self.logger.debug(f'Processing request {id(request)} using default processor')
  623. message = await self._default_process(request)
  624. self.logger.info(f'Accepted request {id(request)}, putting message {message!r} for {channel} into message queue')
  625. self.messageQueue.put_nowait((channel, message, overlongmode))
  626. raise aiohttp.web.HTTPOk()
  627. async def _default_process(self, request):
  628. try:
  629. message = await request.text()
  630. except Exception as e:
  631. self.logger.info(f'Bad request {id(request)}: exception while reading request data: {e!s}')
  632. raise aiohttp.web.HTTPBadRequest() # Yes, it's always the client's fault. :-)
  633. self.logger.debug(f'Request {id(request)} payload: {message!r}')
  634. # Strip optional [CR] LF at the end of the payload
  635. if message.endswith('\r\n'):
  636. message = message[:-2]
  637. elif message.endswith('\n'):
  638. message = message[:-1]
  639. if '\r' in message or '\n' in message:
  640. self.logger.info('Bad request {id(request)}: linebreaks in message')
  641. raise aiohttp.web.HTTPBadRequest()
  642. return message
  643. def configure_logging(config):
  644. #TODO: Replace with logging.basicConfig(..., force = True) (Py 3.8+)
  645. root = logging.getLogger()
  646. root.setLevel(getattr(logging, config['logging']['level']))
  647. root.handlers = [] #FIXME: Undocumented attribute of logging.Logger
  648. formatter = logging.Formatter(config['logging']['format'], style = '{')
  649. stderrHandler = logging.StreamHandler()
  650. stderrHandler.setFormatter(formatter)
  651. root.addHandler(stderrHandler)
  652. async def main():
  653. if len(sys.argv) != 2:
  654. print('Usage: http2irc.py CONFIGFILE', file = sys.stderr)
  655. sys.exit(1)
  656. configFile = sys.argv[1]
  657. config = Config(configFile)
  658. configure_logging(config)
  659. loop = asyncio.get_running_loop()
  660. messageQueue = MessageQueue()
  661. irc = IRCClient(messageQueue, config)
  662. webserver = WebServer(messageQueue, config)
  663. sigintEvent = asyncio.Event()
  664. def sigint_callback():
  665. global logger
  666. nonlocal sigintEvent
  667. logger.info('Got SIGINT, stopping')
  668. sigintEvent.set()
  669. loop.add_signal_handler(signal.SIGINT, sigint_callback)
  670. def sigusr1_callback():
  671. global logger
  672. nonlocal config, irc, webserver
  673. logger.info('Got SIGUSR1, reloading config')
  674. try:
  675. newConfig = config.reread()
  676. except InvalidConfig as e:
  677. logger.error(f'Config reload failed: {e!s} (old config remains active)')
  678. return
  679. config = newConfig
  680. configure_logging(config)
  681. irc.update_config(config)
  682. webserver.update_config(config)
  683. loop.add_signal_handler(signal.SIGUSR1, sigusr1_callback)
  684. await asyncio.gather(irc.run(loop, sigintEvent), webserver.run(sigintEvent))
  685. if __name__ == '__main__':
  686. asyncio.run(main())