Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.
 
 

811 řádky
36 KiB

  1. import aiohttp
  2. import aiohttp.web
  3. import asyncio
  4. import base64
  5. import collections
  6. import datetime
  7. import importlib.util
  8. import inspect
  9. import ircstates
  10. import irctokens
  11. import itertools
  12. import logging
  13. import os.path
  14. import signal
  15. import ssl
  16. import string
  17. import sys
  18. import tempfile
  19. import time
  20. import toml
  21. logger = logging.getLogger('irclog')
  22. SSL_CONTEXTS = {'yes': True, 'no': False, 'insecure': ssl.SSLContext()}
  23. messageConnectionClosed = object() # Signals that the connection was closed by either the bot or the server
  24. messageEOF = object() # Special object to signal the end of messages to Storage
  25. def get_month_str(ts = None):
  26. dt = datetime.datetime.utcfromtimestamp(ts).replace(tzinfo = datetime.timezone.utc) if ts is not None else datetime.datetime.utcnow()
  27. return dt.strftime('%Y-%m')
  28. class InvalidConfig(Exception):
  29. '''Error in configuration file'''
  30. def is_valid_pem(path, withCert):
  31. '''Very basic check whether something looks like a valid PEM certificate'''
  32. try:
  33. with open(path, 'rb') as fp:
  34. contents = fp.read()
  35. # All of these raise exceptions if something's wrong...
  36. if withCert:
  37. assert contents.startswith(b'-----BEGIN CERTIFICATE-----\n')
  38. endCertPos = contents.index(b'-----END CERTIFICATE-----\n')
  39. base64.b64decode(contents[28:endCertPos].replace(b'\n', b''), validate = True)
  40. assert contents[endCertPos + 26:].startswith(b'-----BEGIN PRIVATE KEY-----\n')
  41. else:
  42. assert contents.startswith(b'-----BEGIN PRIVATE KEY-----\n')
  43. endCertPos = -26 # Please shoot me.
  44. endKeyPos = contents.index(b'-----END PRIVATE KEY-----\n')
  45. base64.b64decode(contents[endCertPos + 26 + 28: endKeyPos].replace(b'\n', b''), validate = True)
  46. assert contents[endKeyPos + 26:] == b''
  47. return True
  48. except: # Yes, really
  49. return False
  50. class Config(dict):
  51. def __init__(self, filename):
  52. super().__init__()
  53. self._filename = filename
  54. with open(self._filename, 'r') as fp:
  55. obj = toml.load(fp)
  56. # Sanity checks
  57. if any(x not in ('logging', 'storage', 'irc', 'web', 'channels') for x in obj.keys()):
  58. raise InvalidConfig('Unknown sections found in base object')
  59. if any(not isinstance(x, collections.abc.Mapping) for x in obj.values()):
  60. raise InvalidConfig('Invalid section type(s), expected objects/dicts')
  61. if 'logging' in obj:
  62. if any(x not in ('level', 'format') for x in obj['logging']):
  63. raise InvalidConfig('Unknown key found in log section')
  64. if 'level' in obj['logging'] and obj['logging']['level'] not in ('DEBUG', 'INFO', 'WARNING', 'ERROR'):
  65. raise InvalidConfig('Invalid log level')
  66. if 'format' in obj['logging']:
  67. if not isinstance(obj['logging']['format'], str):
  68. raise InvalidConfig('Invalid log format')
  69. try:
  70. #TODO: Replace with logging.Formatter's validate option (3.8+); this test does not cover everything that could be wrong (e.g. invalid format spec or conversion)
  71. # This counts the number of replacement fields. Formatter.parse yields tuples whose second value is the field name; if it's None, there is no field (e.g. literal text).
  72. assert sum(1 for x in string.Formatter().parse(obj['logging']['format']) if x[1] is not None) > 0
  73. except (ValueError, AssertionError) as e:
  74. raise InvalidConfig('Invalid log format: parsing failed') from e
  75. if 'storage' in obj:
  76. if any(x != 'path' for x in obj['storage']):
  77. raise InvalidConfig('Unknown key found in storage section')
  78. if 'path' in obj['storage']:
  79. obj['storage']['path'] = os.path.abspath(os.path.join(os.path.dirname(self._filename), obj['storage']['path']))
  80. try:
  81. #TODO This doesn't seem to work correctly; doesn't fail when the dir is -w
  82. f = tempfile.TemporaryFile(dir = obj['storage']['path'])
  83. f.close()
  84. except (OSError, IOError) as e:
  85. raise InvalidConfig('Invalid storage path: not writable') from e
  86. if 'irc' in obj:
  87. if any(x not in ('host', 'port', 'ssl', 'nick', 'real', 'certfile', 'certkeyfile') for x in obj['irc']):
  88. raise InvalidConfig('Unknown key found in irc section')
  89. if 'host' in obj['irc'] and not isinstance(obj['irc']['host'], str): #TODO: Check whether it's a valid hostname
  90. raise InvalidConfig('Invalid IRC host')
  91. if 'port' in obj['irc'] and (not isinstance(obj['irc']['port'], int) or not 1 <= obj['irc']['port'] <= 65535):
  92. raise InvalidConfig('Invalid IRC port')
  93. if 'ssl' in obj['irc'] and obj['irc']['ssl'] not in ('yes', 'no', 'insecure'):
  94. raise InvalidConfig(f'Invalid IRC SSL setting: {obj["irc"]["ssl"]!r}')
  95. if 'nick' in obj['irc'] and not isinstance(obj['irc']['nick'], str): #TODO: Check whether it's a valid nickname, username, etc.
  96. raise InvalidConfig('Invalid IRC nick')
  97. if len(IRCClientProtocol.nick_command(obj['irc']['nick'])) > 510:
  98. raise InvalidConfig('Invalid IRC nick: NICK command too long')
  99. if 'real' in obj['irc'] and not isinstance(obj['irc']['real'], str):
  100. raise InvalidConfig('Invalid IRC realname')
  101. if len(IRCClientProtocol.user_command(obj['irc']['nick'], obj['irc']['real'])) > 510:
  102. raise InvalidConfig('Invalid IRC nick/realname combination: USER command too long')
  103. if ('certfile' in obj['irc']) != ('certkeyfile' in obj['irc']):
  104. raise InvalidConfig('Invalid IRC cert config: needs both certfile and certkeyfile')
  105. if 'certfile' in obj['irc']:
  106. if not isinstance(obj['irc']['certfile'], str):
  107. raise InvalidConfig('Invalid certificate file: not a string')
  108. obj['irc']['certfile'] = os.path.abspath(os.path.join(os.path.dirname(self._filename), obj['irc']['certfile']))
  109. if not os.path.isfile(obj['irc']['certfile']):
  110. raise InvalidConfig('Invalid certificate file: not a regular file')
  111. if not is_valid_pem(obj['irc']['certfile'], True):
  112. raise InvalidConfig('Invalid certificate file: not a valid PEM cert')
  113. if 'certkeyfile' in obj['irc']:
  114. if not isinstance(obj['irc']['certkeyfile'], str):
  115. raise InvalidConfig('Invalid certificate key file: not a string')
  116. obj['irc']['certkeyfile'] = os.path.abspath(os.path.join(os.path.dirname(self._filename), obj['irc']['certkeyfile']))
  117. if not os.path.isfile(obj['irc']['certkeyfile']):
  118. raise InvalidConfig('Invalid certificate key file: not a regular file')
  119. if not is_valid_pem(obj['irc']['certkeyfile'], False):
  120. raise InvalidConfig('Invalid certificate key file: not a valid PEM key')
  121. if 'web' in obj:
  122. if any(x not in ('host', 'port') for x in obj['web']):
  123. raise InvalidConfig('Unknown key found in web section')
  124. if 'host' in obj['web'] and not isinstance(obj['web']['host'], str): #TODO: Check whether it's a valid hostname (must resolve I guess?)
  125. raise InvalidConfig('Invalid web hostname')
  126. if 'port' in obj['web'] and (not isinstance(obj['web']['port'], int) or not 1 <= obj['web']['port'] <= 65535):
  127. raise InvalidConfig('Invalid web port')
  128. if 'channels' in obj:
  129. seenChannels = {}
  130. for key, channel in obj['channels'].items():
  131. if not isinstance(key, str) or not key:
  132. raise InvalidConfig(f'Invalid channel key {key!r}')
  133. if not isinstance(channel, collections.abc.Mapping):
  134. raise InvalidConfig(f'Invalid channel for {key!r}')
  135. if any(x not in ('ircchannel', 'auth', 'active') for x in channel):
  136. raise InvalidConfig(f'Unknown key(s) found in channel {key!r}')
  137. if 'ircchannel' not in channel:
  138. channel['ircchannel'] = f'#{key}'
  139. if not isinstance(channel['ircchannel'], str):
  140. raise InvalidConfig(f'Invalid channel {key!r} IRC channel: not a string')
  141. if not channel['ircchannel'].startswith('#') and not channel['ircchannel'].startswith('&'):
  142. raise InvalidConfig(f'Invalid channel {key!r} IRC channel: does not start with # or &')
  143. if any(x in channel['ircchannel'][1:] for x in (' ', '\x00', '\x07', '\r', '\n', ',')):
  144. raise InvalidConfig(f'Invalid channel {key!r} IRC channel: contains forbidden characters')
  145. if len(channel['ircchannel']) > 200:
  146. raise InvalidConfig(f'Invalid channel {key!r} IRC channel: too long')
  147. if channel['ircchannel'] in seenChannels:
  148. raise InvalidConfig(f'Invalid channel {key!r} IRC channel: collides with channel {seenWebPaths[channel["ircchannel"]]!r}')
  149. seenChannels[channel['ircchannel']] = key
  150. if 'auth' in channel:
  151. if channel['auth'] is not False and not isinstance(channel['auth'], str):
  152. raise InvalidConfig(f'Invalid channel {key!r} auth: must be false or a string')
  153. if isinstance(channel['auth'], str) and ':' not in channel['auth']:
  154. raise InvalidConfig(f'Invalid channel {key!r} auth: must contain a colon')
  155. else:
  156. channel['auth'] = False
  157. if 'active' in channel:
  158. if channel['active'] is not True and channel['active'] is not False:
  159. raise InvalidConfig(f'Invalid channel {key!r} active: must be true or false')
  160. else:
  161. channel['active'] = True
  162. # Default values
  163. finalObj = {'logging': {'level': 'INFO', 'format': '{asctime} {levelname} {name} {message}'}, 'storage': {'path': os.path.abspath(os.path.dirname(self._filename))}, 'irc': {'host': 'irc.hackint.org', 'port': 6697, 'ssl': 'yes', 'nick': 'irclogbot', 'real': 'I am an irclog bot.', 'certfile': None, 'certkeyfile': None}, 'web': {'host': '127.0.0.1', 'port': 8080}, 'channels': {}}
  164. # Default values for channels are already set above.
  165. # Merge in what was read from the config file and set keys on self
  166. for key in ('logging', 'storage', 'irc', 'web', 'channels'):
  167. if key in obj:
  168. finalObj[key].update(obj[key])
  169. self[key] = finalObj[key]
  170. def __repr__(self):
  171. return f'<Config(logging={self["logging"]!r}, storage={self["storage"]!r}, irc={self["irc"]!r}, web={self["web"]!r}, channels={self["channels"]!r})>'
  172. def reread(self):
  173. return Config(self._filename)
  174. class IRCClientProtocol(asyncio.Protocol):
  175. logger = logging.getLogger('irclog.IRCClientProtocol')
  176. def __init__(self, messageQueue, connectionClosedEvent, loop, config, channels):
  177. self.messageQueue = messageQueue
  178. self.connectionClosedEvent = connectionClosedEvent
  179. self.loop = loop
  180. self.config = config
  181. self.buffer = b''
  182. self.connected = False
  183. self.channels = channels # Currently joined/supposed-to-be-joined channels; set(str)
  184. self.userChannels = collections.defaultdict(set) # List of which channels a user is known to be in; nickname:str -> {channel:str, ...}
  185. self.sasl = bool(self.config['irc']['certfile'] and self.config['irc']['certkeyfile'])
  186. self.authenticated = False
  187. self.server = ircstates.Server(self.config['irc']['host'])
  188. self.capReqsPending = set() # Capabilities requested from the server but not yet ACKd or NAKd
  189. self.caps = set() # Capabilities acknowledged by the server
  190. self.whoxQueue = collections.deque() # Names of channels that were joined successfully but for which no WHO (WHOX) query was sent yet
  191. self.whoxChannel = None # Name of channel for which a WHO query is currently running
  192. self.whoxReply = [] # List of (nickname, account) tuples from the currently running WHO query
  193. @staticmethod
  194. def nick_command(nick: str):
  195. return b'NICK ' + nick.encode('utf-8')
  196. @staticmethod
  197. def user_command(nick: str, real: str):
  198. nickb = nick.encode('utf-8')
  199. return b'USER ' + nickb + b' ' + nickb + b' ' + nickb + b' :' + real.encode('utf-8')
  200. @staticmethod
  201. def valid_channel(channel: str):
  202. return channel[0] in ('#', '&') and not any(x in channel for x in (' ', '\x00', '\x07', '\r', '\n', ','))
  203. @staticmethod
  204. def valid_nick(nick: str):
  205. # According to RFC 1459, a nick must be '<letter> { <letter> | <number> | <special> }'. This is obviously not true in practice because <special> doesn't include underscores, for example.
  206. # So instead, just do a sanity check similar to the channel one to disallow obvious bullshit.
  207. return not any(x in nick for x in (' ', '\x00', '\x07', '\r', '\n', ','))
  208. @staticmethod
  209. def prefix_to_nick(prefix: str):
  210. nick = prefix[1:]
  211. if '!' in nick:
  212. nick = nick.split('!', 1)[0]
  213. if '@' in nick: # nick@host is also legal
  214. nick = nick.split('@', 1)[0]
  215. return nick
  216. def connection_made(self, transport):
  217. self.logger.info('IRC connected')
  218. self.transport = transport
  219. self.connected = True
  220. caps = [b'userhost-in-names', b'away-notify', b'account-notify', b'extended-join']
  221. if self.sasl:
  222. caps.append(b'sasl')
  223. for cap in caps:
  224. self.capReqsPending.add(cap.decode('ascii'))
  225. self.send(b'CAP REQ :' + cap)
  226. self.send(self.nick_command(self.config['irc']['nick']))
  227. self.send(self.user_command(self.config['irc']['nick'], self.config['irc']['real']))
  228. def _send_join_part(self, command, channels):
  229. '''Split a JOIN or PART into multiple messages as necessary'''
  230. # command: b'JOIN' or b'PART'; channels: set[str]
  231. channels = [x.encode('utf-8') for x in channels]
  232. if len(command) + sum(1 + len(x) for x in channels) <= 510: # Total length = command + (separator + channel name for each channel, where the separator is a space for the first and then a comma)
  233. # Everything fits into one command.
  234. self.send(command + b' ' + b','.join(channels))
  235. return
  236. # List too long, need to split.
  237. limit = 510 - len(command)
  238. lengths = [1 + len(x) for x in channels] # separator + channel name
  239. chanLengthAcceptable = [l <= limit for l in lengths]
  240. if not all(chanLengthAcceptable):
  241. # There are channel names that are too long to even fit into one message on their own; filter them out and warn about them.
  242. # This should never happen since the config reader would already filter it out.
  243. tooLongChannels = [x for x, a in zip(channels, chanLengthAcceptable) if not a]
  244. channels = [x for x, a in zip(channels, chanLengthAcceptable) if a]
  245. lengths = [l for l, a in zip(lengths, chanLengthAcceptable) if a]
  246. for channel in tooLongChannels:
  247. self.logger.warning(f'Cannot {command} {channel}: name too long')
  248. runningLengths = list(itertools.accumulate(lengths)) # entry N = length of all entries up to and including channel N, including separators
  249. offset = 0
  250. while channels:
  251. i = next((x[0] for x in enumerate(runningLengths) if x[1] - offset > limit), -1)
  252. if i == -1: # Last batch
  253. i = len(channels)
  254. self.send(command + b' ' + b','.join(channels[:i]))
  255. offset = runningLengths[i-1]
  256. channels = channels[i:]
  257. runningLengths = runningLengths[i:]
  258. def update_channels(self, channels: set):
  259. channelsToPart = self.channels - channels
  260. channelsToJoin = channels - self.channels
  261. self.channels = channels
  262. if self.connected:
  263. if channelsToPart:
  264. self._send_join_part(b'PART', channelsToPart)
  265. if channelsToJoin:
  266. self._send_join_part(b'JOIN', channelsToJoin)
  267. def send(self, data):
  268. self.logger.debug(f'Send: {data!r}')
  269. if len(data) > 510:
  270. raise RuntimeError(f'IRC message too long ({len(data)} > 510): {data!r}')
  271. time_ = time.time()
  272. self.transport.write(data + b'\r\n')
  273. self.messageQueue.put_nowait((time_, b'> ' + data, None, None))
  274. def data_received(self, data):
  275. time_ = time.time()
  276. self.logger.debug(f'Data received: {data!r}')
  277. # Split received data on CRLF. If there's any data left in the buffer, prepend it to the first message and process that.
  278. # Then, process all messages except the last one (since data might not end on a CRLF) and keep the remainder in the buffer.
  279. # If data does end with CRLF, all messages will have been processed and the buffer will be empty again.
  280. messages = data.split(b'\r\n')
  281. if self.buffer:
  282. messages[0] = self.buffer + messages[0]
  283. for message in messages[:-1]:
  284. lines = self.server.recv(message + b'\r\n')
  285. assert len(lines) == 1
  286. self.message_received(time_, message, lines[0])
  287. self.server.parse_tokens(lines[0])
  288. self.buffer = messages[-1]
  289. def message_received(self, time_, message, line):
  290. self.logger.debug(f'Message received at {time_}: {message!r}')
  291. # Queue message for storage
  292. # Note: WHOX is queued further down
  293. self.messageQueue.put_nowait((time_, b'< ' + message, None, None))
  294. for command, channel, logMessage in self.render_message(line):
  295. self.messageQueue.put_nowait((time_, logMessage, command, channel))
  296. maybeTriggerWhox = False
  297. # PING/PONG
  298. if line.command == 'PING':
  299. self.send(irctokens.build('PONG', line.params).format().encode('utf-8'))
  300. # IRCv3 and SASL
  301. elif line.command == 'CAP':
  302. if line.params[1] == 'ACK':
  303. for cap in line.params[2].split(' '):
  304. self.logger.debug('CAP ACK: {cap}')
  305. self.caps.add(cap)
  306. if cap == 'sasl' and self.sasl:
  307. self.send(b'AUTHENTICATE EXTERNAL')
  308. else:
  309. self.capReqsPending.remove(cap)
  310. elif line.params[1] == 'NAK':
  311. self.logger.warning(f'Failed to activate CAP(s): {line.params[2]}')
  312. for cap in line.params[2].split(' '):
  313. self.capReqsPending.remove(cap)
  314. if len(self.capReqsPending) == 0:
  315. self.send(b'CAP END')
  316. elif line.command == 'AUTHENTICATE' and line.params == ['+']:
  317. self.send(b'AUTHENTICATE +')
  318. elif line.command == ircstates.numerics.RPL_SASLSUCCESS:
  319. self.authenticated = True
  320. self.capReqsPending.remove('sasl')
  321. if len(self.capReqsPending) == 0:
  322. self.send(b'CAP END')
  323. elif line.command in ('902', ircstates.numerics.ERR_SASLFAIL, ircstates.numerics.ERR_SASLTOOLONG, ircstates.numerics.ERR_SASLABORTED, ircstates.numerics.RPL_SASLMECHS):
  324. self.logger.error('SASL error, terminating connection')
  325. self.transport.close()
  326. # NICK errors
  327. elif line.command in ('431', ircstates.numerics.ERR_ERRONEUSNICKNAME, ircstates.numerics.ERR_NICKNAMEINUSE, '436'):
  328. self.logger.error(f'Failed to set nickname: {message!r}, terminating connection')
  329. self.transport.close()
  330. # USER errors
  331. elif line.command in ('461', '462'):
  332. self.logger.error(f'Failed to register: {message!r}, terminating connection')
  333. self.transport.close()
  334. # JOIN errors
  335. elif line.command in (ircstates.numerics.ERR_TOOMANYCHANNELS, ircstates.numerics.ERR_CHANNELISFULL, ircstates.numerics.ERR_INVITEONLYCHAN, ircstates.numerics.ERR_BANNEDFROMCHAN, ircstates.numerics.ERR_BADCHANNELKEY):
  336. self.logger.error(f'Failed to join channel: {message!r}, terminating connection')
  337. self.transport.close()
  338. # PART errors
  339. elif line.command == '442':
  340. self.logger.error(f'Failed to part channel: {message!r}')
  341. # JOIN/PART errors
  342. elif line.command == ircstates.numerics.ERR_NOSUCHCHANNEL:
  343. self.logger.error(f'Failed to join or part channel: {message!r}')
  344. # Connection registration reply
  345. elif line.command == ircstates.numerics.RPL_WELCOME:
  346. self.logger.info('IRC connection registered')
  347. if self.sasl and not self.authenticated:
  348. self.logger.error('IRC connection registered but not authenticated, terminating connection')
  349. self.transport.close()
  350. return
  351. self._send_join_part(b'JOIN', self.channels)
  352. # Bot getting KICKed
  353. elif line.command == 'KICK' and line.source and self.server.casefold(line.params[1]) == self.server.casefold(self.server.nickname):
  354. self.logger.warning(f'Got kicked from {line.params[0]}')
  355. kickedChannel = self.server.casefold(line.params[0])
  356. for channel in self.channels:
  357. if self.server.casefold(channel) == kickedChannel:
  358. self.channels.remove(channel)
  359. break
  360. # WHOX on successful JOIN if supported to fetch account information
  361. elif line.command == 'JOIN' and self.server.isupport.whox and line.source and self.server.casefold(line.hostmask.nickname) == self.server.casefold(self.server.nickname):
  362. self.whoxQueue.extend(line.params[0].split(','))
  363. maybeTriggerWhox = True
  364. # WHOX response
  365. elif line.command == ircstates.numerics.RPL_WHOSPCRPL and line.params[1] == '042':
  366. self.whoxReply.append((line.params[2], line.params[3] if line.params[3] != '0' else None))
  367. # End of WHOX response
  368. elif line.command == ircstates.numerics.RPL_ENDOFWHO:
  369. self.messageQueue.put_nowait((time_, self.render_whox(), 'WHOX', self.whoxChannel))
  370. self.whoxChannel = None
  371. self.whoxReply = []
  372. maybeTriggerWhox = True
  373. # General fatal ERROR
  374. elif line.command == 'ERROR':
  375. self.logger.error(f'Server sent ERROR: {message!r}')
  376. self.transport.close()
  377. # Send next WHOX if appropriate
  378. if maybeTriggerWhox and self.whoxChannel is None and self.whoxQueue:
  379. self.whoxChannel = self.whoxQueue.popleft()
  380. self.whoxReply = []
  381. self.send(b'WHO ' + self.whoxChannel.encode('utf-8') + b' c%nat,042')
  382. def get_mode_char(self, channelUser):
  383. if channelUser is None:
  384. return ''
  385. prefix = self.server.isupport.prefix
  386. if any(x in prefix.modes for x in channelUser.modes):
  387. return prefix.prefixes[min(prefix.modes.index(x) for x in channelUser.modes if x in prefix.modes)]
  388. return ''
  389. def render_nick_with_mode(self, channelUser, nickname):
  390. return f'{self.get_mode_char(channelUser)}{nickname}'
  391. def render_message(self, line):
  392. if line.source:
  393. sourceUser = self.server.users.get(self.server.casefold(line.hostmask.nickname)) if line.source else None
  394. get_mode_nick = lambda channel, nick = line.hostmask.nickname: self.render_nick_with_mode(self.server.channels[self.server.casefold(channel)].users.get(self.server.casefold(nick)), nick)
  395. if line.command == 'JOIN':
  396. # Although servers SHOULD NOT send multiple channels in one message per the modern IRC docs <https://modern.ircdocs.horse/#join-message>, let's do the safe thing...
  397. channels = [line.params[0]] if ',' not in line.params[0] else line.params[0].split(',')
  398. account = f' ({line.params[-2]})' if 'extended-join' in self.caps and line.params[-2] != '*' else ''
  399. for channel in channels:
  400. # There can't be a mode set yet on the JOIN, so no need to use get_mode_nick (which would complicate the self-join).
  401. yield 'JOIN', channel, f'{line.hostmask.nickname}{account} joins {channel}'
  402. elif line.command in ('PRIVMSG', 'NOTICE'):
  403. channel = line.params[0]
  404. if channel not in self.server.channels:
  405. return
  406. yield line.command, channel, f'<{get_mode_nick(channel)}> {line.params[1]}'
  407. elif line.command == 'PART':
  408. channels = [line.params[0]] if ',' not in line.params[0] else line.params[0].split(',')
  409. reason = f' [{line.params[1]}]' if len(line.params) == 2 else ''
  410. for channel in channels:
  411. yield 'PART', channel, f'{get_mode_nick(channel)} leaves {channel}'
  412. elif line.command in ('QUIT', 'NICK', 'ACCOUNT'):
  413. if line.hostmask.nickname == self.server.nickname:
  414. channels = self.channels
  415. elif sourceUser is not None:
  416. channels = sourceUser.channels
  417. else:
  418. return
  419. for channel in channels:
  420. if line.command == 'QUIT':
  421. message = f'{get_mode_nick(channel)} quits [{line.params[0]}]'
  422. elif line.command == 'NICK':
  423. newMode = self.get_mode_char(self.server.channels[self.server.casefold(channel)].users[self.server.casefold(line.hostmask.nickname)])
  424. message = f'{get_mode_nick(channel)} is now known as {newMode}{line.params[0]}'
  425. elif line.command == 'ACCOUNT':
  426. message = f'{get_mode_nick(channel)} is now authenticated as {line.params[0]}'
  427. yield line.command, channel, message
  428. elif line.command == 'MODE' and line.params[0][0] in ('#', '&'):
  429. yield 'MODE', line.params[0], f'{get_mode_nick(line.params[0])} sets mode: {" ".join(line.params[1:])}'
  430. elif line.command == 'KICK':
  431. channel = line.params[0]
  432. reason = f' [{line.params[2]}]' if len(line.params) == 3 else ''
  433. yield 'KICK', channel, f'{get_mode_nick(channel, line.params[1])} is kicked from {channel} by {get_mode_nick(channel)}{reason}'
  434. elif line.command == 'TOPIC':
  435. channel = line.params[0]
  436. if line.params[1] == '':
  437. yield 'TOPIC', channel, f'{get_mode_nick(channel)} unsets the topic of {channel}'
  438. else:
  439. yield 'TOPIC', channel, f'{get_mode_nick(channel)} sets the topic of {channel} to: {line.params[1]}'
  440. elif line.command == ircstates.numerics.RPL_TOPIC:
  441. channel = line.params[1]
  442. yield 'TOPIC', channel, f'Topic of {channel}: {line.params[2]}'
  443. elif line.command == ircstates.numerics.RPL_TOPICWHOTIME:
  444. yield 'TOPICWHO', line.params[1], f'Topic set by {irctokens.hostmask(line.params[2]).nickname} at {datetime.datetime.utcfromtimestamp(int(line.params[3])).replace(tzinfo = datetime.timezone.utc):%Y-%m-%d %H:%M:%SZ}'
  445. elif line.command == ircstates.numerics.RPL_ENDOFNAMES:
  446. channel = line.params[1]
  447. users = self.server.channels[self.server.casefold(channel)].users
  448. yield 'NAMES', channel, f'Currently in {channel}: {", ".join(self.render_nick_with_mode(u, u.nickname) for u in users.values())}'
  449. def render_whox(self):
  450. users = []
  451. for nickname, account in self.whoxReply:
  452. accountStr = f' ({account})' if account is not None else ''
  453. users.append(f'{self.render_nick_with_mode(self.server.channels[self.server.casefold(self.whoxChannel)].users.get(self.server.casefold(nickname)), nickname)}{accountStr}')
  454. return f'Currently in {self.whoxChannel}: {", ".join(users)}'
  455. async def quit(self):
  456. # The server acknowledges a QUIT by sending an ERROR and closing the connection. The latter triggers connection_lost, so just wait for the closure event.
  457. self.logger.info('Quitting')
  458. self.send(b'QUIT :Bye')
  459. await self.connectionClosedEvent.wait()
  460. self.transport.close()
  461. def connection_lost(self, exc):
  462. time_ = time.time()
  463. self.logger.info('IRC connection lost')
  464. self.connected = False
  465. self.connectionClosedEvent.set()
  466. self.messageQueue.put_nowait((time_, b'- Connection closed.', None, None))
  467. for channel in self.channels:
  468. self.messageQueue.put_nowait((time_, 'Connection closed.', '_CONNCLOSED', channel))
  469. class IRCClient:
  470. logger = logging.getLogger('irclog.IRCClient')
  471. def __init__(self, messageQueue, config):
  472. self.messageQueue = messageQueue
  473. self.config = config
  474. self.channels = {channel['ircchannel'] for channel in config['channels'].values()}
  475. self._transport = None
  476. self._protocol = None
  477. def update_config(self, config):
  478. needReconnect = self.config['irc'] != config['irc']
  479. self.config = config
  480. if self._transport: # if currently connected:
  481. if needReconnect:
  482. self._transport.close()
  483. else:
  484. self.channels = {channel['ircchannel'] for channel in config['channels'].values()}
  485. self._protocol.update_channels(self.channels)
  486. def _get_ssl_context(self):
  487. ctx = SSL_CONTEXTS[self.config['irc']['ssl']]
  488. if self.config['irc']['certfile'] and self.config['irc']['certkeyfile']:
  489. if ctx is True:
  490. ctx = ssl.create_default_context()
  491. if isinstance(ctx, ssl.SSLContext):
  492. ctx.load_cert_chain(self.config['irc']['certfile'], keyfile = self.config['irc']['certkeyfile'])
  493. return ctx
  494. async def run(self, loop, sigintEvent):
  495. connectionClosedEvent = asyncio.Event()
  496. while True:
  497. connectionClosedEvent.clear()
  498. try:
  499. self._transport, self._protocol = await loop.create_connection(lambda: IRCClientProtocol(self.messageQueue, connectionClosedEvent, loop, self.config, self.channels), self.config['irc']['host'], self.config['irc']['port'], ssl = self._get_ssl_context())
  500. try:
  501. await asyncio.wait((connectionClosedEvent.wait(), sigintEvent.wait()), return_when = asyncio.FIRST_COMPLETED)
  502. finally:
  503. if not connectionClosedEvent.is_set():
  504. await self._protocol.quit()
  505. except (ConnectionRefusedError, ssl.SSLError, asyncio.TimeoutError) as e:
  506. self.logger.error(str(e))
  507. await asyncio.wait((asyncio.sleep(5), sigintEvent.wait()), return_when = asyncio.FIRST_COMPLETED)
  508. if sigintEvent.is_set():
  509. self.logger.debug('Got SIGINT, putting EOF and breaking')
  510. self.messageQueue.put_nowait(messageEOF)
  511. break
  512. class Storage:
  513. logger = logging.getLogger('irclog.Storage')
  514. def __init__(self, messageQueue, config):
  515. self.messageQueue = messageQueue
  516. self.config = config
  517. self.files = {} # channel|None -> (filename, fileobj); None = general raw log
  518. def update_config(self, config):
  519. channelsOld = {channel['ircchannel'] for channel in self.config['channels'].values()}
  520. channelsNew = {channel['ircchannel'] for channel in config['channels'].values()}
  521. channelsRemoved = channelsOld - channelsNew
  522. self.config = config
  523. for channel in channelsRemoved:
  524. if channel in self.files:
  525. self.files[channel][1].close()
  526. del self.files[channel]
  527. def ensure_file_open(self, time_, channel):
  528. fn = f'{get_month_str(time_)}.log'
  529. if channel in self.files and fn == self.files[channel][0]:
  530. return
  531. if channel in self.files:
  532. self.files[channel][1].close()
  533. dn = channel if channel is not None else 'general'
  534. mode = 'a' if channel is not None else 'ab'
  535. os.makedirs(os.path.join(self.config['storage']['path'], dn), exist_ok = True)
  536. self.files[channel] = (fn, open(os.path.join(self.config['storage']['path'], dn, fn), mode))
  537. async def run(self, loop, sigintEvent):
  538. self.update_config(self.config) # Ensure that files are open etc.
  539. flushExitEvent = asyncio.Event()
  540. storageTask = asyncio.create_task(self.store_messages(sigintEvent))
  541. flushTask = asyncio.create_task(self.flush_files(flushExitEvent))
  542. await sigintEvent.wait()
  543. self.logger.debug('Got SIGINT, waiting for remaining messages to be stored')
  544. await storageTask # Wait until everything's stored
  545. flushExitEvent.set()
  546. self.logger.debug('Waiting for flush task')
  547. await flushTask
  548. self.close()
  549. async def store_messages(self, sigintEvent):
  550. while True:
  551. self.logger.debug('Waiting for message')
  552. res = await self.messageQueue.get()
  553. self.logger.debug(f'Got {res!r} from message queue')
  554. if res is messageEOF:
  555. self.logger.debug('Message EOF, breaking store_messages loop')
  556. break
  557. self.store_message(*res)
  558. def store_message(self, time_, message, command, channel):
  559. # Sanity check
  560. if channel is None and (not isinstance(message, bytes) or message[0:1] not in (b'<', b'>', b'-') or message[1:2] != b' ' or command is not None):
  561. self.logger.warning(f'Dropping invalid store_message arguments: {time_}, {message!r}, {command!r}, {channel!r}')
  562. return
  563. elif channel is not None and (not isinstance(message, str) or command is None):
  564. self.logger.warning(f'Dropping invalid store_message arguments: {time_}, {message!r}, {command!r}, {channel!r}')
  565. return
  566. self.logger.debug(f'Logging {message!r} ({command}) at {time_} for {channel!r}')
  567. self.ensure_file_open(time_, channel)
  568. if channel is None:
  569. self.files[None][1].write(str(time_).encode('ascii') + b' ' + message + b'\n')
  570. else:
  571. self.files[channel][1].write(f'{time_} {command} {message}\n')
  572. async def flush_files(self, flushExitEvent):
  573. while True:
  574. await asyncio.wait((flushExitEvent.wait(), asyncio.sleep(60)), return_when = asyncio.FIRST_COMPLETED)
  575. self.logger.debug('Flushing files')
  576. for _, f in self.files.values():
  577. f.flush()
  578. self.logger.debug('Flushing done')
  579. if flushExitEvent.is_set():
  580. break
  581. self.logger.debug('Exiting flush_files')
  582. def close(self):
  583. for _, f in self.files.values():
  584. f.close()
  585. self.files = {}
  586. class WebServer:
  587. logger = logging.getLogger('irclog.WebServer')
  588. def __init__(self, config):
  589. self.config = config
  590. self._paths = {} # '/path' => ('#channel', auth, module, moduleargs) where auth is either False (no authentication) or the HTTP header value for basic auth
  591. self._app = aiohttp.web.Application()
  592. self._app.add_routes([aiohttp.web.post('/{path:.+}', self.post)])
  593. self.update_config(config)
  594. self._configChanged = asyncio.Event()
  595. def update_config(self, config):
  596. # self._paths = {channel['webpath']: (channel['ircchannel'], f'Basic {base64.b64encode(channel["auth"].encode("utf-8")).decode("utf-8")}' if channel['auth'] else False) for channel in config['channels'].values()}
  597. needRebind = self.config['web'] != config['web'] #TODO only if there are changes to web.host or web.port; everything else can be updated without rebinding
  598. self.config = config
  599. if needRebind:
  600. self._configChanged.set()
  601. async def run(self, stopEvent):
  602. while True:
  603. runner = aiohttp.web.AppRunner(self._app)
  604. await runner.setup()
  605. site = aiohttp.web.TCPSite(runner, self.config['web']['host'], self.config['web']['port'])
  606. await site.start()
  607. await asyncio.wait((stopEvent.wait(), self._configChanged.wait()), return_when = asyncio.FIRST_COMPLETED)
  608. await runner.cleanup()
  609. if stopEvent.is_set():
  610. break
  611. self._configChanged.clear()
  612. # https://docs.python.org/3/library/asyncio-subprocess.html#asyncio.asyncio.subprocess.Process
  613. # https://stackoverflow.com/questions/1180606/using-subprocess-popen-for-process-with-large-output
  614. # -> https://stackoverflow.com/questions/57730010/python-asyncio-subprocess-write-stdin-and-read-stdout-stderr-continuously
  615. async def post(self, request):
  616. self.logger.info(f'Received request {id(request)} from {request.remote!r} for {request.path!r} with body {(await request.read())!r}')
  617. try:
  618. channel, auth, module, moduleargs, overlongmode = self._paths[request.path]
  619. except KeyError:
  620. self.logger.info(f'Bad request {id(request)}: no path {request.path!r}')
  621. raise aiohttp.web.HTTPNotFound()
  622. if auth:
  623. authHeader = request.headers.get('Authorization')
  624. if not authHeader or authHeader != auth:
  625. self.logger.info(f'Bad request {id(request)}: authentication failed: {authHeader!r} != {auth}')
  626. raise aiohttp.web.HTTPForbidden()
  627. if module is not None:
  628. self.logger.debug(f'Processing request {id(request)} using {module!r}')
  629. try:
  630. message = await module.process(request, *moduleargs)
  631. except aiohttp.web.HTTPException as e:
  632. raise e
  633. except Exception as e:
  634. self.logger.error(f'Bad request {id(request)}: exception in module process function: {type(e).__module__}.{type(e).__name__}: {e!s}')
  635. raise aiohttp.web.HTTPBadRequest()
  636. if '\r' in message or '\n' in message:
  637. self.logger.error(f'Bad request {id(request)}: module process function returned message with linebreaks: {message!r}')
  638. raise aiohttp.web.HTTPBadRequest()
  639. else:
  640. self.logger.debug(f'Processing request {id(request)} using default processor')
  641. message = await self._default_process(request)
  642. self.logger.info(f'Accepted request {id(request)}, putting message {message!r} for {channel} into message queue')
  643. self.messageQueue.put_nowait((channel, message, overlongmode))
  644. raise aiohttp.web.HTTPOk()
  645. async def _default_process(self, request):
  646. try:
  647. message = await request.text()
  648. except Exception as e:
  649. self.logger.info(f'Bad request {id(request)}: exception while reading request data: {e!s}')
  650. raise aiohttp.web.HTTPBadRequest() # Yes, it's always the client's fault. :-)
  651. self.logger.debug(f'Request {id(request)} payload: {message!r}')
  652. # Strip optional [CR] LF at the end of the payload
  653. if message.endswith('\r\n'):
  654. message = message[:-2]
  655. elif message.endswith('\n'):
  656. message = message[:-1]
  657. if '\r' in message or '\n' in message:
  658. self.logger.info(f'Bad request {id(request)}: linebreaks in message')
  659. raise aiohttp.web.HTTPBadRequest()
  660. return message
  661. def configure_logging(config):
  662. #TODO: Replace with logging.basicConfig(..., force = True) (Py 3.8+)
  663. root = logging.getLogger()
  664. root.setLevel(getattr(logging, config['logging']['level']))
  665. root.handlers = [] #FIXME: Undocumented attribute of logging.Logger
  666. formatter = logging.Formatter(config['logging']['format'], style = '{')
  667. stderrHandler = logging.StreamHandler()
  668. stderrHandler.setFormatter(formatter)
  669. root.addHandler(stderrHandler)
  670. async def main():
  671. if len(sys.argv) != 2:
  672. print('Usage: irclog.py CONFIGFILE', file = sys.stderr)
  673. sys.exit(1)
  674. configFile = sys.argv[1]
  675. config = Config(configFile)
  676. configure_logging(config)
  677. loop = asyncio.get_running_loop()
  678. messageQueue = asyncio.Queue()
  679. # tuple(time: float, message: bytes or str, command: str or None, channel: str or None)
  680. # command is an identifier of the type of message.
  681. # For raw message logs, message is bytes and command and channel are None. For channel-specific formatted messages, message, command, and channel are all strs.
  682. # The queue can also contain messageEOF, which signals to the storage layer to stop logging.
  683. irc = IRCClient(messageQueue, config)
  684. webserver = WebServer(config)
  685. storage = Storage(messageQueue, config)
  686. sigintEvent = asyncio.Event()
  687. def sigint_callback():
  688. global logger
  689. nonlocal sigintEvent
  690. logger.info('Got SIGINT, stopping')
  691. sigintEvent.set()
  692. loop.add_signal_handler(signal.SIGINT, sigint_callback)
  693. def sigusr1_callback():
  694. global logger
  695. nonlocal config, irc, webserver, storage
  696. logger.info('Got SIGUSR1, reloading config')
  697. try:
  698. newConfig = config.reread()
  699. except InvalidConfig as e:
  700. logger.error(f'Config reload failed: {e!s} (old config remains active)')
  701. return
  702. config = newConfig
  703. configure_logging(config)
  704. irc.update_config(config)
  705. webserver.update_config(config)
  706. storage.update_config(config)
  707. loop.add_signal_handler(signal.SIGUSR1, sigusr1_callback)
  708. await asyncio.gather(irc.run(loop, sigintEvent), webserver.run(sigintEvent), storage.run(loop, sigintEvent))
  709. if __name__ == '__main__':
  710. asyncio.run(main())