From e8946a4efcede314f764b946ea9535b61dd0b2bc Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Sat, 19 Dec 2020 04:49:46 +0000 Subject: [PATCH] Switch to bytes because lots of the old things are not valid UTF-8 --- efnet-irclogger-convert.py | 71 +++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/efnet-irclogger-convert.py b/efnet-irclogger-convert.py index 2f28c65..dd48cc8 100644 --- a/efnet-irclogger-convert.py +++ b/efnet-irclogger-convert.py @@ -7,47 +7,48 @@ filename = sys.argv[1] date = filename.rsplit('/', 1)[-1][:10] -with open(filename, 'r') as fp: +with open(filename, 'rb') as fp: for line in fp: origLine = line - if not (line[0] == '[' and line[3] == ':' and line[6:8] == '] ' and line[-1] == '\n'): + if not (line[0:1] == b'[' and line[3:4] == b':' and line[6:8] == b'] ' and line[-1:] == b'\n'): print(f'MALFORMED LINE: {line!r}', file = sys.stderr) continue - time = line[1:6] + time = line[1:6].decode('ascii') line = line[8:-1] ts = datetime.datetime(int(date[:4]), int(date[5:7]), int(date[8:]), int(time[:2]), int(time[3:]), 0).replace(tzinfo = datetime.timezone.utc).timestamp() - if line.startswith('<'): #PRIVMSG - print(f'{ts} PRIVMSG {line}') - elif line.startswith('* '): #ACTION - print(f'{ts} ACTION {line[2:]}') - elif line.startswith('*** '): - words = line.split(' ')[1:] - if words[1:3] == ['has', 'joined']: # JOIN - print(f'{ts} JOIN {words[0]} joins') - elif words[0] == 'Joins:': # JOIN - print(f'{ts} JOIN {words[1]} joins') - elif words[1:3] == ['has', 'left']: # PART - reason = f' [{" ".join(words[3:])}]' if len(words) > 4 or words[3] != '' else '' - print(f'{ts} PART {words[0]} leaves{reason}') - elif words[0] == 'Parts:': # PART - reason = f' [{" ".join(words[3:])[1:-1]}]' if len(words) > 4 or words[3] != '()' else '' - print(f'{ts} PART {words[1]} leaves{reason}') - elif words[1:4] == ['has', 'quit', 'IRC']: # QUIT - print(f'{ts} QUIT {words[0]} quits [{" ".join(words[4:])[1:-1]}]') - elif words[0] == 'Quits:': # QUIT - reason = f' [{" ".join(words[3:])[1:-1]}]' if len(words) > 4 or words[3] != '()' else '' - print(f'{ts} QUIT {words[0]} quits{reason}') - elif words[1:4] == ['was', 'kicked', 'by']: # KICK - print(f'{ts} KICK {words[0]} is kicked by {words[4]} [{" ".join(words[5:])[1:-1]}]') - elif words[1:3] == ['sets', 'mode:']: # MODE - print(f'{ts} MODE {line[4:]}') - elif words[1:4] == ['changes', 'topic', 'to:']: # TOPIC - print(f'{ts} TOPIC {words[0]} sets the topic to: {" ".join(words[4:])}') - elif words[1:4] == ['changes', 'topic', 'to']: # TOPIC - print(f'{ts} TOPIC {words[0]} sets the topic to: {" ".join(words[4:])[1:-1]}') - elif words[1:5] == ['is', 'now', 'known', 'as']: # NICK - print(f'{ts} NICK {line[4:]}') - elif words[1:3] == ['starts', 'logging']: # Silently ignore (there's already a JOIN) + if line.startswith(b'<'): #PRIVMSG + sys.stdout.buffer.write(f'{ts} PRIVMSG '.encode('ascii') + line) + elif line.startswith(b'* '): #ACTION + sys.stdout.buffer.write(f'{ts} ACTION '.encode('ascii') + line[2:]) + elif line.startswith(b'*** '): + words = line.split(b' ')[1:] + if words[1:3] == [b'has', b'joined']: # JOIN + sys.stdout.buffer.write(f'{ts} JOIN '.encode('ascii') + words[0] + b' joins') + elif words[0] == b'Joins:': # JOIN + sys.stdout.buffer.write(f'{ts} JOIN '.encode('ascii') + words[1] + b' joins') + elif words[1:3] == [b'has', b'left']: # PART + reason = (b' [' + b' '.join(words[3:]) + b']') if len(words) > 4 or words[3] != b'' else b'' + sys.stdout.buffer.write(f'{ts} PART '.encode('ascii') + words[0] + b' leaves' + reason) + elif words[0] == b'Parts:': # PART + reason = (b' [' + b' '.join(words[3:])[1:-1] + b']') if len(words) > 4 or words[3] != b'()' else b'' + sys.stdout.buffer.write(f'{ts} PART '.encode('ascii') + words[1] + b' leaves' + reason) + elif words[1:4] == [b'has', b'quit', b'IRC']: # QUIT + reason = (b' [' + b' '.join(words[4:])[1:-1] + b']') if len(words) > 4 or words[4] != b'()' else b'' + sys.stdout.buffer.write(f'{ts} QUIT '.encode('ascii') + words[0] + b' quits' + reason) + elif words[0] == b'Quits:': # QUIT + reason = (b' [' + b' '.join(words[3:])[1:-1] + b']') if len(words) > 4 or words[3] != b'()' else b'' + sys.stdout.buffer.write(f'{ts} QUIT '.encode('ascii') + words[0] + b' quits' + reason) + elif words[1:4] == [b'was', b'kicked', b'by']: # KICK + sys.stdout.buffer.write(f'{ts} KICK '.encode('ascii') + words[0] + b' is kicked by ' + words[4] + b' [' + b' '.join(words[5:])[1:-1] + b']') + elif words[1:3] == [b'sets', b'mode:']: # MODE + sys.stdout.buffer.write(f'{ts} MODE '.encode('ascii') + line[4:]) + elif words[1:4] == [b'changes', b'topic', b'to:']: # TOPIC + sys.stdout.buffer.write(f'{ts} TOPIC '.encode('ascii') + words[0] + b' sets the topic to: ' + b' '.join(words[4:])) + elif words[1:4] == [b'changes', b'topic', b'to']: # TOPIC + sys.stdout.buffer.write(f'{ts} TOPIC '.encode('ascii') + words[0] + b' sets the topic to: ' + b' '.join(words[4:])[1:-1]) + elif words[1:5] == [b'is', b'now', b'known', b'as']: # NICK + sys.stdout.buffer.write(f'{ts} NICK '.encode('ascii') + line[4:]) + elif words[1:3] == [b'starts', b'logging']: # Silently ignore (there's already a JOIN) pass else: print(f'MALFORMED LINE: {origLine!r}', file = sys.stderr)