Browse Source

Switch to bytes because lots of the old things are not valid UTF-8

master
JustAnotherArchivist 3 years ago
parent
commit
e8946a4efc
1 changed files with 36 additions and 35 deletions
  1. +36
    -35
      efnet-irclogger-convert.py

+ 36
- 35
efnet-irclogger-convert.py View File

@@ -7,47 +7,48 @@ filename = sys.argv[1]

date = filename.rsplit('/', 1)[-1][:10]

with open(filename, 'r') as fp:
with open(filename, 'rb') as fp:
for line in fp:
origLine = line
if not (line[0] == '[' and line[3] == ':' and line[6:8] == '] ' and line[-1] == '\n'):
if not (line[0:1] == b'[' and line[3:4] == b':' and line[6:8] == b'] ' and line[-1:] == b'\n'):
print(f'MALFORMED LINE: {line!r}', file = sys.stderr)
continue
time = line[1:6]
time = line[1:6].decode('ascii')
line = line[8:-1]
ts = datetime.datetime(int(date[:4]), int(date[5:7]), int(date[8:]), int(time[:2]), int(time[3:]), 0).replace(tzinfo = datetime.timezone.utc).timestamp()
if line.startswith('<'): #PRIVMSG
print(f'{ts} PRIVMSG {line}')
elif line.startswith('* '): #ACTION
print(f'{ts} ACTION {line[2:]}')
elif line.startswith('*** '):
words = line.split(' ')[1:]
if words[1:3] == ['has', 'joined']: # JOIN
print(f'{ts} JOIN {words[0]} joins')
elif words[0] == 'Joins:': # JOIN
print(f'{ts} JOIN {words[1]} joins')
elif words[1:3] == ['has', 'left']: # PART
reason = f' [{" ".join(words[3:])}]' if len(words) > 4 or words[3] != '' else ''
print(f'{ts} PART {words[0]} leaves{reason}')
elif words[0] == 'Parts:': # PART
reason = f' [{" ".join(words[3:])[1:-1]}]' if len(words) > 4 or words[3] != '()' else ''
print(f'{ts} PART {words[1]} leaves{reason}')
elif words[1:4] == ['has', 'quit', 'IRC']: # QUIT
print(f'{ts} QUIT {words[0]} quits [{" ".join(words[4:])[1:-1]}]')
elif words[0] == 'Quits:': # QUIT
reason = f' [{" ".join(words[3:])[1:-1]}]' if len(words) > 4 or words[3] != '()' else ''
print(f'{ts} QUIT {words[0]} quits{reason}')
elif words[1:4] == ['was', 'kicked', 'by']: # KICK
print(f'{ts} KICK {words[0]} is kicked by {words[4]} [{" ".join(words[5:])[1:-1]}]')
elif words[1:3] == ['sets', 'mode:']: # MODE
print(f'{ts} MODE {line[4:]}')
elif words[1:4] == ['changes', 'topic', 'to:']: # TOPIC
print(f'{ts} TOPIC {words[0]} sets the topic to: {" ".join(words[4:])}')
elif words[1:4] == ['changes', 'topic', 'to']: # TOPIC
print(f'{ts} TOPIC {words[0]} sets the topic to: {" ".join(words[4:])[1:-1]}')
elif words[1:5] == ['is', 'now', 'known', 'as']: # NICK
print(f'{ts} NICK {line[4:]}')
elif words[1:3] == ['starts', 'logging']: # Silently ignore (there's already a JOIN)
if line.startswith(b'<'): #PRIVMSG
sys.stdout.buffer.write(f'{ts} PRIVMSG '.encode('ascii') + line)
elif line.startswith(b'* '): #ACTION
sys.stdout.buffer.write(f'{ts} ACTION '.encode('ascii') + line[2:])
elif line.startswith(b'*** '):
words = line.split(b' ')[1:]
if words[1:3] == [b'has', b'joined']: # JOIN
sys.stdout.buffer.write(f'{ts} JOIN '.encode('ascii') + words[0] + b' joins')
elif words[0] == b'Joins:': # JOIN
sys.stdout.buffer.write(f'{ts} JOIN '.encode('ascii') + words[1] + b' joins')
elif words[1:3] == [b'has', b'left']: # PART
reason = (b' [' + b' '.join(words[3:]) + b']') if len(words) > 4 or words[3] != b'' else b''
sys.stdout.buffer.write(f'{ts} PART '.encode('ascii') + words[0] + b' leaves' + reason)
elif words[0] == b'Parts:': # PART
reason = (b' [' + b' '.join(words[3:])[1:-1] + b']') if len(words) > 4 or words[3] != b'()' else b''
sys.stdout.buffer.write(f'{ts} PART '.encode('ascii') + words[1] + b' leaves' + reason)
elif words[1:4] == [b'has', b'quit', b'IRC']: # QUIT
reason = (b' [' + b' '.join(words[4:])[1:-1] + b']') if len(words) > 4 or words[4] != b'()' else b''
sys.stdout.buffer.write(f'{ts} QUIT '.encode('ascii') + words[0] + b' quits' + reason)
elif words[0] == b'Quits:': # QUIT
reason = (b' [' + b' '.join(words[3:])[1:-1] + b']') if len(words) > 4 or words[3] != b'()' else b''
sys.stdout.buffer.write(f'{ts} QUIT '.encode('ascii') + words[0] + b' quits' + reason)
elif words[1:4] == [b'was', b'kicked', b'by']: # KICK
sys.stdout.buffer.write(f'{ts} KICK '.encode('ascii') + words[0] + b' is kicked by ' + words[4] + b' [' + b' '.join(words[5:])[1:-1] + b']')
elif words[1:3] == [b'sets', b'mode:']: # MODE
sys.stdout.buffer.write(f'{ts} MODE '.encode('ascii') + line[4:])
elif words[1:4] == [b'changes', b'topic', b'to:']: # TOPIC
sys.stdout.buffer.write(f'{ts} TOPIC '.encode('ascii') + words[0] + b' sets the topic to: ' + b' '.join(words[4:]))
elif words[1:4] == [b'changes', b'topic', b'to']: # TOPIC
sys.stdout.buffer.write(f'{ts} TOPIC '.encode('ascii') + words[0] + b' sets the topic to: ' + b' '.join(words[4:])[1:-1])
elif words[1:5] == [b'is', b'now', b'known', b'as']: # NICK
sys.stdout.buffer.write(f'{ts} NICK '.encode('ascii') + line[4:])
elif words[1:3] == [b'starts', b'logging']: # Silently ignore (there's already a JOIN)
pass
else:
print(f'MALFORMED LINE: {origLine!r}', file = sys.stderr)


Loading…
Cancel
Save