Browse Source

Initial commit

master
JustAnotherArchivist 6 years ago
commit
65254119d6
1 changed files with 293 additions and 0 deletions
  1. +293
    -0
      parse.py

+ 293
- 0
parse.py View File

@@ -0,0 +1,293 @@
import enum
import functools
import operator
import re


class ParsingError(Exception):
pass


class EvaluationError(Exception):
pass


class StrEnum(enum.Enum):
def __repr__(self):
return str(self)


class Type(StrEnum):
group = 1
brackets = 2


class Modifier(StrEnum):
negate = 1
plus = 2


class JSBool:
def __init__(self, value):
self.value = bool(value)

def __add__(self, other):
if isinstance(other, (JSBool, JSInt)): # bool + bool/int -> int (addition)
return JSInt(self) + JSInt(other)
elif isinstance(other, JSString): # bool + string -> string (concatenation)
return JSString(self) + other
else:
return NotImplemented

def __sub__(self, other):
if isinstance(other, (JSBool, JSInt, JSString)): # bool - bool/int/string -> int (subtraction)
return JSInt(self) - JSInt(other)
else:
return NotImplemented

def __mul__(self, other):
if isinstance(other, (JSBool, JSInt, JSString)): # bool * bool/int/string -> int (multiplication)
return JSInt(self) * JSInt(other)
else:
return NotImplemented

def __eq__(self, other):
return isinstance(other, JSBool) and self.value == other.value

def __bool__(self):
return self.value

def __int__(self):
return int(self.value)

def __str__(self):
return str(self.value).lower() # 'True' in Python is 'true' in JS

def __repr__(self):
return 'JSBool({!r})'.format(self.value)


class JSInt:
def __init__(self, value):
self.value = int(value)

def __add__(self, other):
if isinstance(other, JSInt): # int + int -> int (addition)
return JSInt(self.value + other.value)
elif isinstance(other, JSBool): # int + bool -> int (addition)
return self + JSInt(other)
elif isinstance(other, JSString): # int + string -> string (concatenation)
return JSString(self) + other
else:
return NotImplemented

def __sub__(self, other):
if isinstance(other, JSInt): # int - int -> int (subtraction)
return JSInt(self.value - other.value)
elif isinstance(other, (JSBool, JSString)): # int - bool/string -> int (subtraction)
return self - JSInt(other)
else:
return NotImplemented

def __mul__(self, other):
if isinstance(other, JSInt): # int * int -> int (multiplication)
return JSInt(self.value * other.value)
elif isinstance(other, (JSBool, JSString)): # int * bool/string -> int (multiplication)
return self * JSInt(other)
else:
return NotImplemented

def __eq__(self, other):
return isinstance(other, JSInt) and self.value == other.value

def __bool__(self):
return self.value != 0 # Any value other than zero is considered 'true'

def __int__(self):
return self.value

def __str__(self):
return str(self.value)

def __repr__(self):
return 'JSInt({!r})'.format(self.value)


class JSString:
def __init__(self, value):
self.value = str(value)

def __add__(self, other):
if isinstance(other, JSString): # string + string -> string (concatenation)
return JSString(self.value + other.value)
elif isinstance(other, (JSInt, JSBool)): # string + int/bool -> string (concatenation)
return self + JSString(other)
else:
return NotImplemented

def __sub__(self, other):
if isinstance(other, (JSBool, JSInt, JSString)): # string - bool/int/string -> int (subtraction)
return JSInt(self) - JSInt(other)
else:
return NotImplemented

def __mul__(self, other):
if isinstance(other, (JSBool, JSInt, JSString)): # string * bool/int/string -> int (multiplication)
return JSInt(self) * JSInt(other)
else:
return NotImplemented

def __eq__(self, other):
return isinstance(other, JSString) and self.value == other.value

def __bool__(self):
return self.value != '' # Any non-empty string is considered 'true'

def __int__(self):
if self.value == '':
return 0
return int(self.value)

def __str__(self):
return self.value

def __repr__(self):
return 'JSString({!r})'.format(self.value)


_itemModifierToResultMapping = {
(): JSString(''),
(Modifier.plus,): JSInt(0),
(Modifier.negate,): JSBool(False),
#(Modifier.plus, Modifier.plus): syntax error
(Modifier.plus, Modifier.negate): JSInt(0),
(Modifier.negate, Modifier.plus): JSBool(True),
(Modifier.negate, Modifier.negate): JSBool(True),
#(Modifier.plus, Modifier.plus, Modifier.plus): syntax error
#(Modifier.plus, Modifier.plus, Modifier.negate): syntax error
(Modifier.plus, Modifier.negate, Modifier.plus): JSInt(1),
(Modifier.plus, Modifier.negate, Modifier.negate): JSInt(1),
#(Modifier.negate, Modifier.plus, Modifier.plus): syntax error
(Modifier.negate, Modifier.plus, Modifier.negate): JSBool(True),
(Modifier.negate, Modifier.negate, Modifier.plus): JSBool(False),
(Modifier.negate, Modifier.negate, Modifier.negate): JSBool(False),
}


class Item:
def __init__(self, type, modifiers, values = None):
if type not in (Type.group, Type.brackets):
raise ValueError('type must be Type.group or Type.brackets')
iter(modifiers) # Test whether modifiers is an iterable, and let the potential TypeError bubble up
if not all(x in (Modifier.negate, Modifier.plus) for x in modifiers):
raise ValueError('modifiers must be an iterable that can only contain Modifier.negate or Modifier.plus')
if values is not None and type != Type.group:
raise ValueError('values can only be specified for group items')
if type == Type.group and values is None:
raise ValueError('values are required for group items')
self.type = type
self.modifiers = modifiers
self.values = values

def evaluate(self, evaluateFunction = None):
if self.type == Type.group:
if evaluateFunction is None or not callable(evaluateFunction):
raise ValueError('must specify a callable evaluateFunction when evaluating a group item')
return evaluateFunction(self.values, self.modifiers)
else:
try:
return _itemModifierToResultMapping[tuple(self.modifiers)]
except KeyError:
raise EvaluationError('Unrecognised modifier pattern {!r}'.format(self.modifiers))

def __eq__(self, other):
return isinstance(other, Item) and self.type == other.type and self.modifiers == other.modifiers and self.values == other.values

def __repr__(self):
return 'Item({!r}, {!r}{})'.format(self.type, self.modifiers, ', values = {!r}'.format(self.values) if self.values is not None else '')


def parse(s):
'''
Parse expression s into a tree of Items.

Argument: s (string), the expression to evaluate

Returns: tree (list of Items)
'''

itemStack = {0: []}
modifierStack = {0: []}
currentItemStack = itemStack[0]
currentModifierStack = modifierStack[0]
stackLevel = 0
finishedItem = False
pos = 0
length = len(s)
while pos < length:
char = s[pos]
if char == '+':
if pos == 0 or not finishedItem:
finishedItem = False
currentModifierStack.append(Modifier.plus)
# else: addition, nothing to do
elif char == '!':
finishedItem = False
currentModifierStack.append(Modifier.negate)
elif char == '(':
finishedItem = False
stackLevel += 1
currentItemStack = itemStack[stackLevel] = []
currentModifierStack = modifierStack[stackLevel] = []
elif char == ')':
if stackLevel == 0:
raise ParsingError('Encountered ) without matching (')
stackLevel -= 1
currentItemStack = itemStack[stackLevel]
currentItemStack.append(Item(type = Type.group, modifiers = modifierStack[stackLevel], values = itemStack[stackLevel + 1]))
currentModifierStack = modifierStack[stackLevel] = []
finishedItem = True
elif char == '[':
if s[pos + 1] != ']':
raise ParsingError('Invalid byte found at position {}; expected ] but got {}'.format(pos + 1, s[pos + 1]))
# End of modifier sequence
currentItemStack.append(Item(type = Type.brackets, modifiers = currentModifierStack))
currentModifierStack = []
pos += 1 # Skip over closing bracket
finishedItem = True
pos += 1
return itemStack[0]


def evaluate(tree, modifiers = None):
t = map(lambda x: x.evaluate(evaluate), tree)
if len(tree) > 1:
result = functools.reduce(operator.add, t) # Concatenation or addition, but this is all handled in the JS* classes
else:
result = next(t)
if modifiers == [Modifier.plus]:
return JSInt(result)
return result


def crack(url, html):
m = re.search(r'setTimeout\(function\(\)\{\s+var\s+s,t,o,p,b,r,e,a,k,i,n,g,f,\s*(?P<parent>[a-zA-Z]+)=\{"(?P<child>[a-zA-Z]+)":(?P<initialExpression>[^}]+)\};' +
r'\s*t\s*=\s*document\.createElement\(\'div\'\);' +
r'\s*t\.innerHTML="<a href=\'/\'>x</a>";' +
r'\s*t\s*=\s*t\.firstChild\.href;\s*r\s*=\s*t\.match\(/https\?:\\/\\//\)\[0\];' +
r'\s*t\s*=\s*t\.substr\(r\.length\);\s*t\s*=\s*t\.substr\(0,t\.length-1\);' +
r'\s*a\s*=\s*document\.getElementById\(\'jschl-answer\'\);' +
r'\s*f\s*=\s*document\.getElementById\(\'challenge-form\'\);' +
r'\s*;((?P=parent)\.(?P=child)\s*[*+-]=\s*[^;]+\s*;\s*)+a\.value\s*=\s*parseInt\((?P=parent)\.(?P=child),\s*10\)\s*\+\s*t\.length;\s*\';\s*121\'' +
r'\s*f\.action\s*\+=\s*location\.hash;' +
r'\s*f\.submit\(\);' +
r'\s*\},\s*4000\);', html)
if not m:
return None
d = m.groupdict()
operators = {'*': operator.mul, '+': operator.add, '-': operator.sub}
result = evaluate(parse(d['initialExpression']))
for m in re.finditer(d['parent'] + r'\.' + d['child'] + r'\s*(?P<operator>[*+-])=\s*(?P<expression>[^;]+)', html):
result = operators[m.group('operator')](result, evaluate(parse(m.group('expression'))))
domain = re.search(r'^https?://([^/]+)/', url).group(1)
return result + JSInt(len(domain))

Loading…
Cancel
Save