Evaluate math equations from unsafe user input in Python Evaluate math equations from unsafe user input in Python python python

Evaluate math equations from unsafe user input in Python


Disclaimer: I'm the Alexer mentioned in the code in the other answer. To be honest, I kind of suggested the bytecode parsing approach only half-jokingly, since I happened to have 99% of the code lying around for an unrelated project and so could whip together a POC in like a couple of minutes. That said, there shouldn't be anything wrong with it, per se; it's just that it's a more complex machinery that is needed for this task. In fact, you should be able to get away with just disassembling the code [checking the opcodes against a whitelist], checking that the constants and names are valid, and executing it with plain, evil eval after that. You should just lose the ability to insert paranoid extra checks all over the execution. (Another disclaimer: I still wouldn't feel comfortable enough to do it with eval)

Anyway, I had a boring moment, so I wrote some code to do this the smart way; using the AST instead of the bytecode. It's just an extra flag to compile(). (Or just ast.parse(), since you'll want the types from the module anyway)

import astimport operator_operations = {        ast.Add: operator.add,        ast.Sub: operator.sub,        ast.Mult: operator.mul,        ast.Div: operator.div,        ast.Pow: operator.pow,}def _safe_eval(node, variables, functions):        if isinstance(node, ast.Num):                return node.n        elif isinstance(node, ast.Name):                return variables[node.id] # KeyError -> Unsafe variable        elif isinstance(node, ast.BinOp):                op = _operations[node.op.__class__] # KeyError -> Unsafe operation                left = _safe_eval(node.left, variables, functions)                right = _safe_eval(node.right, variables, functions)                if isinstance(node.op, ast.Pow):                        assert right < 100                return op(left, right)        elif isinstance(node, ast.Call):                assert not node.keywords and not node.starargs and not node.kwargs                assert isinstance(node.func, ast.Name), 'Unsafe function derivation'                func = functions[node.func.id] # KeyError -> Unsafe function                args = [_safe_eval(arg, variables, functions) for arg in node.args]                return func(*args)        assert False, 'Unsafe operation'def safe_eval(expr, variables={}, functions={}):        node = ast.parse(expr, '<string>', 'eval').body        return _safe_eval(node, variables, functions)if __name__ == '__main__':        import math        print safe_eval('sin(a*pi/b)', dict(a=1, b=2, pi=math.pi), dict(sin=math.sin))

The same thing applies to this as to the bytecode version; if you check the operations against a whitelist and check that the names and values are valid, you should be able to get away with calling eval on the AST. (But again, I still wouldn't do it. Because paranoid. And paranoia is good when eval is concerned)


There is a relatively easy of doing this in Python without third party packages.

  • Using compile() to prepare a single-line Python expression to be bytecode for eval()

  • Not running the bytecode through eval(), but instead run it in your custom opcode loop and only implement opcodes which you really need. E.g. no built-ins, no attribute access, so the sandbox cannot escaped.

However there are some gotchas, like preparing for CPU exhaustion and memory exhaustion, which are not specific to this method and are issue on other approaches too.

Here is a full blog post about the topic. Here is a related gist. Below is shortened sample code.

""""    The orignal author: Alexer / #python.fi"""import opcodeimport disimport sysimport multiprocessingimport time# Python 3 requiredassert sys.version_info[0] == 3, "No country for old snakes"class UnknownSymbol(Exception):    """ There was a function or constant in the expression we don't support. """class BadValue(Exception):    """ The user tried to input dangerously big value. """    MAX_ALLOWED_VALUE = 2**63class BadCompilingInput(Exception):    """ The user tried to input something which might cause compiler to slow down. """def disassemble(co):    """ Loop through Python bytecode and match instructions  with our internal opcodes.    :param co: Python code object    """    code = co.co_code    n = len(code)    i = 0    extended_arg = 0    result = []    while i < n:        op = code[i]        curi = i        i = i+1        if op >= dis.HAVE_ARGUMENT:            # Python 2            # oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg            oparg = code[i] + code[i+1] * 256 + extended_arg            extended_arg = 0            i = i+2            if op == dis.EXTENDED_ARG:                # Python 2                #extended_arg = oparg*65536L                extended_arg = oparg*65536        else:            oparg = None        # print(opcode.opname[op])        opv = globals()[opcode.opname[op].replace('+', '_')](co, curi, i, op, oparg)        result.append(opv)    return result# For the opcodes see dis.py# (Copy-paste)# https://docs.python.org/2/library/dis.htmlclass Opcode:    """ Base class for out internal opcodes. """    args = 0    pops = 0    pushes = 0    def __init__(self, co, i, nexti, op, oparg):        self.co = co        self.i = i        self.nexti = nexti        self.op = op        self.oparg = oparg    def get_pops(self):        return self.pops    def get_pushes(self):        return self.pushes    def touch_value(self, stack, frame):        assert self.pushes == 0        for i in range(self.pops):            stack.pop()class OpcodeArg(Opcode):    args = 1class OpcodeConst(OpcodeArg):    def get_arg(self):        return self.co.co_consts[self.oparg]class OpcodeName(OpcodeArg):    def get_arg(self):        return self.co.co_names[self.oparg]class POP_TOP(Opcode):    """Removes the top-of-stack (TOS) item."""    pops = 1    def touch_value(self, stack, frame):        stack.pop()class DUP_TOP(Opcode):    """Duplicates the reference on top of the stack."""    # XXX: +-1    pops = 1    pushes = 2    def touch_value(self, stack, frame):        stack[-1:] = 2 * stack[-1:]class ROT_TWO(Opcode):    """Swaps the two top-most stack items."""    pops = 2    pushes = 2    def touch_value(self, stack, frame):        stack[-2:] = stack[-2:][::-1]class ROT_THREE(Opcode):    """Lifts second and third stack item one position up, moves top down to position three."""    pops = 3    pushes = 3    direct = True    def touch_value(self, stack, frame):        v3, v2, v1 = stack[-3:]        stack[-3:] = [v1, v3, v2]class ROT_FOUR(Opcode):    """Lifts second, third and forth stack item one position up, moves top down to position four."""    pops = 4    pushes = 4    direct = True    def touch_value(self, stack, frame):        v4, v3, v2, v1 = stack[-3:]        stack[-3:] = [v1, v4, v3, v2]class UNARY(Opcode):    """Unary Operations take the top of the stack, apply the operation, and push the result back on the stack."""    pops = 1    pushes = 1class UNARY_POSITIVE(UNARY):    """Implements TOS = +TOS."""    def touch_value(self, stack, frame):        stack[-1] = +stack[-1]class UNARY_NEGATIVE(UNARY):    """Implements TOS = -TOS."""    def touch_value(self, stack, frame):        stack[-1] = -stack[-1]class BINARY(Opcode):    """Binary operations remove the top of the stack (TOS) and the second top-most stack item (TOS1) from the stack. They perform the operation, and put the result back on the stack."""    pops = 2    pushes = 1class BINARY_POWER(BINARY):    """Implements TOS = TOS1 ** TOS."""    def touch_value(self, stack, frame):        TOS1, TOS = stack[-2:]        print(TOS1, TOS)        if abs(TOS1) > BadValue.MAX_ALLOWED_VALUE or abs(TOS) > BadValue.MAX_ALLOWED_VALUE:            raise BadValue("The value for exponent was too big")        stack[-2:] = [TOS1 ** TOS]class BINARY_MULTIPLY(BINARY):    """Implements TOS = TOS1 * TOS."""    def touch_value(self, stack, frame):        TOS1, TOS = stack[-2:]        stack[-2:] = [TOS1 * TOS]class BINARY_DIVIDE(BINARY):    """Implements TOS = TOS1 / TOS when from __future__ import division is not in effect."""    def touch_value(self, stack, frame):        TOS1, TOS = stack[-2:]        stack[-2:] = [TOS1 / TOS]class BINARY_MODULO(BINARY):    """Implements TOS = TOS1 % TOS."""    def touch_value(self, stack, frame):        TOS1, TOS = stack[-2:]        stack[-2:] = [TOS1 % TOS]class BINARY_ADD(BINARY):    """Implements TOS = TOS1 + TOS."""    def touch_value(self, stack, frame):        TOS1, TOS = stack[-2:]        stack[-2:] = [TOS1 + TOS]class BINARY_SUBTRACT(BINARY):    """Implements TOS = TOS1 - TOS."""    def touch_value(self, stack, frame):        TOS1, TOS = stack[-2:]        stack[-2:] = [TOS1 - TOS]class BINARY_FLOOR_DIVIDE(BINARY):    """Implements TOS = TOS1 // TOS."""    def touch_value(self, stack, frame):        TOS1, TOS = stack[-2:]        stack[-2:] = [TOS1 // TOS]class BINARY_TRUE_DIVIDE(BINARY):    """Implements TOS = TOS1 / TOS when from __future__ import division is in effect."""    def touch_value(self, stack, frame):        TOS1, TOS = stack[-2:]        stack[-2:] = [TOS1 / TOS]class BINARY_LSHIFT(BINARY):    """Implements TOS = TOS1 << TOS."""    def touch_value(self, stack, frame):        TOS1, TOS = stack[-2:]        stack[-2:] = [TOS1 << TOS]class BINARY_RSHIFT(BINARY):    """Implements TOS = TOS1 >> TOS."""    def touch_value(self, stack, frame):        TOS1, TOS = stack[-2:]        stack[-2:] = [TOS1 >> TOS]class BINARY_AND(BINARY):    """Implements TOS = TOS1 & TOS."""    def touch_value(self, stack, frame):        TOS1, TOS = stack[-2:]        stack[-2:] = [TOS1 & TOS]class BINARY_XOR(BINARY):    """Implements TOS = TOS1 ^ TOS."""    def touch_value(self, stack, frame):        TOS1, TOS = stack[-2:]        stack[-2:] = [TOS1 ^ TOS]class BINARY_OR(BINARY):    """Implements TOS = TOS1 | TOS."""    def touch_value(self, stack, frame):        TOS1, TOS = stack[-2:]        stack[-2:] = [TOS1 | TOS]class RETURN_VALUE(Opcode):    """Returns with TOS to the caller of the function."""    pops = 1    final = True    def touch_value(self, stack, frame):        value = stack.pop()        return valueclass LOAD_CONST(OpcodeConst):    """Pushes co_consts[consti] onto the stack.""" # consti    pushes = 1    def touch_value(self, stack, frame):        # XXX moo: Validate type        value = self.get_arg()        assert isinstance(value, (int, float))        stack.append(value)class LOAD_NAME(OpcodeName):    """Pushes the value associated with co_names[namei] onto the stack.""" # namei    pushes = 1    def touch_value(self, stack, frame):        # XXX moo: Get name from dict of valid variables/functions        name = self.get_arg()        if name not in frame:            raise UnknownSymbol("Does not know symbol {}".format(name))        stack.append(frame[name])class CALL_FUNCTION(OpcodeArg):    """Calls a function. The low byte of argc indicates the number of positional parameters, the high byte the number of keyword parameters. On the stack, the opcode finds the keyword parameters first. For each keyword argument, the value is on top of the key. Below the keyword parameters, the positional parameters are on the stack, with the right-most parameter on top. Below the parameters, the function object to call is on the stack. Pops all function arguments, and the function itself off the stack, and pushes the return value.""" # argc    pops = None    pushes = 1    def get_pops(self):        args = self.oparg & 0xff        kwargs = (self.oparg >> 8) & 0xff        return 1 + args + 2 * kwargs    def touch_value(self, stack, frame):        argc = self.oparg & 0xff        kwargc = (self.oparg >> 8) & 0xff        assert kwargc == 0        if argc > 0:            args = stack[-argc:]            stack[:] = stack[:-argc]        else:            args = []        func = stack.pop()        assert func in frame.values(), "Uh-oh somebody injected bad function. This does not happen."        result = func(*args)        stack.append(result)def check_for_pow(expr):    """ Python evaluates power operator during the compile time if its on constants.    You can do CPU / memory burning attack with ``2**999999999999999999999**9999999999999``.    We mainly care about memory now, as we catch timeoutting in any case.    We just disable pow and do not care about it.    """    if "**" in expr:        raise BadCompilingInput("Power operation is not allowed")def _safe_eval(expr, functions_and_constants={}, check_compiling_input=True):    """ Evaluate a Pythonic math expression and return the output as a string.    The expr is limited to 1024 characters / 1024 operations    to prevent CPU burning or memory stealing.    :param functions_and_constants: Supplied "built-in" data for evaluation    """    # Some safety checks    assert len(expr) < 1024    # Check for potential bad compiler input    if check_compiling_input:        check_for_pow(expr)    # Compile Python source code to Python code for eval()    code = compile(expr, '', 'eval')    # Dissect bytecode back to Python opcodes    ops = disassemble(code)    assert len(ops) < 1024    stack = []    for op in ops:        value = op.touch_value(stack, functions_and_constants)    return value