Pythonic way to handle arguments with values in Union - python-3.x

In the code below, print_pos accepts one argument which can be of
three different types.
from typing import List, Tuple, Union
pos_t = Tuple[int, int]
anchor_t = Tuple[str, str]
anchor_pos_t = Tuple[anchor_t, pos_t]
def print_pos(
pos: Union[
pos_t,
anchor_pos_t,
List[Union[pos_t, anchor_pos_t]]
]
) -> None:
if isinstance(pos, tuple) and isinstance(pos[0], int):
print('xy =', pos)
elif isinstance(pos, tuple) and isinstance(pos[0], tuple):
print('anchor =', pos[0])
print('xy =', pos[1])
elif isinstance(pos, list):
print('[')
for p in pos:
print_pos(p)
print(']')
else:
raise ValueError('invalid pos')
print_pos((0, 100))
print_pos((('right', 'bottom'), (0, 100)))
print_pos([
(0, 100),
(('right', 'bottom'), (0, 100))
])
Right now, I use isinstance to check for the different possibilities
for the type of pos but I find the code rather clumsy. Is there a
more convenient/elegant way to do that? In particular is there a mean
to reuse types I defined (pos_t, anchor_t, anchor_pos_t) in my type check?

You can use the typeguard library to check variable types at runtime.
This library is mainly for runtime type validation, rather than conditional type checking, so an extra is_type function needs to be defined to fit your needs.
Extra type casts are also unfortunately necessary to prevent type checker errors.
from typing import Any, List, Tuple, Union, cast
from typeguard import check_type
pos_t = Tuple[int, int]
anchor_t = Tuple[str, str]
anchor_pos_t = Tuple[anchor_t, pos_t]
def is_type(value: Any, expected_type: Any) -> bool:
"""
Return whether the given value is of the expected type or not.
"""
try:
check_type('<blank>', value, expected_type)
return True
except TypeError:
return False
def print_pos(
pos: Union[pos_t, anchor_pos_t, List[Union[pos_t, anchor_pos_t]]]
) -> None:
if is_type(pos, pos_t):
pos = cast(pos_t, pos)
print('xy =', pos)
elif is_type(pos, anchor_pos_t):
pos = cast(anchor_pos_t, pos)
print('anchor =', pos[0])
print('xy =', pos[1])
elif is_type(pos, List[Union[pos_t, anchor_pos_t]]):
pos = cast(List[Union[pos_t, anchor_pos_t]], pos)
print('[')
for p in pos:
print_pos(p)
print(']')
else:
raise ValueError('invalid pos')
print_pos((0, 100))
print_pos((('right', 'bottom'), (0, 100)))
print_pos([(0, 100), (('right', 'bottom'), (0, 100))])
This isn't the cleanest solution, but it works.
I would recommend using a more object-oriented approach with classes if possible, to eliminate the need for union types.

This does not yet work with any current python version, but python 3.10 (planned to be released October 2021) will have structural pattern matching.
This allows something like this, which might be slightly more readable:
from typing import List, Tuple, Union
pos_t = Tuple[int, int]
anchor_t = Tuple[str, str]
anchor_pos_t = Tuple[anchor_t, pos_t]
def print_pos(
pos: Union[
pos_t,
anchor_pos_t,
List[Union[pos_t, anchor_pos_t]]
]
) -> None:
match pos:
# need to match this more specific case first, as tuple((x, y)) matches this as well
case tuple(((a, b), (x, y))):
print('anchor =', (a, b))
print('xy =', (x, y))
case tuple((x, y)):
print('xy =', (x, y))
case list(_):
print('[')
for p in pos:
print_pos(p)
print(']')
case _:
raise ValueError('invalid pos')
print_pos((0, 100))
print_pos((('right', 'bottom'), (0, 100)))
print_pos([
(0, 100),
(('right', 'bottom'), (0, 100))
])
This already works with the 3.10.0a7 pre-release, although the mypy support is not there yet.
Structural pattern matching is in a sense similar to sequence unpacking like
(a, b), (x, y) = pos
but more powerful.
The three PEPs describing structural pattern matching:
PEP 634 -- Structural Pattern Matching: Specification
PEP 635 -- Structural Pattern Matching: Motivation and Rationale
PEP 636 -- Structural Pattern Matching: Tutorial

Related

switch case substitute for inside a python 3.9 class [duplicate]

This question's answers are a community effort. Edit existing answers to improve this post. It is not currently accepting new answers or interactions.
I want to write a function in Python that returns different fixed values based on the value of an input index.
In other languages I would use a switch or case statement, but Python does not appear to have a switch statement. What are the recommended Python solutions in this scenario?
Python 3.10 (2021) introduced the match-case statement which provides a first-class implementation of a "switch" for Python. For example:
def f(x):
match x:
case 'a':
return 1
case 'b':
return 2
case _:
return 0 # 0 is the default case if x is not found
The match-case statement is considerably more powerful than this simple example.
The original answer below was written in 2008, before match-case was available:
You could use a dictionary:
def f(x):
return {
'a': 1,
'b': 2,
}[x]
If you'd like defaults, you could use the dictionary get(key[, default]) function:
def f(x):
return {
'a': 1,
'b': 2
}.get(x, 9) # 9 will be returned default if x is not found
I've always liked doing it this way
result = {
'a': lambda x: x * 5,
'b': lambda x: x + 7,
'c': lambda x: x - 2
}[value](x)
From here
In addition to the dictionary methods (which I really like, BTW), you can also use if-elif-else to obtain the switch/case/default functionality:
if x == 'a':
# Do the thing
elif x == 'b':
# Do the other thing
if x in 'bc':
# Fall-through by not using elif, but now the default case includes case 'a'!
elif x in 'xyz':
# Do yet another thing
else:
# Do the default
This of course is not identical to switch/case - you cannot have fall-through as easily as leaving off the break statement, but you can have a more complicated test. Its formatting is nicer than a series of nested ifs, even though functionally that's what it is closer to.
Python >= 3.10
Wow, Python 3.10+ now has a match/case syntax which is like switch/case and more!
PEP 634 -- Structural Pattern Matching
Selected features of match/case
1 - Match values:
Matching values is similar to a simple switch/case in another language:
match something:
case 1 | 2 | 3:
# Match 1-3.
case _:
# Anything else.
#
# Match will throw an error if this is omitted
# and it doesn't match any of the other patterns.
2 - Match structural patterns:
match something:
case str() | bytes():
# Match a string like object.
case [str(), int()]:
# Match a `str` and an `int` sequence
# (`list` or a `tuple` but not a `set` or an iterator).
case [_, _]:
# Match a sequence of 2 variables.
# To prevent a common mistake, sequence patterns don’t match strings.
case {"bandwidth": 100, "latency": 300}:
# Match this dict. Extra keys are ignored.
3 - Capture variables
Parse an object; saving it as variables:
match something:
case [name, count]
# Match a sequence of any two objects and parse them into the two variables.
case [x, y, *rest]:
# Match a sequence of two or more objects,
# binding object #3 and on into the rest variable.
case bytes() | str() as text:
# Match any string like object and save it to the text variable.
Capture variables can be useful when parsing data (such as JSON or HTML) that may come in one of a number of different patterns.
Capture variables is a feature. But it also means that you need to use dotted constants (ex: COLOR.RED) only. Otherwise, the constant will be treated as a capture variable and overwritten.
More sample usage:
match something:
case 0 | 1 | 2:
# Matches 0, 1 or 2 (value).
print("Small number")
case [] | [_]:
# Matches an empty or single value sequence (structure).
# Matches lists and tuples but not sets.
print("A short sequence")
case str() | bytes():
# Something of `str` or `bytes` type (data type).
print("Something string-like")
case _:
# Anything not matched by the above.
print("Something else")
Python <= 3.9
My favorite Python recipe for switch/case was:
choices = {'a': 1, 'b': 2}
result = choices.get(key, 'default')
Short and simple for simple scenarios.
Compare to 11+ lines of C code:
// C Language version of a simple 'switch/case'.
switch( key )
{
case 'a' :
result = 1;
break;
case 'b' :
result = 2;
break;
default :
result = -1;
}
You can even assign multiple variables by using tuples:
choices = {'a': (1, 2, 3), 'b': (4, 5, 6)}
(result1, result2, result3) = choices.get(key, ('default1', 'default2', 'default3'))
class switch(object):
value = None
def __new__(class_, value):
class_.value = value
return True
def case(*args):
return any((arg == switch.value for arg in args))
Usage:
while switch(n):
if case(0):
print "You typed zero."
break
if case(1, 4, 9):
print "n is a perfect square."
break
if case(2):
print "n is an even number."
if case(2, 3, 5, 7):
print "n is a prime number."
break
if case(6, 8):
print "n is an even number."
break
print "Only single-digit numbers are allowed."
break
Tests:
n = 2
#Result:
#n is an even number.
#n is a prime number.
n = 11
#Result:
#Only single-digit numbers are allowed.
My favorite one is a really nice recipe. It's the closest one I've seen to actual switch case statements, especially in features.
class switch(object):
def __init__(self, value):
self.value = value
self.fall = False
def __iter__(self):
"""Return the match method once, then stop"""
yield self.match
raise StopIteration
def match(self, *args):
"""Indicate whether or not to enter a case suite"""
if self.fall or not args:
return True
elif self.value in args: # changed for v1.5, see below
self.fall = True
return True
else:
return False
Here's an example:
# The following example is pretty much the exact use-case of a dictionary,
# but is included for its simplicity. Note that you can include statements
# in each suite.
v = 'ten'
for case in switch(v):
if case('one'):
print 1
break
if case('two'):
print 2
break
if case('ten'):
print 10
break
if case('eleven'):
print 11
break
if case(): # default, could also just omit condition or 'if True'
print "something else!"
# No need to break here, it'll stop anyway
# break is used here to look as much like the real thing as possible, but
# elif is generally just as good and more concise.
# Empty suites are considered syntax errors, so intentional fall-throughs
# should contain 'pass'
c = 'z'
for case in switch(c):
if case('a'): pass # only necessary if the rest of the suite is empty
if case('b'): pass
# ...
if case('y'): pass
if case('z'):
print "c is lowercase!"
break
if case('A'): pass
# ...
if case('Z'):
print "c is uppercase!"
break
if case(): # default
print "I dunno what c was!"
# As suggested by Pierre Quentel, you can even expand upon the
# functionality of the classic 'case' statement by matching multiple
# cases in a single shot. This greatly benefits operations such as the
# uppercase/lowercase example above:
import string
c = 'A'
for case in switch(c):
if case(*string.lowercase): # note the * for unpacking as arguments
print "c is lowercase!"
break
if case(*string.uppercase):
print "c is uppercase!"
break
if case('!', '?', '.'): # normal argument passing style also applies
print "c is a sentence terminator!"
break
if case(): # default
print "I dunno what c was!"
Some of the comments indicated that a context manager solution using with foo as case rather than for case in foo might be cleaner, and for large switch statements the linear rather than quadratic behavior might be a nice touch. Part of the value in this answer with a for loop is the ability to have breaks and fallthrough, and if we're willing to play with our choice of keywords a little bit we can get that in a context manager too:
class Switch:
def __init__(self, value):
self.value = value
self._entered = False
self._broken = False
self._prev = None
def __enter__(self):
return self
def __exit__(self, type, value, traceback):
return False # Allows a traceback to occur
def __call__(self, *values):
if self._broken:
return False
if not self._entered:
if values and self.value not in values:
return False
self._entered, self._prev = True, values
return True
if self._prev is None:
self._prev = values
return True
if self._prev != values:
self._broken = True
return False
if self._prev == values:
self._prev = None
return False
#property
def default(self):
return self()
Here's an example:
# Prints 'bar' then 'baz'.
with Switch(2) as case:
while case(0):
print('foo')
while case(1, 2, 3):
print('bar')
while case(4, 5):
print('baz')
break
while case.default:
print('default')
break
class Switch:
def __init__(self, value):
self.value = value
def __enter__(self):
return self
def __exit__(self, type, value, traceback):
return False # Allows a traceback to occur
def __call__(self, *values):
return self.value in values
from datetime import datetime
with Switch(datetime.today().weekday()) as case:
if case(0):
# Basic usage of switch
print("I hate mondays so much.")
# Note there is no break needed here
elif case(1,2):
# This switch also supports multiple conditions (in one line)
print("When is the weekend going to be here?")
elif case(3,4):
print("The weekend is near.")
else:
# Default would occur here
print("Let's go have fun!") # Didn't use case for example purposes
There's a pattern that I learned from Twisted Python code.
class SMTP:
def lookupMethod(self, command):
return getattr(self, 'do_' + command.upper(), None)
def do_HELO(self, rest):
return 'Howdy ' + rest
def do_QUIT(self, rest):
return 'Bye'
SMTP().lookupMethod('HELO')('foo.bar.com') # => 'Howdy foo.bar.com'
SMTP().lookupMethod('QUIT')('') # => 'Bye'
You can use it any time you need to dispatch on a token and execute extended piece of code. In a state machine you would have state_ methods, and dispatch on self.state. This switch can be cleanly extended by inheriting from base class and defining your own do_ methods. Often times you won't even have do_ methods in the base class.
Edit: how exactly is that used
In case of SMTP you will receive HELO from the wire. The relevant code (from twisted/mail/smtp.py, modified for our case) looks like this
class SMTP:
# ...
def do_UNKNOWN(self, rest):
raise NotImplementedError, 'received unknown command'
def state_COMMAND(self, line):
line = line.strip()
parts = line.split(None, 1)
if parts:
method = self.lookupMethod(parts[0]) or self.do_UNKNOWN
if len(parts) == 2:
return method(parts[1])
else:
return method('')
else:
raise SyntaxError, 'bad syntax'
SMTP().state_COMMAND(' HELO foo.bar.com ') # => Howdy foo.bar.com
You'll receive ' HELO foo.bar.com ' (or you might get 'QUIT' or 'RCPT TO: foo'). This is tokenized into parts as ['HELO', 'foo.bar.com']. The actual method lookup name is taken from parts[0].
(The original method is also called state_COMMAND, because it uses the same pattern to implement a state machine, i.e. getattr(self, 'state_' + self.mode))
I'm just going to drop my two cents in here. The reason there isn't a case/switch statement in Python is because Python follows the principle of "there's only one right way to do something". So obviously you could come up with various ways of recreating switch/case functionality, but the Pythonic way of accomplishing this is the if/elif construct. I.e.,
if something:
return "first thing"
elif somethingelse:
return "second thing"
elif yetanotherthing:
return "third thing"
else:
return "default thing"
I just felt PEP 8 deserved a nod here. One of the beautiful things about Python is its simplicity and elegance. That is largely derived from principles laid out in PEP 8, including "There's only one right way to do something."
Let's say you don't want to just return a value, but want to use methods that change something on an object. Using the approach stated here would be:
result = {
'a': obj.increment(x),
'b': obj.decrement(x)
}.get(value, obj.default(x))
Here Python evaluates all methods in the dictionary.
So even if your value is 'a', the object will get incremented and decremented by x.
Solution:
func, args = {
'a' : (obj.increment, (x,)),
'b' : (obj.decrement, (x,)),
}.get(value, (obj.default, (x,)))
result = func(*args)
So you get a list containing a function and its arguments. This way, only the function pointer and the argument list get returned, not evaluated. 'result' then evaluates the returned function call.
Solution to run functions:
result = {
'case1': foo1,
'case2': foo2,
'case3': foo3,
}.get(option)(parameters_optional)
where foo1(), foo2() and foo3() are functions
Example 1 (with parameters):
option = number['type']
result = {
'number': value_of_int, # result = value_of_int(number['value'])
'text': value_of_text, # result = value_of_text(number['value'])
'binary': value_of_bin, # result = value_of_bin(number['value'])
}.get(option)(value['value'])
Example 2 (no parameters):
option = number['type']
result = {
'number': func_for_number, # result = func_for_number()
'text': func_for_text, # result = func_for_text()
'binary': func_for_bin, # result = func_for_bin()
}.get(option)()
Example 4 (only values):
option = number['type']
result = {
'number': lambda: 10, # result = 10
'text': lambda: 'ten', # result = 'ten'
'binary': lambda: 0b101111, # result = 47
}.get(option)()
If you have a complicated case block you can consider using a function dictionary lookup table...
If you haven't done this before it's a good idea to step into your debugger and view exactly how the dictionary looks up each function.
NOTE: Do not use "()" inside the case/dictionary lookup or it will call each of your functions as the dictionary / case block is created. Remember this because you only want to call each function once using a hash style lookup.
def first_case():
print "first"
def second_case():
print "second"
def third_case():
print "third"
mycase = {
'first': first_case, #do not use ()
'second': second_case, #do not use ()
'third': third_case #do not use ()
}
myfunc = mycase['first']
myfunc()
If you're searching extra-statement, as "switch", I built a Python module that extends Python. It's called ESPY as "Enhanced Structure for Python" and it's available for both Python 2.x and Python 3.x.
For example, in this case, a switch statement could be performed by the following code:
macro switch(arg1):
while True:
cont=False
val=%arg1%
socket case(arg2):
if val==%arg2% or cont:
cont=True
socket
socket else:
socket
break
That can be used like this:
a=3
switch(a):
case(0):
print("Zero")
case(1):
print("Smaller than 2"):
break
else:
print ("greater than 1")
So espy translate it in Python as:
a=3
while True:
cont=False
if a==0 or cont:
cont=True
print ("Zero")
if a==1 or cont:
cont=True
print ("Smaller than 2")
break
print ("greater than 1")
break
Most of the answers here are pretty old, and especially the accepted ones, so it seems worth updating.
First, the official Python FAQ covers this, and recommends the elif chain for simple cases and the dict for larger or more complex cases. It also suggests a set of visit_ methods (a style used by many server frameworks) for some cases:
def dispatch(self, value):
method_name = 'visit_' + str(value)
method = getattr(self, method_name)
method()
The FAQ also mentions PEP 275, which was written to get an official once-and-for-all decision on adding C-style switch statements. But that PEP was actually deferred to Python 3, and it was only officially rejected as a separate proposal, PEP 3103. The answer was, of course, no—but the two PEPs have links to additional information if you're interested in the reasons or the history.
One thing that came up multiple times (and can be seen in PEP 275, even though it was cut out as an actual recommendation) is that if you're really bothered by having 8 lines of code to handle 4 cases, vs. the 6 lines you'd have in C or Bash, you can always write this:
if x == 1: print('first')
elif x == 2: print('second')
elif x == 3: print('third')
else: print('did not place')
This isn't exactly encouraged by PEP 8, but it's readable and not too unidiomatic.
Over the more than a decade since PEP 3103 was rejected, the issue of C-style case statements, or even the slightly more powerful version in Go, has been considered dead; whenever anyone brings it up on python-ideas or -dev, they're referred to the old decision.
However, the idea of full ML-style pattern matching arises every few years, especially since languages like Swift and Rust have adopted it. The problem is that it's hard to get much use out of pattern matching without algebraic data types. While Guido has been sympathetic to the idea, nobody's come up with a proposal that fits into Python very well. (You can read my 2014 strawman for an example.) This could change with dataclass in 3.7 and some sporadic proposals for a more powerful enum to handle sum types, or with various proposals for different kinds of statement-local bindings (like PEP 3150, or the set of proposals currently being discussed on -ideas). But so far, it hasn't.
There are also occasionally proposals for Perl 6-style matching, which is basically a mishmash of everything from elif to regex to single-dispatch type-switching.
Expanding on the "dict as switch" idea. If you want to use a default value for your switch:
def f(x):
try:
return {
'a': 1,
'b': 2,
}[x]
except KeyError:
return 'default'
I found that a common switch structure:
switch ...parameter...
case p1: v1; break;
case p2: v2; break;
default: v3;
can be expressed in Python as follows:
(lambda x: v1 if p1(x) else v2 if p2(x) else v3)
or formatted in a clearer way:
(lambda x:
v1 if p1(x) else
v2 if p2(x) else
v3)
Instead of being a statement, the Python version is an expression, which evaluates to a value.
The solutions I use:
A combination of 2 of the solutions posted here, which is relatively easy to read and supports defaults.
result = {
'a': lambda x: x * 5,
'b': lambda x: x + 7,
'c': lambda x: x - 2
}.get(whatToUse, lambda x: x - 22)(value)
where
.get('c', lambda x: x - 22)(23)
looks up "lambda x: x - 2" in the dict and uses it with x=23
.get('xxx', lambda x: x - 22)(44)
doesn't find it in the dict and uses the default "lambda x: x - 22" with x=44.
You can use a dispatched dict:
#!/usr/bin/env python
def case1():
print("This is case 1")
def case2():
print("This is case 2")
def case3():
print("This is case 3")
token_dict = {
"case1" : case1,
"case2" : case2,
"case3" : case3,
}
def main():
cases = ("case1", "case3", "case2", "case1")
for case in cases:
token_dict[case]()
if __name__ == '__main__':
main()
Output:
This is case 1
This is case 3
This is case 2
This is case 1
I didn't find the simple answer I was looking for anywhere on Google search. But I figured it out anyway. It's really quite simple. Decided to post it, and maybe prevent a few less scratches on someone else's head. The key is simply "in" and tuples. Here is the switch statement behavior with fall-through, including RANDOM fall-through.
l = ['Dog', 'Cat', 'Bird', 'Bigfoot',
'Dragonfly', 'Snake', 'Bat', 'Loch Ness Monster']
for x in l:
if x in ('Dog', 'Cat'):
x += " has four legs"
elif x in ('Bat', 'Bird', 'Dragonfly'):
x += " has wings."
elif x in ('Snake',):
x += " has a forked tongue."
else:
x += " is a big mystery by default."
print(x)
print()
for x in range(10):
if x in (0, 1):
x = "Values 0 and 1 caught here."
elif x in (2,):
x = "Value 2 caught here."
elif x in (3, 7, 8):
x = "Values 3, 7, 8 caught here."
elif x in (4, 6):
x = "Values 4 and 6 caught here"
else:
x = "Values 5 and 9 caught in default."
print(x)
Provides:
Dog has four legs
Cat has four legs
Bird has wings.
Bigfoot is a big mystery by default.
Dragonfly has wings.
Snake has a forked tongue.
Bat has wings.
Loch Ness Monster is a big mystery by default.
Values 0 and 1 caught here.
Values 0 and 1 caught here.
Value 2 caught here.
Values 3, 7, 8 caught here.
Values 4 and 6 caught here
Values 5 and 9 caught in default.
Values 4 and 6 caught here
Values 3, 7, 8 caught here.
Values 3, 7, 8 caught here.
Values 5 and 9 caught in default.
# simple case alternative
some_value = 5.0
# this while loop block simulates a case block
# case
while True:
# case 1
if some_value > 5:
print ('Greater than five')
break
# case 2
if some_value == 5:
print ('Equal to five')
break
# else case 3
print ( 'Must be less than 5')
break
I was quite confused after reading the accepted answer, but this cleared it all up:
def numbers_to_strings(argument):
switcher = {
0: "zero",
1: "one",
2: "two",
}
return switcher.get(argument, "nothing")
This code is analogous to:
function(argument){
switch(argument) {
case 0:
return "zero";
case 1:
return "one";
case 2:
return "two";
default:
return "nothing";
}
}
Check the Source for more about dictionary mapping to functions.
def f(x):
dictionary = {'a':1, 'b':2, 'c':3}
return dictionary.get(x,'Not Found')
##Returns the value for the letter x;returns 'Not Found' if x isn't a key in the dictionary
I liked Mark Bies's answer
Since the x variable must used twice, I modified the lambda functions to parameterless.
I have to run with results[value](value)
In [2]: result = {
...: 'a': lambda x: 'A',
...: 'b': lambda x: 'B',
...: 'c': lambda x: 'C'
...: }
...: result['a']('a')
...:
Out[2]: 'A'
In [3]: result = {
...: 'a': lambda : 'A',
...: 'b': lambda : 'B',
...: 'c': lambda : 'C',
...: None: lambda : 'Nothing else matters'
...: }
...: result['a']()
...:
Out[3]: 'A'
Edit: I noticed that I can use None type with with dictionaries. So this would emulate switch ; case else
def f(x):
return 1 if x == 'a' else\
2 if x in 'bcd' else\
0 #default
Short and easy to read, has a default value and supports expressions in both conditions and return values.
However, it is less efficient than the solution with a dictionary. For example, Python has to scan through all the conditions before returning the default value.
Simple, not tested; each condition is evaluated independently: there is no fall-through, but all cases are evaluated (although the expression to switch on is only evaluated once), unless there is a break statement. For example,
for case in [expression]:
if case == 1:
print(end='Was 1. ')
if case == 2:
print(end='Was 2. ')
break
if case in (1, 2):
print(end='Was 1 or 2. ')
print(end='Was something. ')
prints Was 1. Was 1 or 2. Was something. (Dammit! Why can't I have trailing whitespace in inline code blocks?) if expression evaluates to 1, Was 2. if expression evaluates to 2, or Was something. if expression evaluates to something else.
There have been a lot of answers so far that have said, "we don't have a switch in Python, do it this way". However, I would like to point out that the switch statement itself is an easily-abused construct that can and should be avoided in most cases because they promote lazy programming. Case in point:
def ToUpper(lcChar):
if (lcChar == 'a' or lcChar == 'A'):
return 'A'
elif (lcChar == 'b' or lcChar == 'B'):
return 'B'
...
elif (lcChar == 'z' or lcChar == 'Z'):
return 'Z'
else:
return None # or something
Now, you could do this with a switch-statement (if Python offered one) but you'd be wasting your time because there are methods that do this just fine. Or maybe, you have something less obvious:
def ConvertToReason(code):
if (code == 200):
return 'Okay'
elif (code == 400):
return 'Bad Request'
elif (code == 404):
return 'Not Found'
else:
return None
However, this sort of operation can and should be handled with a dictionary because it will be faster, less complex, less prone to error and more compact.
And the vast majority of "use cases" for switch statements will fall into one of these two cases; there's just very little reason to use one if you've thought about your problem thoroughly.
So, rather than asking "how do I switch in Python?", perhaps we should ask, "why do I want to switch in Python?" because that's often the more interesting question and will often expose flaws in the design of whatever you're building.
Now, that isn't to say that switches should never be used either. State machines, lexers, parsers and automata all use them to some degree and, in general, when you start from a symmetrical input and go to an asymmetrical output they can be useful; you just need to make sure that you don't use the switch as a hammer because you see a bunch of nails in your code.
A solution I tend to use which also makes use of dictionaries is:
def decision_time( key, *args, **kwargs):
def action1()
"""This function is a closure - and has access to all the arguments"""
pass
def action2()
"""This function is a closure - and has access to all the arguments"""
pass
def action3()
"""This function is a closure - and has access to all the arguments"""
pass
return {1:action1, 2:action2, 3:action3}.get(key,default)()
This has the advantage that it doesn't try to evaluate the functions every time, and you just have to ensure that the outer function gets all the information that the inner functions need.
Defining:
def switch1(value, options):
if value in options:
options[value]()
allows you to use a fairly straightforward syntax, with the cases bundled into a map:
def sample1(x):
local = 'betty'
switch1(x, {
'a': lambda: print("hello"),
'b': lambda: (
print("goodbye," + local),
print("!")),
})
I kept trying to redefine switch in a way that would let me get rid of the "lambda:", but gave up. Tweaking the definition:
def switch(value, *maps):
options = {}
for m in maps:
options.update(m)
if value in options:
options[value]()
elif None in options:
options[None]()
Allowed me to map multiple cases to the same code, and to supply a default option:
def sample(x):
switch(x, {
_: lambda: print("other")
for _ in 'cdef'
}, {
'a': lambda: print("hello"),
'b': lambda: (
print("goodbye,"),
print("!")),
None: lambda: print("I dunno")
})
Each replicated case has to be in its own dictionary; switch() consolidates the dictionaries before looking up the value. It's still uglier than I'd like, but it has the basic efficiency of using a hashed lookup on the expression, rather than a loop through all the keys.
Expanding on Greg Hewgill's answer - We can encapsulate the dictionary-solution using a decorator:
def case(callable):
"""switch-case decorator"""
class case_class(object):
def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
def do_call(self):
return callable(*self.args, **self.kwargs)
return case_class
def switch(key, cases, default=None):
"""switch-statement"""
ret = None
try:
ret = case[key].do_call()
except KeyError:
if default:
ret = default.do_call()
finally:
return ret
This can then be used with the #case-decorator
#case
def case_1(arg1):
print 'case_1: ', arg1
#case
def case_2(arg1, arg2):
print 'case_2'
return arg1, arg2
#case
def default_case(arg1, arg2, arg3):
print 'default_case: ', arg1, arg2, arg3
ret = switch(somearg, {
1: case_1('somestring'),
2: case_2(13, 42)
}, default_case(123, 'astring', 3.14))
print ret
The good news are that this has already been done in NeoPySwitch-module. Simply install using pip:
pip install NeoPySwitch

Hypothesis strategy to generate multiple kwargs

It is natural to write my test in terms of 3 separate numpy arrays, but the first dimension of each numpy array must be of equal length. As a hack, I can simply ask for a larger numpy array
#given(
arrays=arrays(
dtype=float,
shape=tuples(
integers(3, 3),
array_shapes(max_dims=1).map(lambda t: t[0]),
array_shapes(max_dims=1).map(lambda t: t[0]),
),
elements=floats(width=16, allow_nan=False, allow_infinity=False),
),
)
def test(arrays: np.ndarray):
a, b, c = arrays[0], arrays[1], arrays[2]
...
but this obscures what I'm really trying to generate, and makes it impossible to have separate strategies the elements of each of the arrays. Is there any way to generate these arrays while maintaining the constraint on the size of the first dimension? I imagine I would want something like
#given(
(a, b, c) = batched_arrays(
n_arrays=3,
shared_sizes=array_sizes(max_dims=1),
unshared_sizes=arrays_sizes(),
dtypes=[float, int, float],
elements=[floats(), integers(0), floats(0, 1)])
)
def test(a: np.ndarray, b:np.ndarray, c:np.ndarray):
assert a.shape[0] == b.shape[0] and a.shape[0] == c.shape[0]
...
Sorry to answer my own question. It turns out you can get this with shared
#given(
a=arrays(float, shared(array_shapes(max_dims=1), key="dim1")),
b=arrays(float, shared(array_shapes(max_dims=1), key="dim1")),
)
def test_shared(a, b):
assert a.shape[0] == b.shape[0]

What's the underlying implementation for most_common method of Counter?

I found a pyi file which has the following def
def most_common(self, n: Optional[int] = ...) -> List[Tuple[_T, int]]: ...
How could this happen? List is not defined, and no implementation?
Just highlight some valuable suggestions here for followers:
List is imported from the typing module; it's not the same thing as list. The .pyi file doesn't need to import it because stub files are never executed; they just have to be syntactically valid Python
If you use from future import annotations, you won't have to import typing to use List et al. in function annotations in .py files, either, since function annotations will be treated as string literals. (Starting in Python 4, that will be the default behavior. See PEP 563 for details.)
You are looking at the pyi file which is used solely for annotations. It is never executed by the Python interpreter. You can learn more about pyi files by reading PEP484.
Using a debugger, put a breakpoint on the line where you call most_commonand then step into the method.
Python 3.7 implementation.
...\Lib\collections\__init__.py:
def most_common(self, n=None):
'''List the n most common elements and their counts from the most
common to the least. If n is None, then list all element counts.
>>> Counter('abcdeabcdabcaba').most_common(3)
[('a', 5), ('b', 4), ('c', 3)]
'''
# Emulate Bag.sortedByCount from Smalltalk
if n is None:
return sorted(self.items(), key=_itemgetter(1), reverse=True)
return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
_heapq.nlargest (in ...\Lib\heapq.py) implementation:
def nlargest(n, iterable, key=None):
"""Find the n largest elements in a dataset.
Equivalent to: sorted(iterable, key=key, reverse=True)[:n]
"""
# Short-cut for n==1 is to use max()
if n == 1:
it = iter(iterable)
sentinel = object()
if key is None:
result = max(it, default=sentinel)
else:
result = max(it, default=sentinel, key=key)
return [] if result is sentinel else [result]
# When n>=size, it's faster to use sorted()
try:
size = len(iterable)
except (TypeError, AttributeError):
pass
else:
if n >= size:
return sorted(iterable, key=key, reverse=True)[:n]
# When key is none, use simpler decoration
if key is None:
it = iter(iterable)
result = [(elem, i) for i, elem in zip(range(0, -n, -1), it)]
if not result:
return result
heapify(result)
top = result[0][0]
order = -n
_heapreplace = heapreplace
for elem in it:
if top < elem:
_heapreplace(result, (elem, order))
top, _order = result[0]
order -= 1
result.sort(reverse=True)
return [elem for (elem, order) in result]
# General case, slowest method
it = iter(iterable)
result = [(key(elem), i, elem) for i, elem in zip(range(0, -n, -1), it)]
if not result:
return result
heapify(result)
top = result[0][0]
order = -n
_heapreplace = heapreplace
for elem in it:
k = key(elem)
if top < k:
_heapreplace(result, (k, order, elem))
top, _order, _elem = result[0]
order -= 1
result.sort(reverse=True)
return [elem for (k, order, elem) in result]

Python 3.x - function args type-testing

I started learning Python 3.x some time ago and I wrote a very simple code which adds numbers or concatenates lists, tuples and dicts:
X = 'sth'
def adder(*vargs):
if (len(vargs) == 0):
print('No args given. Stopping...')
else:
L = list(enumerate(vargs))
for i in range(len(L) - 1):
if (type(L[i][1]) != type(L[i + 1][1])):
global X
X = 'bad'
break
if (X == 'bad'):
print('Args have different types. Stopping...')
else:
if type(L[0][1]) == int: #num
temp = 0
for i in range(len(L)):
temp += L[i][1]
print('Sum is equal to:', temp)
elif type(L[0][1]) == list: #list
A = []
for i in range(len(L)):
A += L[i][1]
print('List made is:', A)
elif type(L[0][1]) == tuple: #tuple
A = []
for i in range(len(L)):
A += list(L[i][1])
print('Tuple made is:', tuple(A))
elif type(L[0][1]) == dict: #dict
A = L[0][1]
for i in range(len(L)):
A.update(L[i][1])
print('Dict made is:', A)
adder(0, 1, 2, 3, 4, 5, 6, 7)
adder([1,2,3,4], [2,3], [5,3,2,1])
adder((1,2,3), (2,3,4), (2,))
adder(dict(a = 2, b = 433), dict(c = 22, d = 2737))
My main issue with this is the way I am getting out of the function when args have different types with the 'X' global. I thought a while about it, but I can't see easier way of doing this (I can't simply put the else under for, because the results will be printed a few times; probably I'm messing something up with the continue and break usage).
I'm sure I'm missing an easy way to do this, but I can't get it.
Thank you for any replies. If you have any advice about any other code piece here, I would be very grateful for additional help. I probably have a lot of bad non-Pythonian habits coming from earlier C++ coding.
Here are some changes I made that I think clean it up a bit and get rid of the need for the global variable.
def adder(*vargs):
if len(vargs) == 0:
return None # could raise ValueError
mytype = type(vargs[0])
if not all(type(x) == mytype for x in vargs):
raise ValueError('Args have different types.')
if mytype is int:
print('Sum is equal to:', sum(vargs))
elif mytype is list or mytype is tuple:
out = []
for item in vargs:
out += item
if mytype is list:
print('List made is:', out)
else:
print('Tuple made is:', tuple(out))
elif mytype is dict:
out = {}
for i in vargs:
out.update(i)
print('Dict made is:', out)
adder(0, 1, 2, 3, 4, 5, 6, 7)
adder([1,2,3,4], [2,3], [5,3,2,1])
adder((1,2,3), (2,3,4), (2,))
adder(dict(a = 2, b = 433), dict(c = 22, d = 2737))
I also made some other improvements that I think are a bit more 'pythonic'. For instance
for item in list:
print(item)
instead of
for i in range(len(list)):
print(list[i])
In a function like this if there are illegal arguments you would commonly short-cuircuit and just throw a ValueError.
if bad_condition:
raise ValueError('Args have different types.')
Just for contrast, here is another version that feels more pythonic to me (reasonable people might disagree with me, which is OK by me).
The principal differences are that a) type clashes are left to the operator combining the arguments, b) no assumptions are made about the types of the arguments, and c) the result is returned instead of printed. This allows combining different types in the cases where that makes sense (e.g, combine({}, zip('abcde', range(5)))).
The only assumption is that the operator used to combine the arguments is either add or a member function of the first argument's type named update.
I prefer this solution because it does minimal type checking, and uses duck-typing to allow valid but unexpected use cases.
from functools import reduce
from operator import add
def combine(*args):
if not args:
return None
out = type(args[0])()
return reduce((getattr(out, 'update', None) and (lambda d, u: [d.update(u), d][1]))
or add, args, out)
print(combine(0, 1, 2, 3, 4, 5, 6, 7))
print(combine([1,2,3,4], [2,3], [5,3,2,1]))
print(combine((1,2,3), (2,3,4), (2,)))
print(combine(dict(a = 2, b = 433), dict(c = 22, d = 2737)))
print(combine({}, zip('abcde', range(5))))

How to convert mathematical strings into a solvable sum in python 3 [duplicate]

stringExp = "2^4"
intVal = int(stringExp) # Expected value: 16
This returns the following error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ValueError: invalid literal for int()
with base 10: '2^4'
I know that eval can work around this, but isn't there a better and - more importantly - safer method to evaluate a mathematical expression that is being stored in a string?
eval is evil
eval("__import__('os').remove('important file')") # arbitrary commands
eval("9**9**9**9**9**9**9**9", {'__builtins__': None}) # CPU, memory
Note: even if you use set __builtins__ to None it still might be possible to break out using introspection:
eval('(1).__class__.__bases__[0].__subclasses__()', {'__builtins__': None})
Evaluate arithmetic expression using ast
import ast
import operator as op
# supported operators
operators = {ast.Add: op.add, ast.Sub: op.sub, ast.Mult: op.mul,
ast.Div: op.truediv, ast.Pow: op.pow, ast.BitXor: op.xor,
ast.USub: op.neg}
def eval_expr(expr):
"""
>>> eval_expr('2^6')
4
>>> eval_expr('2**6')
64
>>> eval_expr('1 + 2*3**(4^5) / (6 + -7)')
-5.0
"""
return eval_(ast.parse(expr, mode='eval').body)
def eval_(node):
if isinstance(node, ast.Num): # <number>
return node.n
elif isinstance(node, ast.BinOp): # <left> <operator> <right>
return operators[type(node.op)](eval_(node.left), eval_(node.right))
elif isinstance(node, ast.UnaryOp): # <operator> <operand> e.g., -1
return operators[type(node.op)](eval_(node.operand))
else:
raise TypeError(node)
You can easily limit allowed range for each operation or any intermediate result, e.g., to limit input arguments for a**b:
def power(a, b):
if any(abs(n) > 100 for n in [a, b]):
raise ValueError((a,b))
return op.pow(a, b)
operators[ast.Pow] = power
Or to limit magnitude of intermediate results:
import functools
def limit(max_=None):
"""Return decorator that limits allowed returned values."""
def decorator(func):
#functools.wraps(func)
def wrapper(*args, **kwargs):
ret = func(*args, **kwargs)
try:
mag = abs(ret)
except TypeError:
pass # not applicable
else:
if mag > max_:
raise ValueError(ret)
return ret
return wrapper
return decorator
eval_ = limit(max_=10**100)(eval_)
Example
>>> evil = "__import__('os').remove('important file')"
>>> eval_expr(evil) #doctest:+IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
...
TypeError:
>>> eval_expr("9**9")
387420489
>>> eval_expr("9**9**9**9**9**9**9**9") #doctest:+IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
...
ValueError:
Pyparsing can be used to parse mathematical expressions. In particular, fourFn.py
shows how to parse basic arithmetic expressions. Below, I've rewrapped fourFn into a numeric parser class for easier reuse.
from __future__ import division
from pyparsing import (Literal, CaselessLiteral, Word, Combine, Group, Optional,
ZeroOrMore, Forward, nums, alphas, oneOf)
import math
import operator
__author__ = 'Paul McGuire'
__version__ = '$Revision: 0.0 $'
__date__ = '$Date: 2009-03-20 $'
__source__ = '''http://pyparsing.wikispaces.com/file/view/fourFn.py
http://pyparsing.wikispaces.com/message/view/home/15549426
'''
__note__ = '''
All I've done is rewrap Paul McGuire's fourFn.py as a class, so I can use it
more easily in other places.
'''
class NumericStringParser(object):
'''
Most of this code comes from the fourFn.py pyparsing example
'''
def pushFirst(self, strg, loc, toks):
self.exprStack.append(toks[0])
def pushUMinus(self, strg, loc, toks):
if toks and toks[0] == '-':
self.exprStack.append('unary -')
def __init__(self):
"""
expop :: '^'
multop :: '*' | '/'
addop :: '+' | '-'
integer :: ['+' | '-'] '0'..'9'+
atom :: PI | E | real | fn '(' expr ')' | '(' expr ')'
factor :: atom [ expop factor ]*
term :: factor [ multop factor ]*
expr :: term [ addop term ]*
"""
point = Literal(".")
e = CaselessLiteral("E")
fnumber = Combine(Word("+-" + nums, nums) +
Optional(point + Optional(Word(nums))) +
Optional(e + Word("+-" + nums, nums)))
ident = Word(alphas, alphas + nums + "_$")
plus = Literal("+")
minus = Literal("-")
mult = Literal("*")
div = Literal("/")
lpar = Literal("(").suppress()
rpar = Literal(")").suppress()
addop = plus | minus
multop = mult | div
expop = Literal("^")
pi = CaselessLiteral("PI")
expr = Forward()
atom = ((Optional(oneOf("- +")) +
(ident + lpar + expr + rpar | pi | e | fnumber).setParseAction(self.pushFirst))
| Optional(oneOf("- +")) + Group(lpar + expr + rpar)
).setParseAction(self.pushUMinus)
# by defining exponentiation as "atom [ ^ factor ]..." instead of
# "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right
# that is, 2^3^2 = 2^(3^2), not (2^3)^2.
factor = Forward()
factor << atom + \
ZeroOrMore((expop + factor).setParseAction(self.pushFirst))
term = factor + \
ZeroOrMore((multop + factor).setParseAction(self.pushFirst))
expr << term + \
ZeroOrMore((addop + term).setParseAction(self.pushFirst))
# addop_term = ( addop + term ).setParseAction( self.pushFirst )
# general_term = term + ZeroOrMore( addop_term ) | OneOrMore( addop_term)
# expr << general_term
self.bnf = expr
# map operator symbols to corresponding arithmetic operations
epsilon = 1e-12
self.opn = {"+": operator.add,
"-": operator.sub,
"*": operator.mul,
"/": operator.truediv,
"^": operator.pow}
self.fn = {"sin": math.sin,
"cos": math.cos,
"tan": math.tan,
"exp": math.exp,
"abs": abs,
"trunc": lambda a: int(a),
"round": round,
"sgn": lambda a: abs(a) > epsilon and cmp(a, 0) or 0}
def evaluateStack(self, s):
op = s.pop()
if op == 'unary -':
return -self.evaluateStack(s)
if op in "+-*/^":
op2 = self.evaluateStack(s)
op1 = self.evaluateStack(s)
return self.opn[op](op1, op2)
elif op == "PI":
return math.pi # 3.1415926535
elif op == "E":
return math.e # 2.718281828
elif op in self.fn:
return self.fn[op](self.evaluateStack(s))
elif op[0].isalpha():
return 0
else:
return float(op)
def eval(self, num_string, parseAll=True):
self.exprStack = []
results = self.bnf.parseString(num_string, parseAll)
val = self.evaluateStack(self.exprStack[:])
return val
You can use it like this
nsp = NumericStringParser()
result = nsp.eval('2^4')
print(result)
# 16.0
result = nsp.eval('exp(2^4)')
print(result)
# 8886110.520507872
Some safer alternatives to eval() and sympy.sympify().evalf()*:
asteval
numexpr
*SymPy sympify is also unsafe according to the following warning from the documentation.
Warning: Note that this function uses eval, and thus shouldn’t be used on unsanitized input.
The reason eval and exec are so dangerous is that the default compile function will generate bytecode for any valid python expression, and the default eval or exec will execute any valid python bytecode. All the answers to date have focused on restricting the bytecode that can be generated (by sanitizing input) or building your own domain-specific-language using the AST.
Instead, you can easily create a simple eval function that is incapable of doing anything nefarious and can easily have runtime checks on memory or time used. Of course, if it is simple math, than there is a shortcut.
c = compile(stringExp, 'userinput', 'eval')
if c.co_code[0]==b'd' and c.co_code[3]==b'S':
return c.co_consts[ord(c.co_code[1])+ord(c.co_code[2])*256]
The way this works is simple, any constant mathematic expression is safely evaluated during compilation and stored as a constant. The code object returned by compile consists of d, which is the bytecode for LOAD_CONST, followed by the number of the constant to load (usually the last one in the list), followed by S, which is the bytecode for RETURN_VALUE. If this shortcut doesn't work, it means that the user input isn't a constant expression (contains a variable or function call or similar).
This also opens the door to some more sophisticated input formats. For example:
stringExp = "1 + cos(2)"
This requires actually evaluating the bytecode, which is still quite simple. Python bytecode is a stack oriented language, so everything is a simple matter of TOS=stack.pop(); op(TOS); stack.put(TOS) or similar. The key is to only implement the opcodes that are safe (loading/storing values, math operations, returning values) and not unsafe ones (attribute lookup). If you want the user to be able to call functions (the whole reason not to use the shortcut above), simple make your implementation of CALL_FUNCTION only allow functions in a 'safe' list.
from dis import opmap
from Queue import LifoQueue
from math import sin,cos
import operator
globs = {'sin':sin, 'cos':cos}
safe = globs.values()
stack = LifoQueue()
class BINARY(object):
def __init__(self, operator):
self.op=operator
def __call__(self, context):
stack.put(self.op(stack.get(),stack.get()))
class UNARY(object):
def __init__(self, operator):
self.op=operator
def __call__(self, context):
stack.put(self.op(stack.get()))
def CALL_FUNCTION(context, arg):
argc = arg[0]+arg[1]*256
args = [stack.get() for i in range(argc)]
func = stack.get()
if func not in safe:
raise TypeError("Function %r now allowed"%func)
stack.put(func(*args))
def LOAD_CONST(context, arg):
cons = arg[0]+arg[1]*256
stack.put(context['code'].co_consts[cons])
def LOAD_NAME(context, arg):
name_num = arg[0]+arg[1]*256
name = context['code'].co_names[name_num]
if name in context['locals']:
stack.put(context['locals'][name])
else:
stack.put(context['globals'][name])
def RETURN_VALUE(context):
return stack.get()
opfuncs = {
opmap['BINARY_ADD']: BINARY(operator.add),
opmap['UNARY_INVERT']: UNARY(operator.invert),
opmap['CALL_FUNCTION']: CALL_FUNCTION,
opmap['LOAD_CONST']: LOAD_CONST,
opmap['LOAD_NAME']: LOAD_NAME
opmap['RETURN_VALUE']: RETURN_VALUE,
}
def VMeval(c):
context = dict(locals={}, globals=globs, code=c)
bci = iter(c.co_code)
for bytecode in bci:
func = opfuncs[ord(bytecode)]
if func.func_code.co_argcount==1:
ret = func(context)
else:
args = ord(bci.next()), ord(bci.next())
ret = func(context, args)
if ret:
return ret
def evaluate(expr):
return VMeval(compile(expr, 'userinput', 'eval'))
Obviously, the real version of this would be a bit longer (there are 119 opcodes, 24 of which are math related). Adding STORE_FAST and a couple others would allow for input like 'x=5;return x+x or similar, trivially easily. It can even be used to execute user-created functions, so long as the user created functions are themselves executed via VMeval (don't make them callable!!! or they could get used as a callback somewhere). Handling loops requires support for the goto bytecodes, which means changing from a for iterator to while and maintaining a pointer to the current instruction, but isn't too hard. For resistance to DOS, the main loop should check how much time has passed since the start of the calculation, and certain operators should deny input over some reasonable limit (BINARY_POWER being the most obvious).
While this approach is somewhat longer than a simple grammar parser for simple expressions (see above about just grabbing the compiled constant), it extends easily to more complicated input, and doesn't require dealing with grammar (compile take anything arbitrarily complicated and reduces it to a sequence of simple instructions).
Okay, so the problem with eval is that it can escape its sandbox too easily, even if you get rid of __builtins__. All the methods for escaping the sandbox come down to using getattr or object.__getattribute__ (via the . operator) to obtain a reference to some dangerous object via some allowed object (''.__class__.__bases__[0].__subclasses__ or similar). getattr is eliminated by setting __builtins__ to None. object.__getattribute__ is the difficult one, since it cannot simply be removed, both because object is immutable and because removing it would break everything. However, __getattribute__ is only accessible via the . operator, so purging that from your input is sufficient to ensure eval cannot escape its sandbox.
In processing formulas, the only valid use of a decimal is when it is preceded or followed by [0-9], so we just remove all other instances of ..
import re
inp = re.sub(r"\.(?![0-9])","", inp)
val = eval(inp, {'__builtins__':None})
Note that while python normally treats 1 + 1. as 1 + 1.0, this will remove the trailing . and leave you with 1 + 1. You could add ),, and EOF to the list of things allowed to follow ., but why bother?
You can use the ast module and write a NodeVisitor that verifies that the type of each node is part of a whitelist.
import ast, math
locals = {key: value for (key,value) in vars(math).items() if key[0] != '_'}
locals.update({"abs": abs, "complex": complex, "min": min, "max": max, "pow": pow, "round": round})
class Visitor(ast.NodeVisitor):
def visit(self, node):
if not isinstance(node, self.whitelist):
raise ValueError(node)
return super().visit(node)
whitelist = (ast.Module, ast.Expr, ast.Load, ast.Expression, ast.Add, ast.Sub, ast.UnaryOp, ast.Num, ast.BinOp,
ast.Mult, ast.Div, ast.Pow, ast.BitOr, ast.BitAnd, ast.BitXor, ast.USub, ast.UAdd, ast.FloorDiv, ast.Mod,
ast.LShift, ast.RShift, ast.Invert, ast.Call, ast.Name)
def evaluate(expr, locals = {}):
if any(elem in expr for elem in '\n#') : raise ValueError(expr)
try:
node = ast.parse(expr.strip(), mode='eval')
Visitor().visit(node)
return eval(compile(node, "<string>", "eval"), {'__builtins__': None}, locals)
except Exception: raise ValueError(expr)
Because it works via a whitelist rather than a blacklist, it is safe. The only functions and variables it can access are those you explicitly give it access to. I populated a dict with math-related functions so you can easily provide access to those if you want, but you have to explicitly use it.
If the string attempts to call functions that haven't been provided, or invoke any methods, an exception will be raised, and it will not be executed.
Because this uses Python's built in parser and evaluator, it also inherits Python's precedence and promotion rules as well.
>>> evaluate("7 + 9 * (2 << 2)")
79
>>> evaluate("6 // 2 + 0.0")
3.0
The above code has only been tested on Python 3.
If desired, you can add a timeout decorator on this function.
I think I would use eval(), but would first check to make sure the string is a valid mathematical expression, as opposed to something malicious. You could use a regex for the validation.
eval() also takes additional arguments which you can use to restrict the namespace it operates in for greater security.
[I know this is an old question, but it is worth pointing out new useful solutions as they pop up]
Since python3.6, this capability is now built into the language, coined "f-strings".
See: PEP 498 -- Literal String Interpolation
For example (note the f prefix):
f'{2**4}'
=> '16'
Based on Perkins' amazing approach, I've updated and improved his "shortcut" for simple algebraic expressions (no functions or variables). Now it works on Python 3.6+ and avoids some pitfalls:
import re
# Kept outside simple_eval() just for performance
_re_simple_eval = re.compile(rb'd([\x00-\xFF]+)S\x00')
def simple_eval(expr):
try:
c = compile(expr, 'userinput', 'eval')
except SyntaxError:
raise ValueError(f"Malformed expression: {expr}")
m = _re_simple_eval.fullmatch(c.co_code)
if not m:
raise ValueError(f"Not a simple algebraic expression: {expr}")
try:
return c.co_consts[int.from_bytes(m.group(1), sys.byteorder)]
except IndexError:
raise ValueError(f"Expression not evaluated as constant: {expr}")
Testing, using some of the examples in other answers:
for expr, res in (
('2^4', 6 ),
('2**4', 16 ),
('1 + 2*3**(4^5) / (6 + -7)', -5.0 ),
('7 + 9 * (2 << 2)', 79 ),
('6 // 2 + 0.0', 3.0 ),
('2+3', 5 ),
('6+4/2*2', 10.0 ),
('3+2.45/8', 3.30625),
('3**3*3/3+3', 30.0 ),
):
result = simple_eval(expr)
ok = (result == res and type(result) == type(res))
print("{} {} = {}".format("OK!" if ok else "FAIL!", expr, result))
OK! 2^4 = 6
OK! 2**4 = 16
OK! 1 + 2*3**(4^5) / (6 + -7) = -5.0
OK! 7 + 9 * (2 << 2) = 79
OK! 6 // 2 + 0.0 = 3.0
OK! 2+3 = 5
OK! 6+4/2*2 = 10.0
OK! 3+2.45/8 = 3.30625
OK! 3**3*3/3+3 = 30.0
Testing bad input:
for expr in (
'foo bar',
'print("hi")',
'2*x',
'lambda: 10',
'2**1234',
):
try:
result = simple_eval(expr)
except ValueError as e:
print(e)
continue
print("OK!") # will never happen
Malformed expression: foo bar
Not a simple algebraic expression: print("hi")
Expression not evaluated as constant: 2*x
Expression not evaluated as constant: lambda: 10
Expression not evaluated as constant: 2**1234
This is a massively late reply, but I think useful for future reference. Rather than write your own math parser (although the pyparsing example above is great) you could use SymPy. I don't have a lot of experience with it, but it contains a much more powerful math engine than anyone is likely to write for a specific application and the basic expression evaluation is very easy:
>>> import sympy
>>> x, y, z = sympy.symbols('x y z')
>>> sympy.sympify("x**3 + sin(y)").evalf(subs={x:1, y:-3})
0.858879991940133
Very cool indeed! A from sympy import * brings in a lot more function support, such as trig functions, special functions, etc., but I've avoided that here to show what's coming from where.
Use eval in a clean namespace:
>>> ns = {'__builtins__': None}
>>> eval('2 ** 4', ns)
16
The clean namespace should prevent injection. For instance:
>>> eval('__builtins__.__import__("os").system("echo got through")', ns)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<string>", line 1, in <module>
AttributeError: 'NoneType' object has no attribute '__import__'
Otherwise you would get:
>>> eval('__builtins__.__import__("os").system("echo got through")')
got through
0
You might want to give access to the math module:
>>> import math
>>> ns = vars(math).copy()
>>> ns['__builtins__'] = None
>>> eval('cos(pi/3)', ns)
0.50000000000000011
Here's my solution to the problem without using eval. Works with Python2 and Python3. It doesn't work with negative numbers.
$ python -m pytest test.py
test.py
from solution import Solutions
class SolutionsTestCase(unittest.TestCase):
def setUp(self):
self.solutions = Solutions()
def test_evaluate(self):
expressions = [
'2+3=5',
'6+4/2*2=10',
'3+2.45/8=3.30625',
'3**3*3/3+3=30',
'2^4=6'
]
results = [x.split('=')[1] for x in expressions]
for e in range(len(expressions)):
if '.' in results[e]:
results[e] = float(results[e])
else:
results[e] = int(results[e])
self.assertEqual(
results[e],
self.solutions.evaluate(expressions[e])
)
solution.py
class Solutions(object):
def evaluate(self, exp):
def format(res):
if '.' in res:
try:
res = float(res)
except ValueError:
pass
else:
try:
res = int(res)
except ValueError:
pass
return res
def splitter(item, op):
mul = item.split(op)
if len(mul) == 2:
for x in ['^', '*', '/', '+', '-']:
if x in mul[0]:
mul = [mul[0].split(x)[1], mul[1]]
if x in mul[1]:
mul = [mul[0], mul[1].split(x)[0]]
elif len(mul) > 2:
pass
else:
pass
for x in range(len(mul)):
mul[x] = format(mul[x])
return mul
exp = exp.replace(' ', '')
if '=' in exp:
res = exp.split('=')[1]
res = format(res)
exp = exp.replace('=%s' % res, '')
while '^' in exp:
if '^' in exp:
itm = splitter(exp, '^')
res = itm[0] ^ itm[1]
exp = exp.replace('%s^%s' % (str(itm[0]), str(itm[1])), str(res))
while '**' in exp:
if '**' in exp:
itm = splitter(exp, '**')
res = itm[0] ** itm[1]
exp = exp.replace('%s**%s' % (str(itm[0]), str(itm[1])), str(res))
while '/' in exp:
if '/' in exp:
itm = splitter(exp, '/')
res = itm[0] / itm[1]
exp = exp.replace('%s/%s' % (str(itm[0]), str(itm[1])), str(res))
while '*' in exp:
if '*' in exp:
itm = splitter(exp, '*')
res = itm[0] * itm[1]
exp = exp.replace('%s*%s' % (str(itm[0]), str(itm[1])), str(res))
while '+' in exp:
if '+' in exp:
itm = splitter(exp, '+')
res = itm[0] + itm[1]
exp = exp.replace('%s+%s' % (str(itm[0]), str(itm[1])), str(res))
while '-' in exp:
if '-' in exp:
itm = splitter(exp, '-')
res = itm[0] - itm[1]
exp = exp.replace('%s-%s' % (str(itm[0]), str(itm[1])), str(res))
return format(exp)
Using lark parser library https://stackoverflow.com/posts/67491514/edit
from operator import add, sub, mul, truediv, neg, pow
from lark import Lark, Transformer, v_args
calc_grammar = f"""
?start: sum
?sum: product
| sum "+" product -> {add.__name__}
| sum "-" product -> {sub.__name__}
?product: power
| product "*" power -> {mul.__name__}
| product "/" power -> {truediv.__name__}
?power: atom
| power "^" atom -> {pow.__name__}
?atom: NUMBER -> number
| "-" atom -> {neg.__name__}
| "(" sum ")"
%import common.NUMBER
%import common.WS_INLINE
%ignore WS_INLINE
"""
#v_args(inline=True)
class CalculateTree(Transformer):
add = add
sub = sub
neg = neg
mul = mul
truediv = truediv
pow = pow
number = float
calc_parser = Lark(calc_grammar, parser="lalr", transformer=CalculateTree())
calc = calc_parser.parse
def eval_expr(expression: str) -> float:
return calc(expression)
print(eval_expr("2^4"))
print(eval_expr("-1*2^4"))
print(eval_expr("-2^3 + 1"))
print(eval_expr("2**4")) # Error
I came here looking for a mathematic expression parser as well. Reading through some of the answers and looking up libraries, I came across py-expression which I am now using. It basically handles a lot of operators and formula constructs, but if you're missing something you can easily add new operators/functions to it.
The basic syntax is:
from py_expression.core import Exp
exp = Exp()
parsed_formula = exp.parse('a+4')
result = exp.eval(parsed_formula, {"a":2})
The only issue that I've had with it so far is that it doesn't come with built-in mathematical constants nor a mechanism to add them in. I just proposed a solution to that however: https://github.com/FlavioLionelRita/py-expression/issues/7

Resources