How to make my program faster - python-3.x

"""
ID: nihaowa2
LANG: PYTHON3
TASK: namenum
"""
import time
start=time.time()
translate = {2: ("A","B","C"), 5: ("J","K","L"), 8: ("T","U","V"), 3:("D","E","F"), 6: ("M","N","O"), 9: ("W","X","Y"), 4: ("G","H","I") ,7 ("P","R","S")}
names=[""]
def newlist(nl,num):
nnl=[]
for i in translate[num]:
for j in nl:
nnl.append(j+i)
return nnl
gnd=open ('dict.txt', 'r')
goodnames=[]
for i in range(4616):
goodnames.append(gnd.readline()[:-1])
goodnames.append(gnd.readline())
fin = open ('namenum.in', 'r')
fout = open ('namenum.out', 'w')
namenumber=(fin.readline().split()[0])
for i in namenumber:
names=newlist(names,int(i))
ok=[]
for i in names:
if i in goodnames:
ok.append(i)
if ok==[]:
fout.write("NONE\n")
else:
ok.sort()
for i in ok:
fout.write(i+"\n")
fout.close()
The code above is my attempt to solve this problem . But the grader says it is too slow . I need it to run under 1 second . Could anyone please give me some suggestions?

It is faster if you read in the whole file, prepare a dictionary from {"Number" : [list of names]} - For simplicity I choose to keep the string input and make a string out of the key for the dict as {str:[str,...], ...}
You can then simply look up the cows number in the dict wich is O(1):
mapping = {'A': '2', 'B': '2', 'C': '2',
'D': '3', 'E': '3', 'F': '3',
'G': '4', 'H': '4', 'I': '4',
'J': '5', 'K': '5', 'L': '5',
'M': '6', 'N': '6', 'O': '6',
'P': '7', 'R': '7', 'S': '7',
'T': '8', 'U': '8', 'V': '8',
'W': '9', 'X': '9', 'Y': '9',
'Q': '', 'Z': '' # part of names in the file, not mapped to a number
}
# read file, one word per list entry, cleaned
def readDict(fn="dict.txt"):
with open(fn) as f:
all = f.readlines()
return [x.strip('\n').strip() for x in all]
# a name as number
def getNameAsNum(name):
return ''.join( mapping[c] for c in name)
# dict of numbers as key and all names as list
def namesAsNums():
names = readDict()
d={}
for name in names:
asNum = getNameAsNum(name)
if asNum and name:
d.setdefault(asNum[:], list())
d[asNum].append(name[:])
d[asNum].sort()
return d
# get the mapping of number to list of names
nan = namesAsNums()
# get input and ---
cow = input("Which cow serial number?")
# limit string to 12
cow = ''.join(cow[0:12])
# ... check and print
if cow in nan:
for n in nan[cow]:
print(n)
else:
print ("NONE")
Parsing the file takes my laptop 0.16s - lookup of it in the dict is negligable.
If there were many names with same numbers in the file I would forgoe the sort on every dict-insert and use a second go over the whole dict to sort the list when all names are in it - but for this file its the same runtime.

Related

creating a list of lists from text file where the new list is based on a condition [duplicate]

This question already has answers here:
Sorting sub-lists into new sub-lists based on common first items
(4 answers)
Closed 2 years ago.
I have a text file that has lines in following order:
1 id:0 e1:"a" e2:"b"
0 id:0 e1:"4" e2:"c"
0 id:1 e1:"6" e2:"d"
2 id:2 e1:"8" e2:"f"
2 id:2 e1:"9" e2:"f"
2 id:2 e1:"d" e2:"k"
and I have to extract a list of lists containing elements (e1,e2) with id determining the index of the outer list and inner list following the order of the lines. So in the above case my output will be
[[("a","b"),("4","c")],[("6","d")],[("8","f"),("9","f"),("d","k")]]
The problem for me is that to know that the beginning of the new inner list, I need to check if the id value has changed. Each id does not have fixed number of elements. For example id:0 has 2, id:1 has 1 and id:2 has 3. Is there a efficient way to check this condition in next line while making the list?
You can use itertools.groupby() for the job:
import itertools
def split_by(
items,
key=None,
processing=None,
container=list):
for key_value, grouping in itertools.groupby(items, key):
if processing:
grouping = (processing(group) for group in grouping)
if container:
grouping = container(grouping)
yield grouping
to be called as:
from operator import itemgetter
list(split_by(items, itemgetter(0), itemgetter(slice(1, None))))
The items can be easily generated from text above (assuming it is contained in the file data.txt):
def get_items():
# with io.StringIO(text) as file_obj: # to read from `text`
with open(filename, 'r') as file_obj: # to read from `filename`
for line in file_obj:
if line.strip():
vals = line.replace('"', '').split()
yield tuple(val.split(':')[1] for val in vals[1:])
Finally, to test all the pieces (where open(filename, 'r') in get_items() is replaced by io.StringIO(text)):
import io
import itertools
from operator import itemgetter
text = """
1 id:0 e1:"a" e2:"b"
0 id:0 e1:"4" e2:"c"
0 id:1 e1:"6" e2:"d"
2 id:2 e1:"8" e2:"f"
2 id:2 e1:"9" e2:"f"
2 id:2 e1:"d" e2:"k"
""".strip()
print(list(split_by(get_items(), itemgetter(0), itemgetter(slice(1, None)))))
# [[('a', 'b'), ('4', 'c')], [('6', 'd')], [('8', 'f'), ('9', 'f'), ('d', 'k')]]
This efficiently iterates through the input without unnecessary memory allocation.
No other packages are required
Load and parse the file:
Beginning with a text file, formatted as shown in the question
# parse text file into dict
with open('test.txt', 'r') as f:
text = [line[2:].replace('"', '').strip().split() for line in f.readlines()] # clean each line and split it into a list
text = [[v.split(':') for v in t] for t in text] # split each value in the list into a list
d =[{v[0]: v[1] for v in t} for t in text] # convert liest to dicts
# text will appear as:
[[['id', '0'], ['e1', 'a'], ['e2', 'b']],
[['id', '0'], ['e1', '4'], ['e2', 'c']],
[['id', '1'], ['e1', '6'], ['e2', 'd']],
[['id', '2'], ['e1', '8'], ['e2', 'f']],
[['id', '2'], ['e1', '9'], ['e2', 'f']],
[['id', '2'], ['e1', 'd'], ['e2', 'k']]]
# d appears as:
[{'id': '0', 'e1': 'a', 'e2': 'b'},
{'id': '0', 'e1': '4', 'e2': 'c'},
{'id': '1', 'e1': '6', 'e2': 'd'},
{'id': '2', 'e1': '8', 'e2': 'f'},
{'id': '2', 'e1': '9', 'e2': 'f'},
{'id': '2', 'e1': 'd', 'e2': 'k'}]
Parse the list of dicts to expected output
Use .get to determine if a key exists, and return some specified value, None in this case, if the key is nonexistent.
dict.get defaults to None, so this method never raises a KeyError.
If None is a value in the dictionary, then change the default value returned by .get.
test.get(v[0], 'something here')
test = dict()
for r in d:
v = list(r.values())
if test.get(v[0]) == None:
test[v[0]] = [tuple(v[1:])]
else:
test[v[0]].append(tuple(v[1:]))
# test dict appears as:
{'0': [('a', 'b'), ('4', 'c')],
'1': [('6', 'd')],
'2': [('8', 'f'), ('9', 'f'), ('d', 'k')]}
# final output
final = list(test.values())
[[('a', 'b'), ('4', 'c')], [('6', 'd')], [('8', 'f'), ('9', 'f'), ('d', 'k')]]
Code Updated and reduced:
In this case, text is a list of lists, and there's no need to convert it to dict d, as above.
For each list t in text, index [0] is always the key, and index [1:] are the values.
with open('test.txt', 'r') as f:
text = [line[2:].replace('"', '').strip().split() for line in f.readlines()] # clean each line and split it into a list
text = [[v.split(':')[1] for v in t] for t in text] # list of list of only value at index 1
# text appears as:
[['0', 'a', 'b'],
['0', '4', 'c'],
['1', '6', 'd'],
['2', '8', 'f'],
['2', '9', 'f'],
['2', 'd', 'k']]
test = dict()
for t in text:
if test.get(t[0]) == None:
test[t[0]] = [tuple(t[1:])]
else:
test[t[0]].append(tuple(t[1:]))
final = list(test.values())
Using defaultdict
Will save a few lines of code
Using text as a list of lists from above
from collections import defaultdict as dd
test = dd(list)
for t in text:
test[t[0]].append(tuple(t[1:]))
final = list(test.values())

Is a dictionary within a dictionary with any list values

I have a question regarding dictionaries and how to tell if a dictionary appears in another. In the example below, I wish to check if d1 appears in d2 which as shown below:
d1 = { 'a': '1', 'b': '2', 'c': '3' }
d2 = { 'a': '1', 'b': '2', 'c': '3', 'd': '4', 'e': '5' }
res = all(item in d2.items() for item in d1.items())
However, I have a case where I wish to be able to pass in a list of values and check that the values for a, b and any of the values of c appear in d2.
d1 = { 'a': '1', 'b': '2', 'c': ['3', '33', '333'] }
d2 = { 'a': '1', 'b': '2', 'c': '3', 'd': '4', 'e': '5' }
Any help would be appreciated.
If I understand correctly, in your 2nd example, the result should be True since '3' belongs to list d1['c'] and d2['c'] == 3. If I am right then this might work:
d1 = { 'a': '1', 'b': '2', 'c': ['3', '33', '333'] }
d2 = { 'a': '1', 'b': '2', 'c': '3', 'd': '4', 'e': '5' }
res = all(item in d2.items() if not isinstance(item[1], list)
else any((item[0], x) in d2.items() for x in item[1])
for item in d1.items())
print(res)

Adding a string to an array adds all characters separately

Any suggestion how to merge it better so the dual digits numbers does not split?
Sorry for bad english.
def merge(strArr):
newList = []
for x in range(len(strArr)):
newList += strArr[x]
return newList
array_test = ["1, 3, 4, 7, 13", "1, 2, 4, 13, 15"]
print(merge(array_test))
output =['1', ',', ' ', '3', ',', ' ', '4', ',', ' ', '7', ',', ' ', '1', '3', '1', ',', ' ', '2', ',', ' ', '4', ',', ' ', '1', '3', ',', ' ', '1', '5']`
expected output= [1,2,3,4,7,13,1,2,4,13,15]
Using list comprehension:
merged_arr = [n for s in array_test for n in s.split(", ")]
print(merged_arr)
This prints:
['1', '3', '4', '7', '13', '1', '2', '4', '13', '15']
It merges this way because for lists += is an array concatenation and that in this context your string object is interpreted as an array of characters:
[] += "Hello"
# Equivalent to
[] += ["H", "e", "l", "l", "o"]
If you want to join strings you can do:
out = "".join(array_test)
Your result becomes the way it is, because you take each inner string and add each character of it to your return-list without removing any spaces or commas.
You can change your code to:
def merge(strArr):
new_list = []
for inner in strArr:
new_list.extend(inner.split(", ")) # split and extend instead of += (== append)
return new_list
array_test =["1, 3, 4, 7, 13", "1, 2, 4, 13, 15"]
merged = merge(array_test)
as_int = list(map(int,merged))
print(merged)
print(as_int)
Output:
['1', '3', '4', '7', '13', '1', '2', '4', '13', '15']
[1, 3, 4, 7, 13, 1, 2, 4, 13, 15]
Without as_int()you will still hav strings in your list, you need to convert them into integers.

Assigning a new key to specific key-value pairs in a list of python dictionary

I have the following list of python dictionary:
x=[{'a': '1',
'b': '2',
'c': '3',
'd': '4',
'e': '5',
'f': '6',
'g': '7'}]
My desired output:
x=[{'g': '7', 'AA': {'a': '1', 'b': '2', 'c': '3', 'd': '4', 'e': '5', 'f': '6'}}]
I was able to assign a new key in the x list to specific key-value pairs, however, I'm appending it to the current list and getting the following output:
x=[{'a': '1',
'b': '2',
'c': '3',
'd': '4',
'e': '5',
'f': '6',
'AA':{'a':'1','b':'2','c':'3','d':'4','e':'5','f':'6'},
{'g':'7'}]
Eventually, I had to remove the keys a through f.
Current working solution:
for k in x:
k['AA'] = {u: k[u] for u in sorted(k.keys())[:6]}
rem_list = ['a', 'b', 'c', 'd', 'e', 'f']
for k in rem_list:
for v in x:
v.pop(k)
I need to simply assign a new key to the corresponding key-value pairs on the fly without looping through the list twice.
Why not just create a new dictionary? It's much safer as it doesn't manipulate mutable dict data, and may cause other methods using this dict to mis-behave.
Try something like:
x=[{'a': '1',
'b': '2',
'c': '3',
'd': '4',
'e': '5',
'f': '6',
'g': '7'}]
new_dict = {'AA': {k: v for k, v in x[0].items() if k != 'g'}, 'g': x[0]['g']}

I want to find the highest number using import re .txt

I want to be able to just search for all digits in a text file and find the max number, using the re module. Where do I need to edit my code to accomplish this goal?
The random.txt looks somthing like the following:
Datq15UxkNwMN5zUQhd46J8WeF9RjAq214TlJiQ8EkZvmdOpmBOdd365mICKC67GGvqwbLqV2Gox3n3E5WC1Vq8C22lZ6sL3Ip24untQyw46g2219WlA07GP30PNvc8o3hCb2d283l68mh86RH6gDNbN7kIXmdO4a84hUz73905o3BlR71YCQF985JTz54FRoN32pM8N23YcYd7jv9Ys575UzaH9RZ7sosMdeqnTgnVt0bH99b2P5ilvJ33QaJ6G76VU8vPN
import re
with open('content.txt', 'r',) as f:
contents = f.read()
number = 0
pattern = re.compile(r'\d')
matches = pattern.finditer('content.txt')
for match in matches:
n = int(match)
if saved <= n:
number = int(match)
print(number)
the file just ran once and gave me the answer 0
Try this,
import re
with open('file1.txt', 'r') as f:
data = f.read()
list_of_numbers = re.findall(r'(?:[\d]+)',data)
list_of_numbers = map(int, list_of_numbers)
print(max(list_of_numbers))
Output:
73905 # max number
your list_of_numbers look like this
['15', '5', '46', '8', '9', '214', '8', '365', '67', '2', '3', '3', '5', '1',
'8', '22', '6', '3', '24', '46', '2219', '07', '30', '8', '3', '2', '283', '68',
'86', '6', '7', '4', '84', '73905', '3', '71', '985', '54', '32', '8', '23', '7',
'9', '575', '9', '7', '0', '99', '2', '5', '33', '6', '76', '8']
You are looking for matches within "content.txt" string so there is no match. Also a MatchObject can't be converted to int:
import re
with open('content.txt', 'r',) as f:
contents = f.read()
number = 0
pattern = re.compile(r'\d+')
matches = pattern.finditer(contents)
for match in matches:
n = int(match.group(0))
if number <= n:
number = int(match.group(0))
print(number)

Resources