I want to select n pairs of word positions (unique) in a file and swap them. My code is as following, but it never gives me n pairs I am looking for.
My logic is first to sample 2*n elements and create n pairs. Then iterate over the file contents (stored in list) and locate the correct positions and make the swap. It is not working. I have difficulty in understanding where it fails.
with open(rname) as rd:
lines = rd.readlines()
for i in range(len(lines)):
lines[i] = lines[i].strip()
sz = sum(len(x.split()) for x in lines)
chosen = random.sample(range(sz),6000)
result = pairwise(chosen)
for (a,b) in result:
flag1 = 0
flag2 = 0
l = 0
ind1 = 0
m = 0
ind2 = 0
j = 0
for y in range(len(lines)):
if ( (j+len(lines[y].split()) >= a) and (flag1 == 0)):
l = y
ind1 = a - j-1
flag1 = 1
if ( (j+len(lines[y].split()) >= b) and (flag2 == 0)):
m = y
ind2 = b - j-1
flag2 = 1
if ( (flag1 == 1) and (flag2 == 1)):
words1 = lines[l].split()
words2 = lines[m].split()
words1[ind1], words2[ind2] = 'swapped'+words2[ind2], 'swapped'+words1[ind1]
lines[l] = ' '.join(words1)
lines[m] = ' '.join(words2)
break
j += len(lines[y].split())
name ='n_file.txt'
with open(wname,'w') as wd:
for line in lines:
print(line, file=wd)
def pairwise(iterable):
a, b = tee(iterable)
next(b, None)
return zip(a, b)
For example, consider a file with content
i am here, where are you
it is none of your business
Let us say i choose 2 positions (2,8) (position is defined in terms of the word positions in the file and start with 0 for the first word in the file). My output will look like
i am swappednone where are you
it is swappedhere, of your business
Not a complete solution, but this might give you some ideas:
import random
def swap_pairs(items,n):
"""swaps n distinct pairs of objects in items"""
k = len(items)
indices = random.sample(range(k),2*n)
random.shuffle(indices)
for i in range(n):
s, t = indices[2*i], indices[2*i+1]
items[s], items[t] = items[t], items[s]
This mutates the list in place.
For example, if
words = ['i', 'am', 'here,', 'where', 'are', 'you', 'it', 'is', 'none', 'of', 'your', 'business']
then after
swap_pairs(words,3)
words might be
['you', 'am', 'it', 'none', 'are', 'i', 'here,', 'is', 'where', 'of', 'your', 'business']
Related
I have written two algorithms for creating unique mazes, one of them using depth-first-search (DFS) and the other using Kruskal's. The DFS algorithm performs as expected, however Kruskal's algorithm runs marginally slower than DFS and I do not know why.
I had written Kruskal's algorithm in Python.
I suspect the random.choice() function seems to be the underlying problem. The difference in runtime becomes noticeable when (r, c) > 30.
Here is the code for Kruskal's algorithm:
# Create a list of all possible edges
def create_edges(r, c):
edges = []
for y in range(r):
for x in range(c):
i = (y, x)
for d in ((0, 1), (0, -1), (1, 0), (-1, 0)):
p = tuple(map(sum, zip(d, i)))
py = p[0]
px = p[1]
if px in range(c) and py in range(r):
edges.append([i, p])
return edges
def kruskal(r, c, sz):
path = []
# Create a list of parent root nodes
roots = {(y, x) : [(y, x)] for y in range(r) for x in range(c)}
edges = create_edges(r, c)
while edges:
# Choose a random edge
edge = random.choice(edges)
parent = edge[0]
child = edge[1]
parent_set = get_set(roots, parent)
child_set = get_set(roots, child)
# Check if the parent / child are already in the same set
if parent_set == child_set:
rev_edge = edge.reverse()
if rev_edge in edges:
edges.remove(rev_edge)
edges.remove(edge)
continue
roots[parent_set] += roots[child_set]
roots.pop(child_set)
path.extend((parent, child))
rev_edge = edge.reverse()
if rev_edge in edges:
edges.remove(rev_edge)
edges.remove(edge)
return path
def get_set(roots, member):
s = None
for parent, children in roots.items():
if member in children:
s = parent
return s
def create_maze(t, r, c, sz):
maze = [['|_' for _ in range(c)] for _ in range(r)]
for cell in maze: cell.append('| ')
wd = {'DOWN' : ( 1, 0),
'UP' : (-1, 0),
'LEFT' : ( 0, -1),
'RIGHT': ( 0, 1)}
for n in range(len(t) - 1):
a = n
b = n + 1
p1 = t[a]
p2 = t[b]
ay, ax = p1[0], p1[1]
by, bx = p2[0], p2[1]
w = tuple(numpy.array(p2) - numpy.array(p1))
if w in wd.values():
k = list(wd.keys())[list(wd.values()).index(w)]
if k == 'DOWN': maze[ay][ax] = maze[ay][ax].replace('_', ' ')
if k == 'UP': maze[by][bx] = maze[by][bx].replace('_', ' ')
if k == 'LEFT': maze[ay][ax] = maze[ay][ax].replace('|', ' ')
if k == 'RIGHT': maze[by][bx] = maze[by][bx].replace('|', ' ')
return maze
def print_maze(maze, r, c, delay = 0):
s, l = min((r, c)), max((r, c))
a = 1 / (4 * r * c)
e = (1 / (s * l)) ** 2
delay = (a * 2.718 ** (-1 * e)) ** 0.5
time.sleep(delay)
print(' _' * c)
for iy in range(r):
for ix in range(c + 1):
print(maze[iy][ix], end = '')
print('')
print('')
def main():
r = 30
c = 30
sz = r * c
path = kruskal(r, c, sz)
maze = create_maze(path, r, c, sz)
print_maze(maze, r, c)
if __name__ == "__main__":
main()
I read about the error and try to cast map into list, but the error still appeared, I will show you the main file that contain the error.
def power(L, C, Erange):
assert len(L) == len(C), "The L and C must be corresponded to each other"
E = copy.deepcopy(Erange)
E[0] -= 1
power_table = dict()
for c in set(C): # for each type of class
first = [index for index, eachc in enumerate(C) if eachc == c]
rest = [index for index, eachc in enumerate(C) if eachc != c]
p_first = len(first) / len(L)
p_rest = len(rest) / len(L)
powerc = []
for u, v in zip(E[0:-1], E[1:]): # checking the range (u,v]
like_first = sum([1 for i in first if u < L[i] <= v]) / len(first) * p_first
like_rest = sum([1 for i in rest if u < L[i] <= v]) / len(rest) * p_rest
try:
powerc.append((like_first ** 2 / (like_first + like_rest)))
except ZeroDivisionError:
powerc.append(0)
power_table[c] = powerc
power = []
for l, c in zip(L, C):
for e_cursor in range(len(E)):
if E[e_cursor] >= l: break
power.append(round(power_table[c][e_cursor - 1], 2))
return power
def cliff_core(data, percentage, obj_as_binary, handled_obj=False):
if len(data) < 50:
logging.debug("no enough data to cliff. return the whole dataset")
return range(len(data))
classes = map(toolkit.str2num, zip(*data)[-1])
if not handled_obj:
if obj_as_binary:
classes = [1 if i > 0 else 0 for i in classes]
else:
classes = toolkit.apply_bin_range(classes)
data_power = list()
for col in zip(*data):
col = map(toolkit.str2num, col)
E = toolkit.binrange(col)
data_power.append(power(col, classes, E))
data_power = map(list, zip(*data_power)) # transposing the data power
row_sum = [sum(row) for row in data_power]
index = range(len(data))
zips = zip(data, classes, row_sum, index)
output = list()
for cls in set(classes):
matched = filter(lambda z: z[1] == cls, zips)
random.shuffle(matched)
matched = sorted(matched, key=lambda z: z[2], reverse=True)
if len(matched) < 5:
output.extend([m[3] for m in matched]) # all saved
continue
for i in range(int(len(matched) * percentage)):
output.append(matched[i][3])
return sorted(output)
def cliff(attribute_names,data_matrix,independent_attrs,objective_attr,objective_as_binary=False,
cliff_percentage=0.4):
ori_attrs, alldata = attribute_names, data_matrix # load the database
alldata_t = map(list, zip(*alldata))
valued_data_t = list()
for attr, col in zip(ori_attrs, alldata_t):
if attr in independent_attrs:
valued_data_t.append(col)
valued_data_t.append(alldata_t[attribute_names.index(objective_attr)])
alldata = map(list, zip(*valued_data_t))
alldata = map(lambda row: map(toolkit.str2num, row), alldata) # numbering the 2d table
after_cliff = cliff_core(alldata, cliff_percentage, objective_as_binary)
res = [data_matrix[i] for i in after_cliff]
return res
For example, I have 2 lists:
list1 = [6,6,6,6,6,6,6]
list2 = [0,2,4]
If there are the same indexes in the list1 and list2, I need to remove these indexes from the list1, because I should sum the unique indexes from the list1, for example:
a = [1,2,3,4,5]
b = [0,2,4]
x = [a.index(i) for i in a]
y = [b.index(j) for j in b]
for idx in y:
if idx in x:
x.remove(idx)
print(sum(x))
printed is >> 7
I tried this but did not work if there are the same values in list1
a = [6,6,6,6,6,6,6]
b = [0,2,4]
x = [a.index(i) for i in a]
y = [b.index(j) for j in b]
for idx in y:
if idx in x:
x.remove(idx)
printed is >> 0
Indexes and values are different. There will never be the same index twice in one list. You get their index by their value, however index(value) function gives you the first index which matches your value. Have a look at:
a, b, x = [1,2,3,4,5,6,7], [1,2,3], 0
c, d = len(a), len(b)
if d < c:
d, c = len(a), len(b)
for i in range(c, d):
x += i
print(x)
Your question is not very clear, so here are two answers:
If you want to sum the elements from the first list that do not appear in the second list, here is a way to do it:
a = [1,2,3,4,5]
b = [0,2,4]
# We create a set in order to have O(1) operations to check if an element is in b
b_set = set(b)
# We sum on the values of a that are not in b
res = sum(x for x in a if x not in b_set)
print(res)
>>> 9
If you want to sum the elements of the first list that do not have their rank/index in the second list, a way to do that could be:
a = [1,2,3,4,5]
b = [0,2,4]
# We create a set in order to have O(1) operations to check if an element is in b
b_set = set(b)
# We sum on the values of a that don't have their rank/index in b
res = sum(x for (i, x) in enumerate(a) if i not in b_set)
print(res)
>>> 6
I want to change the first /E after /V to X and the second /E to Y and the third /E to Z in text files.
/V may be followed by a word other than /E
The number of /E is from 1 to 3.
For example, the following string
AA/V+BB/E+CC/E+DD/E
should be changed as follows:
AA/V+X+Y+Z
This is my code and the results are correct. But I think it's inefficient.
s1 = '1/VV+12/ER+123/EED+1234/EC'
s2 = 'GG/K+AA/V+BB/E+FF/E+CC/E'
for s in [s1, s2]:
morphs = s.split('+')
lst = []
l = ['X','Y','Z']
i = 0
flag = False
while i < len(morphs):
lst.append(morphs[i])
if '/V' in morphs[i]:
for j in range(1, len(morphs)-i):
if '/E' in morphs[i+j]:
lst.append(l.pop(0))
flag = True
else:
lst.append(morphs[i+j])
i = i+j
flag = False
break
if flag:
break
i+=1
print('+'.join(lst))
Results:
1/VV+X+Y+Z
GG/K+AA/V+X+Y+Z
A simple way o do the same will be get the indices of \v and all the ,\e and then just check whether the indices of which e is more than that of V,
If it's so, then replace the E's accordingly as you said you can only have 1,2 or 3 E's in your text..
You can easily implement it in Python:
def format_string(s):
start_index = s.index('/V')
change_to = ['X', 'Y', 'Z']
for c in change_to:
if '/E' in s:
target_index = s.index('/E')
if target_index>start_index:
s = s[:target_index-2] + c + s[target_index + 2:]
return s
s1 = 'AA/V+BB/E+CC/E+DD/E'
s2 = 'AA/V+BB/E+FF/F'
expected1 = 'AA/V+X+Y+Z'
expected2 = 'AA/V+X+FF/F'
print format_string(s1)==expected1
print format_string(s2)==expected2
Essentially what I want the function to do is this:
Take an integer input and save it as n
Print a list of vectors with two entries (a,b), where
For example, when I input n = 443889, I should get an output of [(76,17),(38,73)], because the only two solutions to this problem are: , and
But with my code, when I give the input n=443889, I get the output [(76, 17), (75, 28), (74, 34), (73, 38), (72, 41)], even though some of these vectors doesn't give a solution to my equation.
def ramanujans(n):
lista = []
counter = 0
for a in range(1,n):
b = (n- (a**3))**(1/3)
result = a**3 + b**3
if isinstance(b,complex):
break
elif result == n:
b = int(round(b))
lista.insert(0,(a, b))
return (lista)
with a little different checking for complex results and a different check if result == n (integer comparison only) i seem to be getting the correct results:
def ramanujans(n):
res = []
for a in range(1, n):
s = n - a**3
if s < 0:
break
b = round(s**(1/3))
result = a**3 + b**3
if result == n:
res.append((a, b))
return res
with:
[(17, 76), (38, 73), (73, 38), (76, 17)]
as results for n=443889
you could stop the loop earlier; if a is around (n/2)**(1/3) you just get the results you already have with the a and b interchanged; this could then look like (did not carefully check the edge cases...):
from math import ceil
def ramanujans(n):
res = []
limit = ceil(((n/2)**(1/3)))
for a in range(1, limit+1):
s = n - a**3
b = round(s**(1/3))
result = a**3 + b**3
if result == n:
if a <= b: # this is to cover the edge cases...
res.append((a, b))
return res
print(ramanujans(n=443889)) # [(17, 76), (38, 73)]
print(ramanujans(n=2000)) # [(10, 10)]
print(ramanujans(n=1729)) # [(1, 12), (9, 10)]
and would only return 'half' the results.