python3, difflib SequenceMatcher - string

the following takes in two strings, compares differences and return them both as identicals as well as their differences, separated by spaces (maintaining the length of the longest sting.
The commented area in the code, are the 4 strings that should be returned.
from difflib import SequenceMatcher
t1 = 'betty: backstreetvboysareback"give.jpg"LAlarrygarryhannyhref="ang"_self'
t2 = 'bettyv: backstreetvboysareback"lifeislike"LAlarrygarryhannyhref="in.php"_self'
#t1 = 'betty : backstreetvboysareback" i e "LAlarrygarryhannyhref=" n "_self'
#t2 = 'betty : backstreetvboysareback" i e "LAlarrygarryhannyhref=" n "_self'
#o1 = ' g v .jpg g '
#o2 = ' v l f islike i .php '
matcher = SequenceMatcher(None, t1, t2)
blocks = matcher.get_matching_blocks()
bla1 = []
bla2 = []
for i in range(len(blocks)):
if i != len(blocks)-1:
bla1.append([t1[blocks[i].a + blocks[i].size:blocks[i+1].a], blocks[i].a + blocks[i].size, blocks[i+1].a])
bla2.append([t2[blocks[i].b + blocks[i].size:blocks[i+1].b], blocks[i].b + blocks[i].size, blocks[i+1].b])
cnt = 0
for i in range(len(bla1)):
if bla1[i][1] < bla2[i][1]:
num = bla2[i][1] - bla1[i][1]
t2 = t2[0:bla2[i][1]] + ' '*num + t2[bla2[i][1]:len(t2)]
bla2[i][0] = ' '*num + bla2[i][0]
bla2[i][1] = bla1[i][1]
if bla2[i][1] < bla1[i][1]:
num = bla1[i][1] - bla2[i][1]
t1 = t1[0:bla1[i][1]] + ' '*num + t1[bla1[i][1]:len(t1)]
bla1[i][0] = ' '*num + bla1[i][0]
bla1[i][1] = bla2[i][1]
if bla1[i][2] > bla2[i][2]:
num = bla1[i][2] - bla2[i][2]
t2 = t2[0:bla2[i][2]] + ' '*num + t2[bla2[i][2]:len(t2)]
bla2[i][0] = bla2[i][0] + ' '*num
bla2[i][2] = bla1[i][2]
if bla2[i][2] > bla1[i][2]:
num = bla2[i][2] - bla1[i][2]
t1 = t1[0:bla1[i][2]] + ' '*num + t1[bla1[i][2]:len(t1)]
bla1[i][0] = bla1[i][0] + ' '*num
bla1[i][2] = bla2[i][2]
t11 = []
t11 = t1[0:bla1[0][1]]
t11 += t1[bla1[0][2]:bla1[1][1]]
t11 += t1[bla1[1][2]:bla1[2][1]]
t11 += t1[bla1[2][2]:bla1[3][1]]
t11 += t1[bla1[3][2]:bla1[4][1]]
t11 += t1[bla1[5][2]:bla1[6][1]]
t11 += t1[bla1[6][2]:len(t1)]
t12 = []
t12 = t2[0:bla1[0][1]]
t12 += t2[bla1[0][2]:bla1[1][1]]
t12 += t2[bla1[1][2]:bla1[2][1]]
t12 += t2[bla1[2][2]:bla1[3][1]]
t12 += t2[bla1[3][2]:bla1[4][1]]
t12 += t2[bla1[5][2]:bla1[6][1]]
t12 += t2[bla1[6][2]:len(t2)]
After ranging the blocks into an organised format bla1, bla2 where each difference is stored as a string with its start and end position eg ['v', 33, 34] for each separate string. After this, I attempt to insert spaces to match the length and separation factors necessary and this is where the code starts to break.
Please if someone could take a look!

I have worked through resolving this, and since no one has posted a response I will post the progress and solution. The following code is progress ... it worked well when dealing with variations that had less offset but began to break when getting into larger differences, specifically in maintaining spacing (offset) in matching up the two.
from difflib import SequenceMatcher
import pdb
t1 = 'betty: backstreetvboysareback"give.jpg"LAlarrygarryhannyhref="ang"_self'
t2 = 'betty: backstreetvboysareback"lol.jpg"LAlarrygarryhannyhref="ang"_self'
#t2 = 'bettyv: backstreetvboysareback"lifeislike"LAlarrygarryhannyhref="in.php"_selff'
#t2 = 'LA'
#t2 = 'c give.'
#t2 = 'give.'
#t1 = 'betty : backstreetvboysareback" i e "LAlarrygarryhannyhref=" n "_self'
#t2 = 'betty : backstreetvboysareback" i e "LAlarrygarryhannyhref=" n "_self'
#o1 = ' g v .jpg g '
#o2 = ' v l f islike i .php '
matcher = SequenceMatcher(None, t1, t2)
blocks = matcher.get_matching_blocks()
#print(len(blocks))
bla1 = []
bla2 = []
#bla = (string), (first pos), (second pos), (pos1 + pos2), (pos + pos2 total positions added togeather)
dnt = False
for i in range(len(blocks)):
if i == 0:
if blocks[i].a != 0 and dnt == False:
bla1.append([t1[blocks[i].a:blocks[i].b], 0, blocks[i].a, 0, 0])
bla2.append([t2[blocks[i].a:blocks[i].b], 0, blocks[i].b, 0, 0])
dnt = True
if blocks[i].b != 0 and dnt == False:
bla2.append([t2[blocks[i].a:blocks[i].b], 0, blocks[i].b, 0, 0])
bla1.append([t1[blocks[i].a:blocks[i].b], 0, blocks[i].a, 0, 0])
dnt = True
if i != len(blocks)-1:
print(blocks[i])
bla1.append([t1[blocks[i].a + blocks[i].size:blocks[i+1].a], blocks[i].a + blocks[i].size, blocks[i+1].a, 0, 0])
bla2.append([t2[blocks[i].b + blocks[i].size:blocks[i+1].b], blocks[i].b + blocks[i].size, blocks[i+1].b, 0, 0])
#pdb.set_trace()
ttl = 0
for i in range(len(bla1)):
cnt = bla1[i][2] - bla1[i][1]
if cnt != 0:
bla1[i][3] = cnt
ttl = ttl + cnt
bla1[i][4] = ttl
ttl = 0
for i in range(len(bla2)):
cnt = bla2[i][2] - bla2[i][1]
if cnt != 0:
bla2[i][3] = cnt
ttl = ttl + cnt
bla2[i][4] = ttl
print(bla1)
print(bla2)
tt1 = ''
dif = 0
i = 0
while True:
if i == 0:
if bla1[i][3] >= bla2[i][3]: dif = bla1[i][3]
if bla1[i][3] < bla2[i][3]: dif = bla2[i][3]
tt1 += t1[:bla1[i][1]] + '_'*dif
if i <= len(bla1) -1:
if bla1[i][3] >= bla2[i][3]: dif = bla1[i][3]
if bla1[i][3] < bla2[i][3]: dif = bla2[i][3]
if len(bla1) != 1:
if i == 0: tt1 += t1[bla1[i][1] + bla1[i][3]:bla1[i+1][1]]
if i != 0 and i != len(bla1)-1: tt1 += '_'*dif + t1[bla1[i][1] + bla1[i][3]:bla1[i+1][1]]
if i == len(bla1)-1: tt1 += '_'*dif + t1[bla1[i][1] + bla1[i][3]:len(t1)]
i = i+1
print('t1 = ' + tt1)
else:
break
tt2 = ''
i = 0
dif = 0
while True:
if i == 0:
if bla1[i][3] >= bla2[i][3]: dif = bla1[i][3]
if bla1[i][3] < bla2[i][3]: dif = bla2[i][3]
tt2 += t2[:bla2[i][1]] + '_'*dif
if i <= len(bla2) -1:
if bla1[i][3] >= bla2[i][3]: dif = bla1[i][3]
if bla1[i][3] < bla2[i][3]: dif = bla2[i][3]
if len(bla2) != 1:
if i == 0: tt2 += t2[bla2[i][1] + bla2[i][3]:bla2[i+1][1]]
if i != 0 and i != len(bla1)-1: tt2 += '_'*dif + t2[bla2[i][1] + bla2[i][3]:bla2[i+1][1]]
if i == len(bla2)-1: tt2 += '_'*dif + t2[bla2[i][1] + bla2[i][3]:len(t2)]
i = i+1
print('t2 = ' + tt2)
else:
break
print()
Solution:
Unfortunately I have been too busy to continue coding this and have resorted to sub-processing diffutils ... this is a wonderful alternative to a lot of painstaking coding!

Related

How to make dictionaries iterate faster

I wrote this code but it is very slow. Is there a way to make my code execute faster?
mirTable has 8789 rows and tgtTable has 9100 rows. It's taking 5 minutes to execute this code
mirTable = mirTable.to_dict()
tgtTable = tgtTable.to_dict()
for mn in range(mLen):
m = 0
for lk in range(tLen):
g = ""
h = ""
if mirTable['UPDATE_KEY'][mn] != tgtTable['UPDATE_KEY'][lk]:
m = 9
if row.INTERFACE_KEY == None:
g = str(mirTable['CONTACT_SRC_ID'][mn])
else:
g = row.INTERFACE_KEY
g = str(mirTable[g][mn])
if row.INTERFACE_KEY_2 != None:
c = row.INTERFACE_KEY_2
h = "_" + str(mirTable[c][mn])
else:
h = None
a = ""
if row.INTERFACE_KEY_2 == None:
a = "drft_" + g + "_" + row.INTERFACE_TYPE + "_" + str(curDate) + ".xml"
else:
a = "drft_" + g + h + "_" + row.INTERFACE_TYPE + "_" + str(curDate) + ".xml"
if m == 9:
f.append({i:mirTable[i][mn] for i in (mirTable.keys())})
p.append(mirTable['UPDATE_KEY'][mn])
xml_FileName.append(a)
t1 = time.time()
print(t1-t0)

Where should I put the count in this tim sort algorithm, to accurately compare runtime to other algorithms

I've written a Timsort sorting algorithm for a computer science class, I would like to be able to compare the runtime to other similar algorithms, such as merge sort for instance. However, I am not sure where I should put the count (ie: count +=1)within the code to have an accurate run time. Any help would be much appreciated.
RUN = 32
def insertion_sort(arr, left, right):
for i in range(left + 1, right + 1):
temp = arr[i]
j = i - 1
while (arr[j] > temp and j >= left):
arr[j + 1] = arr[j]
arr[j] = temp
j -= 1
def merge(arr, left, right, count):
c = 0
index = count
length = len(left) + len(right)
while left and right:
if left[0] < right[0]:
arr[index] = left.pop(0)
c += 1
index += 1
else:
arr[index] = right.pop(0)
c += 1
index += 1
if len(left) == 0:
while c < length:
arr[index] = right.pop(0)
c += 1
index += 1
elif len(right) == 0:
while c < length:
arr[index] = left.pop(0)
c += 1
index += 1
def tim_sort(arr):
n = len(arr)
for i in range(0, n, RUN):
insertion_sort(arr, i, min((i + (RUN - 1)), (n - 1)))
size = RUN
while size < n:
for left in range(0, n, 2 * size):
if (left + size > n):
merge(arr, arr[left:n], [], left)
else:
left_sub_arr = arr[left:(left + size)]
right_sub_arr = arr[(left + size):min((left + 2 * size), n)]
merge(arr, left_sub_arr, right_sub_arr, left)
size *= 2
return arr

Recursion in Python/Merge Sort/Call by Reference

My apologies, I'm new to StackOverFlow & Python. I've written a code for Merge_Sort but it's not running as the values of arrays are getting lost while returning from recursion calls.
Coding Environment: Python3.x
OS: Linux ( Ubuntu 18.04)
Below is my code:
class sort:
def __init__(self, arr, n):
self.arr = arr
self.n = n
def __init__(self, arr, m, n):
self.arr = arr
self.m = m
self.n = n
arrS = arr.copy()
arrL = [0] * (n - int((m + n)/2) + 1)
arrR = [0] * (n - (m + 1))
def Merge_sort(self,arr,first,last):
mid = int((first + last) / 2)
arrMain = arr[first:last+1]
arrLeft = arr[first:mid+1]
arrRight = arr[mid+1:last+1]
arrL = [0] * (mid - first + 1)
arrR = [0] * (last - mid + 1)
arrN = [0] * ( last - first + 1)
if first < last:
#Sort Left Array
self.Merge_sort(arr, first, mid)
#Sort Right Array
self.Merge_sort(arr, mid+1, last)
#I defined the below 3 variables while debugging to view the list
arrL = arr[first:mid+1]
arrR = arr[mid+1:last+1]
print("Left Array: " + str(arrL))
print("Right Array: " + str(arrR))
x = len(arrL)
y = len(arrR)
i = j = k = 0
while i < x and j < y:
if (arrL[i] <= arrR[j]):
arrN[k] = arrL[i]
i += 1
else:
arrN[k] = arrR[j]
j += 1
# end-if#001
k += 1
while (i < x):
arrN[k] = arrL[i]
i += 1
k += 1
while (j < y):
arrN[k] = arrR[j]
j += 1
k += 1
arr = arrN.copy()
print("Merged Array:" + str(arr))
return arrN
#End-if#001
from Sort import sort
arr = [7, 5, 4 ,9, 3, 2 , 0, 1, 6, 8]
n = 0
sort4 = sort(arr, 0, int(len(arr)))
sort4.arr = arr.copy()
sort4.Merge_sort(sort4.arr, 0, int(len(arr)) - 1)
Input of the program: arr = [7, 5, 4 ,9, 3, 2 , 0, 1, 6, 8]
Output of the program: Left Array: [7, 5, 4, 9, 3] Right Array: [2, 0, 1, 6, 8]
Merged Array:[2, 0, 1, 6, 7, 5, 4, 8, 9, 3]
At the end of program it just seems to merge my original array.
Kindly suggest.
Just to notify you, the problem is resolved. I wasn't returning the values correctly. Below is my code just for the reference.
def Merge_sort(self,arr,first,last):
mid = int((first + last) / 2)
arrMain = arr[first:last+1]
arrLeft = arr[first:mid+1]
arrRight = arr[mid+1:last+1]
arrL = [0] * (mid - first + 1)
arrR = [0] * (last - mid + 1)
arrN = [0] * ( last - first + 1)
if first < last:
#Sort Left Array
arrL = self.Merge_sort(arr, first, mid)
#Sort Right Array
arrR = self.Merge_sort(arr, mid+1, last)
#arrL = arr[first:mid+1]
#arrR = arr[mid+1:last+1]
print("Left Array: " + str(arrL))
print("Right Array: " + str(arrR))
x = int(len(arrL))
y = int(len(arrR))
i = j = k = 0
while i < x and j < y:
if (arrL[i] <= arrR[j]):
arrN[k] = arrL[i]
i += 1
else:
arrN[k] = arrR[j]
j += 1
# end-if#001
k += 1
while (i < x):
arrN[k] = arrL[i]
i += 1
k += 1
while (j < y):
arrN[k] = arrR[j]
j += 1
k += 1
arr = arrN.copy()
print("Merged Array:" + str(arr))
return arrN
#End-if#001
return arrMain

Simple python loop doesn't work with many inner loops

I am struggling with a very simple loop that perfectly works if run "standalone", but doesn't work anymore if I use it as an outer loop for many other instructions (which also perfectly work if run standalone for only 1 iteration).
The simple outer loop is a
for i in range(0,somevalue):
do some inner instructions
Here is the full code, which perfectly works if I put a range of dimension 1, whilst it never ends if I put even a simple range of dimension 2:
import numpy as np
import numpy.ma as ma
import random
import matplotlib.pyplot as plt
i = int
x = np.zeros(1440)
class_x = np.zeros(1440)
w1 = np.array([0,6*60])
w2 = np.array([20*60,23*60])
x[w1[0]:(w1[1])] = np.full(np.diff(w1),0.001)
x[w2[0]:(w2[1])] = np.full(np.diff(w2),0.001)
x_masked = np.zeros_like(ma.masked_not_equal(x,0.001))
c = 10
func_time = 300
max_free_spot = int
i = 0
for i in range(0,1):
tot_time = 0
switch_count = 0
switch_ons = []
while
tot_time <= func_time:
switch_on = random.choice([random.randint(w1[0],(w1[1]-c)),random.randint(w2[0],(w2[1]-c))])
if x[switch_on] == 0.001:
if switch_on in range(w1[0],w1[1]):
if np.any(x[switch_on:w1[1]]!=0.001):
next_switch = [switch_on + k[0] for k in np.where(x[switch_on:]!=0.001)]
if (next_switch[0] - switch_on) >= c and max_free_spot >= c:
upper_limit = min((next_switch[0]-switch_on),min(func_time,w1[1]-switch_on))
elif (next_switch[0] - switch_on) < c and max_free_spot >= c:
continue
else: upper_limit = next_switch[0]-switch_on
else:
upper_limit = min(func_time,w1[1]-switch_on) #max random length of cycle
if upper_limit >= c:
indexes = np.arange(switch_on,switch_on+(random.randint(c,upper_limit)))
else:
indexes = np.arange(switch_on,switch_on+upper_limit)
else:
if np.any(x[switch_on:w2[1]]!=0.001):
next_switch = [switch_on + k[0] for k in np.where(x[switch_on:]!=0.001)]
if (next_switch[0] - switch_on) >= c:
upper_limit = min((next_switch[0]-switch_on),min(func_time,w2[1]-switch_on))
elif (next_switch[0] - switch_on) < c and max_free_spot >= c:
continue
else: upper_limit = next_switch[0]-switch_on
else:
upper_limit = min(func_time,w2[1]-switch_on)
if upper_limit >= c:
indexes = np.arange(switch_on,switch_on+(random.randint(c,upper_limit)))
else:
indexes = np.arange(switch_on,switch_on+upper_limit)
tot_time = tot_time + indexes.size
switch_ons.append(switch_on)
if tot_time > func_time:
indexes_adj = indexes[:-(tot_time-func_time)]
coincidence = random.randint(1,5)
np.put(x_masked,indexes_adj,(2*coincidence),mode='clip')
np.put(x,indexes_adj,(2*coincidence))
x_masked = np.zeros_like(ma.masked_greater_equal(x_masked,0.001))
tot_time = (tot_time - indexes.size) + indexes_adj.size
switch_count = switch_count + 1
break
else:
coincidence = random.randint(1,5)
np.put(x_masked,indexes,(2*coincidence),mode='clip')
np.put(x,indexes,(2*coincidence))
x_masked = np.zeros_like(ma.masked_greater_equal(x_masked,0.001))
tot_time = tot_time
switch_count = switch_count + 1
free_spots = []
for j in ma.notmasked_contiguous(x_masked):
free_spots.append(j.stop-j.start)
max_free_spot = max(free_spots)
class_x = class_x + x
plt.plot(class_x)
Any help is really really appreciated

How to create Complex Class in python which does all the functions related to Complex numbers like (add, subtract, multiply, divide etc)

Please help :/. How to create Complex Class in python which does all the functions related to Complex numbers like (add, subtract, multiply, divide etc). Also, other functions like power, norm etc.
Here you go!
from copy import *
from math import sqrt
class Complex:
def __init__(self, re, im):
self.re = deepcopy(re)
self.im = deepcopy(im)
def __str__(self):
r1 = self.re
i1 = self.im
if(r1 > 0 and i1 > 0):
r1 = str(r1)
r1 +='+'
if(abs(i1) != 1):
i1 = str(i1)
i1 += 'i'
else:
i1 = 'i'
elif(r1 == 0 and i1 == 0):
return '0'
elif(r1 <= 0 and i1<0):
if(r1 == 0):
r1 = str(r1)
r1 = ''
if(i1 == -1):
i1 = str(i1)
i1 = '-i'
else:
i1 = str(i1)
i1 += 'i'
elif(r1 <= 0 and i1>0):
if(r1 == 0):
r1 = str(r1)
r1 = ''
else:
r1 = str(r1)
r1 += '+'
if(i1 == 1):
i1 = str(i1)
i1 = 'i'
else:
i1 = str(i1)
i1 += 'i'
elif(r1 > 0 and i1 < 0):
i1 = self.im
i1 = str(i1)
if(i1 != '-1'):
i1 += 'i'
else:
i1 = '-i'
if(i1 == 0):
i1 = ''
self.__repr__()
ans = str(r1) + str(i1)
return ans
def __add__(self, other):
r1 = self.re + other.re
i1 = self.im + other.im
ans = Complex(r1,i1)
return ans
def __sub__(self, other):
r1 = self.re - other.re
i1 = self.im - other.im
ans = Complex(r1,i1)
return ans
def __mul__(self, other):
r1 = self.re * other.re
r2 = self.im * other.im
ex1 = r1 - r2
i1 = self.re * other.im
i2 = self.im * other.re
ex2 = i1 + i2
c = Complex(ex1,ex2)
return c
def __truediv__(self, other):
r1 = self.re * other.re
r2 = self.im * other.im
denom = other.re**2 + other.im**2
ex1 = int((r1 + r2) / denom)
i1 = self.re * other.im * (-1)
i2 = self.im * other.re
ex2 = int((i1 + i2) / denom)
c = Complex(ex1, ex2)
return c
def __eq__(self,other):
if(self.re==other.re and self.im==other.im):
return True
else:
return False
def norm(self):
r1 = self.re
i1 = self.im
p1 = r1*r1
p2 = i1*i1
c = p1 + p2
ans = int(sqrt(c))
return ans
def cpow(c, n):
r = Complex(1,0)
for i in range(n):
r = r.__mul__(c)
return r
if name == 'main':
zero = Complex(0,0)
one = Complex(1,0)
iota = Complex(0,1)
minus_one = Complex(-1, 0)
minus_iota = Complex(0, -1)
c1 = Complex(1,1)
v = Complex(0,-1)
x = Complex(2, 3)
y = Complex(4, 5)
z = x + y
print(z)
print(x-y)
print(x*y)
print(x/y)
print(iota)
Hope this helps mate.

Resources