I have two files students and grades that has to be read and converted to a dictionary and finally print to a prettytable:
Grades:
10103 SSW 567 A 98765
10103 SSW 564 A- 98764
10103 CS 501 B 98764
10115 SSW 567 A 98765
10115 SSW 564 B+ 98764
Students:
10103 Baldwin, C SFEN
10115 Wyatt, X SFEN
Below is the code to do that:
from collections import defaultdict
from prettytable import PrettyTable
import os
class University:
def __init__(self,path):
self.students=dict()
self.instructors=dict()
self.grades=defaultdict(list)
def gg(self):
for filename in os.listdir(self.path):
with open(os.path.join(self.path, filename), 'r') as f:
if filename=="students.txt":
for line in f:
a,b,c = line.split("\t")
self.students[a] = {
"name": b, "major": c}
elif filename=="grades.txt":
for line in f:
d,e,f,g = line.split("\t")
self.grades[d].append({
"course": e, "grade": f,"instructor":g})
pt: PrettyTable = PrettyTable(
field_names=[
'CWID',
'Name',
'cc',])
for i,j in self.students.items():
for x,y in self.grades.items():
if i==x:
pt.add_row([i,j["name"],y["course"]])
return pt
a = University("C://Users/Swayam/Documents/Downloads")
c = a.gg()
print(c)
The output is supposed to be:
10103 Baldwin,C SSW567,SSW564,CS501
10115 Wyatt, X SSW 567,SSW564
When I run the above code I get the below error:
TypeError: list indices must be integers or slices, not str
I am not supposed to use the variables in the add_row line and I know that, but what can I replace it with so I get the output.
The issue with your code is that y is a list:
for i,j in self.students.items():
for x,y in self.grades.items():
print(type(y))
>>> <class 'list'>
This is because of you code where you define grades:
self.grades=defaultdict(list)
So, you need some extra code to go through the list of dictionaries:
courses = ",".join(h["course"] for h in y)
And so, the final code becomes:
from collections import defaultdict
from prettytable import PrettyTable
import os
class University:
def __init__(self, path):
self.students = dict()
self.instructors = dict()
self.grades = defaultdict(list)
self.path = path
def gg(self):
for filename in os.listdir(self.path):
with open(os.path.join(self.path, filename), "r") as f:
if filename == "students.txt":
for line in f:
a, b, c = line.split("\t")
self.students[a] = {"name": b, "major": c}
elif filename == "grades.txt":
for line in f:
d, e, f, g = line.split("\t")
self.grades[d].append(
{"course": e, "grade": f, "instructor": g}
)
pt: PrettyTable = PrettyTable(
field_names=[
"CWID",
"Name",
"cc",
]
)
for i, j in self.students.items():
for x, y in self.grades.items():
if i == x:
courses = ",".join(h["course"] for h in y)
pt.add_row([i, j["name"], courses])
return pt
a = University("C://Users/Swayam/Documents/Downloads")
c = a.gg()
print(c)
I get this output when running the above code:
+-------+------------+------------------------+
| CWID | Name | cc |
+-------+------------+------------------------+
| 10103 | Baldwin, C | SSW 567,SSW 564,CS 501 |
| 10115 | Wyatt, X | SSW 567,SSW 564 |
+-------+------------+------------------------+
Note however that I did have to change:
The split you were using to regex split since \t wasn't getting recognised for me
Added self.path = path to your __init__ method
Of course, the path too
Related
Alien Dictionary
Link to the online judge -> LINK
Given a sorted dictionary of an alien language having N words and k starting alphabets of standard dictionary. Find the order of characters in the alien language.
Note: Many orders may be possible for a particular test case, thus you may return any valid order and output will be 1 if the order of string returned by the function is correct else 0 denoting incorrect string returned.
Example 1:
Input:
N = 5, K = 4
dict = {"baa","abcd","abca","cab","cad"}
Output:
1
Explanation:
Here order of characters is
'b', 'd', 'a', 'c' Note that words are sorted
and in the given language "baa" comes before
"abcd", therefore 'b' is before 'a' in output.
Similarly we can find other orders.
My working code:
from collections import defaultdict
class Solution:
def __init__(self):
self.vertList = defaultdict(list)
def addEdge(self,u,v):
self.vertList[u].append(v)
def topologicalSortDFS(self,givenV,visited,stack):
visited.add(givenV)
for nbr in self.vertList[givenV]:
if nbr not in visited:
self.topologicalSortDFS(nbr,visited,stack)
stack.append(givenV)
def findOrder(self,dict, N, K):
list1 = dict
for i in range(len(list1)-1):
word1 = list1[i]
word2 = list1[i+1]
rangej = min(len(word1),len(word2))
for j in range(rangej):
if word1[j] != word2[j]:
u = word1[j]
v = word2[j]
self.addEdge(u,v)
break
stack = []
visited = set()
vlist = [v for v in self.vertList]
for v in vlist:
if v not in visited:
self.topologicalSortDFS(v,visited,stack)
result = " ".join(stack[::-1])
return result
#{
# Driver Code Starts
#Initial Template for Python 3
class sort_by_order:
def __init__(self,s):
self.priority = {}
for i in range(len(s)):
self.priority[s[i]] = i
def transform(self,word):
new_word = ''
for c in word:
new_word += chr( ord('a') + self.priority[c] )
return new_word
def sort_this_list(self,lst):
lst.sort(key = self.transform)
if __name__ == '__main__':
t=int(input())
for _ in range(t):
line=input().strip().split()
n=int(line[0])
k=int(line[1])
alien_dict = [x for x in input().strip().split()]
duplicate_dict = alien_dict.copy()
ob=Solution()
order = ob.findOrder(alien_dict,n,k)
x = sort_by_order(order)
x.sort_this_list(duplicate_dict)
if duplicate_dict == alien_dict:
print(1)
else:
print(0)
My problem:
The code runs fine for the test cases that are given in the example but fails for ["baa", "abcd", "abca", "cab", "cad"]
It throws the following error for this input:
Runtime Error:
Runtime ErrorTraceback (most recent call last):
File "/home/e2beefe97937f518a410813879a35789.py", line 73, in <module>
x.sort_this_list(duplicate_dict)
File "/home/e2beefe97937f518a410813879a35789.py", line 58, in sort_this_list
lst.sort(key = self.transform)
File "/home/e2beefe97937f518a410813879a35789.py", line 54, in transform
new_word += chr( ord('a') + self.priority[c] )
KeyError: 'f'
Running in some other IDE:
If I explicitly give this input using some other IDE then the output I'm getting is b d a c
Interesting problem. Your idea is correct, it is a partially ordered set you can build a directed acyclcic graph and find an ordered list of vertices using topological sort.
The reason for your program to fail is because not all the letters that possibly some letters will not be added to your vertList.
Spoiler: adding the following line somewhere in your code solves the issue
vlist = [chr(ord('a') + v) for v in range(K)]
A simple failing example
Consider the input
2 4
baa abd
This will determine the following vertList
{"b": ["a"]}
The only constraint is that b must come before a in this alphabet. Your code returns the alphabet b a, since the letter d is not present you the driver code will produce an error when trying to check your solution. In my opinion it should simply output 0 in this situation.
Initial question
I want to calculate the Levenshtein distance between multiple strings, one in a series, the other in a list. I tried my hands on map, zip, etc., but I only got the desired result using a for loop and apply. Is there a way to improve style and especially speed?
Here is what I tried and it does what it is supposed to do, but lacks of speed given a large series.
import stringdist
strings = ['Hello', 'my', 'Friend', 'I', 'am']
s = pd.Series(data=strings, index=strings)
c = ['me', 'mine', 'Friend']
df = pd.DataFrame()
for w in c:
df[w] = s.apply(lambda x: stringdist.levenshtein(x, w))
## Result: ##
me mine Friend
Hello 4 5 6
my 1 3 6
Friend 5 4 0
I 2 4 6
am 2 4 6
Solution
Thanks to #Dames and #molybdenum42, I can provide the solution I used, directly beneath the question. For more insights, please check their great answers below.
import stringdist
from itertools import product
strings = ['Hello', 'my', 'Friend', 'I', 'am']
s = pd.Series(data=strings, index=strings)
c = ['me', 'mine', 'Friend']
word_combinations = np.array(list(product(s.values, c)))
vectorized_levenshtein = np.vectorize(stringdist.levenshtein)
result = vectorized_levenshtein(word_combinations[:, 0],
word_combinations[:, 1])
result = result.reshape((len(s), len(c)))
df = pd.DataFrame(result, columns=c, index=s)
This results in the desired data frame.
Setup:
import stringdist
import pandas as pd
import numpy as np
import itertools
s = pd.Series(data=['Hello', 'my', 'Friend'],
index=['Hello', 'my', 'Friend'])
c = ['me', 'mine', 'Friend']
Options
option: an easy one-liner
df = pd.DataFrame([s.apply(lambda x: stringdist.levenshtein(x, w)) for w in c])
option: np.fromfunction (thanks to #baccandr)
#np.vectorize
def lavdist(a, b):
return stringdist.levenshtein(c[a], s[b])
df = pd.DataFrame(np.fromfunction(lavdist, (len(c), len(s)), dtype = int),
columns=c, index=s)
option: see #molybdenum42
word_combinations = np.array(list(itertools.product(s.values, c)))
vectorized_levenshtein = np.vectorize(stringdist.levenshtein)
result = vectorized_levenshtein(word_combinations[:,0], word_combinations[:,1])
df = pd.DataFrame([word_combinations[:,1], word_combinations[:,1], result])
df = df.set_index([0,1])[2].unstack()
(the best) option: modified option 3
word_combinations = np.array(list(itertools.product(s.values, c)))
vectorized_levenshtein = np.vectorize(distance)
result = vectorized_levenshtein(word_combinations[:,0], word_combinations[:,1])
result = result.reshape((len(s), len(c)))
df = pd.DataFrame(result, columns=c, index=s)
Performance testing:
import timeit
from Levenshtein import distance
import pandas as pd
import numpy as np
import itertools
s = pd.Series(data=['Hello', 'my', 'Friend'],
index=['Hello', 'my', 'Friend'])
c = ['me', 'mine', 'Friend']
test_code0 = """
df = pd.DataFrame()
for w in c:
df[w] = s.apply(lambda x: distance(x, w))
"""
test_code1 = """
df = pd.DataFrame({w:s.apply(lambda x: distance(x, w)) for w in c})
"""
test_code2 = """
#np.vectorize
def lavdist(a, b):
return distance(c[a], s[b])
df = pd.DataFrame(np.fromfunction(lavdist, (len(c), len(s)), dtype = int),
columns=c, index=s)
"""
test_code3 = """
word_combinations = np.array(list(itertools.product(s.values, c)))
vectorized_levenshtein = np.vectorize(distance)
result = vectorized_levenshtein(word_combinations[:,0], word_combinations[:,1])
df = pd.DataFrame([word_combinations[:,1], word_combinations[:,1], result])
df = df.set_index([0,1])[2] #.unstack() produces error
"""
test_code4 = """
word_combinations = np.array(list(itertools.product(s.values, c)))
vectorized_levenshtein = np.vectorize(distance)
result = vectorized_levenshtein(word_combinations[:,0], word_combinations[:,1])
result = result.reshape((len(s), len(c)))
df = pd.DataFrame(result, columns=c, index=s)
"""
test_setup = "from __main__ import distance, s, c, pd, np, itertools"
print("test0", timeit.timeit(test_code0, number = 1000, setup = test_setup))
print("test1", timeit.timeit(test_code1, number = 1000, setup = test_setup))
print("test2", timeit.timeit(test_code2, number = 1000, setup = test_setup))
print("test3", timeit.timeit(test_code3, number = 1000, setup = test_setup))
print("test4", timeit.timeit(test_code4, number = 1000, setup = test_setup))
Results
# results
# test0 1.3671939949999796
# test1 0.5982696900009614
# test2 0.3246431229999871
# test3 2.0100400850005826
# test4 0.23796007100099814
Using itertools, you can at least get all the required combinations. Using a vectorized version of stringcount.levenshtein (made using numpy.vectorize()) you can then get your desired result without looping at all, though I haven't tested the performance of the vectorized levenshtein function.
The code could look something like this:
import stringdist
import numpy as np
import pandas as pd
import itertools
s = pd.Series(["Hello", "my","Friend"])
c = ['me', 'mine', 'Friend']
word_combinations = np.array(list(itertools.product(s.values, c)))
vectorized_levenshtein = np.vectorize(stringdist.levenshtein)
result = vectorized_levenshtein(word_combinations[:,0], word_combinations[:,1])
At this point you have the results in a numpy array, each corresponding to one of all the possible combinations of your two intial arrays. If you want to get it into the shape you have in your example, there's some pandas trickery to be done:
df = pd.DataFrame([word_combinations[:,0], word_combinations[:,1], result]).T
### initially looks like: ###
# 0 1 2
# 0 Hello me 4
# 1 Hello mine 5
# 2 Hello Friend 6
# 3 my me 1
# 4 my mine 3
# 5 my Friend 6
# 6 Friend me 5
# 7 Friend mine 4
# 8 Friend Friend 0
df = df.set_index([0,1])[2].unstack()
### Now looks like: ###
# Friend Hello my
# Friend 0 6 6
# me 5 4 1
# mine 4 5 3
Again, I haven't tested the performance of this method, so I recommend checking that out - it should be faster than iteration though.
EDIT:
User #Dames has a better suggestion for making the result all pretty-like:
result = result.reshape(len(c), len(s))
df = pd.DataFrame(result, columns=c, index=s)
I have some code for linear programming using PuLP module and I want to print the output by sorting it in the ascending order. I tried using sorted() method but unfortunately it doesn't work in this case as the output is not the list. Also, the output (as written below) is the combination of some variable name and variable value which I want to print together separated by "=" sign.
import time
start_time = time.time()
from pulp import *
from openpyxl import load_workbook
import pandas as pd
from pulp import solvers
import xlrd
import os
vName = list()
vValue = list()
wName = list()
wValue = list()
PriceData=open('C:\Fourth Term # Dal\Project\Collaboration\Sensitivity analysis\GUI\Otherdata\pricedata.txt', 'r')
line1= PriceData.readlines()
wb=load_workbook(filename="C:\Fourth Term # Dal\Project\Collaboration\Sensitivity analysis\GUI\Price Data\\"+ line1[0].strip())
ws=wb['Sheet1']
OC_Data=open('C:\Fourth Term # Dal\Project\Collaboration\Sensitivity analysis\GUI\Otherdata\OC_Data.txt', 'r')
line2= OC_Data.readlines()
OC=open('C:\Fourth Term # Dal\Project\Collaboration\Sensitivity analysis\GUI\Operating Characteristics\\'+ line2[0].strip())
line3= OC.readlines()
Horizon=int(line1[1][10:])
pieces=int(line3[4][29:])
T=list(range(Horizon))
L=list(range(pieces))
p=[0 for j in range(Horizon)]
UB=[0 for l in range(pieces)]
LB=[0 for l in range(pieces)]
f=[0 for l in range(pieces)]
g=[0 for l in range(pieces)]
for j in range(Horizon):
p[j]=ws.cell(row=967, column=j+2).value
UB[0]=float(line3[6][5:])
UB[1]=float(line3[10][5:])
UB[2]=float(line3[14][5:])
LB[0]=float(line3[5][5:])
LB[1]=float(line3[9][5:])
LB[2]=float(line3[13][5:])
f[0]=float(line3[7][6:])*30.5
f[1]=float(line3[11][6:])*30.5
f[2]=float(line3[15][6:])*30.5
g[0]=float(line3[8][6:])*30.5
g[1]=float(line3[12][6:])*30.5
g[2]=float(line3[16][6:])*30.5
Cap=float(line3[3][10:])
M=Cap
prob= LpProblem("EL-NR", LpMaximize)
v = LpVariable.matrix("v", (T),0, None, LpContinuous)
w = LpVariable.matrix("w", (T),0, None, LpContinuous)
I = LpVariable.matrix("I", (T),0, None, LpInteger)
z = LpVariable.matrix("z", (T), 0, 1, LpBinary)
lb= LpVariable.matrix("lambda", (T,L,L), 0, 1, LpContinuous)
mu= LpVariable.matrix("mu", (T,L,L), 0, 1, LpContinuous)
r= LpVariable.matrix("r", (T,L), 0 ,1, LpBinary)
zz= LpVariable.matrix("zz", (T,L,L),0, 1,LpBinary)
xx= LpVariable.matrix("xx", (T,L,L),0,1,LpBinary)
prob+= (lpSum([p[t]*(-v[t]+w[t]) for t in T]))
for t in T:
prob+= w[t]<=I[t]
prob+= w[t]<=M*z[t]
prob+= v[t]<=M*(1-z[t])
prob+= I[t]<=Cap
prob+= lpSum([r[t][l] for l in L])==1
def constraint_1(t,L):
value = 0
for l in L:
for k in L:
if k>=l:
value+= lb[t][l][k]*f[k]
return value
def constraint_2(t,L):
value = 0
for l in L:
for k in L:
if k<=l:
value+= mu[t][l][k]*g[k]
return value
for t in T:
prob+=v[t]<= constraint_1(t,L)
prob+=w[t]<= constraint_2(t,L)
for t in T:
for l in L[:pieces-1]:
for k in L:
if k==l:
prob+= lb[t][l][k]<=((UB[l]-I[t])/f[l])+M*(1-r[t][l])
for l in L:
for k in L:
if k>l:
prob+= lb[t][l][k]<=((UB[k]-UB[k-1])/f[k])+M*(1-r[t][l])
for l in L:
for k in L:
if k>=l:
prob+= lpSum([lb[t][l][k] for k in L])==r[t][l]
for l in L[1:]:
for k in L:
if k==l:
prob+=mu[t][l][k]<=((I[t]-LB[l])/g[l])+M*(1-r[t][l])
for l in L:
for k in L:
if k<l:
prob+=mu[t][l][k]<=((LB[k+1]-LB[k])/g[k])+M*(1-r[t][l])
if k<=l:
prob+= lpSum([mu[t][l][k] for k in L])==r[t][l]
if k>=l:
prob+=lb[t][l][k]<=zz[t][l][k]
if k<=l:
prob+=mu[t][l][k]<=xx[t][l][k]
for kr in L:
if k>=l:
if kr>k:
prob+=zz[t][l][k]>=zz[t][l][kr]
for kr in L:
if k<=l:
if kr<k:
prob+=xx[t][l][k]>=xx[t][l][kr]
prob+=lb[t][l][k]>=0
prob+=mu[t][l][k]>=0
if t>0:
b=t-1
prob+= I[t]==I[b]+v[b]-w[b]
prob+= I[0]==float(line3[1][25:])
prob.solve(GUROBI())
for v in prob.variables():
if v.name[:1]=="v":
a=v.name[1:]
vName.append(a)
b=v.varValue
vValue.append(b)
print(v.name[2:]+'='+ str(b))
This is what I am getting:
0=0.0
1=-0.0
10=-0.0
11=0.0
12=0.0
13=0.0
14=0.0
15=274000.0
16=189500.0
17=183000.0
18=0.0
19=0.0
2=0.0
20=251140.00000618323
21=46707.99999497511
22=0.0
23=0.0
3=0.0
4=274000.0
5=189500.0
6=170500.0
7=183000.0
8=183000.0
9=0.0
And this is what I need:
0=0.0
1=-0.0
2=0.0
3=0.0
4=274000.0
5=189500.0
6=170500.0
7=183000.0
8=183000.0
9=0.0
10=-0.0
11=0.0
12=0.0
13=0.0
14=0.0
15=274000.0
16=189500.0
17=183000.0
18=0.0
19=0.0
20=251140.00000618323
21=46707.99999497511
22=0.0
23=0.0
I would appreciate if someone could answer this. TIA :)
Does this work for you....
Replace
for v in prob.variables():
if v.name[:1]=="v":
a=v.name[1:]
vName.append(a)
b=v.varValue
vValue.append(b)
print(v.name[2:]+'='+ str(b))
With:
for t in T:
print(str(t) + '=' + str(v[t].varValue))
Trying to figure out how to use a function to generate new self named variables inside a class.
I've played around with it in IDLE, and searched online docs. Solution is alluding me.
>>> import random
>>> abc = [(map(chr,range(ord('A'),ord('Z')+1)))+(map(chr,range(ord('a'),ord('z')+1)))]
>>> class Test():
def __init__(self, abc):
self.a = 0
self.abc = abc
def newSelf(self):
for i in range(2):
b = random.choice(abc)
c = random.choice(abc)
globals()['self.'+b+c] = 0
#or
locals()['self.'+b+c] = 0
print(b+c,0)
>>> example = Test(abc)
>>> example.a
0
>>> example.newSelf() #say it generates
An 0
ze 0
>>> example.An #calling new self variable of example object returns
Traceback (most recent call last):
File "<pyshell#221>", line 1, in <module>
example.An
AttributeError: 'Test' object has no attribute 'An'
# I'm hoping for...
>>> example.An
0
Using setattr:
You can use setattr to set the new attribute:
>>> class Test():
... def __init__(self, abc):
... self.a = 0
... self.abc = abc
... def newSelf(self):
... for i in range(2):
... b = random.choice(abc)
... c = random.choice(abc)
... setattr(self, b+c, 0)
... print(b+c,0)
And the attribute will again be available:
>>> example = Test(abc)
>>> example.newSelf()
zM 0
Ja 0
>>> example.zM
0
>>> example.Ja
0
Using exec:
You can use exec function to execute a python statement stored in a string. As, you are generating the variable name randomly, you can create the whole python statement in a string, and execute that statement using exec like below:
>>> class Test():
... def __init__(self, abc):
... self.a = 0
... self.abc = abc
... def newSelf(self):
... for i in range(2):
... b = random.choice(abc)
... c = random.choice(abc)
... exec('self.'+b+c+' = 0')
... print(b+c,0)
Here, I have created the new attribute using exec('self.'+b+c+' = 0'). Now, after calling this method, the attribute will be available:
>>> example = Test(abc)
>>> example.newSelf()
Tw 0
Xt 0
>>> example.Tw
0
>>> example.Xt
0
The problem statement:
An unnamed tourist got lost in New York. All he has is a map of M
metro stations, which shows the coordinates of the stations and his
own coordinates, which he saw on the nearby pointer. The tourist is
not sure that each of the stations is open, therefore, just in case,
he is looking for the nearest N stations. The tourist moves
through New York City like every New Yorker (Distance of city
quarters). Help the tourist to find these stations.
Sample input
5 2
А 1 2
B 4.5 1.2
C 100500 100500
D 100501 100501
E 100502 100502
1 1
Sample output
A B
My code:
import scipy.spatial.distance as d
import math
#finds N nearest metro stations in relation to the tourist
def find_shortest_N(distance_list, name_list, number_of_stations):
result = []
for num in range(0, number_of_stations):
min_val_index = distance_list.index(min(distance_list))
result.append(name_list[min_val_index])
distance_list.pop(min_val_index)
name_list.pop(min_val_index)
return result
#returns a list with distances between touri and stations
def calculate_nearest(list_of_coords, tourist_coords):
distances = []
for metro_coords in list_of_coords:
distances.append(math.fabs(d.cityblock(metro_coords, tourist_coords)))
return distances
station_coords = []
station_names = []
input_stations = input("Input a number of stations: ").split()
input_stations = list(map(int, input_stations))
#all station coordinates and their names
station_M = input_stations[0]
#number of stations a tourist wants to visit
stations_wanted_N = input_stations[1]
#distribute the station names in station_names list
#and the coordinates in station_coords list
for data in range(0, station_M):
str_input = input()
list_input = str_input.split()
station_names.append(list_input[0])
list_input.pop(0)
list_input = list(map(float, list_input))
station_coords.append(list_input)
tourist_coordinates = input("Enter tourist position: ").split()
tourist_coordinates = list(map(float, tourist_coordinates))
distance_values = calculate_nearest(station_coords, tourist_coordinates)
result = find_shortest_N(distance_values, station_names, stations_wanted_N)
for name in result:
print(name, end=" ")
You could also, for example, directly use the cdist function:
import numpy as np
from scipy.spatial.distance import cdist
sample_input = '''
5 2
А 1 2
B 4.5 1.2
C 100500 100500
D 100501 100501
E 100502 100502
1 1
'''
# Parsing the input data:
sample_data = [line.split()
for line in sample_input.strip().split('\n')]
tourist_coords = np.array(sample_data.pop(), dtype=float) # takes the last line
nbr_stations, nbr_wanted = [int(n) for n in sample_data.pop(0)] # takes the first line
stations_coords = np.array([line[1:] for line in sample_data], dtype=float)
stations_names = [line[0] for line in sample_data]
# Computing the distances:
tourist_coords = tourist_coords.reshape(1, 2) # have to be a 2D array
distance = cdist(stations_coords, tourist_coords, metric='cityblock')
# Sorting the distances:
sorted_distance = sorted(zip(stations_names, distance), key=lambda x:x[1])
# Result:
result = [name for name, dist in sorted_distance[:nbr_wanted]]
print(result)
Use scipy.spatial.KDTree
from scipy.spatial import KDTree
subway_tree = KDTree(stations_coords)
dist, idx = subway_tree.query(tourist_coords, nbr_wanted, p = 1)
nearest_stations = station_names[idx]