#!/usr/bin/python3.4
import urllib.request
import os
import re
os.chdir('/home/whatever/')
a = open('Shopstxt.csv','r')
b = a.readlines()
a.close()
c = len(b)
d = list(zip(*(e.split(';') for e in b)))
shopname = []
shopaddress = []
shopcity = []
shopphone = []
shopwebsite = []
f = d[0]
g = d[1]
h = d[2]
i = d[3]
j = d[4]
e = -1
for n in range(0, 5):
e = e + 1
sn = f[n]
sn.title()
print(sn)
shopname.append(sn)
sa = g[n]
sa.title()
shopaddress.append(sa)
sc = h[n]
sc.title()
shopcity.append(sc)
Shopstxt.csv is all upper case letters and I want to convert them to title. I thought this would do it but it doesn't...it still leaves them all upper case. What am I doing wrong?
I also want to save the file back. Just wanting to check on a couple of things real quick like as well...time pressed.
When I combine the file back together, before writing it back to the drive do I have to add an '\n' at the end of each line or does it automatically include the '\n' when I write each line to the file?
Strings are immutable, so you need to asign the result of title():
sa = sa.title()
sc = sc.title()
Also, if you do this:
with open("bla.txt", "wt") as outfile:
outfile.write("stuff")
outfile.write("more stuff")
then this will not automatically add line endings.
A quick way to add line endings would be this:
textblobb = "\n".join(list_of_text_lines)
with open("bla.txt", "wt") as outfile:
outfile.write(textblobb)
As long as textblobb isn't inefficiently large and fits into memory, that should do the trick nicely.
Use the .title() method when defining your variables like I did in the code below. As others have mentioned, strings are immutable so save yourself a step and create the string you need in one line.
for n in range(0, 5):
e = e + 1
sn = f[n].title() ### Grab and modify the list index before assigning to your variable
print(sn)
shopname.append(sn)
sa = g[n].title() ###
shopaddress.append(sa)
sc = h[n].title() ###
shopcity.append(sc)
Related
I have written a function that is going to have up to 72 IF statements
and i was hoping to write code that will be much shorter, but have no idea where to start
The function reads the self.timeselect variable when a radio button is selected and the result is saved to a text file called missing_time.txt. If the result is equal to 1 then save "0000" to the file, if the result is 2 save then 0020 to the text file etc. This can be for 72 possible combinations.
Is there a smarter way to simplify the function ?
def buttonaction():
selectedchoice = ""
if self.timeselect.get() == 1:
selectedchoice = "0000"
orig_stdout = sys.stdout
f = open('missing_time.txt', 'w')
sys.stdout = f
print(selectedchoice)
f.close()
if self.timeselect.get() == 2:
selectedchoice = "0020"
orig_stdout = sys.stdout
f = open('missing_time.txt', 'w')
sys.stdout = f
print(selectedchoice)
f.close()
self.timeselect = tkinter.IntVar()
self.Radio_1 = tkinter.Radiobutton(text="0000",variable =
self.timeselect,indicator = 0 ,value=1)
self.Radio_1.place(x=50,y=200)
self.Radio_2 = tkinter.Radiobutton(text="0020",variable =
self.timeselect,indicator = 0 ,value=2)
self.Radio_2.place(x=90,y=200)
choice_map = {
1 : "0000",
2 : "0020"
}
def buttonaction():
selected = self.timeselect.get()
if 0 < selected < 73: # This works as intended in Python
selectedchoice = choice_map[selected]
# Do you intend to append to file instead of replacing it?
# See text below.
with open("missing_time.txt", 'w') as outfile:
outfile.write(selectedchoice + "\n")
print(selectedchoice)
Better yet, if there is a pattern that relates the value of self.timeselect.get() to the string that you write out, generate selectchoice directly from that pattern instead of using a dictionary to do the mapping.
Edit
I find it a bit odd that you are clearing the file "missing_time.txt" every time you call buttonaction. If your intention is to append to it, change the file mode accordingly.
Also, instead of opening and closing the file each time, you might just want to open it once and pass the handler to buttonaction or keep it as a global depending on how you use it.
Finally, if you do not intend to catch the KeyError from an invalid key, you can do what #Clifford suggests and use choice_map.get(selected, "some default value that does not have to be str").
All you need to do in this case is construct a string from the integer value self.timeselect.get().
selectedchoice = self.timeselect.get()
if 0 < selectedchoice < 73:
orig_stdout = sys.stdout
f = open('missing_time.txt', 'w')
sys.stdout = f
print( str(selectedchoice).zfill(4) ) # Convert choice to
# string with leading
# zeros to 4 charaters
f.close()
Further in the interests of simplification, redirecting stdout and restoring it is a cumbersome method of outputting to a file. Instead, you can write directly to the file:
with open('missing_time.txt', 'w') as f:
f.write(selectedchoice + "\n")
Note that because we use the with context manager here, f is automatically closed when we leave this context so there is no need to call f.close(). Ultimately you end up with:
selectedchoice = self.timeselect.get()
if 0 < selectedchoice < 73:
with open('missing_time.txt', 'w') as f:
f.write( str(selectedchoice).zfill(4) + "\n" )
Even if you did use the conditionals each one differs only in the first line, so only that part need be conditional and the remainder of the content performed after the conditionals. Moreover all conditionals are mutually exclusive so you can use else-if:
if self.timeselect.get() == 1:
selectedchoice = "0000"
elif self.timeselect.get() == 2:
selectedchoice = "0020"
...
if 0 < selectedchoice < 73:
with open('missing_time.txt', 'w') as f:
f.write(selectedchoice + "\n")
In circumstances where there is no direct arithmetic relationship between selectchoice and the required string, or the available choices are perhaps not contiguous, it is possible to implement a switch using a dictionary:
choiceToString = {
1: "0001",
2: "0002",
...
72: "0072",
}
selectedchoice = choiceToString.get( self.timeselect.get(), "Invalid Choice")
if selectedchoice != "Invalid Choice":
with open('missing_time.txt', 'w') as f:
f.write(selectedchoice + "\n")
Since there is no switch statement in Python, you can't really reduce the number of if statements. But I see 2 two way to optimize and reduce your code length.
First, you can use some
if condition:
elif condition:
instead of
if condition:
if condition:
since you can't have self.timeselect.get() evaluated to more than one int.
Secondly you can wrap all the code that doesn't vary in a function.
You can get rid of selectedchoice and put
orig_stdout = sys.stdout
f = open('missing_time.txt', 'w')
sys.stdout = f
print(selectedchoice)
f.close()
in a function writeToFile(selectedOption)
I'm assuming that the values are arbitrary and there's no defined pattern. I also see that the only thing that changes in your code is the selectedChoice variable. You can use a Dictionary in such cases. A dictionary's elements are key/value pairs so you can reference the key and get the value.
dictionary = {
1:"0000",
2:"0020",
3:"0300",
4:"4000"
}
def buttonAction():
selectedChoice = dictionary[self.timeselect.get()]
if 0<selectedChoice<=72:
f=open('missing_time.txt','w')
f.write(selectedChoice+" ")
f.close()
print(choice)
Actually My file contents are.
ttsighser66
dagadfgadgadgfadg
dafgad
fgadfgad
ttsighser63
sadfsadf
asfdas
My code
file=open("C:\\file.txt","r")
cont = []
for i in file:
dd = i.strip("\n")
cont.append(dd)
cc = ",".join(cont)
if "tt" in i:
cc = ",".join(cont[:-1])
print(cont[-1], cc)
cont = []
My code generate below Output:
ttsighser66
ttsighser63 dagadfgadgadgfadg,dafgad,fgadfgad
But I want output like below format
ttsighser66,dagadfgadgadgfadg,dafgad,fgadfgad
ttsighser63,sadfsadf,asfdas
file=open("file.txt","r")
cont = []
for i in file:
dd = i.strip("\n")
cont.append(dd)
#print('cc',cont)
if "ttsighser" in i and len(cont) != 1:
cc = ",".join(cont[:-1])
print(cc)
cont = []
cont.append(dd)
print(",".join(cont))
If you don't need to store any strings to a list and just need to print strings, you could try this instead.
with open("file.txt", "r") as f:
line_counter = 0
file_lines = f.readlines()
for i in file_lines:
dd = i.strip()
if "tt" in dd:
print("{0}{1}".format("\n" if line_counter > 0 else "", dd), end="")
else:
print(",{0}".format(dd), end="")
line_counter += 1
print("")
The reason why your code displays
ttsighser66
ttsighser63 dagadfgadgadgfadg,dafgad,fgadfgad
instead of
ttsighser66,dagadfgadgadgfadg,dafgad,fgadfgad
ttsighser63,sadfsadf,asfdas
is because when you first encounter 'ttsighser66', it is appended to cont. Then since 'ttsighser66' contains 'tt', we would proceed to the conditional branch.
In the conditional branch, we would be joining the first and second to the last string in cont in cc = ",".join(cont[:-1]). However, since we only have 'ttsighser66' in cont, cont[:-1] will give us [] (an empty list). Since cont[:-1] is empty, ",".join(cont[:-1]) will be empty as well. Thus, cc will be empty. Since cc is empty, print(cont[-1], cc) will give us ttsighser66.
In the second line, ttsighser63 dagadfgadgadgfadg,dafgad,fgadfgad gets displayed because cont contains more than one value already so it will also display the values before 'ttsighser63'.
The remaining strings are not displayed because, based from your code, it would need another string containing 'tt' before the strings in cc could be displayed.
Essentially, you require a pair of strings containing 'tt' to display the strings between the pairs.
Additonal remark: The line cc = ",".join(cont) in your code seems pretty useless since its scope is limited to the for loop only and its value is being replaced inside the conditional branch.
version 1 (all data in list of strings && 1 time print)
fp=open("file.txt", "r")
data = []
for line in fp:
if "tt" in line:
data.append(line.strip())
else:
data.append(data.pop() + "," + line.strip())
fp.close()
[print (data) for line in data]
Version 2 (all data in a single string && 1 time print)
fp=open("file.txt","r")
data = ""
for line in fp:
if "tt" in line:
data += "\n" + line.strip()
else:
data += ","+line.strip()
fp.close()
data = data[1:]
print (data)
I'm doing a text categorization experiment. For the feature extraction phase I'm trying to create a feature dictionary per document. For now, I have two features, Type token ratio and n-grams of the relative frequency of function words. When I print my instances, only the feature type token ratio is in the dictionary. This seems to be because an ill functioning get_pos(). It returns empty lists.
This is my code:
instances = []
labels = []
directory = "\\Users\OneDrive\Data"
for dname, dirs, files in os.walk(directory):
for fname in files:
fpath = os.path.join(dname, fname)
with open(fpath,'r') as f:
text = csv.reader(f, delimiter='\t')
vector = {}
#TTR
lemmas = get_lemmas(text)
unique_lem = set(lemmas)
TTR = str(len(unique_lem) / len(lemmas))
name = fname[:5]
vector['TTR'+ '+' + name] = TTR
#function word ngrams
pos = get_pos(text)
fw = []
regex = re.compile(
r'(LID)|(VNW)|(ADJ)|(TW)|(VZ)|(VG)|(BW)')
for tag in pos:
if regex.search(tag):
fw.append(tag)
for n in [1,2,3]:
grams = ngrams(fw, n)
fdist = FreqDist(grams)
total = sum(c for g,c in fdist.items())
for gram, count in fdist.items():
vector['fw'+str(n)+'+'+' '+ name.join(gram)] = count/total
instances.append(vector)
labels.append(fname[:1])
print(instances)
And this is an example of a Dutch input file:
This is the code from the get_pos function, which I call from another script:
def get_pos(text):
row4=[]
pos = []
for row in text:
if not row:
continue
else:
row4.append(row[4])
pos = [x.split('(')[0] for x in row4] # remove what's between the brackets
return pos
Can you help me find what's wrong with the get_pos function?
When you call get_lemmas(text), all contents of the file are consumed, so get_pos(text) has nothing left to iterate over. If you want to go through a file's content multiple times, you need to either f.seek(0) between the calls, or read the rows into a list in the beginning and iterate over the list when needed.
I can easily replace each every letter by doing them one after another but when i use loop it won't.
a = strrep(a,'b','z');
a = strrep(a,'a','e');
a = strrep(a,'c','f');
but for i = 1:size(a,2)
a = strrep(a,'b','z');
a = strrep(a,'a','e');
a = strrep(a,'c','f');
end
only change 'b' and 'a' not all. so lets say if we str 'abcdabc' then replace one after another gives right answer but loop shows the result as 'ezedeze'. so please help with this.
Here are two approaches:
With changem (from the Mapping Toolbox):
str = 'basic example string';
old = 'abcde';
new = 'fhdot';
str = changem(str, new, old);
With ismember:
str = 'basic example string';
old = 'abcde';
new = 'fhdot';
[ind1, ind2] = ismember(str, old);
new = 'fhdot';
str(ind1) = new(ind2(ind1));
So far, I have this:
def main():
bad_filename = True
l =[]
while bad_filename == True:
try:
filename = input("Enter the filename: ")
fp = open(filename, "r")
for f_line in fp:
a=(f_line)
b=(f_line.strip('\n'))
l.append(b)
print (l)
bad_filename = False
except IOError:
print("Error: The file was not found: ", filename)
main()
this is my program and when i print this what i get
['1,2,3,4,5']
['1,2,3,4,5', '6,7,8,9,0']
['1,2,3,4,5', '6,7,8,9,0', '1.10,2.20,3.30,0.10,0.30']
but instead i need to get
[1,2,3,4,5]
[6,7,8,9,0.00]
[1.10,2.20,3.3.0,0.10,0.30]
Each line of the file is a series on numbers separated by commas, but to python they are just characters. You need one more conversion step to get your string into a list. First split on commas to create a list of strings each of which is a number. Then use what is called "list comprehension" (or a for loop) to convert each string into a number:
b = f_line.strip('\n').split(',')
c = [float(v) for v in b]
l.append(c)
If you really want to reset the list each time through the loop (your desired output shows only the last line) then instead of appending, just assign the numerical list to l:
b = f_line.strip('\n').split(',')
l = [float(v) for v in b]
List comprehension is a shorthand way of saying:
l = []
for v in b:
l.append(float(v))
You don't need a or the extra parentheses around the assignment of a and b.