How can i complete this project ? it about sentmental classification - python-3.x

I was trying to write code for sentmental classifier for twitter csv file.
The code is working but in the coursera platform it gets stuck it did not work. It show me some errors; it is coursera pyhton function dictionaries and files specialization course.
projectTwitterDataFile = open("project_twitter_data.csv","r")
resultingDataFile = open("resulting_data.csv","w")
punctuation_chars = ["'", '"', ",", ".", "!", ":", ";", '#', '#']
# lists of words to use
positive_words = []
with open("positive_words.txt") as pos_f:
for lin in pos_f:
if lin[0] != ';' and lin[0] != '\n':
positive_words.append(lin.strip())
def get_pos(strSentences):
strSentences = strip_punctuation(strSentences)
listStrSentences= strSentences.split()
count=0
for word in listStrSentences:
for positiveWord in positive_words:
if word == positiveWord:
count+=1
return count
negative_words = []
with open("negative_words.txt") as pos_f:
for lin in pos_f:
if lin[0] != ';' and lin[0] != '\n':
negative_words.append(lin.strip())
def get_neg(strSentences):
strSentences = strip_punctuation(strSentences)
listStrSentences = strSentences.split()
count=0
for word in listStrSentences:
for negativeWord in negative_words:
if word == negativeWord:
count+=1
print(count)
return count
def strip_punctuation(strWord):
for charPunct in punctuation_chars:
strWord = strWord.replace(charPunct, "")
return strWord
def writeInDataFile(resultingDataFile):
resultingDataFile.write("Number of Retweets, Number of Replies,
positive Score, Negative Score, Net Score")
resultingDataFile.write("\n")
linesPTDF = projectTwitterDataFile.readlines()
headerDontUsed= linesPTDF.pop(0)
for linesTD in linesPTDF:
listTD = linesTD.strip().split(',')
resultingDataFile.write("{}, {}, {}, {}, {}".format(listTD[1],
listTD[2], get_pos(listTD[0]), get_neg(listTD[0]),
(get_pos(listTD[0])- get_neg(listTD[0]))))
resultingDataFile.write("\n")
writeInDataFile(resultingDataFile)
projectTwitterDataFile.close()
resultingDataFile.close())
Error
TimeLimitError: Program exceeded run time limit. on line 37
Description
Your program is running too long. Most programs in this book should
run in less than 10 seconds easily. This probably indicates your
program is in an infinite loop.
To Fix
Add some print statements to figure out if your program is in an
infinte loop. If it is not you can increase the run time with
sys.setExecutionLimit(msecs)

It's not very clear to me what you're trying to do. Your code isn't very well-formatted, so besides the fact that your line indentation is off, it's not immediately clear where your functions end and where new commands begin. In fact, you have commands put in between your definitions/functions. The typical convention in Python is to put your callable functions at the top, then the flow of actual code at the bottom after your functions.
I take it that positive_words.txt and negative_words.txt are just two text files where each line is either a word (with punctuation marks such as apostrophes already stripped out), something starting with a semicolon, or a blank line? If so, you can probably just do something like this to extract the lists:
with open("positive_words") as f:
positive_words = [ c.strip() for c in f.readlines() if c[0] not in [';', '\n'] ]
Also, instead opening files and passing the instances of their openings into your functions, only to not use that file in any other functions, maybe you should just pass in the name of the file, then do all of the opening and closing from within the function.

punctuation_chars = ["'", '"', ",", ".", "!", ":", ";", '#', '#']
def strip_punctuation(oldS):
for i in punctuation_chars:
oldS = str(oldS).replace('%s' % i, '')
return oldS
def strip_punctuation(oldS):
for i in punctuation_chars:
oldS = str(oldS).replace('%s' % i, '')
return oldS
punctuation_chars = ["'", '"', ",", ".", "!", ":", ";", '#', '#']
# list of positive words to use
positive_words = []
with open("positive_words.txt") as pos_f:
for lin in pos_f:
if lin[0] != ';' and lin[0] != '\n':
positive_words.append(lin.strip())
def get_pos(str):
str = strip_punctuation(str).split()
j = 0
for i in str:
if i in positive_words:
j += 1
return j
def strip_punctuation(oldS):
for i in punctuation_chars:
oldS = str(oldS).replace('%s' % i, '')
return oldS
punctuation_chars = ["'", '"', ",", ".", "!", ":", ";", '#', '#']
negative_words = []
with open("negative_words.txt") as pos_f:
for lin in pos_f:
if lin[0] != ';' and lin[0] != '\n':
negative_words.append(lin.strip())
def get_neg(str):
str = strip_punctuation(str).split()
k = 0
for i in str:
if i in negative_words:
k += 1
return k
def run(file):
csvFile = open(file, 'r')
lines = csvFile.readlines()
lines = lines[1:]
neg_count = []
pos_count = []
wordList = []
for i in lines:
i = i.strip()
i = i.split(",")[0]
wordList.append(i)
for i in wordList:
neg_count.append(get_neg(i))
pos_count.append(get_pos(i))
res = ['retweet_count,reply_count,pos_count,neg_count,score']
res = []
for i in lines:
i = i.strip()
i = i.split(",")[1:]
res.append(i)
temp = []
for i in res:
i = list(map(int, i))
temp.append(i)
res = temp
for i in range(len(res)):
res[i].append(pos_count[i])
res[i].append(neg_count[i])
res[i].append(pos_count[i] - neg_count[i])
temp = []
for i in res:
temp.append(','.join('%s' %id for id in i))
res = temp
res.insert(0, "Number of Retweets, Number of Replies, Positive Score, Negative Score, Net Score")
print(res)
res = '\n'.join('%s' % id for id in res)
with open("resulting_data.csv", 'w') as csvFile:
write = csvFile.write(res)
if __name__ == '__main__':
run('project_twitter_data.csv')

Related

How do I print the output in one line as opposed to it creating a new line?

For some reason, I cannot seem to find where I have gone wrong with this program. It simply takes a file and reverses the text in the file, but for some reason all of separate sentences print on a new and I need them to print on the same line.
Here is my code for reference:
def read_file(filename):
try:
sentences = []
with open(filename, 'r') as infile:
sentence = ''
for line in infile.readlines():
if(line.strip())=='':continue
for word in line.split():
if word[-1] in ['.', '?', '!']:
sentence += word
sentences.append(sentence)
sentence = ''
else:
sentence += word + ' '
return sentences
except:
return None
def reverse_line(sentence):
stack = []
punctuation=sentence[-1]
sentence=sentence[:-1].lower()
words=sentence.split()
words[-1] = words[-1].title()
for word in words:
stack.append(word)
reversed_sentence = ''
while len(stack) != 0:
reversed_sentence += stack.pop() + ' '
return reversed_sentence.strip()+punctuation
def main():
filepath = input('File: ')
sentences = read_file(filepath)
if sentences is None:
print('Unable to read data from file: {}'.format(filepath))
return
for sentence in sentences:
reverse_sentence = reverse_line(sentence)
print(reverse_sentence)
main()
You can use the end keyword argument:
print(reverse_sentence, end=' ')
The default value for the end is \n, printing a new-line character at the end.
https://docs.python.org/3.3/library/functions.html#print

How to split string based on commas (',') without considering commas inside brackets('(' and ')')?

I want to split my string using python 3+ which is having commas. I don't want string to split based on commas inside brackets.
For example:-
cstr = 'animal_tiger,(CAST(SUBSTR(TRIM(replace(MAX(tigers_name),"Body Parts",'')),1,3) AS INT))'
I want to split this into two string elements.
I tried splitting based on commas but it is taking inside commas as well.
import re
import csv
from StringIO import StringIO
cstr = 'animal_tiger,(CAST(SUBSTR(TRIM(replace(MAX(tigers_name),"Body Parts",'')),1,3) AS INT))'
b = re.split(r',(?=")', cstr)
print(b)
c = re.split(''',(?=(?:[^'"]|'[^']*'|"[^"]*")*$)''', cstr)
print(c)
data = StringIO(cstr)
reader = csv.reader(data, delimiter=';')
for row in reader:
print(row)
def split_with_commas_outside_of_quotes(string):
arr = []
start, flag = 0, False
for pos, x in enumerate(string):
if x == '(' and x == ')':
flag= not(flag)
if flag == False and x == ',':
arr.append(string[start:pos])
start = pos+1
arr.append(string[start:pos])
return arr
print(split_with_commas_outside_of_quotes(cstr))
print(cstr.replace('(','$')).replace(')','#').split(',')
Expected result is splitting of string into two different strings of list that is:-
outputlist - ['animal_tiger','(CAST(SUBSTR(TRIM(replace(MAX(tigers_name),"Body Parts",'')),1,3) AS INT))']
remember the length of the list is 2.
Here you go. Use this function:
def split_with_commas_outside_of_quotes(string):
arr = []
bracketCount = 0
currentItem = ""
for i in range(len(string)):
if i == len(string)-1:
currentItem += string[i]
arr.append(currentItem)
elif string[i] == "(":
bracketCount += 1
currentItem += string[i]
elif string[i] == ")":
bracketCount -= 1
currentItem += string[i]
elif bracketCount == 0 and string[i] == ",":
arr.append(currentItem)
currentItem = ""
else:
currentItem += string[i]
return arr
cstr = 'animal_tiger,(CAST(SUBSTR(TRIM(replace(MAX(tigers_name),"Body Parts",'')),1,3) AS INT))'
print(split_with_commas_outside_of_quotes(cstr))
Output:
['animal_tiger', '(CAST(SUBSTR(TRIM(replace(MAX(tigers_name),"Body Parts",)),1,3) AS INT))']
You can use split():
data = """animal_tiger,(CAST(SUBSTR(TRIM(replace(MAX(tigers_name),"Body Parts",'')),1,3) AS INT))"""
data.split(',', 1)
>>> ['animal_tiger',
'(CAST(SUBSTR(TRIM(replace(MAX(tigers_name),"Body Parts",\'\')),1,3) AS INT))']

Longest word in a string using python programming

Hello guys I am still an armature in python was hoping if anyone could help with this solution.
Write a function called longest which will take a string of space separated words and will return the longest one.
For example:
longest("This is Fabulous") => "Fabulous"
longest("F") => "F"
class Test(unittest.TestCase):
def test_longest_word(self):
sentence = "This is Fabulous"
self.assertEqual('Fabulous', longest(sentence))
def test_one_word(self):
sentence = "This"
self.assertEqual("This", longest(sentence))
This is my solution so far;
def find_longest_word(word_list):
longest_word = ''
longest_size = 0
for word in word_list:
if (len(word) > longest_size)
longest_word = word
longest_size = len(word)
return longest_word
words = input('Please enter a few words')
word_list = words.split()
find_longest_word(word_list)
Unfortunately am getting this error when I try to test the code
"File "", line 6
if (len(word) > longest_size)
^
SyntaxError: invalid syntax
Any help please I will highly appreciate?
def find_longest_word(myText):
a = myText.split(' ')
return max(a, key=len)
text = "This is Fabulous"
print (find_longest_word(text)) #Fabulous
EDIT: The solution above works if you want one of the longest words and not all of them. For example if my text is "Hey ! How are you ?" It will return just "Hey". If you want it to return ["Hey", "How", "are", "you"]
Better use this.
def find_longest_word(myText):
a = myText.split(' ')
m = max(map(len,a))
return [x for x in a if len(x) == m]
print (find_longest_word("Hey ! How are you ?")) #['Hey', 'How', 'are', 'you']
See also, this question
You are missing the : at the end of the if statement
Use the updated code below, I fixed your indentation issues too.
def find_longest_word(word_list):
longest_word = ''
longest_size = 0
for word in word_list:
if (len(word) > longest_size):
longest_word = word
longest_size = len(word)
return longest_word
words = input('Please enter a few words')
word_list = words.split()
find_longest_word(word_list)
Code sample is incorrect. I get the following message if I try to output:
Error on line 15: print(longest_word("chair", "couch", "table"))
TypeError: longest_word() takes 1 positional argument but 3 were given
So the code looks like this:
def longest_word(word_list):
longest_word = ''
longest_size = 0
for word in word_list:
if (len(word) > longest_size):
longest_word = word
longest_size = len(word)
return longest_word
words = input("chair", "couch", "table")
word_list = words.split()
find_longest_word(word_list)
# longest word in a text
text = input("Enter your text")
#Create a list of strings by splitting the original string
split_txt = text.split(" ")
# create a dictionary as word:len(word)
text_dic = {i:len(i)for i in split_txt}
long_word = max([v for v in text_dic.values()])
for k,v in text_dic.items():
if long_word == v:
print(k)

Siimple Python. Not sure why my program is outputting this

I am making a program to take in a sentence, convert each word to pig latin, and then spit it back out as a sentence. I have no idea where I have messed up. I input a sentence and run it and it says
built-in method lower of str object at 0x03547D40
s = input("Input an English sentence: ")
s = s[:-1]
string = s.lower
vStr = ("a","e","i","o","u")
def findFirstVowel(word):
for index in range(len(word)):
if word[index] in vStr:
return index
return -1
def translateWord():
if(vowel == -1) or (vowel == 0):
end = (word + "ay")
else:
end = (word[vowel:] + word[:vowel]+ "ay")
def pigLatinTranslator(string):
for word in string:
vowel = findFirstVowel(word)
translateWord(vowel)
return
print (string)
You have used the lower method incorrectly.
You should use it like this string = s.lower().
The parentheses change everything. When you don't use it, Python returns an object.
Built-in function should always use ()
Here is the corrected version of the code which should work:
s = input("Input an English sentence: \n").strip()
string = s.lower() #lowercasing
vStr = ("a","e","i","o","u")
def findFirstVowel(word):
for idx,chr in enumerate(word):
if chr in vStr:
return idx
return -1
def translateWord(vowel, word):
if(vowel == -1) or (vowel == 0):
end = (word + "ay")
else:
end = (word[vowel:] + word[:vowel]+ "ay")
def pigLatinTranslator(string):
for word in string:
vowel = findFirstVowel(word)
translateWord(vowel,word)
return
print(string)

Something wrong with call function

I don't know where does it goes wrong. I can get the correct result if I just call out my valid_ISBN(isbn) function, but when I write the file, the result become all invalid. (maybe something wrong with function call, but I don't know how to fix it)
def main():
# Call and open the File
inFile = open("isbn.txt", "r")
for line in inFile:
line_strip = line.replace("-", "").replace(" ", "").rstrip("\n")
isbn = line_strip # the function call
# Output file
str = []
str.append(line)
outFile = open("isbnOut.txt", "a")
for i in str:
if valid_ISBN(isbn) == "valid":
outFile.write(i.strip() + " valid\n")
else:
outFile.write(i.strip() + " invalid\n")
inFile.close()
outFile.close()
def valid_ISBN(isbn):
if len(isbn) != 10 or (isbn[0:9].isdigit()) == False:
print("invalid")
else:
return partial_sums(isbn)
def partial_sums(s1):
lst1 =[]
sum1 = 0
for i in range(len(s1)):
if (i == (len(s1) -1)) and ((s1[i] == "x") or (s1[i] == "X")):
sum1 = sum1 + 10
else:
sum1 = sum1 + int(s1[i])
lst1.append(sum1)
#print(lst1)
sum_of_s1(lst1)
def sum_of_s1(s2):
lst2 = []
sum2 = 0
for i in s2:
sum2 += i
lst2.append(sum2)
#print(lst2)
checkISBN(lst2[-1])
def checkISBN(value):
if value % 11 == 0:
print("valid")
else:
print("invalid")
main()
2 Test case for isbn text file (no new line):
019-923-3241
818-851-703X
In your main function:
Every time you read a line in from your input file you initialize str and fill it with just one value. You open your output file, do your validity checks for your one value in str, and finally write the one value to the the output file.
The next time you read the file you do the same stuff... so str isn't needed at all
Also using str as a variable name is bad form. In your console write in help(str) and you will see why.
Now to deal with your actual complaint:
Your problem is the fact that there is no new line.
when you say for line in some_open_file_handler:... what python does is populate line with everything up to the next newline character or the end of the file.
If your input file has no new lines seperating isbns then the first value of line would be 019-923-3241 818-851-703X. Thus the line line_strip = line.replace("-", "").replace(" ", "").rstrip("\n") set linestrip to 0199233241818851703X
This should fix it:
'line.split() will yield ['019-923-3241','818-851-703X']
outFile = open("isbnOut.txt", "a")
for line in open("isbn.txt", "r"):
isbns = line.split() #this splits stuff up by whitespace.
for isbn in isbns:
isbn = isbn.replace("-", "").replace(" ", "").rstrip("\n")
if valid_ISBN(isbn) == "valid":
outFile.write(i.strip() + " valid\n")
else:
outFile.write(i.strip() + " invalid\n")

Resources