Morphological analysis of words with MRJob and Pymorphy2 - python-3.x

Can anyone help with the MRJob and Pymorphy2? I am new to python and hadoop. I sort of understood how to perform text tokenisation, but I cannot understand how to morphologically disassemble the resulting tokens using Pymorphy2. Maybe I am doing something obvious wrong, but I do not understand.
This is my code:
from mrjob.job import MRJob
import re, pymorphy2
morph = pymorphy2.MorphAnalyzer()
WORD_RE = re.compile(r"[\w']+")
class MRMorphWord(MRJob):
def mapper(self, _, line):
for word in WORD_RE.findall(line):
yield (word.lower(), 1)
def reducer(self, _, word):
for i in word:
p = morph.parse(word)[0]
yield p
if __name__ == '__main__':
MRMorphWord.run()
Here is the error message:
parse
word_lower = word.lower()
AttributeError: 'generator' object has no attribute 'lower'

Related

Getting "TypeError: method takes 0 positional arguments but 1 was given" with "self" as argument

I am trying to store each line of a txt file in a list. Getting the error mentioned in the title even though my methods contain self as an argument. Here's my codes:
This is corpus.py
class Corpus():
def __init__(self, corpus_name):
self.corpus_name = corpus_name
def corpus_to_list(self):
corpus = open(self.corpus_name, "r")
my_corpus = corpus.split("\n")
corpus.close()
return my_corpus
and this is main.py
import corpus as corp
def main():
corpus = corp.Corpus("review.txt")
corpus_list = corpus.corpus_to_list()
print(corpus_list)
if __name__ == "__main__":
main()

mock `readlines()` in python unit tests

I am trying to mock a readlines() object in python unit tests. What I have so far is
class Sample:
def read_file(filename):
with open(filename, "r") as f:
lines = f.readlines()
I want to write a unit test for readlines() object.
So far, I have come up with the following.
TEST_DATA = "test\ntest2\n"
#mock.patch("builtins.open")
def test_open(mock_open):
mock_open.side_effect = [mock_open(read_data=TEST_DATA).return_value]
assert mock_open.side_effect == Sample.read_file()
My question here is, how do I assert the returned value of mock_open is the same as the returned value of the actual read_file function in the Sample class? This is where I am failing and not able to go any further. Any help on this is much appreciated! Thank you in advance!
In unittest.mock docs there is an example that may help you
Here is the docs example adapted to your code.
from unittest.mock import patch
class Sample:
def read_file(filename):
with open(filename, "r") as f:
lines = f.readlines()
return lines
TEST_DATA = "test\ntest2\n"
def test_open(mock_open):
with patch('__main__.open', mock_open(read_data=TEST_DATA)) as m:
s = Sample()
res = s.read_file('foo')
assert res == TEST_DATA

How to print all the highest value places

I cannot figure out how to get python to print all the highest values as it only prints the first one it encounters.
It takes standard input from a file that has on a few lines the following:
89 Michael Dunne (grade name)
I know I can use the zip function but I cannot figure out how only print the name from it
If I add "highstudents = sorted(zip(grade,name),reverse=True)" it sorts from high to low but I do not know how to filter the name out as it prints as "(89, 'Pepe')"
The code below is the following attempt so far.
import sys
def topgrade(x):
s = max(x)
return s
def main():
s = sys.argv[1]
grade=[]
name = []
try:
with open(s,'r') as studata:
for line in studata:
try:
line = line.strip()
grade.append(int(line[0:2]))
name.append(line[3::])
except ValueError:
print("Invalid mark",line[0:2],"encountered. Skipping.")
top = topgrade(grade)
a = grade.index(top)
print("Best students:",name[a])
print("Best mark:",top)
except FileNotFoundError:
print("File not found:",s)
if __name__ == '__main__':
main()
Rather than trying to keep the students and marks in 2 separate lists (with the risk that they get out of step) it is better to use a dictionary - where the key is the mark and the value is a list of the student(s) who obtained that mark.
Then it is a simple task of just printing out the highest key, and the associated list of students. I'm using defaultdict as an easier option than having to create or append to the list for each value.
from collections import defaultdict
import sys
def main():
s = sys.argv[1]
grades = defaultdict(list)
try:
with open(s,'r') as studata:
for line in studata:
try:
line = line.strip()
grades[int(line[0:2])].append(line[3::])
except ValueError:
print("Invalid mark",line[0:2],"encountered. Skipping.")
top_mark = max(grades.keys())
print("Best students:{}".format(','.join(grades[top_mark])))
print("Best mark: {}".format(top_mark))
except FileNotFoundError:
print("File not found:",s)
if __name__ == '__main__':
main()

How to extract the last names from text file using nltk in python

import re
import nltk
from nltk.corpus import stopwords
stop = stopwords.words('english')
from nltk.corpus import wordnet
inputfile = open('file.txt', 'r')
String= inputfile.read()
def last_name(resume_text):
tokenized_sentences = nltk.sent_tokenize(resume_text)
a_list=[]
for sentence in tokenized_sentences:
a_list=(sentence.split())
s1=a_list[1:]
sentence1=''.join(s1)
tokenized_sentences = nltk.sent_tokenize(sentence1)
for sentence in tokenized_sentences:
for chunk in nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sentence), tagset='universal')):
if hasattr(chunk, 'label') and chunk.label() == 'PERSON':
chunk = chunk[0]
(name, tag) = chunk
if tag == 'NOUN':
return name
if __name__ == '__main__':
lastname= last_name(String)
print(lastname)
I want to extract the last name from a resume. It returns the first name correctly but the second name is wrong.
How can I solve this issue?

python3 OSError: [Errno -9999] Unanticipated host error with pyaudio in speech recognition AI

I am working on an AI, like Jarvis in python3. I am using the python speech_recognition module and pyaudio and everything else required acording to this page.
https://pypi.python.org/pypi/SpeechRecognition/
I have it on a raspberry pi now, before i was using my mac which was working fine. Now sometimes i get an error when running my Jarvis code on my Raspberry pi! Not always but frequetly enough to put a wrench in our progress. And not knowing when the error will come is a big problem and we need to get rid of it. Iḿ using a blue Snowball mic. Here is my code and my error if you could help, that would be great thanks!
Traceback (most recent call last):
File "/media/pi/TRAVELDRIVE/Jarvis(10.0).py", line 172, in <module>
with m as source: r.adjust_for_ambient_noise(source)
File "/usr/local/lib/python3.4/dist-packages/speech_recognition/__init__.py", line 140, in __enter__
input=True, # stream is an input stream
File "/usr/local/lib/python3.4/dist-packages/pyaudio.py", line 750, in open
stream = Stream(self, *args, **kwargs)
File "/usr/local/lib/python3.4/dist-packages/pyaudio.py", line 441, in __init__
self._stream = pa.open(**arguments)
OSError: [Errno -9999] Unanticipated host error
Jarvis.py
#JARVIS mark 10. python 3.5.1 version
#JUST.A.RATHER.VERY.INTELEGENT.SYSTEM.
##import speech_recognition
##import datetime
##import os
##import random
##import datetime
##import webbrowser
##import time
##import calendar
from difflib import SequenceMatcher
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.tokenize import PunktSentenceTokenizer
import speech_recognition as sr
import sys
from time import sleep
import os
import random
r = sr.Recognizer()
m = sr.Microphone()
#Brain functions, vocab!
what_i_should_call_someone = [""]
Good_Things = ["love","sweet","nice","happy","fun","awesome","great"]
Bad_Things = ["death","kill","hurt","harm","discomfort","rape","pain","sad","depression","depressed","angry","mad","broken","raging","rage"]
# Words that you might says in the beginning of your input, for example: "um hey where are we!?!"
Slang_Words = ["um","uh","hm","eh"]
# Put all greetings in here
Static_Greetings = ["Hey","Hi","Hello"]
# Put your AIs Name and other names just in case.
Name = ["jarvis"]
posible_answer_key_words = ["becuase","yes","no"]
Chance_that_question_was_asked_1 = 0
Chance_that_question_was_asked_2 = 0
certainty_question_was_asked = 0
Me_statment_keywords = ["you","your","yours"]
You_statment_keywords = ["i","i'm","me"]
global certainty_person_is_talking_to_me
what_i_said = ("")
Just_asked_querstion = False
the_last_thing_i_said = ("")
the_last_thing_person_said = ("")
what_person_said = ("")
what_person_said_means = [""]
what_im_about_to_say = [""]
why_im_about_to_say_it = [""]
who_im_talking_to = [""]
how_i_feel = [""]
why_do_i_feel_the_way_i_do = [""]
what_i_am_thinking = ("")
# ways to describe the nouns last said
it_pronouns = ["it","they","she","he"]
# last person place or thing described spoken or descussed!
last_nouns = [""]
# Sample of random questions so Jarvis has somthing to index to know what a question is!
Sample_Questions = ["what is the weather like","where are we today","why did you do that","where is the dog","when are we going to leave","why do you hate me","what is the Answer to question 8",
"what is a dinosour","what do i do in an hour","why do we have to leave at 6.00", "When is the apointment","where did you go","why did you do that","how did he win","why won’t you help me",
"when did he find you","how do you get it","who does all the shipping","where do you buy stuff","why don’t you just find it in the target","why don't you buy stuff at target","where did you say it was",
"when did he grab the phone","what happened at seven am","did you take my phone","do you like me","do you know what happened yesterday","did it break when it dropped","does it hurt everyday",
"does the car break down often","can you drive me home","where did you find me"
"can it fly from here to target","could you find it for me"]
Sample_Greetings = ["hey","hello","hi","hey there","hi there","hello there","hey jarvis","hey dude"]
Question_Keyword_Answer = []
Int_Question_Keywords_In_Input = []
Possible_Question_Key_Words = ["whats","what","where","when","why","isn't","whats","who","should","would","could","can","do","does","can","can","did"]
Possible_Greeting_Key_Words = ["hey","hi","hello",Name]
# In this function: Analyze the user input find out if it's (Question, Answer, Command. Etc) and what is being: Asked, Commanded, ETC.
def Analyze():
def Analyze_For_Greeting():
def Greeting_Keyword_Check():
global Possible_Greeting_Key_Words
Int_Greeting_Keywords_In_Input = []
for words in what_person_said_l_wt:
if words in Possible_Greeting_Key_Words:
Int_Greeting_Keywords_In_Input.append(words)
Amount_Greeting_Keywords = (len(Int_Greeting_Keywords_In_Input))
if Amount_Greeting_Keywords > 0:
return True
def Greeting_Sentence_Match():
for Ran_Greeting in Sample_Greetings:
Greeting_Matcher = SequenceMatcher(None, Ran_Greeting, what_person_said_l).ratio()
if Greeting_Matcher > 0.5:
print (Greeting_Matcher)
print ("Similar to Greeting: "+Ran_Greeting)
return True
Greeting_Keyword_Check()
Greeting_Sentence_Match()
#In this function: determin if the input is a question or not.
def Analyze_For_Question():
# In this function: if there is atleast one question keyword in the user input then return true.
def Question_Keyword_Check():
global Possible_Question_Key_Words
Int_Question_Keywords_In_Input = []
for words in what_person_said_l_wt:
if words in Possible_Question_Key_Words:
Int_Question_Keywords_In_Input.append(words)
Amount_Question_keywords = (len(Int_Question_Keywords_In_Input))
if Amount_Question_keywords > 0:
return True
# In this function: if the users input is simular to other sample questions, return true.
def Question_Sentence_Match():
for Ran_Question in Sample_Questions:
Question_Matcher = SequenceMatcher(None, Ran_Question, what_person_said_l).ratio()
if Question_Matcher > 0.5:
print (Question_Matcher)
print ("Similar to Question: "+Ran_Question)
return True
# In this function: if the first word of the users input is a question keyword and there is a different question keyword in the input return true.
def Question_Verb_Noun_Check():
#if you say "hey jarvis" before somthing like a question or command it will still understand
try:
for word in what_person_said_l_wt:
if word in Static_Greetings or word in Name:
print (word)
Minus_Begin_Greet1 = what_person_said_l_wt.remove(word)
print (Minus_Begin_Greet1)
return True
except IndexError:
pass
Question_Keyword_Check()
Question_Sentence_Match()
Question_Verb_Noun_Check()
if Question_Keyword_Check()==True and Question_Sentence_Match()==True and Question_Verb_Noun_Check()==True:
return True
else:
return False
# All the funtions in Analyze
Analyze_For_Greeting()
Analyze_For_Question()
Conversation=True
Conversation_Started=False
while Conversation==True:
try:
if Conversation_Started==False:
#Greeting()
Conversation_Started=True
with m as source: r.adjust_for_ambient_noise(source)
print(format(r.energy_threshold))
print("Say something!") # just here for now and testing porposes so we know whats happening
with m as source: audio = r.listen(source)
print("Got it! Now to recognize it...")
try:
# recognize speech using Google Speech Recognition
value = r.recognize_google(audio)
# we need some special handling here to correctly print unicode characters to standard output
if str is bytes: # this version of Python uses bytes for strings (Python 2)
print(u"You said {}".format(value).encode("utf-8"))
else: # this version of Python uses unicode for strings (Python 3+)
print("You said {}".format(value))
what_person_said_l = value.lower()
what_person_said_l_wt = word_tokenize(what_person_said_l)
Analyze()
except sr.UnknownValueError:
print ("what was that?")
except sr.RequestError as e:
print("Uh oh! Sorry sir Couldn't request results from Google Speech Recognition service; {0}".format(e))
except KeyboardInterrupt:
pass

Resources