Python Tutorial Help NLP customer reviews - python-3.x

I'm fairly new to Python and am following a tutorial on creating a wordcloud based on a customer reviews file. The tutorial link is https://towardsdatascience.com/detecting-bad-customer-reviews-with-nlp-d8b36134dc7e
from wordcloud import WordCloud, STOPWORDS
import pandas as pd
# read data
reviews_df = pd.read_csv("Hotel_Reviews3.csv")
# append the positive and negative text reviews
reviews_df["review"] = reviews_df["Negative_Review"] + reviews_df["Positive_Review"]
# create the label
reviews_df["is_bad_review"] = reviews_df["Reviewer_Score"].apply(lambda x: 1 if x < 5 else 0)
# select only relevant columns
reviews_df = reviews_df[["review", "is_bad_review"]]
reviews_df.head()
Hotel_Reviews3.csv:
https://i.stack.imgur.com/8ZGxj.png
ERROR MESSAGE:
Traceback (most recent call last):
File "C:\Users\stecd\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\core\indexes\base.py", line 3078, in get_loc
return self._engine.get_loc(key)
File "pandas\_libs\index.pyx", line 140, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1492, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1500, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Positive_Review'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\stecd\Desktop\WorldCloud\wordCloud.py", line 6, in <module>
reviews_df["review"] = reviews_df["Negative_Review"] + reviews_df["Positive_Review"]
File "C:\Users\stecd\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\core\frame.py", line 2688, in __getitem__
return self._getitem_column(key)
File "C:\Users\stecd\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\core\frame.py", line 2695, in _getitem_column
return self._get_item_cache(key)
File "C:\Users\stecd\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\core\generic.py", line 2489, in _get_item_cache
values = self._data.get(item)
File "C:\Users\stecd\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\core\internals.py", line 4115, in get
loc = self.items.get_loc(item)
File "C:\Users\stecd\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\core\indexes\base.py", line 3080, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\_libs\index.pyx", line 140, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1492, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1500, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Positive_Review'
>>>

From the error message i'd guess that Hotel_Reviews3.csv may not have a "Positive_Review" column. It could be that the corresponding table entry is truncated or has whitespaces so that it does not match "Positive_Review".

Related

Key Error 6 while trying to access a matrix/array

import pandas as pd
import numpy as np
df = pd.read_csv("ia-infect-dublin.csv", header = None)
df.columns = ['Person_ID', 'Contacted']
df = df.sort_values(by=['Person_ID', 'Contacted'])
unique = df['Person_ID'].unique()
unique = np.append(unique, ["Start"])
matrix = pd.DataFrame(0, columns=unique, index=unique, dtype=int)
l_group = df.groupby('Person_ID')
for name, group in l_group:
i = 0
for index, rows in group.iterrows():
if i ==0:
matrix.loc[['Start'], rows['Person_ID']] += 1
previous_state = rows['Person_ID']
i = 1
else:
matrix.loc[previous_state, rows['Person_ID']] += 1
print(matrix.head())
I am making a transition matrix of where it goes through the csv file and find the frequency of changes from one person to another. A -> B -> C -> D and adds the total count. However, I receive an error:
Traceback (most recent call last):
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py", line 3621, in get_loc
return self._engine.get_loc(casted_key)
File "pandas/_libs/index.pyx", line 136, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 163, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 5198, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 5206, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 6
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/Users/vydang/Documents/FA22/BMI5007/Homeworks/hw10/testing", line 23, in <module>
matrix.loc[['Start'], rows['Person_ID']] += 1
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py", line 961, in __getitem__
return self._getitem_tuple(key)
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py", line 1140, in _getitem_tuple
return self._getitem_lowerdim(tup)
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py", line 867, in _getitem_lowerdim
section = self._getitem_axis(key, axis=i)
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py", line 1202, in _getitem_axis
return self._get_label(key, axis=axis)
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py", line 1153, in _get_label
return self.obj.xs(label, axis=axis)
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/generic.py", line 3849, in xs
return self[key]
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/frame.py", line 3505, in __getitem__
indexer = self.columns.get_loc(key)
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py", line 3623, in get_loc
raise KeyError(key) from err
KeyError: 6
I have tried to see where the error is by:
value = '6'
if value in matrix.index:
print(matrix.loc[value])
else:
print("Not in index")
And it does populate and I have also tried:
matrix['6']
matrix.loc['6']
and no error occurs. Is there any other possible reasons that this may be occuring?
I have tried to check if the index has the 'Start' and it did.

keyerror:0 though index starts from 0

this is my dataset.
I basically want the number of words in each list from the words column. this is my code
for ind,word in enumerate(brand6['words']):
brand6['len']=len(brand6[ind][word])
However, i am getting an error like this:
Traceback (most recent call last):
File "C:\Users\darshini.nagaraj\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2646, in get_loc
return self._engine.get_loc(key)
File "pandas\_libs\index.pyx", line 111, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1618, in
pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1626, in
pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 0
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\darshini.nagaraj\Desktop\example.py", line 28, in <module>
print((brand6[ind][word]))
File "C:\Users\darshini.nagaraj\Anaconda3\lib\site-packages\pandas\core\frame.py", line 2800, in __getitem__
indexer = self.columns.get_loc(key)
File "C:\Users\darshini.nagaraj\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2648, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\_libs\index.pyx", line 111, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1618, in
pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1626, in
pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 0
I have been working for 3 days on this. any help will be much appreciated!!
Thanks well in advance!!
try this
word_count=[]
for i in range(brand6.shape[0]):
word_count.append(len(brand6['words'][i]))
brand6['word_count']=word_count
the last line will create a new column as 'word_count' in your dataset.

Error in running Python code for PLSR modelling

I am trying to develop model using PLSR (Partial Least Squares Regression) in Python3 using code provided https://github.com/pgbrodrick/ensemblePLSR. Sample data is also provided.
When I try to run code, it gives me error
>>> python3 ensemble_plsr.py example_settings.txt
I am using Python (3.7.3), python modules scikit-learn (0.20.2) and pandas (0.23.3).
/usr/lib/python3/dist-packages/sklearn/externals/joblib.py:1: DeprecationWarning: the imp module is deprecated in favour of importlib; see the module's documentation for alternative uses
import imp
n bad bands 57
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/pandas/core/indexes/base.py", line 3078, in get_loc
return self._engine.get_loc(key)
File "pandas/_libs/index.pyx", line 140, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 1492, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 1500, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: -1
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "ensemble_plsr.py", line 173, in <module>
df.pop(col)
File "/usr/lib/python3/dist-packages/pandas/core/generic.py", line 760, in pop
result = self[item]
File "/usr/lib/python3/dist-packages/pandas/core/frame.py", line 2688, in __getitem__
return self._getitem_column(key)
File "/usr/lib/python3/dist-packages/pandas/core/frame.py", line 2695, in _getitem_column
return self._get_item_cache(key)
File "/usr/lib/python3/dist-packages/pandas/core/generic.py", line 2491, in _get_item_cache
values = self._data.get(item)
File "/usr/lib/python3/dist-packages/pandas/core/internals.py", line 4115, in get
loc = self.items.get_loc(item)
File "/usr/lib/python3/dist-packages/pandas/core/indexes/base.py", line 3080, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas/_libs/index.pyx", line 140, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 1492, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 1500, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: -1
In short, you're attempting to remove columns for which there is no reference at row 173 of ensemble_plsr.py, hence the KeyError during execution. What's happening under the hood is that when Python attempts to execute the pop method on the DataFrame for the unspecified/non-existent column, it raises that error. There are different ways to resolve this but this solution will resolve the error you are seeing:
Replace rows 172 & 173 in ensemble_plsr.py with the following:
for col in sf.get_setting('ignore columns'):
if col != 'nothing_here':
df.pop(col)
Replace row 16 in example_settings.txt with the following:
ignore columns(any other columns to remove) = nothing_here
Good news, you're done with this issue. Bad news, you're going to hit the next error down the line but you're on your way!

How do i fix errors in pandas lib while working in python?

I was trying out this ..sorting out the necessary data using pandas taking the data set from quandl. when this showed up on execution.
import pandas as pd
import quandl
df = quandl.get('WIKI/GOOGL')
print(df.head())
df = df[['Adj. Open','Adj. High','Adj. Low','Adj. Close','Adj. Volume']]
print(df.head())
df['HL_PCT']=(df['Adj. High']- df['Adj. Close'])/df['Adj. close'] *100.0
df['PCT_change'] = (df['Adj .Close']-df['Adj. open'])/df['Adj. Open']*100.0
df = df[['Adj. Close','HL_PCT','PCT_change','Adj. Volume']]
so i got that as an error message
update:
i updated my code with right capitalizing
now its got this as error
Traceback (most recent call last):
File "D:\Program Files\Python37\lib\site-packages\pandas\core\indexes\base.py", line 2656, in get_loc
return self._engine.get_loc(key)
File "pandas_libs\index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc
File "pandas_libs\index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc
File "pandas_libs\hashtable_class_helper.pxi", line 1601, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas_libs\hashtable_class_helper.pxi", line 1608, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Adj .Close'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "sentdex2.py", line 8, in
df['PCT_change'] = (df['Adj .Close']-df['Adj. Open'])/df['Adj. Open'] *100.0
File "D:\Program Files\Python37\lib\site-packages\pandas\core\frame.py", line 2927, in getitem
indexer = self.columns.get_loc(key)
File "D:\Program Files\Python37\lib\site-packages\pandas\core\indexes\base.py", line 2658, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas_libs\index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc
File "pandas_libs\index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc
File "pandas_libs\hashtable_class_helper.pxi", line 1601, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas_libs\hashtable_class_helper.pxi", line 1608, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Adj .Close'
Replace Adj. close with Adj. Close (note the capital C)

how to authenticate in jira with python

from jira.client import JIRA
jira_options={'server': 'https://abcjira.atlassian.net/login?dest- url=%2Fsecure%2FMyJiraHome.jspa'}
jira=JIRA(options=jira_options,basic_auth=('user','password'))
I want to do basic authentication in jira with python. Wrote above code but it gives me traceback. Could anyone please tell me what is the problem here.
Traceback (most recent call last):
File "lab.py", line 5, in <module>
jira=JIRA(options=jira_options,basic_auth=('user','password'))
File "C:\Python34\lib\site-packages\jira\client.py", line 261, in __init__
si = self.server_info()
File "C:\Python34\lib\site-packages\jira\client.py", line 1619, in server_info
return self._get_json('serverInfo')
File "C:\Python34\lib\site-packages\jira\client.py", line 2040, in _get_json
raise e
File "C:\Python34\lib\site-packages\jira\client.py", line 2037, in _get_json
r_json = json_loads(r)
File "C:\Python34\lib\site-packages\jira\utils.py", line 81, in json_loads
return json.loads(r.text)
File "C:\Python34\lib\json\__init__.py", line 318, in loads
return _default_decoder.decode(s)
File "C:\Python34\lib\json\decoder.py", line 343, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Python34\lib\json\decoder.py", line 361, in raw_decode
raise ValueError(errmsg("Expecting value", s, err.value)) from None
ValueError: Expecting value: line 4 column 1 (char 3)

Resources