Python code error related to lambda function - python-3.x

Python code error related to lambda function
Traceback (most recent call last):
File "C:/Users/abhisheksingh75/PycharmProjects/Practice_Machine_Learning/titanic_2.py", line 34, in <module>
else x['Fare'], axis=1)
File "C:\Users\abhisheksingh75\PycharmProjects\project_!\venv\lib\site-packages\pandas\core\frame.py", line 4877, in apply
ignore_failures=ignore_failures)
File "C:\Users\abhisheksingh75\PycharmProjects\project_!\venv\lib\site-packages\pandas\core\frame.py", line 4973, in _apply_standard
results[i] = func(v)
File "C:/Users/abhisheksingh75/PycharmProjects/Practice_Machine_Learning/titanic_2.py", line 33, in <lambda>
fare_means[x['Pclass']] if pd.isnull(x['Fare'])
File "C:\Users\abhisheksingh75\PycharmProjects\project_!\venv\lib\site-packages\pandas\core\frame.py", line 2139, in __getitem__
return self._getitem_column(key)
File "C:\Users\abhisheksingh75\PycharmProjects\project_!\venv\lib\site-packages\pandas\core\frame.py", line 2146, in _getitem_column
return self._get_item_cache(key)
File "C:\Users\abhisheksingh75\PycharmProjects\project_!\venv\lib\site-packages\pandas\core\generic.py", line 1842, in _get_item_cache
values = self._data.get(item)
File "C:\Users\abhisheksingh75\PycharmProjects\project_!\venv\lib\site-packages\pandas\core\internals.py", line 3843, in get
loc = self.items.get_loc(item)
File "C:\Users\abhisheksingh75\PycharmProjects\project_!\venv\lib\site-packages\pandas\core\indexes\base.py", line 2527, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\_libs\index.pyx", line 117, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 139, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1265, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1273, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: (3, 'occurred at index 152')
Process finished with exit code 1
-----------------------------------------------------------------code
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
from sklearn import ensemble
df_titanic = pd.read_csv('C:/Users/abhisheksingh75/Downloads/train.csv')
df_titanic = pd.read_csv('C:/Users/abhisheksingh75/Downloads/test.csv')
#Drop column which are not useful from predictive prespective
df_titanic = df_titanic.drop(['Name', 'Ticket', 'Cabin'], axis=1)
#Fill null values in age column
age_mean = df_titanic['Age'].mean()
df_titanic['Age'] = df_titanic['Age'].fillna(age_mean)
#print(df_titanic.isna().any())
#fill null values in embarked column
Embarked_mode = (df_titanic['Embarked'].mode())[0][0]
df_titanic['Embarked'] = df_titanic['Embarked'].fillna(Embarked_mode)
#print(df_titanic.isna().any())
df_titanic['Gender'] = df_titanic['Sex'].map({'female':0, 'male':1}).astype(int)
df_titanic['Port'] = df_titanic['Embarked'].map({'C':1, 'S':2, 'Q':3}).astype(int)
df_titanic = df_titanic.drop(['Sex', 'Embarked'], axis=1)
cols = df_titanic.columns.tolist()
cols = [cols[1]] + cols[0:1] + cols[2:]
fare_means = df_titanic.pivot_table('Fare', index='Pclass', aggfunc='mean')
print(fare_means)
df_titanic['Fare'] = df_titanic[['Fare','Pclass']].apply(lambda x:
fare_means[x['Pclass']] if pd.isnull(x['Fare'])
else x['Fare'], axis=1)

df_predict['Fare'] = df_predict[['Fare', 'Pclass']].apply(lambda x:
fare_means**.ix[x**['Pclass']] if pd.isnull(x['Fare'])
else x['Fare'], axis=1)
I don't know why my code was not working before...but when i used fare_mean.ix()
function..it was able to locate element position

Related

KeyError: 'longitude' when reading from csv file [duplicate]

I have successfully read a csv file using pandas. When I am trying to print the a particular column from the data frame i am getting keyerror. Hereby i am sharing the code with the error.
import pandas as pd
reviews_new = pd.read_csv("D:\\aviva.csv")
reviews_new['review']
**
reviews_new['review']
Traceback (most recent call last):
File "<ipython-input-43-ed485b439a1c>", line 1, in <module>
reviews_new['review']
File "C:\Users\30216\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\core\frame.py", line 1997, in __getitem__
return self._getitem_column(key)
File "C:\Users\30216\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\core\frame.py", line 2004, in _getitem_column
return self._get_item_cache(key)
File "C:\Users\30216\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\core\generic.py", line 1350, in _get_item_cache
values = self._data.get(item)
File "C:\Users\30216\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\core\internals.py", line 3290, in get
loc = self.items.get_loc(item)
File "C:\Users\30216\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\indexes\base.py", line 1947, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\index.pyx", line 137, in pandas.index.IndexEngine.get_loc (pandas\index.c:4154)
File "pandas\index.pyx", line 159, in pandas.index.IndexEngine.get_loc (pandas\index.c:4018)
File "pandas\hashtable.pyx", line 675, in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12368)
File "pandas\hashtable.pyx", line 683, in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12322)
KeyError: 'review'
**
Can someone help me in this ?
I think first is best investigate, what are real columns names, if convert to list better are seen some whitespaces or similar:
print (reviews_new.columns.tolist())
I think there can be 2 problems (obviously):
1.whitespaces in columns names (maybe in data also)
Solutions are strip whitespaces in column names:
reviews_new.columns = reviews_new.columns.str.strip()
Or add parameter skipinitialspace to read_csv:
reviews_new = pd.read_csv("D:\\aviva.csv", skipinitialspace=True)
2.different separator as default ,
Solution is add parameter sep:
#sep is ;
reviews_new = pd.read_csv("D:\\aviva.csv", sep=';')
#sep is whitespace
reviews_new = pd.read_csv("D:\\aviva.csv", sep='\s+')
reviews_new = pd.read_csv("D:\\aviva.csv", delim_whitespace=True)
EDIT:
You get whitespace in column name, so need 1.solutions:
print (reviews_new.columns.tolist())
['Name', ' Date', ' review']
^ ^
import pandas as pd
df=pd.read_csv("file.txt", skipinitialspace=True)
df.head()
df['review']
dfObj['Hash Key'] = (dfObj['DEAL_ID'].map(str) +dfObj['COST_CODE'].map(str) +dfObj['TRADE_ID'].map(str)).apply(hash)
#for index, row in dfObj.iterrows():
# dfObj.loc[`enter code here`index,'hash'] = hashlib.md5(str(row[['COST_CODE','TRADE_ID']].values)).hexdigest()
print(dfObj['hash'])

Key Error 6 while trying to access a matrix/array

import pandas as pd
import numpy as np
df = pd.read_csv("ia-infect-dublin.csv", header = None)
df.columns = ['Person_ID', 'Contacted']
df = df.sort_values(by=['Person_ID', 'Contacted'])
unique = df['Person_ID'].unique()
unique = np.append(unique, ["Start"])
matrix = pd.DataFrame(0, columns=unique, index=unique, dtype=int)
l_group = df.groupby('Person_ID')
for name, group in l_group:
i = 0
for index, rows in group.iterrows():
if i ==0:
matrix.loc[['Start'], rows['Person_ID']] += 1
previous_state = rows['Person_ID']
i = 1
else:
matrix.loc[previous_state, rows['Person_ID']] += 1
print(matrix.head())
I am making a transition matrix of where it goes through the csv file and find the frequency of changes from one person to another. A -> B -> C -> D and adds the total count. However, I receive an error:
Traceback (most recent call last):
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py", line 3621, in get_loc
return self._engine.get_loc(casted_key)
File "pandas/_libs/index.pyx", line 136, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 163, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 5198, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 5206, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 6
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/Users/vydang/Documents/FA22/BMI5007/Homeworks/hw10/testing", line 23, in <module>
matrix.loc[['Start'], rows['Person_ID']] += 1
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py", line 961, in __getitem__
return self._getitem_tuple(key)
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py", line 1140, in _getitem_tuple
return self._getitem_lowerdim(tup)
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py", line 867, in _getitem_lowerdim
section = self._getitem_axis(key, axis=i)
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py", line 1202, in _getitem_axis
return self._get_label(key, axis=axis)
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py", line 1153, in _get_label
return self.obj.xs(label, axis=axis)
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/generic.py", line 3849, in xs
return self[key]
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/frame.py", line 3505, in __getitem__
indexer = self.columns.get_loc(key)
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py", line 3623, in get_loc
raise KeyError(key) from err
KeyError: 6
I have tried to see where the error is by:
value = '6'
if value in matrix.index:
print(matrix.loc[value])
else:
print("Not in index")
And it does populate and I have also tried:
matrix['6']
matrix.loc['6']
and no error occurs. Is there any other possible reasons that this may be occuring?
I have tried to check if the index has the 'Start' and it did.

TypeError (not '_NoValueType') after read_csv upon using drop() and group_by

upon reading a CSV into pandas, i get a TypeError when perform drop() as well as group_by (in different statements). Any ideas what could be the cause and how i can solve this?
Relevant code:
main_data = pd.read_csv(file_path, encoding='utf-8', sep=None, skipinitialspace=True, dtype='object', na_filter=False, engine='python )
field_list = DataField.objects.filter(datafile=file_id, to_import=False)
list_to_drop=[]
for field in field_list:
list_to_drop.append(field.fieldname)
if len(list_to_drop) > 0:
print("About to drop ", len(list_to_drop),"columns: ", list_to_drop)
main_data = main_data.drop(list_to_drop, axis=1, inplace=True, errors='ignore')
Debugging data:
JSON of the data loaded through read_csv():
{"Timestamp":{"0":"2021-10-05T10:06:59.540","1":"2021-10-05T10:06:59.665","2":"2021-10-05T10:06:59.665","3":"2021-10-05T10:06:59.665","4":"2021-10-05T10:06:59.665","5":"2021-10-05T10:06:59.767","6":"2021-10-05T10:06:59.767","7":"2021-10-05T10:07:04.645","8":"2021-10-05T10:07:04.669","9":"2021-10-05T10:07:04.669"},"Box Serial":{"0":"10-0-13-181 ","1":"10-0-13-181 ","2":"10-0-13-181 ","3":"10-0-13-181 ","4":"10-0-13-181 ","5":"10-0-13-181","6":"10-0-13-181","7":"10-0-13-162 ","8":"10-0-13-162 ","9":"10-0-13-162 "},"Event type":{"0":"Box ","1":"Advertisement","2":"Payment Server","3":"AirFi Sync Socket","4":"Telemetry Service","5":"Initial Battery","6":"Initial Temperature","7":"Box ","8":"Advertisement","9":"Payment Server"},"Status":{"0":"Booting","1":"Starting","2":"Starting","3":"Connected","4":"Starting","5":"43%","6":"20C","7":"Booting","8":"Starting","9":"Starting"},"ID":{"0":"","1":"","2":"","3":"","4":"","5":null,"6":null,"7":"","8":"","9":""},"Unnamed: 5":{"0":null,"1":null,"2":null,"3":null,"4":null,"5":null,"6":null,"7":null,"8":null,"9":null}}
Stacktrace:
2022-01-06 01:43:55,473 ERROR usecases 7621 6208778240 TRYING TO DROP COLUMNS: ['Unnamed: 5']
Traceback (most recent call last):
File "/Users/user1/PycharmProjects/Project1/clean/usecases.py", line 114, in drop_non_import_columns
main_data = main_data.drop(list_to_drop, axis=1, inplace=True, errors='ignore')
File "/Users/user1/PycharmProjects/Project1/venv/lib/python3.8/site-packages/pandas/util/_decorators.py", line 311, in wrapper
return func(*args, kwargs)
File "/Users/user1/PycharmProjects/Project1/venv/lib/python3.8/site-packages/pandas/core/frame.py", line 4906, in drop
return super().drop(
File "/Users/user1/PycharmProjects/Project1/venv/lib/python3.8/site-packages/pandas/core/generic.py", line 4150, in drop
obj = obj._drop_axis(labels, axis, level=level, errors=errors)
File "/Users/user1/PycharmProjects/Project1/venv/lib/python3.8/site-packages/pandas/core/generic.py", line 4186, in _drop_axis
result = self.reindex({axis_name: new_axis})
File "/Users/user1/PycharmProjects/Project1/venv/lib/python3.8/site-packages/pandas/util/_decorators.py", line 324, in wrapper
return func(*args, **kwargs)
File "/Users/user1/PycharmProjects/Project1/venv/lib/python3.8/site-packages/pandas/core/frame.py", line 4772, in reindex
return super().reindex(**kwargs)
File "/Users/user1/PycharmProjects/Project1/venv/lib/python3.8/site-packages/pandas/core/generic.py", line 4818, in reindex
return self._reindex_axes(
File "/Users/user1/PycharmProjects/Project1/venv/lib/python3.8/site-packages/pandas/core/frame.py", line 4591, in _reindex_axes
frame = frame._reindex_columns(
File "/Users/user1/PycharmProjects/Project1/venv/lib/python3.8/site-packages/pandas/core/frame.py", line 4636, in _reindex_columns
return self._reindex_with_indexers(
File "/Users/user1/PycharmProjects/Project1/venv/lib/python3.8/site-packages/pandas/core/generic.py", line 4883, in _reindex_with_indexers
new_data = new_data.reindex_indexer(
File "/Users/user1/PycharmProjects/Project1/venv/lib/python3.8/site-packages/pandas/core/internals/managers.py", line 676, in reindex_indexer
new_blocks = self._slice_take_blocks_ax0(
File "/Users/user1/PycharmProjects/Project1/venv/lib/python3.8/site-packages/pandas/core/internals/managers.py", line 802, in _slice_take_blocks_ax0
max_len = max(len(mgr_locs), taker.max() + 1)
File "/Users/user1/PycharmProjects/Project1/venv/lib/python3.8/site-packages/numpy/core/_methods.py", line 40, in _amax
return umr_maximum(a, axis, None, out, keepdims, initial, where)
TypeError: int() argument must be a string, a bytes-like object or a number, not '_NoValueType'

Python Tutorial Help NLP customer reviews

I'm fairly new to Python and am following a tutorial on creating a wordcloud based on a customer reviews file. The tutorial link is https://towardsdatascience.com/detecting-bad-customer-reviews-with-nlp-d8b36134dc7e
from wordcloud import WordCloud, STOPWORDS
import pandas as pd
# read data
reviews_df = pd.read_csv("Hotel_Reviews3.csv")
# append the positive and negative text reviews
reviews_df["review"] = reviews_df["Negative_Review"] + reviews_df["Positive_Review"]
# create the label
reviews_df["is_bad_review"] = reviews_df["Reviewer_Score"].apply(lambda x: 1 if x < 5 else 0)
# select only relevant columns
reviews_df = reviews_df[["review", "is_bad_review"]]
reviews_df.head()
Hotel_Reviews3.csv:
https://i.stack.imgur.com/8ZGxj.png
ERROR MESSAGE:
Traceback (most recent call last):
File "C:\Users\stecd\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\core\indexes\base.py", line 3078, in get_loc
return self._engine.get_loc(key)
File "pandas\_libs\index.pyx", line 140, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1492, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1500, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Positive_Review'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\stecd\Desktop\WorldCloud\wordCloud.py", line 6, in <module>
reviews_df["review"] = reviews_df["Negative_Review"] + reviews_df["Positive_Review"]
File "C:\Users\stecd\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\core\frame.py", line 2688, in __getitem__
return self._getitem_column(key)
File "C:\Users\stecd\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\core\frame.py", line 2695, in _getitem_column
return self._get_item_cache(key)
File "C:\Users\stecd\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\core\generic.py", line 2489, in _get_item_cache
values = self._data.get(item)
File "C:\Users\stecd\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\core\internals.py", line 4115, in get
loc = self.items.get_loc(item)
File "C:\Users\stecd\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\core\indexes\base.py", line 3080, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\_libs\index.pyx", line 140, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1492, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1500, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Positive_Review'
>>>
From the error message i'd guess that Hotel_Reviews3.csv may not have a "Positive_Review" column. It could be that the corresponding table entry is truncated or has whitespaces so that it does not match "Positive_Review".

Receiving Type Error: 0 while updating pandas df using Data Nitro

I am updating a Pandas Data Frame.
The script looks up for a product. If the product is already in data frame, it just updates it columns with accumulated new values.
If the product is not there it creates a new set of rows to insert the values of the product.
Code
for m in range(0,len(product_sales_price)):
if exact_match(str(sales_record[n-1]),str(product_sales_price[m]))==True:
total_product_daily_sales = counter * product_sales_price[m+1]
'''
print(total_product_daily_sales)
'''
total_product_daily_net_profit = total_product_daily_sales *.1
print(counter)
print(product_sales_price[m+1])
print(total_product_daily_sales)
print(total_product_daily_net_profit)
print(m)
print(product_sales_price[m])
if (product_revenue_and_net_profit_df.ix[:,0] == product_sales_price[m]).any() == True :
product_revenue_and_net_profit_df.ix[:,:][(product_revenue_and_net_profit_df.ix[:,
0] == product_sales_price[m])] = [
product_revenue_and_net_profit_df.ix[:,0][(product_revenue_and_net_profit_df.ix[:,
0] == product_sales_price[m])],
product_revenue_and_net_profit_df.ix[:,1][(product_revenue_and_net_profit_df.ix[:,
0] == product_sales_price[m])]+counter,
product_revenue_and_net_profit_df.ix[:,2][(product_revenue_and_net_profit_df.ix[:,
0] == product_sales_price[
m])]+total_product_daily_sales,product_revenue_and_net_profit_df.ix[:,
3][(product_revenue_and_net_profit_df.ix[:,0] == product_sales_price[
m])]+total_product_daily_net_profit]
else:
product_revenue_and_net_profit_df.ix[(product_revenue_and_net_profit_df.shape[0]+1),:] = (
[product_sales_price[m],counter,total_product_daily_sales,
total_product_daily_net_profit]
)
Run Time
<sale_frequency time (in seconds):
1
423.44
423.44
42.344
0
Bushwacker Dodge Pocket Style Fender Flare Set of 4
Traceback (most recent call last):
File "32\scriptStarter.py", line 120, in <module>
File "C:\Python Projects\Amazon-Sales\amazon_analysis.py", line 162, in <module>
print (timeit.timeit(fn + "()", "from __main__ import "+fn, number=1))
File "C:\Users\onthego\Anaconda3\lib\timeit.py", line 219, in timeit
return Timer(stmt, setup, timer).timeit(number)
File "C:\Users\onthego\Anaconda3\lib\timeit.py", line 184, in timeit
timing = self.inner(it, self.timer)
File "<timeit-src>", line 6, in inner
File "C:\Python Projects\Amazon-Sales\amazon_analysis.py", line 91, in sale_frequency
m])]+total_product_daily_net_profit]
File "C:\Users\onthego\Anaconda3\lib\site-packages\pandas\core\frame.py", line 2122, in __setitem__
self._setitem_array(key, value)
File "C:\Users\onthego\Anaconda3\lib\site-packages\pandas\core\frame.py", line 2142, in _setitem_array
self.ix._setitem_with_indexer(indexer, value)
File "C:\Users\onthego\Anaconda3\lib\site-packages\pandas\core\indexing.py", line 448, in _setitem_with_indexer
elif np.array(value).ndim == 2:
File "C:\Users\onthego\Anaconda3\lib\site-packages\pandas\core\series.py", line 521, in __getitem__
result = self.index.get_value(self, key)
File "C:\Users\onthego\Anaconda3\lib\site-packages\pandas\core\index.py", line 1595, in get_value
return self._engine.get_value(s, k)
File "pandas\index.pyx", line 100, in pandas.index.IndexEngine.get_value (pandas\index.c:3113)
File "pandas\index.pyx", line 108, in pandas.index.IndexEngine.get_value (pandas\index.c:2844)
File "pandas\index.pyx", line 154, in pandas.index.IndexEngine.get_loc (pandas\index.c:3704)
File "pandas\hashtable.pyx", line 375, in pandas.hashtable.Int64HashTable.get_item (pandas\hashtable.c:7224)
File "pandas\hashtable.pyx", line 381, in pandas.hashtable.Int64HashTable.get_item (pandas\hashtable.c:7162)
KeyError: 0
>>>
>>>
>>>

Resources