import pandas as pd
import numpy as np
df = pd.read_csv("ia-infect-dublin.csv", header = None)
df.columns = ['Person_ID', 'Contacted']
df = df.sort_values(by=['Person_ID', 'Contacted'])
unique = df['Person_ID'].unique()
unique = np.append(unique, ["Start"])
matrix = pd.DataFrame(0, columns=unique, index=unique, dtype=int)
l_group = df.groupby('Person_ID')
for name, group in l_group:
i = 0
for index, rows in group.iterrows():
if i ==0:
matrix.loc[['Start'], rows['Person_ID']] += 1
previous_state = rows['Person_ID']
i = 1
else:
matrix.loc[previous_state, rows['Person_ID']] += 1
print(matrix.head())
I am making a transition matrix of where it goes through the csv file and find the frequency of changes from one person to another. A -> B -> C -> D and adds the total count. However, I receive an error:
Traceback (most recent call last):
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py", line 3621, in get_loc
return self._engine.get_loc(casted_key)
File "pandas/_libs/index.pyx", line 136, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 163, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 5198, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 5206, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 6
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/Users/vydang/Documents/FA22/BMI5007/Homeworks/hw10/testing", line 23, in <module>
matrix.loc[['Start'], rows['Person_ID']] += 1
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py", line 961, in __getitem__
return self._getitem_tuple(key)
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py", line 1140, in _getitem_tuple
return self._getitem_lowerdim(tup)
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py", line 867, in _getitem_lowerdim
section = self._getitem_axis(key, axis=i)
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py", line 1202, in _getitem_axis
return self._get_label(key, axis=axis)
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py", line 1153, in _get_label
return self.obj.xs(label, axis=axis)
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/generic.py", line 3849, in xs
return self[key]
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/frame.py", line 3505, in __getitem__
indexer = self.columns.get_loc(key)
File "/Users/vydang/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py", line 3623, in get_loc
raise KeyError(key) from err
KeyError: 6
I have tried to see where the error is by:
value = '6'
if value in matrix.index:
print(matrix.loc[value])
else:
print("Not in index")
And it does populate and I have also tried:
matrix['6']
matrix.loc['6']
and no error occurs. Is there any other possible reasons that this may be occuring?
I have tried to check if the index has the 'Start' and it did.
Related
I'm using Django 3 and Python 3.7. I have a model (MySql 8 backed table) that has integer primary keys. I have code that searches for such models like so
state = State.objects.get(pk=locality['state'])
The issue is if "locality['state']" contains an empty string, I get the below error
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/django/db/models/fields/__init__.py", line 1768, in get_prep_value
return int(value)
ValueError: invalid literal for int() with base 10: ''
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/Users/davea/Documents/workspace/chicommons/maps/web/tests/test_serializers.py", line 132, in test_coop_create_with_incomplete_data
assert not serializer.is_valid(), serializer.errors
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/rest_framework/serializers.py", line 234, in is_valid
self._validated_data = self.run_validation(self.initial_data)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/rest_framework/serializers.py", line 433, in run_validation
value = self.to_internal_value(data)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/rest_framework/serializers.py", line 490, in to_internal_value
validated_value = field.run_validation(primitive_value)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/rest_framework/fields.py", line 565, in run_validation
value = self.to_internal_value(data)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/rest_framework/relations.py", line 519, in to_internal_value
return [
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/rest_framework/relations.py", line 520, in <listcomp>
self.child_relation.to_internal_value(item)
File "/Users/davea/Documents/workspace/chicommons/maps/web/directory/serializers.py", line 26, in to_internal_value
state = State.objects.get(pk=locality['state'])
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/django/db/models/manager.py", line 82, in manager_method
return getattr(self.get_queryset(), name)(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/django/db/models/query.py", line 404, in get
clone = self._chain() if self.query.combinator else self.filter(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/django/db/models/query.py", line 904, in filter
return self._filter_or_exclude(False, *args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/django/db/models/query.py", line 923, in _filter_or_exclude
clone.query.add_q(Q(*args, **kwargs))
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/django/db/models/sql/query.py", line 1337, in add_q
clause, _ = self._add_q(q_object, self.used_aliases)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/django/db/models/sql/query.py", line 1362, in _add_q
child_clause, needed_inner = self.build_filter(
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/django/db/models/sql/query.py", line 1298, in build_filter
condition = self.build_lookup(lookups, col, value)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/django/db/models/sql/query.py", line 1155, in build_lookup
lookup = lookup_class(lhs, rhs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/django/db/models/lookups.py", line 22, in __init__
self.rhs = self.get_prep_lookup()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/django/db/models/lookups.py", line 72, in get_prep_lookup
return self.lhs.output_field.get_prep_value(self.rhs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/django/db/models/fields/__init__.py", line 1770, in get_prep_value
raise e.__class__(
ValueError: Field 'id' expected a number but got ''.
Is there a more "Django" way to search for an object without an error being thrown if the object doesn't exist? I could do this
state = None if str(type(locality['state'])) != "<class 'int'>" else State.objects.get(pk=locality['state'])
but this seems unnecessarily wordy and not how Django was intended to be used.
I would choose Ask forgiveness not permission strategy
try:
state = State.objects.get(pk=int(locality['state']))
except ValueError:
state = None
You could use a logical AND to validate the dict value before using it to look up the data.
state = locality['state'] and State.objects.get(pk=locality['state'])
I'm fairly new to Python and am following a tutorial on creating a wordcloud based on a customer reviews file. The tutorial link is https://towardsdatascience.com/detecting-bad-customer-reviews-with-nlp-d8b36134dc7e
from wordcloud import WordCloud, STOPWORDS
import pandas as pd
# read data
reviews_df = pd.read_csv("Hotel_Reviews3.csv")
# append the positive and negative text reviews
reviews_df["review"] = reviews_df["Negative_Review"] + reviews_df["Positive_Review"]
# create the label
reviews_df["is_bad_review"] = reviews_df["Reviewer_Score"].apply(lambda x: 1 if x < 5 else 0)
# select only relevant columns
reviews_df = reviews_df[["review", "is_bad_review"]]
reviews_df.head()
Hotel_Reviews3.csv:
https://i.stack.imgur.com/8ZGxj.png
ERROR MESSAGE:
Traceback (most recent call last):
File "C:\Users\stecd\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\core\indexes\base.py", line 3078, in get_loc
return self._engine.get_loc(key)
File "pandas\_libs\index.pyx", line 140, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1492, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1500, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Positive_Review'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\stecd\Desktop\WorldCloud\wordCloud.py", line 6, in <module>
reviews_df["review"] = reviews_df["Negative_Review"] + reviews_df["Positive_Review"]
File "C:\Users\stecd\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\core\frame.py", line 2688, in __getitem__
return self._getitem_column(key)
File "C:\Users\stecd\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\core\frame.py", line 2695, in _getitem_column
return self._get_item_cache(key)
File "C:\Users\stecd\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\core\generic.py", line 2489, in _get_item_cache
values = self._data.get(item)
File "C:\Users\stecd\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\core\internals.py", line 4115, in get
loc = self.items.get_loc(item)
File "C:\Users\stecd\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\core\indexes\base.py", line 3080, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\_libs\index.pyx", line 140, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1492, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1500, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Positive_Review'
>>>
From the error message i'd guess that Hotel_Reviews3.csv may not have a "Positive_Review" column. It could be that the corresponding table entry is truncated or has whitespaces so that it does not match "Positive_Review".
Python code error related to lambda function
Traceback (most recent call last):
File "C:/Users/abhisheksingh75/PycharmProjects/Practice_Machine_Learning/titanic_2.py", line 34, in <module>
else x['Fare'], axis=1)
File "C:\Users\abhisheksingh75\PycharmProjects\project_!\venv\lib\site-packages\pandas\core\frame.py", line 4877, in apply
ignore_failures=ignore_failures)
File "C:\Users\abhisheksingh75\PycharmProjects\project_!\venv\lib\site-packages\pandas\core\frame.py", line 4973, in _apply_standard
results[i] = func(v)
File "C:/Users/abhisheksingh75/PycharmProjects/Practice_Machine_Learning/titanic_2.py", line 33, in <lambda>
fare_means[x['Pclass']] if pd.isnull(x['Fare'])
File "C:\Users\abhisheksingh75\PycharmProjects\project_!\venv\lib\site-packages\pandas\core\frame.py", line 2139, in __getitem__
return self._getitem_column(key)
File "C:\Users\abhisheksingh75\PycharmProjects\project_!\venv\lib\site-packages\pandas\core\frame.py", line 2146, in _getitem_column
return self._get_item_cache(key)
File "C:\Users\abhisheksingh75\PycharmProjects\project_!\venv\lib\site-packages\pandas\core\generic.py", line 1842, in _get_item_cache
values = self._data.get(item)
File "C:\Users\abhisheksingh75\PycharmProjects\project_!\venv\lib\site-packages\pandas\core\internals.py", line 3843, in get
loc = self.items.get_loc(item)
File "C:\Users\abhisheksingh75\PycharmProjects\project_!\venv\lib\site-packages\pandas\core\indexes\base.py", line 2527, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\_libs\index.pyx", line 117, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 139, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1265, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1273, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: (3, 'occurred at index 152')
Process finished with exit code 1
-----------------------------------------------------------------code
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
from sklearn import ensemble
df_titanic = pd.read_csv('C:/Users/abhisheksingh75/Downloads/train.csv')
df_titanic = pd.read_csv('C:/Users/abhisheksingh75/Downloads/test.csv')
#Drop column which are not useful from predictive prespective
df_titanic = df_titanic.drop(['Name', 'Ticket', 'Cabin'], axis=1)
#Fill null values in age column
age_mean = df_titanic['Age'].mean()
df_titanic['Age'] = df_titanic['Age'].fillna(age_mean)
#print(df_titanic.isna().any())
#fill null values in embarked column
Embarked_mode = (df_titanic['Embarked'].mode())[0][0]
df_titanic['Embarked'] = df_titanic['Embarked'].fillna(Embarked_mode)
#print(df_titanic.isna().any())
df_titanic['Gender'] = df_titanic['Sex'].map({'female':0, 'male':1}).astype(int)
df_titanic['Port'] = df_titanic['Embarked'].map({'C':1, 'S':2, 'Q':3}).astype(int)
df_titanic = df_titanic.drop(['Sex', 'Embarked'], axis=1)
cols = df_titanic.columns.tolist()
cols = [cols[1]] + cols[0:1] + cols[2:]
fare_means = df_titanic.pivot_table('Fare', index='Pclass', aggfunc='mean')
print(fare_means)
df_titanic['Fare'] = df_titanic[['Fare','Pclass']].apply(lambda x:
fare_means[x['Pclass']] if pd.isnull(x['Fare'])
else x['Fare'], axis=1)
df_predict['Fare'] = df_predict[['Fare', 'Pclass']].apply(lambda x:
fare_means**.ix[x**['Pclass']] if pd.isnull(x['Fare'])
else x['Fare'], axis=1)
I don't know why my code was not working before...but when i used fare_mean.ix()
function..it was able to locate element position
When importing a csv file I can't seem to set the index. I can't work out if am importing the file correctly, I am doing everything in the interpreter currently, here is what I have:
df = pd.read_csv('E:/test.vbo', sep='\t', encoding='iso-8859-1', skiprows=97)
print(df.head())
this give the following:
sats time lat long velocity heading height ...
0 [data]
1 008 144403.30 003067.21791 000031.98044 010.033 097.16 +00112.43 ...
2 008 144403.40 003067.21777 000031.98036 010.584 098.58 +00113.06 ...
3 008 144403.50 003067.21765 000031.98032 010.809 099.74 +00113.72 ...
4 008 144403.60 003067.21749 000031.98025 011.231 101.05 +00114.34 ...
5 008 144403.70 003067.21728 000031.98021 011.575 102.14 +00114.89 ...
Which is fine, however, this line:
print(df.set_index('time'))
give an error:
>>> print(df.set_index('time'))
Traceback (most recent call last):
File "C:\Users\rob.kinsey\AppData\Local\Continuum\Anaconda3\lib\site-packages\
pandas\indexes\base.py", line 1945, in get_loc
return self._engine.get_loc(key)
File "pandas\index.pyx", line 137, in pandas.index.IndexEngine.get_loc (pandas
\index.c:4154)
File "pandas\index.pyx", line 159, in pandas.index.IndexEngine.get_loc (pandas
\index.c:4018)
File "pandas\hashtable.pyx", line 675, in pandas.hashtable.PyObjectHashTable.g
et_item (pandas\hashtable.c:12368)
File "pandas\hashtable.pyx", line 683, in pandas.hashtable.PyObjectHashTable.g
et_item (pandas\hashtable.c:12322)
KeyError: 'time'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Users\rob.kinsey\AppData\Local\Continuum\Anaconda3\lib\site-packages\
pandas\core\frame.py", line 2837, in set_index
level = frame[col]._values
File "C:\Users\rob.kinsey\AppData\Local\Continuum\Anaconda3\lib\site-packages\
pandas\core\frame.py", line 1997, in __getitem__
return self._getitem_column(key)
File "C:\Users\rob.kinsey\AppData\Local\Continuum\Anaconda3\lib\site-packages\
pandas\core\frame.py", line 2004, in _getitem_column
return self._get_item_cache(key)
File "C:\Users\rob.kinsey\AppData\Local\Continuum\Anaconda3\lib\site-packages\
pandas\core\generic.py", line 1350, in _get_item_cache
values = self._data.get(item)
File "C:\Users\rob.kinsey\AppData\Local\Continuum\Anaconda3\lib\site-packages\
pandas\core\internals.py", line 3290, in get
loc = self.items.get_loc(item)
File "C:\Users\rob.kinsey\AppData\Local\Continuum\Anaconda3\lib\site-packages\
pandas\indexes\base.py", line 1947, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\index.pyx", line 137, in pandas.index.IndexEngine.get_loc (pandas
\index.c:4154)
File "pandas\index.pyx", line 159, in pandas.index.IndexEngine.get_loc (pandas
\index.c:4018)
File "pandas\hashtable.pyx", line 675, in pandas.hashtable.PyObjectHashTable.g
et_item (pandas\hashtable.c:12368)
File "pandas\hashtable.pyx", line 683, in pandas.hashtable.PyObjectHashTable.g
et_item (pandas\hashtable.c:12322)
KeyError: 'time'
>>>
What am I missing please?
>>> print(df.columns.tolist())
['sats time lat long velocity heading height vert-vel dgps racceleratorpedal ast
eeringwheel pbrake glateral glongitudinal awingpitch ngearengaged nengine nwheel
fr nwheelrr nwheelfl nwheelrl mengine rdrsavailabledisplayed pwaterpump toil tdc
dc tmotorstator phvac nephsmotor pboost taircharge tcoolant tclutchoil nephspump
demanded tcellmax vbattery paerooil taerooil tmcucoldplate awingpitchdemand tmcu
_igbtmax avifileindex avitime ']
Solved, here is the correct read_csv line:
>>> df = pd.read_csv('E:/vbox_data/P1GTR__20150922144312_0001.vbo', delim_whitespace=True, encoding='iso-8859-1', header=90)
I am updating a Pandas Data Frame.
The script looks up for a product. If the product is already in data frame, it just updates it columns with accumulated new values.
If the product is not there it creates a new set of rows to insert the values of the product.
Code
for m in range(0,len(product_sales_price)):
if exact_match(str(sales_record[n-1]),str(product_sales_price[m]))==True:
total_product_daily_sales = counter * product_sales_price[m+1]
'''
print(total_product_daily_sales)
'''
total_product_daily_net_profit = total_product_daily_sales *.1
print(counter)
print(product_sales_price[m+1])
print(total_product_daily_sales)
print(total_product_daily_net_profit)
print(m)
print(product_sales_price[m])
if (product_revenue_and_net_profit_df.ix[:,0] == product_sales_price[m]).any() == True :
product_revenue_and_net_profit_df.ix[:,:][(product_revenue_and_net_profit_df.ix[:,
0] == product_sales_price[m])] = [
product_revenue_and_net_profit_df.ix[:,0][(product_revenue_and_net_profit_df.ix[:,
0] == product_sales_price[m])],
product_revenue_and_net_profit_df.ix[:,1][(product_revenue_and_net_profit_df.ix[:,
0] == product_sales_price[m])]+counter,
product_revenue_and_net_profit_df.ix[:,2][(product_revenue_and_net_profit_df.ix[:,
0] == product_sales_price[
m])]+total_product_daily_sales,product_revenue_and_net_profit_df.ix[:,
3][(product_revenue_and_net_profit_df.ix[:,0] == product_sales_price[
m])]+total_product_daily_net_profit]
else:
product_revenue_and_net_profit_df.ix[(product_revenue_and_net_profit_df.shape[0]+1),:] = (
[product_sales_price[m],counter,total_product_daily_sales,
total_product_daily_net_profit]
)
Run Time
<sale_frequency time (in seconds):
1
423.44
423.44
42.344
0
Bushwacker Dodge Pocket Style Fender Flare Set of 4
Traceback (most recent call last):
File "32\scriptStarter.py", line 120, in <module>
File "C:\Python Projects\Amazon-Sales\amazon_analysis.py", line 162, in <module>
print (timeit.timeit(fn + "()", "from __main__ import "+fn, number=1))
File "C:\Users\onthego\Anaconda3\lib\timeit.py", line 219, in timeit
return Timer(stmt, setup, timer).timeit(number)
File "C:\Users\onthego\Anaconda3\lib\timeit.py", line 184, in timeit
timing = self.inner(it, self.timer)
File "<timeit-src>", line 6, in inner
File "C:\Python Projects\Amazon-Sales\amazon_analysis.py", line 91, in sale_frequency
m])]+total_product_daily_net_profit]
File "C:\Users\onthego\Anaconda3\lib\site-packages\pandas\core\frame.py", line 2122, in __setitem__
self._setitem_array(key, value)
File "C:\Users\onthego\Anaconda3\lib\site-packages\pandas\core\frame.py", line 2142, in _setitem_array
self.ix._setitem_with_indexer(indexer, value)
File "C:\Users\onthego\Anaconda3\lib\site-packages\pandas\core\indexing.py", line 448, in _setitem_with_indexer
elif np.array(value).ndim == 2:
File "C:\Users\onthego\Anaconda3\lib\site-packages\pandas\core\series.py", line 521, in __getitem__
result = self.index.get_value(self, key)
File "C:\Users\onthego\Anaconda3\lib\site-packages\pandas\core\index.py", line 1595, in get_value
return self._engine.get_value(s, k)
File "pandas\index.pyx", line 100, in pandas.index.IndexEngine.get_value (pandas\index.c:3113)
File "pandas\index.pyx", line 108, in pandas.index.IndexEngine.get_value (pandas\index.c:2844)
File "pandas\index.pyx", line 154, in pandas.index.IndexEngine.get_loc (pandas\index.c:3704)
File "pandas\hashtable.pyx", line 375, in pandas.hashtable.Int64HashTable.get_item (pandas\hashtable.c:7224)
File "pandas\hashtable.pyx", line 381, in pandas.hashtable.Int64HashTable.get_item (pandas\hashtable.c:7162)
KeyError: 0
>>>
>>>
>>>