Converting timeseries into datetime format in python - python-3.x

I have the column of dates called 'Activity_Period' in this format '200507' which means July 2005 and I want to convert it to datetime format of ('Y'-'m') in python.
I tried to use the datetime.strp however it shows that the input has to be a string and not a series.
df.Activity_Period=datetime.strptime(df.Activity_Period, '%Y-%m')
The following is the error I get
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-40-ac32eb324a0b> in <module>
----> 1 df.Activity_Period=datetime.strptime(df.Activity_Period, '%Y-%m')
TypeError: strptime() argument 1 must be str, not Series

import datetime as dt
import pandas as pd
#simple example
timestamp = '200507'
result = dt.datetime.strptime(timestamp, '%Y%m')
print(result)
#Example using pandas series
series = pd.Series(['200507', '200508', '200509', '200510'])
series = pd.to_datetime(series, format='%Y%m')
print(series)
#for your DF
df['Activity_Period'] = pd.to_datetime(df['Activity_Period'], format='%Y%m')

Related

Changes csv row value

This is my code:
import pandas as pd
import re
# reading the csv file
patients = pd.read_csv("partial.csv")
# updating the column value/data
for patient in patients.iterrows():
cip=patient['VALOR_ID']
new_cip = re.sub('^(\w+|)',r'FIXED_REPLACED_STRING',cip)
patient['VALOR_ID'] = new_cip
# writing into the file
df.to_csv("partial-writer.csv", index=False)
print(df)
I'm getting this message:
Traceback (most recent call last):
File "/home/jeusdi/projects/workarea/salut/load-testing/load.py", line 28, in
cip=patient['VALOR_ID']
TypeError: tuple indices must be integers or slices, not str
EDIT
Form code above you can think I need to set a same fixed value to all rows.
I need to loop over "rows" and generate a random string and set it on each different "row".
Code above would be:
for patient in patients.iterrows():
new_cip = generate_cip()
patient['VALOR_ID'] = new_cip
Use Series.str.replace, but not sure about | in regex. Maybe should be removed it:
df = pd.read_csv("partial.csv")
df['VALOR_ID'] = df['VALOR_ID'].str.replace('^(\w+|)',r'FIXED_REPLACED_STRING')
#if function return scalars
df['VALOR_ID'] = df['VALOR_ID'].apply(generate_cip)
df.to_csv("partial-writer.csv", index=False)

datetime index to datetime series not working in Pandas?

I am trying to convert a datetime index to a datetime series but I get an error:
ticks = pd.date_range(start = '2019-12-30', end = '2020-02-11', periods = 6)
ticks.to_datetime()
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-79-f41607e594d5> in <module>
----> 1 ticks.to_datetime(dayfirst=False)
AttributeError: 'DatetimeIndex' object has no attribute 'to_datetime'
Why and what should I do?
Here ticks are already DatetimeIndex, so no convert necessary:
ticks = pd.date_range(start = '2019-12-30', end = '2020-02-11', periods = 6)
If want Series:
s = ticks.to_series()
Or:
s = pd.Series(ticks)
If want convert to datetimes, is it possible by pandas.to_datetime:
pd.to_datetime(ticks)

AttributeError: 'DataFrame' object has no attribute 'NET_NAME'

python 3.7
A task. Add a new column in the received date frame based on two conditions:
if the value in the NET_NAME column is equal to one of the list and the value in the ECELL_TYPE column is LTE, then assign the value to the SHARING column from the ENODEB_NAME column.
import csv
import os
import pandas as pd
import datetime
import numpy as np
from time import gmtime, strftime
WCOUNT=strftime("%V", gmtime())
WCOUNT = int(WCOUNT)
WCOUNT_last = int(WCOUNT)-1
os.environ['NLS_LANG'] = 'Russian.AL32UTF8'
cell_file_list=pd.read_excel('cdt_config.xlsx',sheet_name ='cdt_config',index_col='para_name')
filial_name_list=pd.read_excel('FILIAL_NAME.xlsx')
gcell_file_name1=cell_file_list.para_value.loc['ucell_file_name']
ecell_file_name=cell_file_list.para_value.loc['ecell_file_name']
cols_simple=['RECDATE','REGION_PHOENIX_NAME','NET_NAME','CELL_NAME_IN_BSC','ENODEB_NAME','ECELL_TYPE','NRI_ADDRESS', 'NRI_BS_NUMBER','NRI_SITEID','STOPTIME', ]
cols_export=['GSM', 'UMTS', 'LTE', 'TOTAL', 'NWEEK', 'SHARING' ]
ecell_df=df = pd.read_csv(ecell_file_name, sep=",",encoding='cp1251',
dtype={'NRI_SITEID': str})
ecell_df=ecell_df.rename(columns={"RECDATE.DATE": "RECDATE"})
ecell_df=ecell_df.rename(columns={"ECELL_MNEMONIC": "CELL_NAME_IN_BSC"})
#replace ","
ecell_df.STOPTIME=pd.to_numeric(ecell_df.STOPTIME.replace(',', '', regex=True), errors='coerce')/3600
ecell_df=ecell_df[cols_simple]
#pivot ecell table
ecell_sum_df=pd.pivot_table(ecell_df,values='STOPTIME',index=['RECDATE','NRI_SITEID','REGION_PHOENIX_NAME','NET_NAME','ENODEB_NAME','ECELL_TYPE'],aggfunc='sum')
ecell_sum_df=ecell_sum_df.fillna(0)
#create a empty column with the same index as the pivot table.
ecell_export_df= pd.DataFrame(index=ecell_sum_df.index.copy())
ecell_export_df=ecell_export_df.assign(LTE=0)
ecell_export_df.LTE=ecell_sum_df.STOPTIME
ecell_export_df['SHARING'] = 0
ecell_export_df.SHARING.replace(ecell_export_df.NET_NAME in filial_name_list, ENODEB_NAME,inplace=True)
print(ecell_export_df)
#print (ecell_export_df)
del ecell_df
del ecell_sum_df
export_df=pd.concat([ecell_export_df],join='outer',axis=1)
export_df=export_df.fillna(0)
export_df['TOTAL'] = export_df.sum(axis=1)
export_df['NWEEK'] = WCOUNT_last
del ecell_export_df
#################################################
Below is the error message:
Traceback (most recent call last):
File "C:/Users/PycharmProjects/ReportCDT/CDT 4G_power pivot.py", line 43, in <module>
ecell_export_df.SHARING.replace(ecell_sum_df.NET_NAME in filial_name_list, ENODEB_NAME,inplace=True)
File "C:\Users\vavrumyantsev\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\generic.py", line 5067, in __getattr__
eturn object.__getattribute__(self, name)
AttributeError: 'DataFrame' object has no attribute 'NET_NAME'
Your traceback contains: DataFrame object has no attribute NET_NAME,
meaning actually that this DataFrame has no column of this name.
This message pertains to ecell_sum_df.NET_NAME (also contained in
the traceback), so let's look how you created this DataFrame (slightly
reformatted for readablity):
ecell_sum_df=pd.pivot_table(ecell_df, values='STOPTIME',\
index=['RECDATE', 'NRI_SITEID', 'REGION_PHOENIX_NAME', 'NET_NAME',
'ENODEB_NAME', 'ECELL_TYPE'], aggfunc='sum')
Note that NET_NAME is a part of the index list, so in the DataFrame
created it is a part of the MultiIndex, not an "ordinary" column.
So Python is right displaying this message.
Maybe you should move this level of the MultiIndex to a "normal" column?

NameError: name 'Series' is not defined while using Jupyter Lab

I am new to python. I am using anaconda and trying to write some python code in it. I have written 2 lines of code in which I am trying to create a Series data from a dictonary
Hi #ApurvG there is no function called Series in native python.
If your question is about pandas series you can do it like this:
import pandas as pd
dictionary={'apurb':400}
series = pd.Series(dictionary)
Jupyter:-
salary = {'John': 5000, 'Rob': 6000, 'Wills':7500, 'Ashu': 5500}
salary
se3 = Series(salary)
NameError Traceback (most recent call last)
C:\Users\ADMINI~1\AppData\Local\Temp/ipykernel_13716/1803553183.py in
----> 1 se3 = Series(salary)
NameError: name 'Series' is not defined
import pandas as pd
se4 = pd.Series(salary)
se4
John 5000
Rob 6000
Wills 7500
Ashu 5500
dtype: int64

Creating a big pandas Dataframe [duplicate]

This question already has answers here:
TypeError: first argument must be an iterable of pandas objects, you passed an object of type "DataFrame"
(5 answers)
Closed 5 years ago.
My code is retrieving historical data of 365 days back from today of 50 different stocks.
I want to store all those data in one dataframe to make it easier to analyse, here I want to filter all those data, date wise and calculate number of advancing/declining stocks at a given date.
My code:
import datetime
from datetime import date, timedelta
import pandas as pd
import nsepy as ns
#setting default dates
end_date = date.today()
start_date = end_date - timedelta(365)
#Deriving the names of 50 stocks in Nifty 50 Index
nifty_50 = pd.read_html('https://en.wikipedia.org/wiki/NIFTY_50')
nifty50_symbols = nifty_50[1][1]
for x in nifty50_symbols:
data = ns.get_history(symbol = x, start=start_date, end=end_date)
big_df = pd.concat(data)
Output:
Traceback (most recent call last):
File "F:\My\Getting data from NSE\advances.py", line 27, in <module>
big_df = pd.concat(data)
File "C:\Users\Abinash\AppData\Local\Programs\Python\Python36\lib\site-packages\pandas\core\reshape\concat.py", line 212, in concat
copy=copy)
File "C:\Users\Abinash\AppData\Local\Programs\Python\Python36\lib\site-packages\pandas\core\reshape\concat.py", line 227, in __init__
'"{name}"'.format(name=type(objs).__name__))
TypeError: first argument must be an iterable of pandas objects, you passed an object of type "DataFrame"
I am very new to python, I went through the tutorial of pandas and saw that pandas.concat was used to merge multiple dataframes into one. I might have understood that wrong.
Data for concatenation has to be iterable for example list.
results = []
for x in nifty50_symbols:
data = ns.get_history(symbol = x, start=start_date, end=end_date)
results.append(data)
big_df = pd.concat(results)

Resources