Is there any alternative for pd.notna ( pandas 0.24.2). It is not working in pandas 0.20.1? - python-3.x

"Code was developed in pandas=0.24.2, and I need to make the code work in pandas=0.20.1. What is the alternative for pd.notna as it is not working in pandas version 0.20.1.
df.loc[pd.notna(df["column_name"])].query(....).drop(....)
I need an alternative to pd.notna to fit in this line of code to work in pandas=0.20.1

import os
import subprocess
import pandas as pd
import sys
from StringIO import StringIO
from io import StringIO
cmd = 'NSLOOKUP email.fullcontact.com'
df = pd.DataFrame()
a = subprocess.Popen(cmd, stdout=subprocess.PIPE)
b = StringIO(a.communicate()[0].decode('utf-8'))
df = pd.read_csv(b, sep=",")
column = list(df.columns)
name = list(df.iloc[1])[0].strip('Name:').strip()
name

Related

Having issues with sys.argv()

I'm new to python programming and trying to implement a code using argv(). Please find the below code for your reference. I want to apply filter where Offer_ID = 'O456' with the help of argv().
Code:
-----
import pandas as pd
import numpy as np
import string
import sys
data = pd.DataFrame({'Offer_ID':["O123","O456","O789"],
'Offer_Name':["Prem New Ste","Prem Exit STE","Online Acquisiton Offer"],
'Rule_List':["R1,R2,R4","R6,R2,R3","R10,R11,R12"]})
data.loc[data[sys.argv[1]] == sys.argv[2]] # The problem is here
print(data)
With this statement I'm getting the output -> "print(data.loc[data['Offer_ID'] =='O456'])"
but I want to accomplish it as shown here "data.loc[data[sys.argv[1]] == sys.argv[2]]" .
Below is the command line argument which I'm using.
python argv_demo2.py Offer_ID O456
Kindly assist me with this.
I'm a little confused as to what the issue is, but is this what you're trying to do?
import pandas as pd
import numpy as np
import string
import sys
data = pd.DataFrame({'Offer_ID':["O123","O456","O789"],
'Offer_Name':["Prem New Ste","Prem Exit STE","Online Acquisiton Offer"],
'Rule_List':["R1,R2,R4","R6,R2,R3","R10,R11,R12"]})
select = data.loc[data[sys.argv[1]] == sys.argv[2]] # The problem is here
print(select)

Loading same data files one by one in Database table in python pandas

I have 6 files with named with Data_20190823101010,Data_20190823101112,Data_20190823101214,Data_20190823101310,Data_20190823101410,Data_20190823101510.
These are daily files to be loaded into a SQL Server DB table.
Due to size and performance reasons need to load one by one.
Python code must pick one file at a time,process and load into DB Table.
How to write the code?
Thanks in advance.
import glob
import os
import pandas as pd
import time
from datetime import datetime
import numpy as np
#folder_name = 'Data_Folder'
file_type = 'csv'
file_titles = ['C1','C2','C3',C4','C5']
df = pd.concat([pd.read_csv(f, header=None,skiprows=1,names=file_titles,low_memory=False) for f in glob.glob(folder_name + "//*Data_*" )])
You can import those csv files in a dataframe and then concatenate and use pandas to_sql function to connect and upload the data to MS SQL Server DB
from sqlalchemy import create_engine
import urllib
import pyodbc
import pandas as pd
import glob
connection= urllib.parse.quote_plus("DRIVER={SQL Server Native Client 11.0};SERVER=Server_name;DATABASE=DB Name")
engine = create_engine('mssql+pyodbc:///?odbc_connect={}'.format(connection))
path = r'C:\file_path' # local drive File path
all_csv_files = glob.glob(path + "/*.csv")
for filename in all_files:
df = pd.read_csv(filename, index_col=None, header=0)
df.to_sql('Table_Name', schema='dbo', con = engine)

How can i save feature extraction as tuple from .wav file to excel?

I'm using following code, But my fbank_feat is tuple and i can't save feature extraction in excel file (in line 28 - df).
from pathlib import Path
from python_speech_features import fbank
import scipy.io.wavfile as wavfile
import numpy as np
import pandas as pd
path = Path('/home/narges/dataset/dataset-CV-16kHz-128kbps/train/').glob('**/*.wav')
wavs = [str(wavf) for wavf in path if wavf.is_file()]
wavs.sort()
print(wavs)
number_of_files=len(wavs)
spk_ID = [wavs[i].upper() for i in range(number_of_files)]
spk_sent = [wavs[i] for i in range(number_of_files)]
for i in range(number_of_files):
(rate, sig) = wavfile.read(wavs[i])
fbank_feat=fbank(sig, rate, winlen=0.06, winstep=0.01, nfilt=26, nfft=512, lowfreq=0, highfreq=rate/2, preemph=0.97, winfunc=np.hamming)
print(fbank_feat)
df = pd.DataFrame('fbank_feat')
writer = pd.ExcelWriter('dataset.xlsx', engine='xlsxWriter')
df.to_excel(writer, 'feat1')
writer.save()

why I get KeyError when I extract data with specific keywords from CSV file using python?

I am trying to use below code to get posts with specific keywords from my csv file but I keep getting KeyErro "Tag1"
import re
import string
import pandas as pd
import openpyxl
import glob
import csv
import os
import xlsxwriter
import numpy as np
keywords = {"agile","backlog"}
# all your keywords
df = pd.read_csv(r"C:\Users\ferr1982\Desktop\split1_out.csv",
error_bad_lines=False)#, sep="," ,
encoding="utf-8")
output = pd.DataFrame(columns=df.columns)
for i in range(len(df.index)):
#if (df.loc[df['Tags'].isin(keywords)]):
if any(x in ((df['Tags1'][i]),(df['Tags2'][i]), (df['Tags3'][i] ),
(df['Tags4'][i]) , (df['Tags5'][i])) for x in keywords):
output.loc[len(output)] = [df[j][i] for j in df.columns]
output.to_csv("new_data5.csv", incdex=False)
Okay, it turned to be that there is a little space before "Tags" column in my CSV file !
it is working now after I added the space to the name in the code above.

Python- iterating over multiple files to read into a data frame

Here is the working code:
from urllib.request import urlretrieve
import requests
import xlrd
import pandas as pd
WORKING CODE
icd9_link = "https://www.cob.cms.hhs.gov/Section111/assets/section111/Section111ValidICD9-2017.xlsx"
icd9_map= pd.read_excel(icd9_link, sheet_name=0, header=0)
NOT WORKING CODE
Define function which will name ICD_9_map_ and use correct link
fx = icd"{0}"_map_= pd.read_excel(icd"{1}"_link, sheet_name=0, header=0)
#
y = [9,10]
for x in y:
fx.format(x, x)

Resources