Python- iterating over multiple files to read into a data frame - python-3.x

Here is the working code:
from urllib.request import urlretrieve
import requests
import xlrd
import pandas as pd
WORKING CODE
icd9_link = "https://www.cob.cms.hhs.gov/Section111/assets/section111/Section111ValidICD9-2017.xlsx"
icd9_map= pd.read_excel(icd9_link, sheet_name=0, header=0)
NOT WORKING CODE
Define function which will name ICD_9_map_ and use correct link
fx = icd"{0}"_map_= pd.read_excel(icd"{1}"_link, sheet_name=0, header=0)
#
y = [9,10]
for x in y:
fx.format(x, x)

Related

Can't see the items when I print the list > frozenset({'nan'}

I am trying to print the rules after using apriori. Instead of printing the actual items my code always prints nan.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from apyori import apriori
store_data = pd.read_csv('C:\\Users\\\\datasets\\popular_words_for_apriori.csv', header=None)
store_data.head()
num_records=len(store_data)
records = []
for i in range(0,99):
records.append([str(store_data.values[i,j]) for j in range(0,54)])
association_rules = apriori(records, min_support=0.0053, min_confidence=0.2, min_lift=1, min_length=3)
association_results = list(association_rules)
print(association_results)
and this is the output:
[RelationRecord(items=frozenset({**'nan'**}), support=1.0, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'nan'}), confidence=1.0, lift=1.0)]), RelationRecord(items=frozenset({'**nan**', 'algorithm'}), support=0.010101010101010102

Import dataset from url and convert text to csv in python3

I am pretty new to Python (using Python3) and read Pandas to import dataset.
I need to import dataset from url - https://newonlinecourses.science.psu.edu/stat501/sites/onlinecourses.science.psu.edu.stat501/files/data/leukemia_remission/index.txt
and convert it to csv file, I am getting some special character in converted csv -> ��
I am download txt file and converting it to csv, is is the right approach?
and converted csv is putting entire text into one column
from urllib.request import urlretrieve
import pandas as pd
from pandas import DataFrame
url = 'https://newonlinecourses.science.psu.edu/stat501/sites/onlinecourses.science.psu.edu.stat501/files/data/leukemia_remission/index.txt'
urlretrieve(url, 'index.txt')
df = pd.read_csv('index.txt', sep='/t', engine='python', lineterminator='\r\n')
csv_file = df.to_csv('index.csv', sep='\t', index=False, header=True)
print(csv_file)
after successful import, I have to Extract X as all columns except the first column and Y as first column also.
I'll appreciate your all help.
from urllib.request import urlretrieve
import pandas as pd
url = 'https://newonlinecourses.science.psu.edu/stat501/sites/onlinecourses.science.psu.edu.stat501/files/data/leukemia_remission/index.txt'
urlretrieve(url, 'index.txt')
df = pd.read_csv('index.txt', sep='\t',encoding='utf-16')
Y = df[['REMISS']]
X = df.drop(['REMISS'],axis=1)

Is there any alternative for pd.notna ( pandas 0.24.2). It is not working in pandas 0.20.1?

"Code was developed in pandas=0.24.2, and I need to make the code work in pandas=0.20.1. What is the alternative for pd.notna as it is not working in pandas version 0.20.1.
df.loc[pd.notna(df["column_name"])].query(....).drop(....)
I need an alternative to pd.notna to fit in this line of code to work in pandas=0.20.1
import os
import subprocess
import pandas as pd
import sys
from StringIO import StringIO
from io import StringIO
cmd = 'NSLOOKUP email.fullcontact.com'
df = pd.DataFrame()
a = subprocess.Popen(cmd, stdout=subprocess.PIPE)
b = StringIO(a.communicate()[0].decode('utf-8'))
df = pd.read_csv(b, sep=",")
column = list(df.columns)
name = list(df.iloc[1])[0].strip('Name:').strip()
name

How can i save feature extraction as tuple from .wav file to excel?

I'm using following code, But my fbank_feat is tuple and i can't save feature extraction in excel file (in line 28 - df).
from pathlib import Path
from python_speech_features import fbank
import scipy.io.wavfile as wavfile
import numpy as np
import pandas as pd
path = Path('/home/narges/dataset/dataset-CV-16kHz-128kbps/train/').glob('**/*.wav')
wavs = [str(wavf) for wavf in path if wavf.is_file()]
wavs.sort()
print(wavs)
number_of_files=len(wavs)
spk_ID = [wavs[i].upper() for i in range(number_of_files)]
spk_sent = [wavs[i] for i in range(number_of_files)]
for i in range(number_of_files):
(rate, sig) = wavfile.read(wavs[i])
fbank_feat=fbank(sig, rate, winlen=0.06, winstep=0.01, nfilt=26, nfft=512, lowfreq=0, highfreq=rate/2, preemph=0.97, winfunc=np.hamming)
print(fbank_feat)
df = pd.DataFrame('fbank_feat')
writer = pd.ExcelWriter('dataset.xlsx', engine='xlsxWriter')
df.to_excel(writer, 'feat1')
writer.save()

why I get KeyError when I extract data with specific keywords from CSV file using python?

I am trying to use below code to get posts with specific keywords from my csv file but I keep getting KeyErro "Tag1"
import re
import string
import pandas as pd
import openpyxl
import glob
import csv
import os
import xlsxwriter
import numpy as np
keywords = {"agile","backlog"}
# all your keywords
df = pd.read_csv(r"C:\Users\ferr1982\Desktop\split1_out.csv",
error_bad_lines=False)#, sep="," ,
encoding="utf-8")
output = pd.DataFrame(columns=df.columns)
for i in range(len(df.index)):
#if (df.loc[df['Tags'].isin(keywords)]):
if any(x in ((df['Tags1'][i]),(df['Tags2'][i]), (df['Tags3'][i] ),
(df['Tags4'][i]) , (df['Tags5'][i])) for x in keywords):
output.loc[len(output)] = [df[j][i] for j in df.columns]
output.to_csv("new_data5.csv", incdex=False)
Okay, it turned to be that there is a little space before "Tags" column in my CSV file !
it is working now after I added the space to the name in the code above.

Resources