I need to read all the csv files on a specific folder and then apply a specific process (calculate some parametres) on each file and for each file I need to ceate an excel file in which I have to store the results.
For now I have been able to apply the calculation for each file manually, but I need to automate the process, which means the only input should be the folder's name instead of going through the folder and each csv file is considered as an input.
I have been advised to use Pandas for the matter but I couldn't figure out how.
My question is, is it even possible to do it with Python?
This is a part of my code :
main.py
from Dlt2Excel_Fct import *
from ModePrvPblc_Fct import *
from FilePaths import filename_csv, filename_asc, filepath
start = time.time()
dlt2excel()
ModePrvPblcGps()
duree = time.time()-start
print('duree', duree)
Dlt2Excel_Fct.py
import pandas as pd
import xlsxwriter
import sys
import os
from tkinter import filedialog
from tkinter import *
from FilePaths import filename_csv
def dlt2excel():
""" Enter the directory of the exported csv file"""
user_input=filename_csv
# user_input = input("Enter the path of your file: ")
assert os.path.exists(user_input), "I did not find the file at, "+str(user_input)
f = open(user_input,'r+')
print("We found your file!")
"""Organize the exported file """
inputFile = f
workbook = xlsxwriter.Workbook('output01.xlsx')
worksheet = workbook.add_worksheet()
exportFile = open('output01.xlsx', 'w')
workbook.close()
for line in inputFile:
new_line = line.replace(',', '\t')
exportFile.write(new_line)
f.close()
inputFile.close()
exportFile.close()
df = pd.read_table('output01.xlsx', error_bad_lines=False) # for '\t'
df.to_excel('output1.xlsx', 'Sheet1')
"""Count the number of duplicates """
data = pd.read_excel(r'output1.xlsx', header = 0)
data.count()
data['count'] = data.groupby(['Payload'])['Index'].transform('count')
data.to_excel('OutputDLT.xlsx', sheet_name='sheet1', index=False)
print("Conversion is done!\n")
ModePrvPblc_Fct.py
import openpyxl
from openpyxl import Workbook
from openpyxl import load_workbook
#from ExcelName import filepath
from FilePaths import filepath
filename =filepath
def ModePrvPblcGps():
file_name='OutputDLT.xlsx'
wb = openpyxl.load_workbook(file_name, read_only=False)
ws = wb.active
sheet = wb['sheet1']
ls = []
PsgPrv=0
PsgPblc=0
for row in ws.iter_rows():
for cell in row:
#print('Cell: [{}] is type({}): "{}"'.format(cell.coordinate, type(cell.value).__name__, cell.value))
if cell.value == 'SQLR: K<ATT_PRIVACY_MODE> V<1>':
PsgPrv+=1
if cell.value == 'SQLR: K<ATT_PRIVACY_MODE> V<0>':
PsgPblc+=1
print('Passage en mode public: ', PsgPblc)
print('Passage en mode privé: ', PsgPrv)
wb = load_workbook(filename)
ws = wb.worksheets[0]
parametres = (
['Passage en mode privé ', PsgPrv],
['Passage en mode public ', PsgPblc],
)
for row_ in (parametres):
ws.append(row_ )
wb.save(filename)
FilePaths.py
import tkinter as tk
from tkinter.simpledialog import askstring
from tkinter import filedialog
import os
import openpyxl
import warnings
warnings.filterwarnings("ignore")
root = tk.Tk()
folder_selected = filedialog.askdirectory()
print(folder_selected)
path=folder_selected + "/"
nom = askstring("Name", "Enter the name of the result file")
print(nom)
if nom == None:
nom= str(None)
else:
nom = nom +".xlsx"
if not os.path.exists(path):
os.makedirs(path)
filepath = path +nom
if not os.path.isfile(filepath):
wb = openpyxl.Workbook(filepath)
wb.save(filename = filepath)
root.file_name = filedialog.askopenfilename(initialdir = "/",title = "Select csv file",filetypes = (("csv files","*.csv"),("all files","*.*")))
filename_csv=root.file_name
print (filename_csv)
root.file_name1 = filedialog.askopenfilename(initialdir = "/",title = "Select trace file",filetypes = (("asc files","*.asc"),("all files","*.*")))
filename_asc=root.file_name1
print (filename_asc)
root.withdraw()
I've many folders that contain multiple csv file that's why I need to automate the process.
Related
I'm trying to make it so that the user chooses which function to run using if.
import os
import csv
import collections
import datetime
import pandas as pd
import time
import string
import re
import glob, os
folder_path = 'C:/ProgramData/WebPort/system/tags'
folder2_path = 'C:/ProgramData/WebPort/system'
search2_str = '"Prefix"'
print("Choices:\n 1 - Read from CSV\n 2 - Read from WPP")
x = input("Please enter your choice:\n")
x = int(x)
if x == 1:
csv_file_list = glob.glob(folder_path + '/*.csv')
with open("csv.txt", 'w') as wf:
for file in csv_file_list:
print(glob.glob(folder_path + '/*.csv'))
with open(file) as rf:
for line in rf:
if line.strip(): # if line is not empty
if not line.endswith("\n"):
line+="\n"
wf.write(line)
print('Reading from .csv')
elif x == 2:
for root, dirs, files in os.walk(folder2_path):
for file in files:
if file.endswith(".wpp"):
print(os.path.join(root, file))
with open(os.path.join(root, file), 'r') as fr, open ("wpp.txt",'a', encoding='utf-8') as fw:
for i,line in enumerate(fr):
if line.find(search2_str) != -1:
fw.write(line)
print('Reading from .wpp')
else:
print('wrong choice')
Getting Invalid syntax in line 34 using this.
I have the following code:
from selenium import webdriver
import sys
import time
import os
import pyautogui
import webbrowser
import openpyxl
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.select import Select
from openpyxl import Workbook
from openpyxl import load_workbook
chrome_path =r"C:\Users\Desktop\webdriver\chromedriver.exe"
driver = webdriver.Chrome(chrome_path)
driver.get("url string")
def login(driver):
elem = driver.find_element_by_xpath("""//*[#id="usernameField"]""")
elem.send_keys("username")
elem2 = driver.find_element_by_xpath("""//*[#id="passwordField"]""")
elem2.send_keys("password")
driver.find_element_by_xpath("""//* [#id="loginForm"]/table/tbody/tr[4]/td[2]/input""").click()
driver.find_element_by_xpath("""//*[#id="nav"]/ul/li[2]/a""").click()
driver.find_element_by_xpath("""//*[#id="check1"]""").click()
def sendvalues(driver):
wb = load_workbook('prueba.xlsx')
coma = ","
ws = wb.active
buscar = driver.find_element_by_xpath(""" //*[#id="wrapper"]/form/div[2]/input[1]""")
rut = driver.find_element_by_xpath("""//*[#id="rut"]""")
dv = driver.find_element_by_xpath("""//*[#id="wrapper"]/form/table[1]/tbody/tr[3]/td[2]/input""")
nombre = driver.find_element_by_xpath("""//*[#id="wrapper"]/form/table[1]/tbody/tr[4]/td[2]/input""")
rutvalue = ws.cell(1,1).value
dvvalue = ws.cell(1,2).value
nombrevalue = ws.cell(1,3).value
rut.send_keys(rutvalue)
dv.send_keys(dvvalue)
nombre.send_keys(nombrevalue)
buscar.click()
table_elements = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//table[#class = 'grilla']")))
for table_element in table_elements:
for row in table_element.find_elements_by_xpath(".//tr"):
text_file = open("Output2.txt", "a")
text_file.write(str(rutvalue)+str(coma)+str(row.text)+'\n')
text_file.close()
clear(driver)
def clear(driver):
rut = driver.find_element_by_xpath("""//*[#id="rut"]""")
dv = driver.find_element_by_xpath("""//*[#id="wrapper"]/form/table[1]/tbody/tr[3]/td[2]/input""")
nombre = driver.find_element_by_xpath("""//*[#id="wrapper"]/form/table[1]/tbody/tr[4]/td[2]/input""")
rut.clear()
dv.clear()
nombre.clear()
login(driver)
sendvalues(driver)
The code extracts a table after sending an Excel file's single row values to a website. I have three columns rutvalue, dvvalue and nombrevalue in the .xlsx file and I need to send those values for each row to the specific input fields (rut value, dv value, name value) on the web page and get the table of results. I need to send the values of each Excel row to the input fields.
Is there a method to develop a cycle iterating each row and get the results?
You can create a class for the scraping code, and in another file, read the xlsx file and call the class in each iteration.
Look this example.
class ScrapingCode():
def __init__(self, rut, dv, name):
self.rut = rut
self.dv = dv
self.name = name
def run(self):
#for use the values here, use self.name_var
rut.send_keys(self.rut)
...code scraping...
** another file
from app.folder.file import ScrapingCode
# read csv
for row in rows:
scrapingCode = ScrapingCode(row[0], row[1], row[2])
scrapingCode.run()
i changed and improved the code with satisfactory results:
from selenium import webdriver
import sys
import time
import os
import pyautogui
import webbrowser
import openpyxl
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.select import Select
from openpyxl import Workbook
from openpyxl import load_workbook
chrome_path =r"C:\Users\Desktop\webdriver\chromedriver.exe"
driver = webdriver.Chrome(chrome_path)
driver.get("myurl")
def login(driver):
elem = driver.find_element_by_xpath("""//*[#id="usernameField"]""")
elem.send_keys("myusername")
elem2 = driver.find_element_by_xpath("""//*[#id="passwordField"]""")
elem2.send_keys("mypassword")
driver.find_element_by_xpath("""//*[#id="loginForm"]/table/tbody/tr[4]/td[2]/input""").click()
driver.find_element_by_xpath("""//*[#id="nav"]/ul/li[2]/a""").click()
driver.find_element_by_xpath("""//*[#id="check1"]""").click()
def sendvalues(driver):
wb = load_workbook('prueba.xlsx')
ws = wb.active
buscar = driver.find_element_by_xpath(""" //*[#id="wrapper"]/form/div[2]/input[1]""")
rut = driver.find_element_by_xpath("""//*[#id="rut"]""")
dv = driver.find_element_by_xpath("""//*[#id="wrapper"]/form/table[1]/tbody/tr[3]/td[2]/input""")
nombre = driver.find_element_by_xpath("""//*[#id="wrapper"]/form/table[1]/tbody/tr[4]/td[2]/input""")
rutvalue= ws.cell(row=x, column=1).value
dvvalue= ws.cell(row=x, column=2).value
nombrevalue= ws.cell(row=x, column=3).value
rut.send_keys(rutvalue)
dv.send_keys(dvvalue)
nombre.send_keys(nombrevalue)
buscar.click()
table(driver)
def table(driver):
rut = driver.find_element_by_xpath("""//*[#id="rut"]""")
dv = driver.find_element_by_xpath("""//*[#id="wrapper"]/form/table[1]/tbody/tr[3]/td[2]/input""")
nombre = driver.find_element_by_xpath("""//*[#id="wrapper"]/form/table[1]/tbody/tr[4]/td[2]/input""")
coma = ","
wb = load_workbook('prueba.xlsx')
ws = wb.active
rutvalue= ws.cell(row=x, column=1).value
table_elements = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//table[#class = 'grilla']")))
for table_element in table_elements:
for row in table_element.find_elements_by_xpath(".//tr"):
text_file = open("Output2.txt", "a")
text_file.write(str(rutvalue)+str(coma)+str(row.text)+'\n')
text_file.close()
rut.clear()
dv.clear()
nombre.clear()
login(driver)
for x in range(1,1000):
sendvalues(driver)
I defined the "for loop" at the end of the code and defined in the sendvalues function the factor x as seen in the following part:
rutvalue= ws.cell(row=x, column=1).value
dvvalue= ws.cell(row=x, column=2).value
nombrevalue= ws.cell(row=x, column=3).value
Thanks for the comments!
import numpy as np
import os
import random
from six.moves import cPickle as pickle
from tensorflow.python.platform import gfile
import glob
import TensorflowUtils as utils
DATA_URL = 'http:\\data.csail.mit.edu\\places\\ADEchallenge\\ADEChallengeData2016.zip'
#download and read dataset
def read_dataset(data_dir):
pickle_filename = "MITSceneParsing.pickle"
pickle_filepath = os.path.join(data_dir, pickle_filename)
if not os.path.exists(pickle_filepath):
utils.maybe_download_and_extract(data_dir, DATA_URL, is_zipfile=True)
SceneParsing_folder = os.path.splitext(DATA_URL.split("/")[-1])[0]
result = create_image_lists(os.path.join(data_dir, SceneParsing_folder))
print ("Pickling ...")
with open(pickle_filepath, 'wb') as f:
pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)
else:
print ("Found pickle file!")
with open(pickle_filepath, 'rb') as f:
result = pickle.load(f)
training_records = result['training']
validation_records = result['validation']
del result
return training_records, validation_records
train_records, valid_records = read_dataset('Data_zoo/MIT_SceneParsing')
print(len(train_records))
print(len(valid_records))
the result is:Found pickle file! 0 0
why the lens about train_records and valid_records are 0?
i don't know whree is wrong and how to correct it.
This code is right. The bug is in 'create_image_lists'.
Note this code in create_image_lists:
filename = os.path.splitext(f.split('/')[-1])[0]
This is no problem in Linux, but in windows, the separator is '\\', so you should modify this code to:
filename = os.path.splitext(f.split('\\')[-1])[0]
Then delete this file 'MITSceneParsing.pickle', and run read_dataset again.
How I can select a particular csv file from a folder which contains 'n' number of csv files and all csv files are of the same kind with 3 columns only the values are different. So, my aim is to select a single csv file by clicking the browser button and then the graph should get plotted with the selection I made.
Sample csv file points
z,x,y
23,0,0
23,0.05387,6.66634
23,0.11799,13.787
23,0.19989,22.9338
23,0.3072,35.0772
23,0.56904,63.648
23,0.84889,91.5284
23,1.22228,123.65
23,1.72457,156.606
23,1.95494,167.717
23,2.25261,178.844
23,2.59162,186.982
23,2.91377,190.805
23,3.23132,190.89
120,0,0
120,0.08749,5.44471
120,0.16471,9.48296
120,0.31905,16.8751
120,0.82326,37.8111
120,1.45144,56.0784
120,2.24965,72.0364
120,3.01642,82.2629
120,3.82591,89.1323
120,4.91071,94.4476
120,6.15553,97.6881
120,7.45795,99.0951
120,8.31468,98.7398
160,0,0
160,0.1142,5.59709
160,0.24587,10.7453
160,0.77917,27.9152
160,1.50412,42.5702
160,2.4017,53.905
160,3.49796,62.7076
160,4.77411,69.3479
160,6.24681,74.2705
160,7.93673,77.5658
160,9.78794,79.1005
160,10.1071,78.9901
I have written a code but with my code I always need to change the 'filename'
in the 'filename.csv' and then need to select from the browser according to filename. I want to make it easy without changing the filename any csv file can be selected.
from tkinter import *
import csv
import os
import tkinter as tk
import sys
from tkinter import filedialog
import pandas as pd
import matplotlib.pyplot as plt
import math
#from sympy import *
from tkinter import ttk
class Application(Frame):
def __init__(self, master = None):
Frame.__init__(self,master)
self.grid()
self.createWidgets()
def createWidgets(self):
top = self.winfo_toplevel()
self.menuBar = Menu(top)
top["menu"] = self.menuBar
self.subMenu = Menu(self.menuBar)
self.menuBar.add_cascade(label = "File", menu = self.subMenu)
self.subMenu.add_command( label = "Read Data",command = self.readCSV)
def readCSV(self):
x=[]
y=[]
z=[]
self.filename = filedialog.askopenfilename()
df=pd.read_csv('GF30.csv', error_bad_lines=False)
read = csv.reader(df, delimiter = ",")
fig = plt.figure()
data_list = []
ax= fig.add_subplot(111)
buttons = next(read)
df.set_index('x', inplace=True)
df.groupby('z')['y'].plot(legend=True,ax=ax)
leg = ax.legend(bbox_to_anchor = [1.1, 0.7], fancybox=True, shadow=True)
leg.get_frame().set_alpha(0.4)
plt.show()
print
for btn in buttons:
new_btn = Button(self, text="btn", command = self.btnClick)
new_btn.pack()
self.root.mainloop()
def btnClick(self):
root.destroy()
I want to write a python script from which I can execute multiple sql queries and the output of that query is saved in excel.
Suppose I have 4 sql query i.e Script1, Script2, Script3 & Script4 and I want to save the generated excel workbook in E:\Test, In that workbook sheet1 contains Script1 output, sheet2 contains Script2 output, sheet3 contains Script3 output, and so on. I have written a query but its working for only one script.
By using this script I am able to generate excel sheet with Test name, but How I run the remaining script so that their output will show in other sheet of same workbook
Please Help
import psycopg2
import sys
import pprint
import pandas as pd
import os
import openpyxl.cell
COMMASPACE = ', '
def main():
conn_string = "dbname='abc' user='qwerty' host='pqr' password='******' port='1234'"
script1 = """
select * From something1
"""
script2 = """
select * From something2
"""
script3 = """
select * From something3
"""
script4 = """
select * From something4
"""
pprint.pprint ('Making connection to the Database...')
con1 = psycopg2.connect(conn_string)
cur = con1.cursor()
pprint.pprint ('Execution Start')
cur.execute(script)
if not cur.rowcount:
pprint.pprint ('Oops! Error Occured')
else:
columns = [desc[0] for desc in cur.description]
data = cur.fetchall()
df = pd.DataFrame(list(data), columns=columns)
df.columns = map(str.upper, df.columns)
writer = pd.ExcelWriter('E:\\Test.xlsx')
df.to_excel(writer, sheet_name='Sheet1')
def hide_column(ws, column_id):
if isinstance(column_id, int):
assert column_id >= 1, "Column numbers must be 1 or greater"
column_id = openpyxl.cell.get_column_letter(column_id)
column_dimension = ws.column_dimensions[column_id]
column_dimension.hidden = True
writer.save()
print ("END of extraction")
if __name__ == "__main__":
main()
try using pandas read_sql with Sql Alchemy.
from openpyxl import load_workbook
from sqlalchemy import create_engine
import pandas as pd
# Parameters for SQL Alchemy
ServerName = "your_Server_Name"
Database = "Your_Database"
Driver = "Your_Driver"
# Create the connection
engine = create_engine('mssql+pyodbc://' + ServerName + '/' + Database + "?" + Driver)
# reading in the dataframes
df1 = pd.read_sql_query("select * from somewhere", engine)
df2 = pd.read_sql_query("select * from somewhere_else", engine)
# Using openpyxl to write to excel sheets
file = 'Your_file_path_Here'
book = load_workbook(file)
writer = pd.ExcelWriter(file, engine='openpyxl')
writer.book = book
# now start writing them to sheets
df1.to_excel(writer, index=None, sheet_name='SQL1')
df1.to_excel(writer, index=None, sheet_name='SQL2')