Save CSV for every functioncall with another name - python-3.x

at the moment I am able to create one CSV file with all the content I get at once.
Now I would like to create a list where I have different names in it.
How can I produce for every functioncall a different CSV file name? I thought about looping a list but I just want a +1 iteration at each call. I thought about saving my state somehow and use it in next functioncall. Everytime I initialize my variable with 0 and so I don't get 1. I think I could do it with Python Function Parameter calls but I have no idea how to use it. Can someone give me a little tip or example? If there are better ideas (maybe my idea is totally bullshit), how to solve this, just help please.
The comments in the code shall represent my imagination.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from tenable.sc import SecurityCenter as SC
import os.path
import sys
import getpass
import csv
SC_HOST = '...'
def parse_entry(entry):
split_after_path = ''
ip = entry.get('ip', None)
pluginText = entry.get('pluginText', None)
if 'Path : ' in pluginText:
for line in pluginText.splitlines(0):
if 'Path : ' in line:
split_after_path_in_plugintext = line.split("Path : ",1)[1]
# place = ['place1', 'place2', 'place3', 'place4', 'place5']
# i = 0
# i = i+1
file_exists = os.path.isfile('testfile_path.csv')
# file_exists = os.path.isfile('testfile_path_'+place[i]+'.csv')
data = open('testfile_path.csv', 'a')
# data = open('testfile_path_'+place[i]+'.csv', 'a')
with data as csvfile:
header = ['IP Address', 'Path']
writer = csv.DictWriter(csvfile, lineterminator='\n', quoting=csv.QUOTE_NONNUMERIC, fieldnames=header)
if not file_exists:
writer.writeheader()
writer.writerow({'IP Address': ip, 'Path': split_after_path})
data.close()
def main():
sc_user = input('[<<] username: ')
sc_pass = getpass.getpass('[<<] password: ')
sc = SC(SC_HOST)
sc.login(sc_user, sc_pass)
# Query API for data
# asset = [12,13,14,25,29]
# i = 0
# assetid = asset[i]
# vuln = sc.analysis.vulns(('pluginID', '=', '25072')('asset','=','assetid'))
# i = i+1
vuln = sc.analysis.vulns(('pluginID', '=', '25072'),('asset','=','11'))
for entry in vuln:
parse_entry(entry)
sc.logout()
return 0
if __name__ == '__main__':
sys.exit(main())

The simplest and most obvious solution is to pass the full file path to your parse_entry function, ie:
def parse_entry(entry, filepath):
# ...
if 'Path : ' in pluginText:
for line in pluginText.splitlines(0):
if 'Path : ' in line:
# ...
file_exists = os.path.isfile(filepath)
with open(filepath, 'a') as csvfile:
# ...
Then in main() use enumerate() to build sequential filenames:
def main():
# ...
for i, entry in enumerate(vuln):
path = "'testfile_path{}.csv".format(i)
parse_entry(entry, path)

You can use a function attribute to keep track of the number of times the function has been called.
def parse_entry(entry):
parse_entry.i += 1
# outside the function you have to initialize the attribute
parse_entry.i = 0
Or you can look at other ways to initialize the function attribute in this post.
Alternatively, you can use glob to get the current number of files.
from glob import glob
i = len(glob('testfile_path_*.csv'))

Related

Method reads properly but the written text file only has 1 line. Is \n not working?

The goal is to extract specific data from a text file under a folder
then write that data into another file under different folder
The extraction part works, save to variables and can even print them
The rises when you try to write them to a file
The file is empty
Need to write in this format
{self.title};;;{self.author};;;{self.release_date};;;
{self.last_update_date};;;{self.language};;;{self.producer};;;{self.book_path}
# This class includes all the operations related to a book
class Operation:
"""
Need to include these class variables
book_title_list (List of all books titles such as “[title1, title2, title3, …]”)
book_info_dict = “{title1: obj1, title2:obj2, title3:obj3…….}”)
"""
book_folder_path = './data/books_data/'
book_info_path = './data/result_data/books.txt'
def extract_book_info(self):
directory_files = os.listdir(self.book_folder_path) # Stores the .txt files under books_data folder
try:
for i in directory_files:
with open(f'{self.book_folder_path}/{i}', 'r', encoding='utf8') as f:
f_contents = f.readlines()
f_line_free = list(map(lambda x: x.strip(), f_contents))
f_lists = f_line_free[10:22] # Slicing only the required elements of the list
"""
Extracting only the necessary part and storing them
under proper variables
"""
title = f_lists[0]
author = f_lists[2]
release_date = f_lists[4]
last_update_date = f_lists[5]
language = f_lists[7]
producer = f_lists[11]
"""
Extracting the desired values
"""
title_data = title[7:]
author_data = author[8:]
release_date_data = release_date[14:]
last_update_date_data = last_update_date[24:-1]
language_data = language[10:]
producer_data = producer[13:]
print(title_data)
with open(self.book_info_path, 'w', encoding="utf8") as wf:
wf.write(f'{title_data};;;{author_data};;;{release_date_data};;;'
f'{last_update_date_data};;;{language_data};;;{producer_data};;;{self.book_info_path}\n')
return True
except FileNotFoundError:
return False
except Exception:
return False

Attempt to populate list with multiple entries in a for loop only returns a single entry

I have a csv file with URLs I'd like to extract data from, but my script currently only manages to get the last entry to append. This is the script:
import os
import glob
import time
from urllib.request import urlopen
import pandas as pd
import xml.etree.ElementTree as ET
count=0
files=glob.glob('./extract/isbnlist/Reihe*_isbn-dnb2.csv',recursive=True) #searches all files in folder
print(files)
for file in files:
if count==0:
csvfile = pd.read_csv(file, sep='\t', encoding='utf-8')
for row in csvfile['URL']:
print('row: ' + row)
with urlopen(str(row)) as response:
doc = ET.parse(response)
root = doc.getroot()
namespaces = { # Manually extracted from the XML file, but there could be code written to automatically do that.
"zs": "http://www.loc.gov/zing/srw/",
"": "http://www.loc.gov/MARC21/slim",
}
datafield_nodes_path = "./zs:records/zs:record/zs:recordData/record/datafield" # XPath
datafield_attribute_filters = [ #which fields to extract
{
"tag": "100", #author
"ind1": "1",
"ind2": " ",
}]
#datafield_attribute_filters = [] # Decomment this line to clear filters (and process each datafield node)
aut = []
for datafield_node in root.iterfind(datafield_nodes_path, namespaces=namespaces):
if datafield_attribute_filters:
skip_node = True
for attr_dict in datafield_attribute_filters:
for k, v in attr_dict.items():
if datafield_node.get(k) != v:
break
else:
skip_node = False
break
if skip_node:
continue
for subfield_node in datafield_node.iterfind("./subfield[#code='a']", namespaces=namespaces):
aut.append(subfield_node.text) #this gets the author name and title
print(aut)
count+=1
and this is the csv file:
URL
0 http://services.dnb.de/sru/dnb?version=1.1&operation=searchRetrieve&query=ISBN%3D9783960382850&recordSchema=MARC21-xml
1 http://services.dnb.de/sru/dnb?version=1.1&operation=searchRetrieve&query=ISBN%3D9783963622106&recordSchema=MARC21-xml
2 http://services.dnb.de/sru/dnb?version=1.1&operation=searchRetrieve&query=ISBN%3D-&recordSchema=MARC21-xml
3 http://services.dnb.de/sru/dnb?version=1.1&operation=searchRetrieve&query=ISBN%3D9783806241280&recordSchema=MARC21-xml
4 http://services.dnb.de/sru/dnb?version=1.1&operation=searchRetrieve&query=ISBN%3D9783890296005&recordSchema=MARC21-xml
5 http://services.dnb.de/sru/dnb?version=1.1&operation=searchRetrieve&query=ISBN%3D9783110699111&recordSchema=MARC21-xml
6 http://services.dnb.de/sru/dnb?version=1.1&operation=searchRetrieve&query=ISBN%3D9783110698930&recordSchema=MARC21-xml
7 http://services.dnb.de/sru/dnb?version=1.1&operation=searchRetrieve&query=ISBN%3D9783110699104&recordSchema=MARC21-xml
8 http://services.dnb.de/sru/dnb?version=1.1&operation=searchRetrieve&query=ISBN%3D9783963621093&recordSchema=MARC21-xml
9 http://services.dnb.de/sru/dnb?version=1.1&operation=searchRetrieve&query=ISBN%3D9783451716034&recordSchema=MARC21-xml
10 http://services.dnb.de/sru/dnb?version=1.1&operation=searchRetrieve&query=ISBN%3D9788791953514&recordSchema=MARC21-xml
When I execute the script, the output is:
['Schmidt, Horst']
but I need the other results as well. How can I do that?
Any help is appreciated.
EDIT: link to the full csv file on Pastebin the filename is: Reihe-21A51.csv_extract.csv_isbn-dnb2.csv
As #Tranbi pointed out, I had to move the aut=[] outside of the loop
it's now
for file in files:
if count==0: #to only go through the first file, instead of all files in the folder
csvfile = pd.read_csv(file, sep='\t', encoding='utf-8')
aut = []
instead of
aut = []
for datafield_node in root.iterfind(datafield_nodes_path, namespaces=namespaces):

How to save data using openpyxl with Python3 in a Excel file? Is there any way to take a bit of time to do that?

The procedure is :
Reading data from a txt file by line;
Then send the data to a serial port;
Save the data to a Excel file in a row with number/date/time
I have tried two methods, and both of them are worked.
But the is some problem:
The more data, the more time is taken when save the new data to the Excel file. For example, when write the data to the 10000 row, it's almost need 2 seconds (When I using append() to append a new row in the Excel file);
I want to send and save the data all the time, untill I stop the script in the Pycharm by click the STOP button. Once I did that, the Excel saved before is broken and CAN NOT open it again.
I want to solve these problem. Can anyone have any suggestions? Thank you so much!
Here is the code:
import serial
import serial.tools.list_ports
import time
import datetime
import sys
import os
import openpyxl
from openpyxl.styles import Font
from openpyxl.styles import PatternFill
from openpyxl.styles import Alignment
# The path to palce the Excel file
def creatfilepath(path):
# path -- The user input a path
if os.path.isdir(path):
datapath = path
print('\nM1:The path is OK, and the Excel file will be saved in {}'.format(datapath))
else:
if os.makedirs(path):
datapath = path
print('\nM2:The path does not exist, and scrip will creat the path {} to save the Excel'.format(datapath))
else:
datapath = os.getcwd()
print('\nM3:Creating the path is failed, and the Excel will be saved in same path {} where the script is.'.format(datapath))
return datapath
# Creating the Excel to save the data
def creatdatafile(filename, path, data_title):
# filename -- Excel filename
# path -- The path to save the Excel
# data_title -- Excel's title, which is a list and the default value is ['Number', 'Date', 'Time', 'Command', 'Type', 'Comment']
# Creating a Workbook
serial_data_1902_wb = openpyxl.Workbook()
# Accessing the active sheet
serial_data_1902_sheet = serial_data_1902_wb.active
# sheet'title
serial_data_1902_sheetname = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
serial_data_1902_sheet.title = serial_data_1902_sheetname
# Excel's filename
serial_data_1902_filename = filename + '_' + \
datetime.datetime.now().strftime('%Y%m%d_%H%M%S') + '.xlsx'
# The absolute path
ab_path = path + "\\" + serial_data_1902_filename
serial_data_1902_wb.save(ab_path)
# Loading the Excel
active_data_file = openpyxl.load_workbook(ab_path)
# Accessing the sheet
active_datasheet = active_data_file[serial_data_1902_sheetname]
# Setting the title
row_title = data_title
active_datasheet.append(row_title)
# Freezing the 1st row
active_datasheet.freeze_panes = 'A2'
# Accessing the title's scope
title_scope = active_datasheet.dimensions
# Arial/12/Bold/
title_font_obj = Font(name='Arial', size=10, bold=True,
italic=False, strike=False, color='000000')
# Fill type
title_fill_obj = PatternFill(fill_type='solid', fgColor='7EC0EE')
# Alignment
title_align_obj = Alignment(
horizontal='center', vertical='center', wrap_text=False)
# Formating the 1st row
for rowOfCellObjects in active_datasheet[title_scope]:
for cellObject in rowOfCellObjects:
cellObject.font = title_font_obj
cellObject.fill = title_fill_obj
cellObject.alignment = title_align_obj
active_data_file.save(ab_path)
# Return the absolute path
if os.path.exists(ab_path):
print('\nM4:The Excel {} have created, and the path is {}'.format(
serial_data_1902_filename, path))
return ab_path
else:
print('\nE0:Creating Excel is failed, please check!')
sys.exit(0)
def isavailable(port):
port_list = list(serial.tools.list_ports.comports()
)
if len(port_list) <= 0:
print('\nE1:There is not serial port on the host!')
sys.exit(0)
else:
port_name_list = []
for port_objet in port_list:
port_name = port_objet[0]
port_name_list.append(port_name.lower())
print('\nM5:The serial ports is:{}'.format(port_name_list))
if port.lower() in port_name_list:
print('\nM6:')
else:
print('\nE2:')
sys.exit(0)
# Creating a vitural serial port and open it
def createport(port, baudrate=115200):
isavailable(port)
try:
serialport = serial.Serial(port, baudrate, timeout=1, write_timeout=1)
except ValueError:
sys.exit('\nE3:')
except (OSError, serial.SerialException):
sys.exit('\nE4:')
if serialport.is_open and port == serialport.portstr:
print('\nM7:')
serialport.flushInput()
else:
sys.exit('\nE5:')
return serialport
def savecommand(num, command_list, excelabpath, excelworkbook, excelsheet):
# num -- The number of command
# command_list -- A list which contain the data
# excelabpath -- The absolute path of Excel
# excelworkbook -- The Excel which save data
# excelsheet -- the sheet which save the data
try:
i = 0
for row in excelsheet.iter_rows(min_row=num+1, max_col=5, max_row=num+1):
for cell in row:
cell.value = command_list[i]
i += 1
if i < len(command_list):
continue
else:
break
excelworkbook.save(excelabpath)
except TypeError:
print('\nE10:')
def sendcommand(abpath, workbook, sheet, commandfile, port, intertime=0.5, commamd_num=1):
#
if not port.is_open:
try:
port.open()
except:
print('\nE6:')
else:
pass
try:
with open(commandfile, 'r', encoding='utf-8') as file_object:
for line in file_object:
if len(line) > 1 and (not line.startswith('#')):
command_hex = bytes.fromhex(
line.strip())
try:
print(commamd_num, " ", datetime.datetime.now(),
" ", line)
port.write(command_hex)
# The list contain the number/date/time/command
data_list = [commamd_num, datetime.datetime.now().strftime(
'%Y/%m/%d'), datetime.datetime.now().strftime('%H:%M:%S.%f'), line.strip()]
savecommand(commamd_num, data_list, abpath, workbook, sheet)
time.sleep(intertime)
commamd_num += 1
except serial.SerialTimeoutException:
print("\nE7:")
time.sleep(1)
port.write(command_hex)
# The list contain the number/date/time/command
data_list = [commamd_num, datetime.datetime.now().strftime(
'%Y/%m/%d'), datetime.datetime.now().strftime('%H:%M:%S.%f'), line.strip()]
savecommand (commamd_num, data_list, abpath, workbook, sheet)
time.sleep(intertime)
commamd_num += 1
else:
continue
except FileNotFoundError:
print('\nE8:')
sys.exit(0)
except PermissionError:
print('\nE9:')
sys.exit(0)
port.close()
if not port.is_open:
print('\nM9:')
return commamd_num
# ~~~~~~~~~~~~~~ The parameters of serial port ~~~~~~~~~~~~~~~~~~
# port number
comNumber = 2
# port name
comPort = 'com{}'.format(comNumber)
# baudRate
baudRate = 115200
# ~~~~~~~~~~~~~~ Other parameters ~~~~~~~~~~~~~~~~~~
# The path of command file
commandPath = r'.\1902.txt'
# The Excel path
savePath = r'D:\1902Code'
# The name of Excel
excelName = 'SerialData1902'
# The title
excelTitle = ['Number', 'Date', 'Time', 'Command', 'Type', 'Comment']
# The time between two command was sent
interTime = 0.01
#
isForever = 1
# ~~~~~~~~~~~~~~~~~~~~~ Begin ~~~~~~~~~~~~~~~~~~~
# Creating the path which save the Excel
excel_path = creatfilepath(savePath)
# Creating the Excel
excel_ab_path = creatdatafile(excelName, excel_path, excelTitle)
# Loading the Excel
excel_workbook = openpyxl.load_workbook(excel_ab_path)
# Accessing the sheet
excel_sheet = excel_workbook.active
# Creating the serial port
serial_port = createport(comPort, baudRate)
# The number of sending command and start with 1
command_num = 1
if isForever:
print('\nM10:')
while isForever:
command_num = sendcommand(excel_ab_path, excel_workbook, excel_sheet,
commandPath, serial_port, interTime, command_num)
elif isForever == 0:
print('\nM11:')
command_num = sendcommand(excel_ab_path, excel_workbook, excel_sheet,
commandPath, serial_port, interTime, command_num)
else:
print('\nE11:')

Using pandas pd.Excel File with user input for folder path and filename

I'm using pd.ExcelFile as below to open and parse a file, but currently only with the actual folder path and filename in one string.
wb = pd.ExcelFile(folder_path+filename)
I want to put this into a function, that asks the user to give a path and filename and deals with invalid input. I started something like the below, but it doesn't seem like the error is being generated inside the function anyway, and i'm not sure how to say 'while wb isn't a valid thing' to continue to prompt for a filepath until we get a valid one?
def Load_Parse():
folder_path = input('\nEnter the path to the qry_T spreadsheet here (include slashes at the start and at the end): ')
filename = input('\nEnter the name of the spreadsheet to be used here: ')
sheetname = input('\nEnter the sheet containing the data here, including the extension (e.g. "qry_Trajectory 2019.xlsx": ')
try:
wb = pd.ExcelFile(folder_path+filename)
except FileNotFoundError:
Any ideas?
I'll then parse the file using a similar method i hope:
df = wb.parse('filename')
using Pathlib, os and pandas and a few functions.
one of the key functions you'll need is the while True which keeps executing a block of code until it's true and you initiate a break
feel free to edit to your own spec.
Modules
from pathlib import Path
import os
import pandas as pd
from xlrd import XLRDError
In Action
df = load_parser()
out:
#Hello Umar.Hussain please enter a valid target directory
#C:\Users\UmarH\Files
#1 excels_0
#2 excels_1
#Choose a number between 1 and 2
1
#Your Choice is excels_0.xlsx
#Choose a Sheet - Lists all sheets
'Sheet1'
# returns dataframe
Main Function
def load_parser():
user = os.getlogin()
print(f"Hello {user} please enter a valid target directory")
cmd = input('')
p = file_tester(cmd,file_type='path')
print("Please select a number from the following file")
target_file = create_excel_dict(p)
target_df = enumerate_sheets(target_file)
return target_df
Helper Functions
def file_tester(string_path, file_type="path"):
path = Path(string_path)
while True:
if path.is_dir():
break
else:
cmd = input(f"Please Enter a Valid {file_type}")
path = Path(cmd)
return path
def create_excel_dict(target_path):
xlsx_dict = {i: x for i, x in enumerate(target_path.glob('*.xlsx'), 1)}
for k,v in xlsx_dict.items():
print(k,v.stem)
rng = [i for i in xlsx_dict.keys()]
file_choice = input(f'Choose a number between {rng[0]} and {rng[-1]}')
while True:
try:
file_choice = int(file_choice)
print(f"Your Choice is {xlsx_dict[file_choice]}")
break
except KeyError:
file_choice = input(f'Choose a number between {rng[0]} and {rng[-1]}')
return xlsx_dict[file_choice]
def enumerate_sheets(target_file):
xl = pd.ExcelFile(target_file)
for sheet in xl.sheet_names:
print(sheet)
target_sheet = input("Please Type Your sheet name")
while True:
try:
df = pd.read_excel(xl,sheet_name=target_sheet)
break
except XLRDError:
target_sheet = input("Please enter a sheet from above.")
return df

How to merge two lists at a delimited token in python3

I am a CS major at the University of Alabama, we have a project in our python class and I am stuck...probably for some stupid reason, but I cant seem to find the answer.
here is the link to the project, as it would be a pain to try and explain on here.
http://beastie.cs.ua.edu/cs150/projects/project1.html
here is my code:
import sys
from scanner import scan
def clInput():
#Gets command line input
log1 = sys.argv[1]
log2 = sys.argv[2]
name = sys.argv[3]
if len(sys.argv) != 4:
print('Incorrect number of arguments, should be 3')
sys.exit(1)
return log1,log2,name
def openFiles(log1,log2):
#Opens sys.argv[1]&[2] for reading
f1 = open(log1, 'r')
f2 = open(log2, 'r')
return f1, f2
def merge(log1,log2):
#Merges parsed logs into list without '---'
log1Parse = [[]]
log2Parse = [[]]
log1Count = 0
log2Count = 0
for i in log1:
if i != ['---']:
log1Parse[log1Count].append(i)
else:
log1Count += 1
log1Parse.append([])
for i in log2:
if i != ['---']:
log2Parse[log2Count].append(i)
else:
log2Count += 1
log2Parse.append([])
return(log1Parse[0] + log2Parse[0] + log1Parse[1] + log2Parse[1])
def searchMerge(name,merged):
#Searches Merged list for sys.argv[3]
for i in range(len(merged)):
if (merged[i][1] == name):
print(merged[i][0],merged[i][1]," ".join(merged[i][2:]))
def main():
log1,log2,name = clInput()
f1,f2 = openFiles(log1,log2)
#Sets the contents of the two scanned files to variables
tokens1 = scan(f1)
tokens2 = scan(f2)
#Call to merge and search
merged = merge(tokens1,tokens2)
searchMerge(name,merged)
main()
ok. so heres the problem. We are to merge two lists together into a sorted master list, delimited at the ---'s
my two log files match the ones posted on the website i linked to above. This code works, however if there are more than two instances of the ---'s in each list, it will not jump to the next list to get the other tokens, and so forth. I have it working for two with the merge function. at the end of that function i return
return(log1Parse[0] + log2Parse[0] + log1Parse[1] + log2Parse[1])
but this only works for two instances of ---. Is there anyway i can change my return to look at all of the indexes instead of having to manually put in [0],[1],[2], etc.? I need it to delimit and merge for an arbitrary amount. Please help!!
p.s. disregard the noobness...im a novice, we all gotta start somewhere
p.p.s. - the from scanner import scan is a scanner i wrote to take in all of the tokens in a given list
so.py:
import sys
def main():
# check and load command line arguments
# your code
if len(sys.argv) != 4:
print('Incorrect number of arguments, should be 3')
sys.exit(1)
# open files using file io
# your code
f1 = open(log1, 'r')
f2 = open(log2, 'r')
# list comprehension to process and filter log files
l1 = [ x.strip().split(" ",2) for x in f1.readlines() if x.strip() != "---" ]
l2 = [ x.strip().split(" ",2) for x in f2.readlines() if x.strip() != "---" ]
f1.close()
f2.close()
sorted_merged_lists = sorted(l1 + l2)
results = [ x for x in sorted_merged_lists if x[1] == name ]
for result in results:
print result
main()
CLI:
$ python so.py log1.txt log2.txt Matt
['12:06:12', 'Matt', 'Logged In']
['13:30:07', 'Matt', 'Opened Terminal']
['15:02:00', 'Matt', 'Opened Evolution']
['15:31:16', 'Matt', 'Logged Out']
docs:
http://docs.python.org/release/3.0.1/tutorial/datastructures.html#list-comprehensions
http://docs.python.org/release/3.0.1/library/stdtypes.html?highlight=strip#str.strip
http://docs.python.org/release/3.0.1/library/stdtypes.html?highlight=split#str.split
http://docs.python.org/release/3.0.1/library/functions.html?highlight=sorted#sorted

Resources