How to write regular expression results to CSV? - python-3.x

I can see the results that I'm looking for in the Command Prompt, but I'm having trouble saving them to a CSV. The saved file is the location of the text and not the actual text.
import re, requests, csv
data = requests.get('http://www.spk-wc.usace.army.mil/fcgi-bin/getplottext.py?archive=true&plot=isbqr&length=wy&interval=d&wy=1995').text
pattern = re.compile(r'\d*\.00')
matches = pattern.finditer(data)
with open('1995.csv', 'w') as new_file:
csv_writer = csv.writer(new_file)
for success in matches:
export = [matches]
csv_writer.writerow(export)

Related

Writing data to csv file from table scrape

I am having trouble figuring out how to write this file to csv. I am parsing data from a table, and can print it just fine, but when I try to write to a csv file i get the error "TypeError: write() argument must be str, not list". I'm not sure how to make my data points into a string.
Code:
from bs4 import BeautifulSoup
import urllib.request
import csv
html = urllib.request.urlopen("https://markets.wsj.com/").read().decode('utf8')
soup = BeautifulSoup(html, 'html.parser') # parse your html
filename = "products.csv"
f = open(filename, "w")
t = soup.find('table', {'summary': 'Major Stock Indexes'}) # finds tag table with attribute summary equals to 'Major Stock Indexes'
tr = t.find_all('tr') # get all table rows from selected table
row_lis = [i.find_all('td') if i.find_all('td') else i.find_all('th') for i in tr if i.text.strip()] # construct list of data
f.write([','.join(x.text.strip() for x in i) for i in row_lis])
Any suggestions?
w.write() takes only a string as an argument, but your passing it a list of lists of strings.
csv.writerows() will write lists to a csv file.
Change your file handle f to be :
f = csv.writer(open(filename,'wb'))
and use it by replacing the last line with:
f.writerows([[x.text.strip() for x in i] for i in row_lis])
will produce a csv with contents:

Convert multiple .txt files into single .csv file (python)

I need to convert a folder with around 4,000 .txt files into a single .csv with two columns:
(1) Column 1: 'File Name' (as specified in the original folder);
(2) Column 2: 'Content' (which should contain all text present in the corresponding .txt file).
Here you can see some of the files I am working with.
The most similar question to mine here is this one (Combine a folder of text files into a CSV with each content in a cell) but I could not implement any of the solutions presented there.
The last one I tried was the Python code proposed in the aforementioned question by Nathaniel Verhaaren but I got the exact same error as the question's author (even after implementing some suggestions):
import os
import csv
dirpath = 'path_of_directory'
output = 'output_file.csv'
with open(output, 'w') as outfile:
csvout = csv.writer(outfile)
csvout.writerow(['FileName', 'Content'])
files = os.listdir(dirpath)
for filename in files:
with open(dirpath + '/' + filename) as afile:
csvout.writerow([filename, afile.read()])
afile.close()
outfile.close()
Other questions which seemed similar to mine (for example, Python: Parsing Multiple .txt Files into a Single .csv File?, Merging multiple .txt files into a csv, and Converting 1000 text files into a single csv file) do not solve this exact problem I presented (and I could not adapt the solutions presented to my case).
I had a similar requirement and so I wrote the following class
import os
import pathlib
import glob
import csv
from collections import defaultdict
class FileCsvExport:
"""Generate a CSV file containing the name and contents of all files found"""
def __init__(self, directory: str, output: str, header = None, file_mask = None, walk_sub_dirs = True, remove_file_extension = True):
self.directory = directory
self.output = output
self.header = header
self.pattern = '**/*' if walk_sub_dirs else '*'
if isinstance(file_mask, str):
self.pattern = self.pattern + file_mask
self.remove_file_extension = remove_file_extension
self.rows = 0
def export(self) -> bool:
"""Return True if the CSV was created"""
return self.__make(self.__generate_dict())
def __generate_dict(self) -> defaultdict:
"""Finds all files recursively based on the specified parameters and returns a defaultdict"""
csv_data = defaultdict(list)
for file_path in glob.glob(os.path.join(self.directory, self.pattern), recursive = True):
path = pathlib.Path(file_path)
if not path.is_file():
continue
content = self.__get_content(path)
name = path.stem if self.remove_file_extension else path.name
csv_data[name].append(content)
return csv_data
#staticmethod
def __get_content(file_path: str) -> str:
with open(file_path) as file_object:
return file_object.read()
def __make(self, csv_data: defaultdict) -> bool:
"""
Takes a defaultdict of {k, [v]} where k is the file name and v is a list of file contents.
Writes out these values to a CSV and returns True when complete.
"""
with open(self.output, 'w', newline = '') as csv_file:
writer = csv.writer(csv_file, quoting = csv.QUOTE_ALL)
if isinstance(self.header, list):
writer.writerow(self.header)
for key, values in csv_data.items():
for duplicate in values:
writer.writerow([key, duplicate])
self.rows = self.rows + 1
return True
Which can be used like so
...
myFiles = r'path/to/files/'
outputFile = r'path/to/output.csv'
exporter = FileCsvExport(directory = myFiles, output = outputFile, header = ['File Name', 'Content'], file_mask = '.txt')
if exporter.export():
print(f"Export complete. Total rows: {exporter.rows}.")
In my example directory, this returns
Export complete. Total rows: 6.
Note: rows does not count the header if present
This generated the following CSV file:
"File Name","Content"
"Test1","This is from Test1"
"Test2","This is from Test2"
"Test3","This is from Test3"
"Test4","This is from Test4"
"Test5","This is from Test5"
"Test5","This is in a sub-directory"
Optional parameters:
header: Takes a list of strings that will be written as the first line in the CSV. Default None.
file_mask: Takes a string that can be used to specify the file type; for example, .txt will cause it to only match .txt files. Default None.
walk_sub_dirs: If set to False, it will not search in sub-directories. Default True.
remove_file_extension: If set to False, it will cause the file name to be written with the file extension included; for example, File.txt instead of just File. Default True.

JSON to CSV with UTF8 encoding in Python

I'm working with analysis of feelings and after having got twitter data with twython and saving them in a txt file in json format, I need to write them in CSV format. I can do this but special characters are not written, for example "Inclusão" is written "Inclus \ xc3 \ xa3o"
here is the code:
import json
from csv import writer
with open('data.txt') as data_file:
data = json.load(data_file)
tweets = data['statuses']
#variables
times = [tweet['created_at'] for tweet in tweets]
users = [tweet['user']['name'] for tweet in tweets]
texts = [tweet['text'] for tweet in tweets]
#output file
out = open('tweets_file.csv', 'w')
print(out, 'created,user,text')
rows = zip(times,users,texts)
csv = writer(out)
for row in rows:
values = [value.encode('utf8') for value in row]
csv.writerow(values)
out.close()
i already solved the problem guys, thank you! the problem is that my text has already been encoded and i was trying to do this again.

How to convert a tab delimited text file to a csv file in Python

I have the following problem:
I want to convert a tab delimited text file to a csv file. The text file is the SentiWS dictionary which I want to use for a sentiment analysis ( https://github.com/MechLabEngineering/Tatort-Analyzer-ME/tree/master/SentiWS_v1.8c ).
The code I used to do this is the following:
txt_file = r"SentiWS_v1.8c_Positive.txt"
csv_file = r"NewProcessedDoc.csv"
in_txt = csv.reader(open(txt_file, "r"), delimiter = '\t')
out_csv = csv.writer(open(csv_file, 'w'))
out_csv.writerows(in_txt)
This code writes everything in one row but I need the data to be in three rows as normally intended from the file itself. There is also a blank line under each data and I don´t know why.
I want the data to be in this form:
Row1 Row2 Row3
Word Data Words
Word Data Words
instead of
Row1
Word,Data,Words
Word,Data,Words
Can anyone help me?
import pandas
It will convert tab delimiter text file into dataframe
dataframe = pandas.read_csv("SentiWS_v1.8c_Positive.txt",delimiter="\t")
Write dataframe into CSV
dataframe.to_csv("NewProcessedDoc.csv", encoding='utf-8', index=False)
Try this:
import csv
txt_file = r"SentiWS_v1.8c_Positive.txt"
csv_file = r"NewProcessedDoc.csv"
with open(txt_file, "r") as in_text:
in_reader = csv.reader(in_text, delimiter = '\t')
with open(csv_file, "w") as out_csv:
out_writer = csv.writer(out_csv, newline='')
for row in in_reader:
out_writer.writerow(row)
There is also a blank line under each data and I don´t know why.
You're probably using a file created or edited in a Windows-based text editor. According to the Python 3 csv module docs:
If newline='' is not specified, newlines embedded inside quoted fields will not be interpreted correctly, and on platforms that use \r\n linendings on write an extra \r will be added. It should always be safe to specify newline='', since the csv module does its own (universal) newline handling.

How to cut a line in python?

2331,0,13:30:08,25.35,22.05,23.8,23.9,23.5,23.7,5455,350,23.65,132,23.6,268,23.55,235,23.5,625,23.45,459,23.7,83,23.75,360,23.8,291,23.85,186,23.9,331,0,1,25,1000,733580089,name,,,
I got a line like this and how could I cut it? I only need the first 9 variable like this:
2331,0,13:30:08,25.35,22.05,23.8,23.9,23.5,23.7,5455
the original data i save as txt.file, and could I rewrite the original one and save?
Use either csv or just to straight file io with string split function
For example:
import csv
with open('some.txt', 'rb') as f:
reader = csv.reader(f)
for row in reader:
print row[:9]
or if everything is on a single line and you don't want to use a csv interface
with open('some.txt', 'r') as f:
line = f.read()
print line.split(str=",")[:9]
If you have a file called "content.txt".
f = open("content.txt","r")
contentFile = f.read();
output = contentFile.split(",")[:9]
output = ",".join(output)
f.close()
f = open("content.txt","wb")
f.write(output)
If all your values are stored in an Array, you can slice like this:
arrayB = arrayA[:9]
To get your values into an array you could split your String at every ","
arrayA = inputString.split(str=",")

Resources