Is it possible to write to an excel sheet(any type) from a bash script ?
What I am looking for is something along these lines :
sed -e :a -e '$!N; s/\n/ /; ta' file.c > #( first coloumn ,second row of the spread sheet )
echo "$cdvar" > #( second coloumn ,third row of the spread sheet )
Thank you for your replies and suggestion .
You could write excel by bash, perl, python, .. I think that each program language has its solutions.
bash
You could use join or awk, and I think that there are other solutions.
join
If you want join to files with same column, look these posts: Bash join command and join in bash like in SAS
awk
You could write a csv, but you could rename into xls and then with excel, gnumeric, or other programs, it is recognized like xls.
ls -R -ltr / | head -50 | awk '{if ($5 >0) print $5,$9}' OFS="," > sample.xls
when you modify xls with excel, gnumeric, or other programs, and save in xls,
you could not read by bash. So that #Geekasaur recommended perl or python solutions.
perl
You could write xls in perl, follow a sample:
#!/usr/bin/perl
use Spreadsheet::WriteExcel;
my $workbook = Spreadsheet::WriteExcel->new("test.xls");
my $worksheet = $workbook->add_worksheet();
open(FH,"<file") or die "Cannot open file: $!\n";
my ($x,$y) = (0,0);
while (<FH>){
chomp;
#list = split /\s+/,$_;
foreach my $c (#list){
$worksheet->write($x, $y++, $c);
}
$x++;$y=0;
}
close(FH);
$workbook->close();
And then you could modify xls with Spreadsheet::ParseExcel package: look How can I modify an existing Excel workbook with Perl? and reading and writing sample [Editor's note: This link is broken and has been reported to IBM]
python
You could write real xls in python, follow a sample:
#!/usr/local/bin/python
# Tool to convert CSV files (with configurable delimiter and text wrap
# character) to Excel spreadsheets.
import string
import sys
import getopt
import re
import os
import os.path
import csv
from pyExcelerator import *
def usage():
""" Display the usage """
print "Usage:" + sys.argv[0] + " [OPTIONS] csvfile"
print "OPTIONS:"
print "--title|-t: If set, the first line is the title line"
print "--lines|-l n: Split output into files of n lines or less each"
print "--sep|-s c [def:,] : The character to use for field delimiter"
print "--output|o : output file name/pattern"
print "--help|h : print this information"
sys.exit(2)
def openExcelSheet(outputFileName):
""" Opens a reference to an Excel WorkBook and Worksheet objects """
workbook = Workbook()
worksheet = workbook.add_sheet("Sheet 1")
return workbook, worksheet
def writeExcelHeader(worksheet, titleCols):
""" Write the header line into the worksheet """
cno = 0
for titleCol in titleCols:
worksheet.write(0, cno, titleCol)
cno = cno + 1
def writeExcelRow(worksheet, lno, columns):
""" Write a non-header row into the worksheet """
cno = 0
for column in columns:
worksheet.write(lno, cno, column)
cno = cno + 1
def closeExcelSheet(workbook, outputFileName):
""" Saves the in-memory WorkBook object into the specified file """
workbook.save(outputFileName)
def getDefaultOutputFileName(inputFileName):
""" Returns the name of the default output file based on the value
of the input file. The default output file is always created in
the current working directory. This can be overriden using the
-o or --output option to explicitly specify an output file """
baseName = os.path.basename(inputFileName)
rootName = os.path.splitext(baseName)[0]
return string.join([rootName, "xls"], '.')
def renameOutputFile(outputFileName, fno):
""" Renames the output file name by appending the current file number
to it """
dirName, baseName = os.path.split(outputFileName)
rootName, extName = os.path.splitext(baseName)
backupFileBaseName = string.join([string.join([rootName, str(fno)], '-'), extName], '')
backupFileName = os.path.join(dirName, backupFileBaseName)
try:
os.rename(outputFileName, backupFileName)
except OSError:
print "Error renaming output file:", outputFileName, "to", backupFileName, "...aborting"
sys.exit(-1)
def validateOpts(opts):
""" Returns option values specified, or the default if none """
titlePresent = False
linesPerFile = -1
outputFileName = ""
sepChar = ","
for option, argval in opts:
if (option in ("-t", "--title")):
titlePresent = True
if (option in ("-l", "--lines")):
linesPerFile = int(argval)
if (option in ("-s", "--sep")):
sepChar = argval
if (option in ("-o", "--output")):
outputFileName = argval
if (option in ("-h", "--help")):
usage()
return titlePresent, linesPerFile, sepChar, outputFileName
def main():
""" This is how we are called """
try:
opts,args = getopt.getopt(sys.argv[1:], "tl:s:o:h", ["title", "lines=", "sep=", "output=", "help"])
except getopt.GetoptError:
usage()
if (len(args) != 1):
usage()
inputFileName = args[0]
try:
inputFile = open(inputFileName, 'r')
except IOError:
print "File not found:", inputFileName, "...aborting"
sys.exit(-1)
titlePresent, linesPerFile, sepChar, outputFileName = validateOpts(opts)
if (outputFileName == ""):
outputFileName = getDefaultOutputFileName(inputFileName)
workbook, worksheet = openExcelSheet(outputFileName)
fno = 0
lno = 0
titleCols = []
reader = csv.reader(inputFile, delimiter=sepChar)
for line in reader:
if (lno == 0 and titlePresent):
if (len(titleCols) == 0):
titleCols = line
writeExcelHeader(worksheet, titleCols)
else:
writeExcelRow(worksheet, lno, line)
lno = lno + 1
if (linesPerFile != -1 and lno >= linesPerFile):
closeExcelSheet(workbook, outputFileName)
renameOutputFile(outputFileName, fno)
fno = fno + 1
lno = 0
workbook, worksheet = openExcelSheet(outputFileName)
inputFile.close()
closeExcelSheet(workbook, outputFileName)
if (fno > 0):
renameOutputFile(outputFileName, fno)
if __name__ == "__main__":
main()
And then you could also convert to csv with this sourceforge project.
And if you could convert to csv, you could rewrite xls.. modifing the script.
You can easily do this by first creating a R script (xsltocsv), and then calling it from your Bash file.
The R script would look something like:
#!/usr/bin/Rscript
suppressMessages(library("gdata"))
suppressMessages(library("argparse"))
#. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
parser <- ArgumentParser(
description = "A script to convert a given xsl file to a csv one"
)
parser$add_argument(
'-rn',
'--print-row-names',
action = 'store_true',
help = 'outputs row names in the output csv file'
)
parser$add_argument(
'-cn',
'--print-column-names',
action = 'store_true',
help = 'outputs column names in the output csv file'
)
parser$add_argument(
'-s',
'--separator',
metavar='separator',
type='character',
default=';',
action = 'store',
help = 'outputs column names in the output csv file'
)
parser$add_argument(
"xsl",
metavar = "xsl-file",
action = "store",
help = "xsl input file"
)
parser$add_argument(
"csv",
metavar = "csv-file",
action = "store",
help = "csv output file"
)
args <- parser$parse_args(commandArgs(TRUE))
#. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
vals <- read.xls(args$xsl)
write.table(n, file=args$csv, quote = FALSE,
col.names=args$print_column_names,
row.names=args$print_row_names, sep=args$separator)
Let us say that you put this into your system path after making the file executable (chmod +x xsltocsv). Then, invoke this script passing the associated parameters, and you are good to go ;)
Related
I have multiple directories inside which there are multiple files.
In directory1 files have the name format:
1.2.826.0.1.3680043.2.133.1.3.49.1.124.27456-3-1-10jd0au.dcm
1.2.826.0.1.3680043.2.133.1.3.49.1.124.27456-3-2-10jd0av.dcm
....
1.2.826.0.1.3680043.2.133.1.3.49.1.124.27456-3-10-17v7m18.dcm
In directory2:
1.2.826.0.1.3680043.2.133.1.3.49.1.46.34440-4-1-r3hu3u.dcm
1.2.826.0.1.3680043.2.133.1.3.49.1.46.34440-4-2-r3hu3v.dcm
....
and so on.
How can I rename these as just 1.dcm, 2.dcm,.....in each directory?
My attempt is as follows:
for dpath, dnames, fnames in os.walk(dir_path):
for dname in dnames:
directory = os.path.join(dir_path,dname)
for filename in os.listdir(directory):
old_name = os.path.join(directory,filename)
new = filename[filename.find("-"):]
new_name = os.path.join(directory, new)
os.rename(old_name, new_name)
But this only yields:
-3-1-10jd0au.dcm
-3-10-17v7m18.dcm
You could write a function that uses a regex to extract the parts of the filename that you need, for example:
import re
def ExtractNumber(filename):
parts = re.search(r".*-.*-(.*)-.*(\..*)", filename)
return parts.group(1) + parts.group(2)
print(ExtractNumber("1.2.826.0.1.3680043.2.133.1.3.49.1.124.27456-3-1-10jd0au.dcm"))
print(ExtractNumber("1.2.826.0.1.3680043.2.133.1.3.49.1.124.27456-3-10-17v7m18.dcm"))
Outputs:
1.dcm
10.dcm
Below is the sample data in input file. I need to process this file and turn it into a csv file. With some help, I was able to convert it to csv file. However not fully converted to csv since I am not able to handle \n, junk line(2nd line) and blank line(4th line). Also, i need help to filter transaction_type i.e., avoid "rewrite" transaction_type
{"transaction_type": "new", "policynum": 4994949}
44uu094u4
{"transaction_type": "renewal", "policynum": 3848848,"reason": "Impressed with \n the Service"}
{"transaction_type": "cancel", "policynum": 49494949, "cancel_table":[{"cancel_cd": "AU"}, {"cancel_cd": "AA"}]}
{"transaction_type": "rewrite", "policynum": 5634549}
Below is the code
import ast
import csv
with open('test_policy', 'r') as in_f, open('test_policy.csv', 'w') as out_f:
data = in_f.readlines()
writer = csv.DictWriter(
out_f,
fieldnames=[
'transaction_type', 'policynum', 'cancel_cd','reason'],lineterminator='\n',
extrasaction='ignore')
writer.writeheader()
for row in data:
dict_row = ast.literal_eval(row)
if 'cancel_table' in dict_row:
cancel_table = dict_row['cancel_table']
cancel_cd= []
for cancel_row in cancel_table:
cancel_cd.append(cancel_row['cancel_cd'])
dict_row['cancel_cd'] = ','.join(cancel_cd)
writer.writerow(dict_row)
Below is my output not considering the junk line,blank line and transaction type "rewrite".
transaction_type,policynum,cancel_cd,reason
new,4994949,,
renewal,3848848,,"Impressed with
the Service"
cancel,49494949,"AU,AA",
Expected output
transaction_type,policynum,cancel_cd,reason
new,4994949,,
renewal,3848848,,"Impressed with the Service"
cancel,49494949,"AU,AA",
Hmm I try to fix them but I do not know how CSV file work, but my small knoll age will suggest you to run this code before to convert the file.
txt = {"transaction_type": "renewal",
"policynum": 3848848,
"reason": "Impressed with \n the Service"}
newTxt = {}
for i,j in txt.items():
# local var (temporar)
lastX = ""
correctJ = ""
# check if in J is ascii white space "\n" and get it out
if "\n" in f"b'{j}'":
j = j.replace("\n", "")
# for grammar purpose check if
# J have at least one space
if " " in str(j):
# if yes check it closer (one by one)
for x in ([j[y:y+1] for y in range(0, len(j), 1)]):
# if 2 spaces are consecutive pass the last one
if x == " " and lastX == " ":
pass
# if not update correctJ with new values
else:
correctJ += x
# remember what was the last value checked
lastX = x
# at the end make J to be the correctJ (just in case J has not grammar errors)
j = correctJ
# add the corrections to a new dictionary
newTxt[i]=j
# show the resoult
print(f"txt = {txt}\nnewTxt = {newTxt}")
Termina:
txt = {'transaction_type': 'renewal', 'policynum': 3848848, 'reason': 'Impressed with \n the Service'}
newTxt = {'transaction_type': 'renewal', 'policynum': 3848848, 'reason': 'Impressed with the Service'}
Process finished with exit code 0
Here is my code with reads the input from a config file and moving files to another directory based on a condition and logs the information to a log file
import shutil
import configparser
import logging.handlers
import os
#Reading the input configuration
config = configparser.ConfigParser()
config.read("config_input.ini")
src_filepath = (config.get("Configuration Inputs","src_filepath"))
dst_filepath = (config.get("Configuration Inputs","dst_filepath"))
log_file_name = (config.get("Configuration Inputs","log_file_name"))
file_limit = int((config.get("Configuration Inputs","file_limit")))
if not os.path.exists (dst_filepath):
os.makedirs(dst_filepath)
onlyfiles_in_dst = next ( os.walk ( dst_filepath ) ) [ 2 ]
file_count_indst = len ( onlyfiles_in_dst )
onlyfiles_in_src = next ( os.walk ( src_filepath ) ) [ 2 ]
file_count_insrc = len ( onlyfiles_in_src )
def sorted_ls(src_filepath):
mtime = lambda f: os.stat(os.path.join(src_filepath, f)).st_mtime
return list(sorted(os.listdir(src_filepath), key=mtime))
move_list = sorted_ls(src_filepath)
#print (move_list)
if file_count_indst < file_limit:
for mfile in move_list:
shutil.move(src_filepath + '\\' + mfile, dst_filepath)
**#Logging everything**
logger = logging.getLogger()
logging.basicConfig(filename=log_file_name, format='%(asctime)s %(message)s', filemode='a')
logger.setLevel(logging.INFO)
logger.info('Number of files moved from source ' + str(len(move_list)))
But the problem is I want to move only the 1000 files from source to destination.
Something like
"ls -lrt| head ls -lrt | head -n 1000"
which I can not do iy as I am running this script on Windows platform.
Please suggest a proper way to do it.
Also please suggest how can I put it under a user defined class and may be can use in some other program.
Can't a simple counter be the solution?
if file_count_indst < file_limit:
count=0;
for mfile in move_list:
shutil.move(src_filepath + '\\' + mfile, dst_filepath)
count = count +1
if count==1000:
break
I am looking for a simple procedure to generate FST (finite state transducer) from cmudict-0.7b or cmudict-0.7b.dict, which will be used with phonetisaurus.
I tried following set of commands (phonetisaurus Aligner, Google NGramLibrary and phonetisaurus arpa2wfst) and able to generate FST but it didn't work. I am not sure where I did a mistake or miss any step. I guess very first command ie phonetisaurus-align, is not correct.
phonetisaurus-align --input=cmudict.dict --ofile=cmudict/cmudict.corpus --seq1_del=false
ngramsymbols < cmudict/cmudict.corpus > cmudict/cmudict.syms
/usr/local/bin/farcompilestrings --symbols=cmudict/cmudict.syms --keep_symbols=1 cmudict/cmudict.corpus > cmudict/cmudict.far
ngramcount --order=8 cmudict/cmudict.far > cmudict/cmudict.cnts
ngrammake --v=2 --bins=3 --method=kneser_ney cmudict/cmudict.cnts > cmudict/cmudict.mod
ngramprint --ARPA cmudict/cmudict.mod > cmudict/cmudict.arpa
phonetisaurus-arpa2wfst-omega --lm=cmudict/cmudict.arpa > cmudict/cmudict.fst
I tried fst with phonetisaurus-g2p as follows:
phonetisaurus-g2p --model=cmudict/cmudict.fst --nbest=3 --input=HELLO --words
But it didn't return anything....
Appreciate any help on this matter.
It is very important to keep dictionary in the right format. Phonetisaurus is very sensitive about that, it requires word and phonemes to be tab separated, spaces would not work then. It also does not allow pronunciation variant numbers CMUSphinx uses like (2) or (3). You need to cleanup dictionary with simple python script for example before feeding it into phonetisaurus. Here is the one I use:
#!/usr/bin/python
import sys
if len(sys.argv) != 3:
print "Split the list on train and test sets"
print
print "Usage: traintest.py file split_count"
exit()
infile = open(sys.argv[1], "r")
outtrain = open(sys.argv[1] + ".train", "w")
outtest = open(sys.argv[1] + ".test", "w")
cnt = 0
split_count = int(sys.argv[2])
for line in infile:
items = line.split()
if items[0][-1] == ')':
items[0] = items[0][:-3]
if items[0].find("_") > 0:
continue
line = items[0] + '\t' + " ".join(items[1:]) + '\n'
if cnt % split_count == 3:
outtest.write(line)
else:
outtrain.write(line)
cnt = cnt + 1
Hi I have recently made this script to rename files I scan for work with a prefix and a date. It works pretty well however it would be great if it could make a directory in the current directory with the same name as the first file then move all the scanned files there. E.g. First file is renamed to 'Scanned As At 22-03-2012 0' then a directory called 'Scanned As At 22-03-2012 0' (Path being M:\Claire\Scanned As At 22-03-2012 0) is made and that file is placed in there.
I'm having a hard time figuring out the best way to do this. Thanks in advance!
import os
import datetime
#target = input( 'Enter full directory path: ')
#prefix = input( 'Enter prefix: ')
target = 'M://Claire//'
prefix = 'Scanned As At '
os.chdir(target)
allfiles = os.listdir(target)
count = 0
for filename in allfiles:
t = os.path.getmtime(filename)
v = datetime.datetime.fromtimestamp(t)
x = v.strftime( ' %d-%m-%Y')
os.rename(filename, prefix + x + " "+str(count) +".pdf")
count +=1
Not quite clear about your requirement. If not rename the file, only put it under the directory, then you can use the following codes (only the for-loop of your example):
for filename in allfiles:
if not os.isfile(filename): continue
t = os.path.getmtime(filename)
v = datetime.datetime.fromtimestamp(t)
x = v.strftime( ' %d-%m-%Y')
dirname = prefix + x + " " + str(count)
target = os.path.join(dirname, filename)
os.renames(filename, target)
count +=1
You can check help(os.renames).