Converting .wav audio files to .h5 (hdf) files using SciPy and PyTables - python-3.x

I need to convert audio .wav files to the .hf or the .npz format, as they are the supported format for training speech translation systems with FBK-Fairseq-ST (https://github.com/mattiadg/FBK-Fairseq-ST).
The following script is meant to run from terminal as python script.py /path/file.wav and write a new hdf file storing the information of the .wav file in the same folder.
from scipy.io import wavfile
import tables
import numpy
import sys
#read data from wav
#fs, data = wavfile.read('/home/vittoria/Documents/corpus-test/01.wav')
fs, data = wavfile.read(sys.argv[1])
#ouput
folder=sys.argv[1][:-6]
name= sys.argv[1][-6:-3]+"h5"
#save_to acoular h5 format
acoularh5 = tables.open_file(folder+name, mode = "w", title = name)
acoularh5.create_earray('/','time_data', atom=None, title='', filters=None, \
expectedrows=100000, chunkshape=[256,64], \
byteorder=None, createparents=False, obj=data)
acoularh5.set_node_attr('/time_data','sample_freq', fs)
acoularh5.close()
However, it raises a value error: ValueError: the shape ((0,)) and chunkshape ((256, 64)) ranks must be equal.
input from terminal:
python 2hf.py 01_83.wav" (python script.py relative-file-path)
Traceback error, please notice that in "environments/hdf/lib/python3.6/" "hdf" is the root folder of the virtual environment. "/tables/" is the folder for the package tables 3.6.1 (https://pypi.org/project/tables/) installed via the pip command in the virtual environment.
Traceback (most recent call last):
File "2hf.py", line 18, in <module>
byteorder=None, createparents=False, obj=data)
File "/home/giuseppe/environments/hdf/lib/python3.6/site-packages/tables/file.py", line 1384, in create_earray
track_times=track_times)
File "/home/giuseppe/environments/hdf/lib/python3.6/site-packages/tables/earray.py", line 160, in __init__
track_times)
File "/home/giuseppe/environments/hdf/lib/python3.6/site-packages/tables/carray.py", line 212, in __init__
(shape, chunkshape))
ValueError: the shape ((0,)) and chunkshape ((256, 64)) ranks must be equal.
Closing remaining open files:01_83.h5...done

I had the same error and solved it by changing the script this way
from scipy.io import wavfile
import tables
import numpy
import sys
#read data from wav
#fs, data = wavfile.read('/home/vittoria/Documents/corpus-test/01.wav')
fs, data = wavfile.read(sys.argv[1])
#ouput
folder=sys.argv[1][:-6]
name= sys.argv[1][-6:-3]+"h5"
#save_to acoular h5 format
acoularh5 = tables.open_file(folder+name, mode = "w", title = name)
acoularh5.create_earray('/','time_data', atom=None, title='', filters=None, \
expectedrows=100000, \
byteorder=None, createparents=False, obj=data)
acoularh5.set_node_attr('/time_data','sample_freq', fs)
acoularh5.close()
I basically just removed this part , chunkshape=[256,64] :-)
Hope this helped.

Related

[Geopandas error]fiona.errors.DriverError: '/vsimem/3563f91543824520abdaa032ab1a68da' not recognized as a supported file format

I wanted to read the .shp files by the file_uploader of streamlit.
Get the list of shp files from the file_uploader of streamlit.
Read the shp files using the geopandas.
Here's my code.
st.session_state.data_01 = st.file_uploader('Please choose a file.', accept_multiple_files=True, key='0').
df = []
for d in st.session_state.data_01:
df.append(gpd.read_file(d),encoding='utf-8')
And I got the error such like:
File "/Users/icuh/Desktop/Eun/Web/life.py", line 17, in run_life
df.append(gpd.read_file(d),encoding='utf-8')
File "/Users/icuh/opt/anaconda3/envs/impacts_02/lib/python3.8/site-packages/geopandas/io/file.py", line 253, in _read_file
return _read_file_fiona(
File "/Users/icuh/opt/anaconda3/envs/impacts_02/lib/python3.8/site-packages/geopandas/io/file.py", line 294, in _read_file_fiona
with reader(path_or_bytes, **kwargs) as features:
File "/Users/icuh/opt/anaconda3/envs/impacts_02/lib/python3.8/site-packages/fiona/collection.py", line 555, in __init__
super(BytesCollection, self).__init__(self.virtual_file, vsi=filetype, **kwds)
File "/Users/icuh/opt/anaconda3/envs/impacts_02/lib/python3.8/site-packages/fiona/collection.py", line 162, in __init__
self.session.start(self, **kwargs)
File "fiona/ogrext.pyx", line 540, in fiona.ogrext.Session.start
File "fiona/_shim.pyx", line 90, in fiona._shim.gdal_open_vector
fiona.errors.DriverError: '/vsimem/3563f91543824520abdaa032ab1a68da' not recognized as a supported file format.
Versions I use
python 3.8.6
geopandas 0.11.1
fiona 1.8.21
shapely 1.8.4
This is not a streamlit issue as such.
have simulated the error you stated with geopandas sample shape file
this fails when shape file has no extension .shp. Same error you reported
try again with an extension .shp. Different error, partner files missing (`.shx', '.prj', ...)
try again where all files are in same directory as .shp. Suceeds
Your upload capability needs to take into account that a shape file is a set of files (not a single file). Either ensure they are all uploaded into same directory. Alternatively zip them up and upload zip file. read_file() supports zip files.
import geopandas as gpd
import fiona
from pathlib import Path
import tempfile
import shutil
with tempfile.TemporaryDirectory() as tmpdirname:
fn = Path(gpd.datasets.get_path("naturalearth_lowres"))
tmp_file = Path(tmpdirname).joinpath(fn.stem)
shutil.copy(fn, tmp_file)
print(tmp_file) # temp file with no extension...
try:
gpd.read_file(tmp_file)
except fiona.errors.DriverError as e:
print(e)
tmp_file.unlink()
# now just the shape file
tmp_file = Path(tmpdirname).joinpath(fn.name)
shutil.copy(fn, tmp_file)
try:
gpd.read_file(tmp_file)
except fiona.errors.DriverError as e:
print(e)
# now all the files that make up an ESRI shapefile
for fn_ in fn.parent.glob("*"):
print(fn_.name)
shutil.copy(fn_, tmpdirname)
gpd.read_file(tmp_file)
# no exception :-)
output
/var/folders/3q/trbn3hyn0y91jwvh6gfn7ln40000gn/T/tmpa8x28emo/naturalearth_lowres
'/var/folders/3q/trbn3hyn0y91jwvh6gfn7ln40000gn/T/tmpa8x28emo/naturalearth_lowres' not recognized as a supported file format.
Unable to open /var/folders/3q/trbn3hyn0y91jwvh6gfn7ln40000gn/T/tmpa8x28emo/naturalearth_lowres.shx or /var/folders/3q/trbn3hyn0y91jwvh6gfn7ln40000gn/T/tmpa8x28emo/naturalearth_lowres.SHX. Set SHAPE_RESTORE_SHX config option to YES to restore or create it.
naturalearth_lowres.shx
naturalearth_lowres.cpg
naturalearth_lowres.shp
naturalearth_lowres.dbf
naturalearth_lowres.prj

Python: copy file tree to a text file

I'm trying to create a text file with a tree of all files / dirs from a place that I choose using os.chdir(). My approach is to print the tree and to save all prints to the text file. The problem is that it doesn't copy the printed tree and the file is blank.
What am I doing wrong?
And is there a way to write this kind of data to the file without to actually print it?
My code:
import os
import sys
f = open("tree.txt", "w")
os.chdir("c:\\Users\Daniel\Desktop")
sys.stdout = f
os.system("tree /f")
f.close()
Edit
I was able to get the file tree from the clipboard after executing the command, however it gives me and eror when it tried to write to the txt file.
code:
import os
import tkinter
with open("tree.txt", "w") as f:
os.system("tree /f |clip")
root = tkinter.Tk()
tree = root.clipboard_get()
print(tree)
f.write(tree)
eror:
Traceback (most recent call last):
File "c:\Users\Daniel\Desktop\Tick\code_test\files.py", line 9, in <module>
f.write(tree)
File "C:\Users\Daniel\AppData\Local\Programs\Python\Python38-32\lib\encodings\cp1252.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode character '\u2502' in position 80: character maps to <undefined>
solution
So I found the problem, I needed to use codec to be able write unicode to the text file. Now it works very well
code:
import os
import tkinter
import codecs
with codecs.open("tree.txt", "w", "utf8") as f:
os.chdir("c:\\Users")
os.system("tree /f |clip")
root = tkinter.Tk()
tree = root.clipboard_get()
f.write(tree)
Method check_output from subprocess module can help you to catch program output:
import subprocess
f = open("tree.txt", "wb")
tree_output = subprocess.check_output('tree /f', shell=True, cwd=r'c:\Users\Daniel\Desktop')
f.write(tree_output)
f.close()
Or with context manager:
import subprocess
with open("tree.txt", "wb") as f:
f.write(subprocess.check_output('tree /f', shell=True, cwd=r'c:\Users\Daniel\Desktop'))
Option wb is required because check_output returns bytes not a str. If you want to process output like a string - call tree_output.decode() first.

Specify where to save image taken with webcame Python

So I have a Python application that accesses the built-in webcam on a laptop and takes a picture. But I'm having difficulty specifying the storage location for the picture (in this case on the desktop). The code I have so far is:
import cv2
import time
import getpass
import os
getUser = getpass.getuser()
save = 'C:/Users/' + getUser + "/Desktop"
camera_port = 0
camera = cv2.VideoCapture(camera_port)
time.sleep(0.1)
return_value, image = camera.read()
os.path.join(cv2.imwrite(save, "user.png", image))
del camera
But when I run it I get the following error:
Traceback (most recent call last):
File "C:/Users/RedCode/PycharmProjects/MyApps/WebcamPic.py", line 13, in <module>
os.path.join(cv2.imwrite(save, "user.png", image))
TypeError: img is not a numpy array, neither a scalar
How can I specify where to store the image when it is taken?
This line here is where you have a problem.
os.path.join(cv2.imwrite(save, "user.png", image))
You want to do this
cv2.imwrite(os.path.join(save, "user.png"), image)
imwrite expects two arguments the file name and the image to be saved.
The call to os.path.join is building your saved file path.

Writing pdf with pypdf2 gives error

I'm trying to write a simple script to merge two PDFs but have run into an issue when trying to save the output to disk. My code is
from PyPDF2 import PdfFileWriter, PdfFileReader
import tkinter as tk
from tkinter import filedialog
### Prompt the user for the 2 files to use via GUI ###
root = tk.Tk()
root.update()
file_path1 = tk.filedialog.askopenfilename(
filetypes=[("PDF files", "*.pdf")],
)
file_path2 = tk.filedialog.askopenfilename(
filetypes=[("PDF files", "*.pdf")],
)
###Function to combine PDFs###
output = PdfFileWriter()
def append_pdf_2_output(file_handler):
for page in range(file_handler.numPages):
output.addPage(file_handler.getPage(page))
#Actually combine the 2 PDFs###
append_pdf_2_output(PdfFileReader(open(file_path1, "rb")))
append_pdf_2_output(PdfFileReader(open(file_path2, "rb")))
###Prompt the user for the file save###
output_name = tk.filedialog.asksaveasfile(
defaultextension='pdf')
###Write the output to disk###
output.write(output_name)
output.close
The problem is that I get an error of
UserWarning: File to write to is not in binary mode. It may not be written to correctly. [pdf.py:453] Traceback (most recent call last): File "Combine2Pdfs.py", line 44, in output.write(output_name) File "/Library/Frameworks/Python.framework/Versions/3.5/lib/pytho‌​n3.5/site-packages/P‌​yPDF2/pdf.py", line 487, in write stream.write(self.header + b("\n")) TypeError: write() argument must be str, not bytes
Where have I gone wrong?
I got it by adding mode = 'wb' to tk.filedialog.asksaveasfile. Now it's
output_name = tk.filedialog.asksaveasfile(
mode = 'wb',
defaultextension='pdf')
output.write(output_name)
Try to use tk.filedialog.asksaveasfilename instead of tk.filedialog.asksaveasfile. You just want the filename, not the file handler itself.
###Prompt the user for the file save###
output_name = tk.filedialog.asksaveasfilename(defaultextension='pdf')

Python Error (ValueError: _getfullpathname: embedded null character)

I don't know How to fix it please help, I have tried everything mentioned in the post Error on import matplotlib.pyplot (on Anaconda3 for Windows 10 Home 64-bit PC) but no luck. I'm a newbie to the python and am self learning specific details would greatly be appreciated.
Console:
Traceback (most recent call last):
from matplotlib import pyplot
File "C:\Users\...\lib\site-packages\matplotlib\pyplot.py", line 29, in <module>
import matplotlib.colorbar
File "C:\Users\...\lib\site-packages\matplotlib\colorbar.py", line 34, in <module>
import matplotlib.collections as collections
File "C:\Users\...\lib\site-packages\matplotlib\collections.py", line 27, in <module>
import matplotlib.backend_bases as backend_bases
File "C:\Users\...\lib\site-packages\matplotlib\backend_bases.py", line 62, in <module>
import matplotlib.textpath as textpath
File "C:\Users\...\lib\site-packages\matplotlib\textpath.py", line 15, in <module>
import matplotlib.font_manager as font_manager
File "C:\Users\...\lib\site-packages\matplotlib\font_manager.py", line 1421, in <module>
_rebuild()
File "C:\Users\...\lib\site-packages\matplotlib\font_manager.py", line 1406, in _rebuild
fontManager = FontManager()
File "C:\Users\...\lib\site-packages\matplotlib\font_manager.py", line 1044, in __init__
self.ttffiles = findSystemFonts(paths) + findSystemFonts()
File "C:\Users\...\lib\site-packages\matplotlib\font_manager.py", line 313, in findSystemFonts
for f in win32InstalledFonts(fontdir):
File "C:\Users\...\lib\site-packages\matplotlib\font_manager.py", line 231, in win32InstalledFonts
direc = os.path.abspath(direc).lower()
File "C:\Users\...\lib\ntpath.py", line 535, in abspath
path = _getfullpathname(path)
ValueError: _getfullpathname: embedded null character
Python:
importing libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
#importing dataset
dataset = pd.read_csv('Position_Salaries.csv')
x = dataset.iloc[:,1:2].values
y = dataset.iloc[:,2].values
#Linear Regression
from sklearn.linear_model import LinearRegression
reg_lin = LinearRegression()
reg_lin = reg_lin.fit(x,y)
#ploynomial Linear Regression
from sklearn.preprocessing import PolynomialFeatures
reg_poly = PolynomialFeatures(degree = 3)
x_poly = reg_poly.fit_transform(x)
reg_poly.fit(x_poly,y)
lin_reg_2 = LinearRegression()
lin_reg_2.fit(x_poly,y)
#Visualizing Linear Regression results
plt.scatter(x,y,color = 'red')
plt.plot(x,reg_lin.predict(x), color = 'blue')
plt.title('Truth vs. Bluff (Linear Reg)')
plt.xlabel('Position level')
plt.ylabel('Salary')
plt.show()
#Visualizing Polynomial Regression results
plt.scatter(x,y,color = 'red')
plt.plot(x,lin_reg_2.predict(reg_poly.fit_transform(x)), color = 'blue')
plt.title('Truth vs. Bluff (Linear Reg)')
plt.xlabel('Position level')
plt.ylabel('Salary')
plt.show()
To find this in font_manager py:
direc = os.path.abspath(direc).lower()
change it into:
direc = direc.split('\0', 1)[0]
and save to apply in your file.
I don't think you applied the patch in Error on import matplotlib.pyplot (on Anaconda3 for Windows 10 Home 64-bit PC) correctly: if you had, there shouldn't be a mention of direc = os.path.abspath(direc).lower() in your error stack, since the patch removed it.
To be clear, here is the entire win32InstalledFonts() method in C:\Anaconda\envs\py35\Lib\site-packages\matplotlib\font_manager.py (or wherever Anaconda is installed) after the patch is applied, with matplotlib 2.0.0:
def win32InstalledFonts(directory=None, fontext='ttf'):
"""
Search for fonts in the specified font directory, or use the
system directories if none given. A list of TrueType font
filenames are returned by default, or AFM fonts if *fontext* ==
'afm'.
"""
from six.moves import winreg
if directory is None:
directory = win32FontDirectory()
fontext = get_fontext_synonyms(fontext)
key, items = None, {}
for fontdir in MSFontDirectories:
try:
local = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, fontdir)
except OSError:
continue
if not local:
return list_fonts(directory, fontext)
try:
for j in range(winreg.QueryInfoKey(local)[1]):
try:
''' Patch fixing [Error on import matplotlib.pyplot (on Anaconda3 for Windows 10 Home 64-bit PC)](https://stackoverflow.com/a/34007642/395857)
key, direc, any = winreg.EnumValue( local, j)
if not is_string_like(direc):
continue
if not os.path.dirname(direc):
direc = os.path.join(directory, direc)
direc = os.path.abspath(direc).lower()
'''
key, direc, any = winreg.EnumValue( local, j)
if not is_string_like(direc):
continue
if not os.path.dirname(direc):
direc = os.path.join(directory, direc)
direc = direc.split('\0', 1)[0]
if os.path.splitext(direc)[1][1:] in fontext:
items[direc] = 1
except EnvironmentError:
continue
except WindowsError:
continue
except MemoryError:
continue
return list(six.iterkeys(items))
finally:
winreg.CloseKey(local)
return None
The actual cause of the issue appears to be in os.path.abspath(). A better solution might be to edit <python dir>\Lib\ntpaths.py as detailed in Error on import matplotlib.pyplot (on Anaconda3 for Windows 10 Home 64-bit PC)
Basically, add a ValueError: exception handler to the Windows version of the abspath() function. This is lower down on the call stack and could save you from encountering this issue in other places.

Resources