how to convert audio data to fourier on jupyter notebook? - audio

I want to convert mp3 files using courier's transform and export as spectrogram.Then I need to save as PNG file containing all frequencies of my mp3. How can I do that by using jupyter notebook?

Most of the following comes from: http://myinspirationinformation.com/uncategorized/audio-signals-in-python/
The mp3 sample comes from the BBC bird song site.
I ran this in Jupyter notebook using Python 3.6 running under Linux Mint.
from IPython.display import Audio, display
import matplotlib.pyplot as plt
from numpy import fft
import numpy as np
import pydub
from scipy.fftpack import fft
from scipy.io import wavfile
import scipy
import urllib
AUDIO_URL='http://downloads.bbc.co.uk/rmhttp/radio4/science/Birdsong-Blackbird.mp3'
temp_folder = '/home/bill/data/tmp/'
urllib.request.urlretrieve(AUDIO_URL, temp_folder+'file.mp3')
#read mp3 file
mp3 = pydub.AudioSegment.from_mp3(temp_folder+"file.mp3")
#convert to wav
mp3.export(temp_folder+"file.wav", format="wav")
#read wav file
freq, audio_data = scipy.io.wavfile.read(temp_folder+"file.wav")
length = audio_data.shape[0]/freq
channels = audio_data.shape[1]
print('freq: {} length: {} channels: {}'.format(freq, length, channels))
#if stereo grab both channels
channel1 = audio_data[:,0] #left
channel2 = audio_data[:,1] #right
#create a time variable in seconds
time = np.arange(0, float(audio_data.shape[0]), 1) / freq
#plot amplitude (or loudness) over time
plt.figure(1)
plt.subplot(211)
plt.plot(time, channel1, linewidth=0.01, alpha=0.7, color='#ff7f00')
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')
plt.subplot(212)
plt.plot(time, channel2, linewidth=0.01, alpha=0.7, color='#ff7f00')
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')
plt.show()
fourier=fft(channel1)
n = len(channel1)
fourier = fourier[0:int(n/2)]
# scale by the number of points so that the magnitude does not depend on the length
fourier = fourier / float(n)
#calculate the frequency at each point in Hz
freq_array = np.arange(0, (n/2), 1.0) * (freq*1.0/n);
plt.plot(freq_array/1000, 10*np.log10(fourier), color='#ff7f00', linewidth=0.02)
plt.xlabel('frequency in kHz')
plt.ylabel('power in dB')
plt.savefig(temp_folder+'spectrogram.png')

Related

Python Display NC File Variable Description as Plot Title

I need to use the "description" as my chart or plot title and I cannot find a way to do this in my internet searches. The output from the .nc file variable that has the "description" that I need looks like this:
<class 'netCDF4._netCDF4.Variable'>
float64 M(lat, on)
_FillValue: nan
long_name: Wind Speed at 100m
description: Anomaly for June 2021 vs the previous 30 years
unlimited dimensions:
current shape = (2920, 7200)
My code looks like this:
# -*- coding: utf-8 -*-
"""
#author: U321103
"""
from sys import exit
import netCDF4 as nc4
from netCDF4 import Dataset
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
#from mpl_toolkits.basemap import Basemap, cm
import datetime
from datetime import datetime
import pandas as pd
import xarray as xr
import bottleneck as bn
import cartopy.crs as ccrs
from mpl_toolkits.basemap import Basemap
import os
os.environ["PROJ_LIB"] = 'C:\\Users\\Yury\\anaconda3\\Library\\share'
# -----------------------------------------------------------------------------------------------------------
#
# -----------------------------------------------------------------------------------------------------------
#%matplotlib inline
#The easiest way to read the data is:
path = "//porfiler03/gtdshare/VORTEX/ANOMALY_FILES/anomaly.M.2021.06.vs30y/world.nc"
# Open the NetCDF file
fh = Dataset(path)
#read variables in fh
for var in fh.variables.values():
print(var)
# Get the 100m wind speed
wind100 = fh['M'][:]
#wind100_units = fh['M'].units
# Get the latitude and longitude points
lats = fh.variables['lat'][:]
lons = fh.variables['lon'][:]
# Get some parameters for the Stereographic Projection
lon_0 = lons.mean()
lat_0 = lats.mean()
#m = Basemap(width=25000000,height=12000000,
# resolution='l',projection='lcc',\
# lat_ts=50,lat_0=lat_0,lon_0=lon_0)
m = Basemap(projection='merc',llcrnrlat=-40,urcrnrlat=60,\
llcrnrlon=-180,urcrnrlon=180,lat_ts=20,resolution='c')
# help on coordinates: https://matplotlib.org/basemap/users/merc.html
fh.close()
# Because our lon and lat variables are 1D,
# use meshgrid to create 2D arrays
# Not necessary if coordinates are already in 2D arrays.
lon, lat = np.meshgrid(lons, lats)
xi, yi = m(lon, lat)
# Plot Data
cs = m.pcolor(xi,yi,np.squeeze(wind100))
# Add Grid Lines
m.drawparallels(np.arange(-80., 81., 40.), labels=[1,0,0,0], fontsize=10)
m.drawmeridians(np.arange(-180., 181., 40.), labels=[0,0,0,1], fontsize=10)
# Add Coastlines, States, and Country Boundaries
m.drawcoastlines()
m.drawstates()
m.drawcountries()
# Add Colorbar
cbar = m.colorbar(cs, location='bottom', pad="10%")
#cbar.set_label(wind100_units)
# Add Title
plt.title(' ')
plt.show()
exit()
So, what I need exactly is "Anomaly for June 2021 vs the previous 30 years" to add to the plot below in the line with plt.title() - thank you!
You should add this line of code wind100_description = fh['M'].description somewhere before fh.close(). Then simply do plt.title(wind100_description) instead of plt.title(' '). Also, it's a good practice to remove the imports you don't need, of which you have quite a few :)

.png to .h5 file conversion code is running very slow. Please suggest how can I run it in GPU

I am using below code for conversion of .png images into a single .h5 file. This code is working fine, but conversion is very slow. I have 40GB data of size 224 x 224 pixels which need to be converted into .h5. Please tell me why my code is running very slow. If I want to run the code in GPU, what are the changes required in my code. Please suggest.
import cv2
import datetime as dt
import h5py
import matplotlib.pyplot as plt
import matplotlib.pylab as plb
import numpy as np
import os
import pandas as pd
from glob import glob
start = dt.datetime.now()
PATH = os.path.abspath(os.path.join('/home/sd/Downloads/', 'he'))
SOURCE_IMAGES = os.path.join(PATH, "patch", "training_data")
images = glob(os.path.join(SOURCE_IMAGES, "*.png"))
images.sort()
NUM_IMAGES = len(images)
HEIGHT = 224
WIDTH = 224
CHANNELS = 3
SHAPE = (HEIGHT, WIDTH, CHANNELS)
#Now we will write the h5 file
train_shape = (len(images), HEIGHT, WIDTH, CHANNELS)
hf=h5py.File('data.h5', 'w')
hf.create_dataset("train_img", shape=train_shape, maxshape=train_shape, compression='gzip', compression_opts=9)
for i, img in enumerate(images):
s=dt.datetime.now()
img=cv2.imread(images[i])
img= cv2.resize(img, (WIDTH,HEIGHT), interpolation=cv2.INTER_CUBIC)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
hf["train_img"][i, ...] = img[None]
e=dt.datetime.now()
hf.close()
Just change this line hf.create_dataset("train_img", shape=train_shape, maxshape=train_shape, compression='gzip', compression_opts=9) into hf.create_dataset("train_img", shape=train_shape, maxshape=train_shape, np.int8)

Feature Extraction using MFCC

I want to know, how to extract the audio (x.wav) signal, feature extraction using MFCC? I know the steps of the audio feature extraction using MFCC. I want to know the fine coding in Python using the Django framework
This is the most important step in building a speech recognizer because after converting the speech signal into the frequency domain, we must convert it into the usable form of the feature vector.
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
from python_speech_features import mfcc, logfbank
frequency_sampling, audio_signal =
wavfile.read("/home/user/Downloads/OSR_us_000_0010_8k.wav")
audio_signal = audio_signal[:15000]
features_mfcc = mfcc(audio_signal, frequency_sampling)
print('\nMFCC:\nNumber of windows =', features_mfcc.shape[0])
print('Length of each feature =', features_mfcc.shape[1])
features_mfcc = features_mfcc.T
plt.matshow(features_mfcc)
plt.title('MFCC')
filterbank_features = logfbank(audio_signal, frequency_sampling)
print('\nFilter bank:\nNumber of windows =', filterbank_features.shape[0])
print('Length of each feature =', filterbank_features.shape[1])
filterbank_features = filterbank_features.T
plt.matshow(filterbank_features)
plt.title('Filter bank')
plt.show()
or you may use this code to extract the feature
import numpy as np
from sklearn import preprocessing
import python_speech_features as mfcc
def extract_features(audio,rate):
"""extract 20 dim mfcc features from an audio, performs CMS and combines
delta to make it 40 dim feature vector"""
mfcc_feature = mfcc.mfcc(audio,rate, 0.025, 0.01,20,nfft = 1200, appendEnergy = True)
mfcc_feature = preprocessing.scale(mfcc_feature)
delta = calculate_delta(mfcc_feature)
combined = np.hstack((mfcc_feature,delta))
return combined
you can use following code to extract an audio file MFCC features using librosa package(it is easy to install and work):
import librosa
import librosa.display
audio_path = 'my_audio_file.wav'
x, sr = librosa.load(audio_path)
mfccs = librosa.feature.mfcc(x, sr=sr,n_mfcc=40)
print(mfccs.shape)
also you can Display the MFCCs using following code:
librosa.display.specshow(mfccs, sr=sr, x_axis='time')

Store the Spectrogram as Image in Python

I want to store the STFT spectrogram of the audio as image. The code below shows a spectrogram to me as output, but when saved as image I get a different image.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
audio_name = '---.au'
hop_length = 512
window_size = 1024
import librosa
y, sr = librosa.load(audio_name)
window = np.hanning(window_size)
out = librosa.core.spectrum.stft(y, n_fft = window_size, hop_length = hop_length,
window=window)
out = 2 * np.abs(out) / np.sum(window)
import librosa.display
librosa.display.specshow(librosa.amplitude_to_db(out,ref=np.max),
y_axis='log', x_axis='time')
from PIL import Image
img = Image.fromarray(out)
if img.mode != 'RGBA':
img = img.convert('RGBA')
img.save('output.png')
But when I save it the output file is a black image.
I want to save the exact image of the spectogrm.
If you want exactly what librosa.display.spectrogram() will show, then use matplotlib to save the plot to a file:
import matplotlib.pyplot as plt
import librosa.display
import numpy as np
import pandas as pd
import librosa
filename = librosa.util.example_audio_file()
y, sr = librosa.load(filename)
y = y[:100000] # shorten audio a bit for speed
window_size = 1024
window = np.hanning(window_size)
stft = librosa.core.spectrum.stft(y, n_fft=window_size, hop_length=512, window=window)
out = 2 * np.abs(stft) / np.sum(window)
# For plotting headlessly
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
fig = plt.Figure()
canvas = FigureCanvas(fig)
ax = fig.add_subplot(111)
p = librosa.display.specshow(librosa.amplitude_to_db(out, ref=np.max), ax=ax, y_axis='log', x_axis='time')
fig.savefig('spec.png')
spec.png:
If the desired is to get just the data in the spectrogram, stored as an image, then see this answer.

Matplotlib.animation.FuncAnimation using pcolormesh

Python 3.5, windows 10 Pro.
I'm trying to continuously plot an 8x8 array of pixels (for the sake of the question I'll just use random data, but in the real thing I'm reading from a serial port).
I can do it using a while loop, but I need to switch over to matplotlib.animation.FuncAnimation and I can't get it to work. I've tried looking at the help files and tried to follow examples from matplotlib.org here, but I've not been able to follow it.
Can someone help me figure out how to continuously plot an 8x8 array of pixels using FuncAnimation and pcolormesh? Here is what I've got so far:
import scipy as sp
import matplotlib.pyplot as plt
from matplotlib import animation
plt.close('all')
y = sp.rand(64).reshape([8,8])
def do_something():
y = sp.rand(64).reshape([8,8])
fig_plot.set_data(y)
return fig_plot,
fig1 = plt.figure(1,facecolor = 'w')
plt.clf()
fig_plot = plt.pcolormesh(y)
fig_ani = animation.FuncAnimation(fig1,do_something)
plt.show()
If you want to see the while loop code, just so you know exactly what I'm trying to reproduce, see below.
import scipy as sp
import matplotlib.pyplot as plt
plt.figure(1)
plt.clf()
while True:
y = sp.rand(64).reshape([8,8])
plt.pcolormesh(y)
plt.show()
plt.pause(.000001)
I was able to find a solution using imshow instead of pcolormesh. In case anyone else is struggling with the same issues I had, I've posted the working code below.
import scipy as sp
import matplotlib.pyplot as plt
import matplotlib.animation as animation
Hz = sp.rand(64).reshape([8,8]) # initalize with random data
fig = plt.figure(1,facecolor='w')
ax = plt.axes()
im = ax.imshow(Hz)
im.set_data(sp.zeros(Hz.shape))
def update_data(n):
Hz = sp.rand(64).reshape([8,8]) # More random data
im.set_data(Hz)
return
ani = animation.FuncAnimation(fig, update_data, interval = 10, blit = False, repeat = False)
fig.show()

Resources