How can we use microphone in google colab? - audio

OSError Traceback (most recent call last)
<ipython-input-21-4159a88154c9> in <module>()
7 response = google_images_download.googleimagesdownload()
8 r = sr.Recognizer()
----> 9 with sr.Microphone() as source:
10 print("Say something!")
11 audio = r.listen(source)
/usr/local/lib/python3.6/dist-packages/speech_recognition/init.py in init(self, device_index, sample_rate, chunk_size)
84 assert 0 <= device_index < count, "Device index out of range ({} devices available; device index should be between 0 and {} inclusive)".format(count, count - 1)
85 if sample_rate is None: # automatically set the sample rate to the hardware's default sample rate if not specified
---> 86 device_info = audio.get_device_inf o_by_index(device_index) if device_index is not None else audio.get_default_input_device_info()
87 assert isinstance(device_info.get("defaultSampleRate"), (float, int)) and device_info["defaultSampleRate"] > 0, "Invalid device info returned from PyAudio: {}".format(device_info)
88 sample_rate = int(device_info["defaultSampleRate"])

Here's an example that shows how to access a user's camera and microphone:
https://colab.research.google.com/notebooks/snippets/advanced_outputs.ipynb#scrollTo=2viqYx97hPMi
The snippet you linked above attempts to access a microphone in Python. That won't work because there's no microphone attached to the virtual machine which executes Python code in Colab.
Instead, you want to access the microphone of the computer running the web browser. Then, capture data there, and pass it back to the virtual machine for processing in Python.
That's what's shown in the snippet linked above.

Here is a simple snippet
from IPython.display import HTML, Audio
from google.colab.output import eval_js
from base64 import b64decode
import numpy as np
from scipy.io.wavfile import read as wav_read
import io
import ffmpeg
AUDIO_HTML = """
<script>
var my_div = document.createElement("DIV");
var my_p = document.createElement("P");
var my_btn = document.createElement("BUTTON");
var t = document.createTextNode("Press to start recording");
my_btn.appendChild(t);
//my_p.appendChild(my_btn);
my_div.appendChild(my_btn);
document.body.appendChild(my_div);
var base64data = 0;
var reader;
var recorder, gumStream;
var recordButton = my_btn;
var handleSuccess = function(stream) {
gumStream = stream;
var options = {
//bitsPerSecond: 8000, //chrome seems to ignore, always 48k
mimeType : 'audio/webm;codecs=opus'
//mimeType : 'audio/webm;codecs=pcm'
};
//recorder = new MediaRecorder(stream, options);
recorder = new MediaRecorder(stream);
recorder.ondataavailable = function(e) {
var url = URL.createObjectURL(e.data);
var preview = document.createElement('audio');
preview.controls = true;
preview.src = url;
document.body.appendChild(preview);
reader = new FileReader();
reader.readAsDataURL(e.data);
reader.onloadend = function() {
base64data = reader.result;
//console.log("Inside FileReader:" + base64data);
}
};
recorder.start();
};
recordButton.innerText = "Recording... press to stop";
navigator.mediaDevices.getUserMedia({audio: true}).then(handleSuccess);
function toggleRecording() {
if (recorder && recorder.state == "recording") {
recorder.stop();
gumStream.getAudioTracks()[0].stop();
recordButton.innerText = "Saving the recording... pls wait!"
}
}
// https://stackoverflow.com/a/951057
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
var data = new Promise(resolve=>{
//recordButton.addEventListener("click", toggleRecording);
recordButton.onclick = ()=>{
toggleRecording()
sleep(2000).then(() => {
// wait 2000ms for the data to be available...
// ideally this should use something like await...
//console.log("Inside data:" + base64data)
resolve(base64data.toString())
});
}
});
</script>
"""
def get_audio():
display(HTML(AUDIO_HTML))
data = eval_js("data")
binary = b64decode(data.split(',')[1])
process = (ffmpeg
.input('pipe:0')
.output('pipe:1', format='wav')
.run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True, quiet=True, overwrite_output=True)
)
output, err = process.communicate(input=binary)
riff_chunk_size = len(output) - 8
# Break up the chunk size into four bytes, held in b.
q = riff_chunk_size
b = []
for i in range(4):
q, r = divmod(q, 256)
b.append(r)
# Replace bytes 4:8 in proc.stdout with the actual size of the RIFF chunk.
riff = output[:4] + bytes(b) + output[8:]
sr, audio = wav_read(io.BytesIO(riff))
return audio, sr
then run this
audio, sr = get_audio()
you might need to install this one
!pip install ffmpeg-python

Related

How to record ambient music with quality in Google Colab

I want to record music tracks using a microphone in a Colab notebook.
I have tested several methods, but they work fine for speech recording, not music tracks.
The following code saves a .wav file,
import IPython.display as ipd
from IPython.display import Javascript
from google.colab import output
from base64 import b64decode
from socket import socket
RECORD = """
const sleep = time => new Promise(resolve => setTimeout(resolve, time))
const b2text = blob => new Promise(resolve => {
const reader = new FileReader()
reader.onloadend = e => resolve(e.srcElement.result)
reader.readAsDataURL(blob)
})
var record = time => new Promise(async resolve => {
stream = await navigator.mediaDevices.getUserMedia({ audio: true })
recorder = new MediaRecorder(stream)
chunks = []
recorder.ondataavailable = e => chunks.push(e.data)
recorder.start()
await sleep(time)
recorder.onstop = async ()=>{
blob = new Blob(chunks)
text = await b2text(blob)
resolve(text)
}
recorder.stop()
})
"""
def recordAudio(sec=5):
display(Javascript(RECORD))
s = output.eval_js('record(%d)' % (sec*1000))
b = b64decode(s.split(',')[1])
with open('audio.wav','wb') as f:
f.write(b)
return 'audio.wav' # or webm ?
ipd.display(ipd.Audio('audio.wav'))
But the quality is too poor to process afterward and get good results, for example, classifying the music genre.
I have tried scripts with libraries soundevice and pyAudio but they does not work in Google Colab. These libraries seem to control the PC's hardware, but it is not possible using Colab.

how do we use variables outside of process.stdout.on(....)?

i'm receiving a variable from python script in node.js.
I read this variable with :
subprocess.stdout.on('data', (data) => {
dataString += data.toString();
})
but i can't use "dataString" outside of "subprocess.stdout.on", it's just empty.
I need the data received from python to use it elsewhere.
any solution for this one please?
node.js file :
const path = require('path')
const {spawn} = require('child_process')
const fs = require('fs')
var img = "./img/facture1.jpg"
const data64 = fs.readFileSync(img, 'base64')
const fetch = require("node-fetch")
var dataString = '';
var r = spawn('python', [
"-u",
path.join(__dirname, 'my_script.py'),
img,
]);
r.stdin.write(data64);
r.stdin.end();
function runScript(){
return r
}
const subprocess = runScript()
subprocess.stdout.on('data', (data) => {
dataString += data.toString();
})
subprocess.stdout.on('end', () =>{
dataStr = dataString;
console.log("\n i'm from python",dataString);
});
python file :
#!/usr/bin/python
import io
import os
import sys, json
import base64
from typing import IO
from PIL import Image
import cv2
import numpy as np
import PIL.Image
import pytesseract
from pytesseract import image_to_string
def read_in():
lines = sys.stdin.readlines()
return lines
def stringToRGB(base64_string):
imgdata = base64.b64decode(str(base64_string))
image = Image.open(io.BytesIO(imgdata))
return cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
def search_string_in_file(file_name, string_to_search1,string_to_search2):
line_number = 0
list_of_results = []
with open(file_name, 'r') as read_obj:
for line in read_obj:
line_number += 1
if string_to_search1 in line:
list_of_results.append(line.rstrip())
if string_to_search2 in line:
list_of_results.append(line.rstrip())
return list_of_results
def main():
lines = read_in()
window_name = 'Image'
image = stringToRGB(lines)
imS = cv2.resize(image, (960, 700))
pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
image_to_text = pytesseract.image_to_string(image, lang='eng')
name = sys.argv[1]
name = name[2:-4]
f = open('%s.txt' % name, "w")
f.write(image_to_text)
f.close()
matched_lines = search_string_in_file('%s.txt' % name, 'Total', 'A PAYER')
for elem in matched_lines:
elem = elem[14:18]
print("\n",elem)
if __name__ == "__main__":
main()

move a downloaded file from python backend(Rest API) to /storage/emulated/0/Download in flutter

Basically I'll provide a input field in the mobile app where user's will paste the YouTube Url & sends the url to the backend Rest API,the Rest API will download the video.The problem is video should get downloaded to this location /storage/emulated/0/Download of the mobile storage
Error, the videos is getting downloaded in the backend.I.e on my local machine instead of mobile storage location
Rest API code for downloading the video
from flask import Flask, request, jsonify
from pytube import YouTube
import os
app = Flask(__name__)
#app.route('/Youtube', methods=["POST"])
def youtube():
youtube_link_request = request.get_json(force=False, silent=False, cache=True)
print(youtube_link_request)
youtube_link = youtube_link_request['link']
downloads = youtube_link_request['path']
print(downloads);
yt = YouTube(youtube_link).streams.first().download()
## YouTube(youtube_link).streams.first().download() will starting downloading the video
views = YouTube(youtube_link).views
thumbnail = YouTube(youtube_link).thumbnail_url
title = YouTube(youtube_link).title
# streams_data = yt.streams.first().download()
# streams = []
# for stream in streams_data:
# stream_info = stream
# streams.append(stream_info.type)
return jsonify({"Data": "successful completed",
"views": views,
"thumbnail": thumbnail,
"title": title,
}), 200
if __name__ == "__main__":
app.run(debug=True)
Apart of flutter code
RaisedButton(
onPressed: () async {
final PermissionHandler _permissionHandler =
PermissionHandler();
var result = await _permissionHandler
.requestPermissions([PermissionGroup.storage]);
// var dir = await
if (_formkey.currentState.validate()) {
if (result[PermissionGroup.storage] ==
PermissionStatus.granted) {
var dirpath =
await ExtStorage.getExternalStoragePublicDirectory(
ExtStorage.DIRECTORY_DOWNLOADS);
final UrlModel = await passInfo(urlTextController.text);
// _loading ? LinearProgressIndicator() : _urlModel;
setState(() {
_loading = !_loading;
_urlModel = UrlModel;
views = _urlModel.views;
title = _urlModel.title;
thumbnail = _urlModel.thumbnail;
});
print(dirpath);
}
// print(title);
Fluttertoast.showToast(
msg: "${_urlModel.title}",
toastLength: Toast.LENGTH_SHORT,
gravity: ToastGravity.CENTER,
timeInSecForIosWeb: 1);
} else {
Fluttertoast.showToast(
msg: "Error",
toastLength: Toast.LENGTH_SHORT,
gravity: ToastGravity.CENTER,
timeInSecForIosWeb: 1);
}
},
child: Text('Submit'),
),
It looks logical to me, You need to run a webserver like Nginx and copy all downloaded files to your webserver root directory, then you have your server host address with the address of the file, so it will be like this server_ip:server_port/name_of_the_file_downloaded.mkv, provide it to the flutter app, so the user will download it to /storage/emulated/0/Download.

Sending Batch request to azure cognitive API for TEXT-OCR

I am calling the Azure cognitive API for OCR text-recognization and I am passing 10-images at the same time simultaneously (as the code below only accepts one image at a time-- that is 10-independent requests in parallel) which is not efficient to me, regardin processing point of view, as I need to use extra modules i.e: Celery and multiprocessing.
So, is there a way to send all the 10-images in a single request and get the output at once then do post processing?
import time
from io import BytesIO
import cv2
import requests
from PIL import Image as PILImage
from PIL import Image
file_list = []
headers = {
"Ocp-Apim-Subscription-Key": "<API-KEY>",
'Content-Type': 'application/octet-stream'}
p = "symbol_sample.jpg"
print(p,"p")
def recognise_text(p):
p = cv2.imread(p)
cropped_image = PILImage.fromarray(p)
buffer = BytesIO()
cropped_image.save(buffer, format="JPEG")
image_bytes = buffer.getvalue()
try:
response = requests.post(
"https://centralindia.api.cognitive.microsoft.com/vision/v2.0/recognizeText?mode=Printed",
headers=headers,
data=image_bytes
)
header_link = str(response.headers['Operation-Location'])
while (True):
headers_get = {
"Ocp-Apim-Subscription-Key": "<API-KEY>"",
'Content-Type': 'application/json'
}
result = requests.get(
url=header_link,
headers=headers_get
)
response_r = result.json()
if response_r["status"] == "Succeeded":
return response_r
else:
time.sleep(4)
except Exception as e:
print(e)
return ""
image1="symbol_sample.jpg"
o = recognise_text(image1)
print(o)
Any help would be really appreciated.
I guess you are looking for Batch Read File
public class BatchReadFileSample
{
public static async Task RunAsync(string endpoint, string key)
{
ComputerVisionClient computerVision = new ComputerVisionClient(new ApiKeyServiceClientCredentials(key))
{
Endpoint = endpoint
};
const int numberOfCharsInOperationId = 36;
string localImagePath = #"Images\handwritten_text.jpg"; // See this repo's readme.md for info on how to get these images. Alternatively, you can just set the path to any appropriate image on your machine.
string remoteImageUrl = "https://github.com/Azure-Samples/cognitive-services-sample-data-files/raw/master/ComputerVision/Images/printed_text.jpg";
Console.WriteLine("Text being batch read ...");
await BatchReadFileFromStreamAsync(computerVision, localImagePath, numberOfCharsInOperationId);
await BatchReadFileFromUrlAsync(computerVision, remoteImageUrl, numberOfCharsInOperationId);
}
// Read text from a remote image
private static async Task BatchReadFileFromUrlAsync(ComputerVisionClient computerVision, string imageUrl, int numberOfCharsInOperationId)
{
if (!Uri.IsWellFormedUriString(imageUrl, UriKind.Absolute))
{
Console.WriteLine("\nInvalid remote image url:\n{0} \n", imageUrl);
return;
}
// Start the async process to read the text
BatchReadFileHeaders textHeaders = await computerVision.BatchReadFileAsync(imageUrl);
await GetTextAsync(computerVision, textHeaders.OperationLocation, numberOfCharsInOperationId);
}
// Recognize text from a local image
private static async Task BatchReadFileFromStreamAsync(ComputerVisionClient computerVision, string imagePath, int numberOfCharsInOperationId)
{
if (!File.Exists(imagePath))
{
Console.WriteLine("\nUnable to open or read local image path:\n{0} \n", imagePath);
return;
}
using (Stream imageStream = File.OpenRead(imagePath))
{
// Start the async process to recognize the text
BatchReadFileInStreamHeaders textHeaders = await computerVision.BatchReadFileInStreamAsync(imageStream);
await GetTextAsync(computerVision, textHeaders.OperationLocation, numberOfCharsInOperationId);
}
}
Here is the Full Code

Python3 paho-mqtt speed vs nodejs mqtt

I have conducted some speed tests for MQTT in Python3 and Node.js, using af QoS level 0 and have found Node.js to be remarkably faster than the Python3 implementation.
How can this be?
I'm open to using either framework as bridge on the server side to handle data from multiple clients. However, I'm losing my confidence that I should be using Python3 for anything on the server.
Running code snippets.
Python3:
import paho.mqtt.client as mqtt
import logging
import time
import threading
import json
import sys
class MqttAdaptor(threading.Thread):
def __init__(self,topic, type=None):
threading.Thread.__init__(self)
self.topic = topic
self.client = None
self.type = type
def run(self):
self.client = mqtt.Client(self.type)
self.client.on_connect = self.on_connect
self.client.on_disconnect = self.on_disconnect
if self.type is not None:
self.client.connect("localhost", 1883, 60)
self.client.on_message = self.on_message
self.client.loop_forever()
else:
self.client.connect_async("localhost", 1883, 60)
self.client.loop_start()
# The callback for when the client receives a CONNACK response from the server.
def on_connect(self,client, userdata, flags, rc):
self.client.subscribe(self.topic)
def on_disconnect(self, client, userdata, rc):
if rc != 0:
print("Unexpected disconnection from local MQTT broker")
# The callback for when a PUBLISH message is received from the server.
def on_message(self,client, userdata, msg):
jsonMsg = ""
try:
jsonMsg = json.loads(msg.payload)
if jsonMsg['rssi'] is not None:
jsonMsg['rssi'] = round(jsonMsg['rssi']*3.3 * 100000)/ 10000
except:
pass
print(json.dumps(jsonMsg))
def publish(self,topic, payload, qos=0,retain=False):
self.client.publish(topic,payload,qos,retain)
def close(self):
if self.client is not None:
self.client.loop_stop()
self.client.disconnect()
if __name__=="__main__":
topic = '/test/+/input/+'
subber = MqttAdaptor(topic,'sub')
subber.start()
topic = None
test = MqttAdaptor(topic)
test.run()
print("start")
while True:
data = sys.stdin.readline()
if not len(data):
print("BREAK")
break
msg = data.split('\t')
topic = msg[0]
test.publish(topic,msg[1],0)
print("done")
sys.exit(0)
Node.js:
"use strict";
const fs = require('fs');
const readline = require('readline');
const mqtt = require('mqtt');
const mqttClient = mqtt.connect();
mqttClient.on('connect', () => {
console.error('==== MQTT connected ====');
mqttClient.subscribe('/test/+/input/+');
});
mqttClient.on('close', () => {
console.error('==== MQTT closed ====');
});
mqttClient.on('error', (error) => {
console.error('==== MQTT error ' + error + ' ====');
});
mqttClient.on('offline', () => {
console.error('==== MQTT offline ====');
});
mqttClient.on('reconnect', () => {
console.error('==== MQTT reconnect ====');
});
mqttClient.on('message', (topic, message) => {
const topicSegments = topic.split('/');
topicSegments[topicSegments.length - 2] = 'done';
topic = topicSegments.join('/');
try {
//The message might not always be valid JSON
const json = JSON.parse(message);
//If rssi is null/undefined in input, it should be left untouched
if (json.rssi !== undefined && json.rssi !== null) {
//Multiply by 3 and limit the number of digits after comma to four
json.rssi = Math.round(json.rssi * 3.3 * 10000) / 10000;
}
console.log(topic + "\t" + JSON.stringify(json));
} catch (ex) {
console.error('Error: ' + ex.message);
}
});
const rl = readline.createInterface({
input: process.stdin,
terminal: false,
});
rl.on('line', (line) => {
const lineSegments = line.split("\t");
if (lineSegments.length >= 2) {
const topic = lineSegments[0];
const message = lineSegments[1];
mqttClient.publish(topic, message);
}
});
rl.on('error', () => {
console.error('==== STDIN error ====');
process.exit(0);
});
rl.on('pause', () => {
console.error('==== STDIN paused ====');
process.exit(0);
});
rl.on('close', () => {
console.error('==== STDIN closed ====');
process.exit(0);
});
Both script are run on the command line connecting to the same broker.
They are run using a scripting pipe (node):
time cat test-performance.txt | pv -l -L 20k -q | nodejs index.js | pv -l | wc -l
and (python):
time cat test-performance.txt | pv -l -L 20k -q | python3 mqttTestThread.py | pv -l | wc -l
The test file contains around 2Gb of text in this format:
/test/meny/input/test {"sensor":"A1","data1":"176","time":1534512473545}
As shown in the scripts, I count the number of lines during the time they run. For a small test the Python3 script has a throughput of roughly 3k/sec, while node has a throughput og roughly 20k/sec.
This is a big difference. Does anyone have an idea why? And/or how to get python to run with a comparable throughput?
There are multiple reasons why Node is faster for this task than Python. The main reason is: Python is slooooow. Only the libraries which are implemented in C like numpy or pandas are somewhat fast. But then also just for numeric tasks.
The second reason is, as Nhosko mentioned in a comment, that Node is per default async and therefore faster in I/O bound tasks.
A potential third reason could be that MQTT sends JSON data. JSON stands for Java-Script-Object-Notation and can be natively converted into NodeJS objects.
I wouldn't recommend you to use Python for this task. Python is great for machine learning and data science. For server and I/O bound tasks you may consider using Node or Go.

Resources