Does OpenAL-Soft have an upper limit on the number of sources? - audio

I'm using OpenAL-Soft for a project, and right now I'm trying to decide whether I need to implement OpenAL source pooling.
Source pooling is somewhat cumbersome (I need to write code to "allocate" sources, as well as somehow decide when they can be "freed"), but necessary if the number of sources that can be generated by OpenAL is limited.
Since OpenAL-Soft is a software implementation of the OpenAL API, I wonder if the number of sources it can generate is actually limited by the underlying hardware. Theoretically, since all mixing is done in software, there might be no need to actually use one hardware channel per source.
However, I'm not sure about it. How should I proceed?

It appears that OpenAL-Soft indeed does have an upper limit on the number of sources, which can be defined in a config file. The default seems to be 256. It makes sense to limit the number of sources because of the associated CPU and memory costs. Looks like I'll end up implementing a source pool after all.

I just took a peek at its header ... did not see anything pop out.
Here is working code which synthesizes then renders audio buffer data ... you could play with seeing if it accommodates your necessary number of sources
// gcc -o openal_play_wed openal_play_wed.c -lopenal -lm
#include <stdio.h>
#include <stdlib.h> // gives malloc
#include <math.h>
#ifdef __APPLE__
#include <OpenAL/al.h>
#include <OpenAL/alc.h>
#elif __linux
#include <AL/al.h>
#include <AL/alc.h>
#endif
ALCdevice * openal_output_device;
ALCcontext * openal_output_context;
ALuint internal_buffer;
ALuint streaming_source[1];
int al_check_error(const char * given_label) {
ALenum al_error;
al_error = alGetError();
if(AL_NO_ERROR != al_error) {
printf("ERROR - %s (%s)\n", alGetString(al_error), given_label);
return al_error;
}
return 0;
}
void MM_init_al() {
const char * defname = alcGetString(NULL, ALC_DEFAULT_DEVICE_SPECIFIER);
openal_output_device = alcOpenDevice(defname);
openal_output_context = alcCreateContext(openal_output_device, NULL);
alcMakeContextCurrent(openal_output_context);
// setup buffer and source
alGenBuffers(1, & internal_buffer);
al_check_error("failed call to alGenBuffers");
}
void MM_exit_al() {
ALenum errorCode = 0;
// Stop the sources
alSourceStopv(1, & streaming_source[0]); // streaming_source
int ii;
for (ii = 0; ii < 1; ++ii) {
alSourcei(streaming_source[ii], AL_BUFFER, 0);
}
// Clean-up
alDeleteSources(1, &streaming_source[0]);
alDeleteBuffers(16, &streaming_source[0]);
errorCode = alGetError();
alcMakeContextCurrent(NULL);
errorCode = alGetError();
alcDestroyContext(openal_output_context);
alcCloseDevice(openal_output_device);
}
void MM_render_one_buffer() {
/* Fill buffer with Sine-Wave */
// float freq = 440.f;
float freq = 100.f;
float incr_freq = 0.1f;
int seconds = 4;
// unsigned sample_rate = 22050;
unsigned sample_rate = 44100;
double my_pi = 3.14159;
size_t buf_size = seconds * sample_rate;
short * samples = malloc(sizeof(short) * buf_size);
printf("\nhere is freq %f\n", freq);
int i=0;
for(; i<buf_size; ++i) {
samples[i] = 32760 * sin( (2.f * my_pi * freq)/sample_rate * i );
freq += incr_freq;
// incr_freq += incr_freq;
// freq *= factor_freq;
if (100.0 > freq || freq > 5000.0) {
incr_freq *= -1.0f;
}
}
/* upload buffer to OpenAL */
alBufferData( internal_buffer, AL_FORMAT_MONO16, samples, buf_size, sample_rate);
al_check_error("populating alBufferData");
free(samples);
/* Set-up sound source and play buffer */
// ALuint src = 0;
// alGenSources(1, &src);
// alSourcei(src, AL_BUFFER, internal_buffer);
alGenSources(1, & streaming_source[0]);
alSourcei(streaming_source[0], AL_BUFFER, internal_buffer);
// alSourcePlay(src);
alSourcePlay(streaming_source[0]);
// ---------------------
ALenum current_playing_state;
alGetSourcei(streaming_source[0], AL_SOURCE_STATE, & current_playing_state);
al_check_error("alGetSourcei AL_SOURCE_STATE");
while (AL_PLAYING == current_playing_state) {
printf("still playing ... so sleep\n");
sleep(1); // should use a thread sleep NOT sleep() for a more responsive finish
alGetSourcei(streaming_source[0], AL_SOURCE_STATE, & current_playing_state);
al_check_error("alGetSourcei AL_SOURCE_STATE");
}
printf("end of playing\n");
/* Dealloc OpenAL */
MM_exit_al();
} // MM_render_one_buffer
int main() {
MM_init_al();
MM_render_one_buffer();
}

Related

Capture image with V4L2 on jetson TX2

To explain my process, find below a diagram:
I am working on computed tomography scanner. I use jetson TX2 for image acquisition and pre-processing.
From the jetson, I control the turn table and the camera. The camera is the FSM-IMX304m. I need to access the raw pointer. For that, I need to control the camera using V4L2 (we advise not use libargus to access raw pointer, because it is store in the ISP and the ISP compress data .. Can you confirm it ?). My first problem is about the documentation about v4l2, I didn't find a clear documentation for the C++ API .. I need to control:
exposure time;
gain;
function to clear the buffer.
I found a sample on internet, see how V4L2 works :
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <linux/ioctl.h>
#include <linux/types.h>
#include <linux/v4l2-common.h>
#include <linux/v4l2-controls.h>
#include <linux/videodev2.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <string.h>
#include <fstream>
#include <string>
using namespace std;
int main() {
// 1. Open the device
int fd; // A file descriptor to the video device
fd = open("/dev/video0",O_RDWR);
if(fd < 0){
perror("Failed to open device, OPEN");
return 1;
}
// 2. Ask the device if it can capture frames
v4l2_capability capability;
if(ioctl(fd, VIDIOC_QUERYCAP, &capability) < 0){
// something went wrong... exit
perror("Failed to get device capabilities, VIDIOC_QUERYCAP");
return 1;
}
// 3. Set Image format
v4l2_format imageFormat;
imageFormat.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
imageFormat.fmt.pix.width = 1024;
imageFormat.fmt.pix.height = 1024;
imageFormat.fmt.pix.pixelformat = V4L2_PIX_FMT_MJPEG;
imageFormat.fmt.pix.field = V4L2_FIELD_NONE;
// tell the device you are using this format
if(ioctl(fd, VIDIOC_S_FMT, &imageFormat) < 0){
perror("Device could not set format, VIDIOC_S_FMT");
return 1;
}
// 4. Request Buffers from the device
v4l2_requestbuffers requestBuffer = {0};
requestBuffer.count = 1; // one request buffer
requestBuffer.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; // request a buffer which we can use for capturing frames
requestBuffer.memory = V4L2_MEMORY_MMAP;
if(ioctl(fd, VIDIOC_REQBUFS, &requestBuffer) < 0){
perror("Could not request buffer from device, VIDIOC_REQBUFS");
return 1;
}
// 5. Query the buffer to get raw data ie. ask for the you requested buffer
// and allocate memory for it
v4l2_buffer queryBuffer = {0};
queryBuffer.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
queryBuffer.memory = V4L2_MEMORY_MMAP;
queryBuffer.index = 0;
if(ioctl(fd, VIDIOC_QUERYBUF, &queryBuffer) < 0){
perror("Device did not return the buffer information, VIDIOC_QUERYBUF");
return 1;
}
// use a pointer to point to the newly created buffer
// mmap() will map the memory address of the device to
// an address in memory
char* buffer = (char*)mmap(NULL, queryBuffer.length, PROT_READ | PROT_WRITE, MAP_SHARED,
fd, queryBuffer.m.offset);
memset(buffer, 0, queryBuffer.length);
// 6. Get a frame
// Create a new buffer type so the device knows which buffer we are talking about
v4l2_buffer bufferinfo;
memset(&bufferinfo, 0, sizeof(bufferinfo));
bufferinfo.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
bufferinfo.memory = V4L2_MEMORY_MMAP;
bufferinfo.index = 0;
// Activate streaming
int type = bufferinfo.type;
if(ioctl(fd, VIDIOC_STREAMON, &type) < 0){
perror("Could not start streaming, VIDIOC_STREAMON");
return 1;
}
/***************************** Begin looping here *********************/
// Queue the buffer
if(ioctl(fd, VIDIOC_QBUF, &bufferinfo) < 0){
perror("Could not queue buffer, VIDIOC_QBUF");
return 1;
}
// Dequeue the buffer
if(ioctl(fd, VIDIOC_DQBUF, &bufferinfo) < 0){
perror("Could not dequeue the buffer, VIDIOC_DQBUF");
return 1;
}
// Frames get written after dequeuing the buffer
cout << "Buffer has: " << (double)bufferinfo.bytesused / 1024
<< " KBytes of data" << endl;
// Write the data out to file
ofstream outFile;
outFile.open("webcam_output.jpeg", ios::binary| ios::app);
int bufPos = 0, outFileMemBlockSize = 0; // the position in the buffer and the amount to copy from
// the buffer
int remainingBufferSize = bufferinfo.bytesused; // the remaining buffer size, is decremented by
// memBlockSize amount on each loop so we do not overwrite the buffer
char* outFileMemBlock = NULL; // a pointer to a new memory block
int itr = 0; // counts thenumber of iterations
while(remainingBufferSize > 0) {
bufPos += outFileMemBlockSize; // increment the buffer pointer on each loop
// initialise bufPos before outFileMemBlockSize so we can start
// at the beginning of the buffer
outFileMemBlockSize = 1024; // set the output block size to a preferable size. 1024 :)
outFileMemBlock = new char[sizeof(char) * outFileMemBlockSize];
// copy 1024 bytes of data starting from buffer+bufPos
memcpy(outFileMemBlock, buffer+bufPos, outFileMemBlockSize);
outFile.write(outFileMemBlock,outFileMemBlockSize);
// calculate the amount of memory left to read
// if the memory block size is greater than the remaining
// amount of data we have to copy
if(outFileMemBlockSize > remainingBufferSize)
outFileMemBlockSize = remainingBufferSize;
// subtract the amount of data we have to copy
// from the remaining buffer size
remainingBufferSize -= outFileMemBlockSize;
// display the remaining buffer size
cout << itr++ << " Remaining bytes: "<< remainingBufferSize << endl;
delete outFileMemBlock;
}
// Close the file
outFile.close();
/******************************** end looping here **********************/
// end streaming
if(ioctl(fd, VIDIOC_STREAMOFF, &type) < 0){
perror("Could not end streaming, VIDIOC_STREAMOFF");
return 1;
}
close(fd);
return 0;
}
On the jetson, the code compile perfectly, but I can't run the code. It is blocked at this step :
// Dequeue the buffer
if(ioctl(fd, VIDIOC_DQBUF, &bufferinfo) < 0){
perror("Could not dequeue the buffer, VIDIOC_DQBUF");
return 1;
}
It is like the code is blocked in an endless loop. I have tested the code on my personal computer which runs Ubuntu 18.04, and the sample works well.
I do not have this sensor, but I assume that:
Your pixel format is incorrectly set imageFormat.fmt.pix.pixelformat = V4L2_PIX_FMT_MJPEG;
Most likely there should be 12-bit raw data from this sensor V4L2_PIX_FMT_Y12 or one of these options (Mono8/10/12/16, Bayer8/10/12/16, RGB8, YUV422, YUV411).
You can view the available formats in the Linux kernel here https://elixir.bootlin.com/linux/v4.9.237/source/include/uapi/linux/videodev2.h#L499
Check the documentation for your sensor.
Since Nvidia developers have extended the v4l2 subsystem, you need to use the following controls to adjust exposure and gain: TEGRA_CAMERA_CID_EXPOSURE, TEGRA_CAMERA_CID_GAIN. See file tegra-v4l2-camera.h
And also check the sensor controls:
v4l2-ctl --list-ctrls
....
gain 0x009a2009 (int64): min = 0 max = 480 step = 1 default = 0 value = 0 flags = slider
exposure 0x009a200a (int64): min = 28 max = 1000000 step = 1 default = 27879 value = 28 flags = slider
.....
Also examples of receiving raw data from the camera can be seen in examples from Nvidia
https://docs.nvidia.com/jetson/l4t-multimedia/mmapi_build.html

How to increase speed of large for loops

Right now i'm trying to run very large for loops for some task, nearly about 8e+12 iterations. I tried using c++11 threading, but it do not seems to be working that fast as required. I am using system with 8 gb ram, i5 cpu and intel graphics 4000 card. If i use openmp would it be better or i have to use nvidia gpu and use cuda for this task? My code is as below:
#include <ros/ros.h>
// PCL specific includes
#include <sensor_msgs/PointCloud2.h>
#include <pcl_conversions/pcl_conversions.h>
#include <pcl/point_types.h>
#include <pcl/filters/voxel_grid.h>
#include <visualization_msgs/Marker.h>
#include <rosbag/bag.h>
#include <std_msgs/Int32.h>
#include <rosbag/view.h>
#include <boost/foreach.hpp>
#define foreach BOOST_FOREACH
#include <fstream>
#include <pcl/point_cloud.h>
#include <pcl/octree/octree_pointcloud_changedetector.h>
#include <pcl/io/pcd_io.h>
#include <iostream>
#include <vector>
#include <ctime>
#include <thread>
ros::Publisher marker_publisher;
int frame_index = 0;
using namespace std;
int x[200000];
void thread_function(pcl::PointCloud<pcl::PointXYZRGB>::ConstPtr cloudB,vector<int> v,int p0) {
for(size_t p1=0;p1<v.size() && ros::ok();++p1) {
int p0p1 = sqrt( pow(cloudB->points[v[p1]].x-cloudB->points[v[p0]].x,2)
+pow(cloudB->points[v[p1]].y-cloudB->points[v[p0]].y,2)
+pow(cloudB->points[v[p1]].z-cloudB->points[v[p0]].z,2) ) * 1000;
if(p0p1>10) {
for(size_t p2=0;p2<v.size() && ros::ok();++p2) {
int p0p2 = sqrt( pow(cloudB->points[v[p2]].x-cloudB->points[v[p0]].x,2)
+pow(cloudB->points[v[p2]].y-cloudB->points[v[p0]].y,2)
+pow(cloudB->points[v[p2]].z-cloudB->points[v[p0]].z,2) ) * 1000;
int p1p2 = sqrt( pow(cloudB->points[v[p2]].x-cloudB->points[v[p1]].x,2)
+pow(cloudB->points[v[p2]].y-cloudB->points[v[p1]].y,2)
+pow(cloudB->points[v[p2]].z-cloudB->points[v[p1]].z,2) ) * 1000;
if(p0p2>10 && p1p2>10) {
}
}
}
}
x[p0] = 3;
cout<<"ended thread="<<p0<<endl;
}
void cloud_cb (const sensor_msgs::PointCloud2ConstPtr& input)
{
frame_index++;
pcl::PointCloud<pcl::PointXYZRGB>::Ptr cloudB (new pcl::PointCloud<pcl::PointXYZRGB> );
pcl::fromROSMsg(*input,*cloudB);
// Initializing Marker parameters which will be used in rviz
vector<visualization_msgs::Marker> line_list, marker, text_view_facing;
line_list.resize(4); marker.resize(4); text_view_facing.resize(4);
for(int i=0;i<line_list.size();i++) {
marker[i].header.frame_id = line_list[i].header.frame_id = text_view_facing[i].header.frame_id = "/X3/base_link";
marker[i].header.stamp = line_list[i].header.stamp = text_view_facing[i].header.stamp =ros::Time();
marker[i].ns = line_list[i].ns = text_view_facing[i].ns ="lines";
marker[i].action = line_list[i].action = text_view_facing[i].action = visualization_msgs::Marker::ADD;
marker[i].pose.orientation.w = line_list[i].pose.orientation.w = text_view_facing[i].pose.orientation.w = 1;
marker[i].id = i+4;
line_list[i].id = i;
marker[i].type = visualization_msgs::Marker::POINTS;
line_list[i].type = visualization_msgs::Marker::LINE_LIST;
line_list[i].color.r = 1; line_list[i].color.g = 1; line_list[i].color. b = 1; line_list[i].color.a = 1;
marker[i].scale.x = 0.003;
marker[i].scale.y = 0.003;
marker[i].scale.z = 0.003;
text_view_facing[i].id = i+8;
text_view_facing[i].type = visualization_msgs::Marker::TEXT_VIEW_FACING;
text_view_facing[i].color.b = 1; text_view_facing[i].color.a = 1.0; text_view_facing[i].color.g = 1.0; text_view_facing[i].color.r = 1.0;
text_view_facing[i].scale.z = 0.015;
}
marker[3].scale.x = 0.05;
marker[3].scale.y = 0.05;
marker[3].scale.z = 0.05;
if(frame_index==10) // Saving the point cloud for only one time to find moved object in it
{
pcl::io::savePCDFileASCII ("test_pcd.pcd", *cloudB);
}
if(frame_index>10) // Reading above point cloud file after saving for once to compare it with newly arriving point clouds
{
pcl::PointCloud<pcl::PointXYZRGB>::Ptr cloud (new pcl::PointCloud<pcl::PointXYZRGB>);
if (pcl::io::loadPCDFile<pcl::PointXYZRGB> ("test_pcd.pcd", *cloud) == -1) //* load the file
{
PCL_ERROR ("Couldn't read file test_pcd.pcd \n");
}
else {
srand ((unsigned int) time (NULL));
// Octree resolution - side length of octree voxels
double resolution = 0.1;
// Instantiate octree-based point cloud change detection class
pcl::octree::OctreePointCloudChangeDetector<pcl::PointXYZRGB> octree (resolution);
// Add points from cloudA to octree
octree.setInputCloud (cloud);
octree.addPointsFromInputCloud ();
// Switch octree buffers: This resets octree but keeps previous tree structure in memory.
octree.switchBuffers ();
// Add points from cloudB to octree
octree.setInputCloud (cloudB);
octree.addPointsFromInputCloud ();
std::vector<int> newPointIdxVector;
// Get vector of point indices from octree voxels which did not exist in previous buffer
octree.getPointIndicesFromNewVoxels (newPointIdxVector);
geometry_msgs::Point p; std_msgs::ColorRGBA c;
for (size_t i = 0; i < newPointIdxVector.size (); ++i)
{
p.x = cloudB->points[newPointIdxVector[i]].x;
p.y = cloudB->points[newPointIdxVector[i]].y;
p.z = cloudB->points[newPointIdxVector[i]].z;
c.r = cloudB->points[newPointIdxVector[i]].r/255.0;
c.g = cloudB->points[newPointIdxVector[i]].g/255.0;
c.b = cloudB->points[newPointIdxVector[i]].b/255.0;
c.a = 1;
//cout<<newPointIdxVector.size()<<"\t"<<p.x<<"\t"<<p.y<<"\t"<<p.z<<endl;
if(!isnan(p.x) && !isnan(p.y) && !isnan(p.z)) {
marker[3].points.push_back(p);
marker[3].colors.push_back(c);
}
}
marker_publisher.publish(marker[3]);
pcl::PointCloud<pcl::PointXYZRGB> P;
thread t[newPointIdxVector.size()];
for(int p0=0;p0<newPointIdxVector.size();++p0) { // For each voxel in moved object
t[p0] = thread(thread_function,cloudB,newPointIdxVector,p0);
}
for(int p0=0;p0<newPointIdxVector.size();++p0) { // For each voxel in moved object
t[p0].join();
cout<<"joined"<<"\t"<<p0<<"\t"<<x[p0]<<endl;
}
}
}
}
int main (int argc, char** argv)
{
ros::init (argc, argv, "training");
ros::NodeHandle nh;
ros::Subscriber sub = nh.subscribe<sensor_msgs::PointCloud2> ("input", 1, cloud_cb);
marker_publisher = nh.advertise<visualization_msgs::Marker> ("visualization_marker",1);
// Spin
ros::spin ();
}
This task is really important for my algorithm to complete. I need a suggestion how to make this loops run very fast.
In above code the thread_function is the main function where i'm putting the for loops currentely. Is their any way to increase its performance in above code?
OpenMP is the easiest to implement and try. Just add a couple of lines at your CMakeLists.txt, an include and the famous #pragma omp parallel for line just before your for loop.
Threading itself is not necessarily a guarantee for speed. If your process is mostly linear, there is nothing to be done in parallel. In your case, it looks like you have a loop and each iteration might be able to be done independently in parallel, but because each loop is so small and mostly simple mathematical operations, the overhead for making each item its own thread might not save you much (if any) time. The algorithm itself might need an overhaul (i.e. doing this an entirely different way), but threading could potentially solve your issue if your loop is huge and you can break it into, say, 4 chunks and parallel process the 4 chunks (i.e. one thread does items 0-100, another 101-200, etc). Just be aware that one process might finish before another and if some other process is relying on the completion of the whole set of data, then you'll need to ensure that you're done with all 4 threads before continuing. And if you do any kind of manipulation of the data (i.e. shifting elements, adding, removing) in the parallel processes, then you could end up screwing up a parallel thread. Hope that helps!

sndio sio_onmove not calling back.

I'm trying to write a fullduplex test that copies audio in to audio out. sio_onmove does not get called. I have no idea why. Here's my code so far:
#include <stdio.h>
#include <stdlib.h>
#include <sndio.h>
unsigned char buf[0xffff];
struct sio_hdl *hdl;
void cb(void *arg, int delta) {
int l;
printf("call %d\n", delta);
for(;;) {
l = sio_read(hdl, buf, delta);
if(l==0) break;
sio_write(hdl, buf, l);
}
}
int main(void) {
int m, i;
struct sio_par par;
struct sio_cap cap;
hdl = sio_open("rsnd/0", SIO_PLAY | SIO_REC , 1);
sio_getcap(hdl, &cap);
sio_initpar( &par);
par.bits = cap.enc[0].bits;
par.bps = cap.enc[0].bps;
par.sig = cap.enc[0].sig;
par.le = cap.enc[0].le;
par.msb = cap.enc[0].msb;
par.rchan=cap.rchan[0];
par.pchan=cap.pchan[0];
par.rate =cap.rate[0];
par.appbufsz = 1024;
sio_setpar(hdl, &par);
sio_onmove(hdl, cb, NULL);
sio_start(hdl);
for(;;)
sleep(1);
}
I'm initializing rsnd/0 for recording and play back. The parameters I'm initializing from a getcap call. I'm then setting cb as the callback for onmove. I then start audio. From there I loop forever doing nothing
The sio_onmove() call-back is called either from sio_revents() if non-blocking i/o is used or from blocking sio_read() or sio_write().
As above program calls sleep(1) instead, the call-back is never called.
AFAIU, to do the full-duplex test, you could use blocking i/o (set to 0 last argument of the sio_open() function) and do the following steps:
call sio_initpar() to initialize a sio_par structure, as you do
set your preferred parameters in the sio_par structure
call sio_setpar() to submit them to the device. devices exposed through the server (ex. "snd/0") will accept any parameters, while raw devices (ex. "rsnd/0") pick something close to whatever the hardware supports.
call sio_getpar() to get the parameters the device accepted, this is needed to get the device buffer size
possibly check if they are usable by your program
call sio_start()
prime the play buffer by writing par.bufsz samples with sio_write(). This corresponds to: par.bufsz * par.pchan * par.bps bytes.
At this stage, device starts and you could do the main-loop as with the following pseudo-code:
unsigned char *data;
size_t n, todo, blksz;
blksz = par.round * par.rchan * par.bps;
for (;;) {
/* read one block */
data = buf;
todo = blksz;
while (todo > 0) {
n = sio_read(hdl, data, todo);
if (n == 0)
errx(1, "failed");
todo -= n;
data += n;
}
/* write one block */
n = sio_write(hdl, buf, blksz);
if (n != blksz)
errx(1, "failed");
}
The sio_onmove() call-back is not needed for pure audio programs. It's only useful to synchronize non-audio events (ex video, midi messages) to the audio stream.

Zero copy in using vmsplice/splice in Linux

I am trying to get zero copy semantics working in linux using
vmsplice()/splice() but I don't see any performance improvement. This
is on linux 3.10, tried on 3.0.0 and 2.6.32. The following code tries
to do file writes, I have tried network socket writes() also, couldn't
see any improvement.
Can somebody tell what am I doing wrong ?
Has anyone gotten improvement using vmsplice()/splice() in production ?
#include <assert.h>
#include <fcntl.h>
#include <iostream>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <unistd.h>
#include <vector>
const char *filename = "Test-File";
const int block_size = 4 * 1024;
const int file_size = 4 * 1024 * 1024;
using namespace std;
int pipes[2];
vector<char *> file_data;
static int NowUsecs() {
struct timeval tv;
const int err = gettimeofday(&tv, NULL);
assert(err >= 0);
return tv.tv_sec * 1000000LL + tv.tv_usec;
}
void CreateData() {
for (int xx = 0; xx < file_size / block_size; ++xx) {
// The data buffer to fill.
char *data = NULL;
assert(posix_memalign(reinterpret_cast<void **>(&data), 4096, block_size) == 0);
file_data.emplace_back(data);
}
}
int SpliceWrite(int fd, char *buf, int buf_len) {
int len = buf_len;
struct iovec iov;
iov.iov_base = buf;
iov.iov_len = len;
while (len) {
int ret = vmsplice(pipes[1], &iov, 1, SPLICE_F_GIFT);
assert(ret >= 0);
if (!ret)
break;
len -= ret;
if (len) {
auto ptr = static_cast<char *>(iov.iov_base);
ptr += ret;
iov.iov_base = ptr;
iov.iov_len -= ret;
}
}
len = buf_len;
while (len) {
int ret = splice(pipes[0], NULL, fd, NULL, len, SPLICE_F_MOVE);
assert(ret >= 0);
if (!ret)
break;
len -= ret;
}
return 1;
}
int WriteToFile(const char *filename, bool use_splice) {
// Open and write to the file.
mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
int fd = open(filename, O_CREAT | O_RDWR, mode);
assert(fd >= 0);
const int start = NowUsecs();
for (int xx = 0; xx < file_size / block_size; ++xx) {
if (use_splice) {
SpliceWrite(fd, file_data[xx], block_size);
} else {
assert(write(fd, file_data[xx], block_size) == block_size);
}
}
const int time = NowUsecs() - start;
// Close file.
assert(close(fd) == 0);
return time;
}
void ValidateData() {
// Open and read from file.
const int fd = open(filename, O_RDWR);
assert(fd >= 0);
char *read_buf = (char *)malloc(block_size);
for (int xx = 0; xx < file_size / block_size; ++xx) {
assert(read(fd, read_buf, block_size) == block_size);
assert(memcmp(read_buf, file_data[xx], block_size) == 0);
}
// Close file.
assert(close(fd) == 0);
assert(unlink(filename) == 0);
}
int main(int argc, char **argv) {
auto res = pipe(pipes);
assert(res == 0);
CreateData();
const int without_splice = WriteToFile(filename, false /* use splice */);
ValidateData();
const int with_splice = WriteToFile(filename, true /* use splice */);
ValidateData();
cout << "TIME WITH SPLICE: " << with_splice << endl;
cout << "TIME WITHOUT SPLICE: " << without_splice << endl;
return 0;
}
I did a proof-of-concept some years ago where I got as 4x speedup using an optimized, specially tailored, vmsplice() code. This was measured against a generic socket/write() based solution. This blog post from natsys-lab echoes my findings. But I believe you need to have the exact right use case to get near this number.
So what are you doing wrong? Primarily I think you are measuring the wrong thing. When writing directly to a file you have 1 system call, which is write(). And you are not actually copying data (except to the kernel). When you have a buffer with data that you want to write to disk, it's not gonna get faster than that.
In you vmsplice/splice setup you are still copying you data into the kernel, but you have a total of 2 system calls vmsplice()+splice() to get it to disk. The speed being identical to write() is probably just a testament to Linux system call speed :-)
A more "fair" setup would be to write one program that read() from stdin and write() the same data to stdout. Write an identical program that simply splice() stdin into a file (or point stdout to a file when you run it). Although this setup might be too simple to really show anything.
Aside: an (undocumented?) feature of vmsplice() is that you can also use to to read data from a pipe. I used this in my old POC. It was basically just an IPC layer based on the idea of passing memory pages around using vmsplice().
Note: NowUsecs() probably overflows the int

PIC18F String to Bluetooth to Tera Term

I am trying to output a string from the PIC's USART and have it display on Tera Term. I am using the:
PIC18F4331
Sparkfun Bluesmirf RN-42
MPLAB v8.85
Tera Term
I've been working at this code for a couple of days and I am not seeing a single response. A couple of things that I think may be causing the issue is the baud rate and/or not having an interrupt routine. But is there a need for an interrupt if I am only attempting to transmit? Please, can someone guide me? Also, when using printf, I am seeing a response through the bluetooth but in strange symbolic form. For example, รพรพรพ.
The code is a modification of one found online.
// Libraries
#include <p18f4331.h>
#include <stdio.h>
// Configuations
#pragma config OSC = XT
#pragma config WDTEN = OFF
#pragma config PWRTEN = OFF
#pragma config FCMEN = OFF
#pragma config IESO = OFF
#pragma config BOREN = ON
#pragma config BORV = 27
#pragma config WDPS = 128
#pragma config T1OSCMX = ON
#pragma config PWMPIN = ON
#pragma config MCLRE = ON
#pragma config LVP = OFF
#pragma config STVREN = OFF
#pragma config PWM4MX = RD5
// Definitions
#define _XTAL_FREQ 4000000
#define BAUDRATE 9600
void EUSART(void)
{
TRISC = 0b10000000;
SPBRG = 25;
TXSTAbits.CSRC = 0; // Baud Rate Generated Externally
TXSTAbits.TX9 = 0; // 8-Bit Transmission
TXSTAbits.TXEN = 1; // Transmit Enabled
TXSTAbits.SYNC = 0; // Asynchronous Mode
TXSTAbits.BRGH = 1; // High Baud Rate
TXSTAbits.TRMT = 0; // Transmit Shift Register When TSR Is Full
RCSTAbits.SPEN = 1; // Serial Port Enabled
RCSTAbits.RX9 = 0; // 8-Bit Reception
RCSTAbits.CREN = 1; // Enables Receive
}
void SendByteSerially(unsigned char Byte) // Writes a character to the serial port
{
while(!PIR1bits.TXIF) ; // wait for previous transmission to finish
TXREG = Byte;
}
unsigned char ReceiveByteSerially(void) // Reads a character from the serial port
{
while(!PIR1bits.RCIF) continue; // Wait for transmission to receive
return RCREG;
}
void SendStringSerially(const rom unsigned char* st)
{
while(*st) SendByteSerially(*st++);
}
void delayMS(unsigned int x)
{
unsigned char y;
for(;x > 0; x--) for(y=0; y< 82;y++);
}
void main(void)
{
unsigned char SerialData;
EUSART();
SendStringSerially("Hello World");
while(1)
{
SerialData = ReceiveByteSerially();
SendByteSerially(SerialData);
delayMS(1000);
}
}
You are using a PIC18, be sure that BRG16 equals 0 since you're using BRGH
BAUDCTL.BRG16 = 0;
because SPBRGH16 is the higher byte of SPBRG, and that may change the baudrate value of your USART.
Plus, be sure you're in the PIR1 bank. In MPLAB, that would be
banksel PIR1; //Not sure if there's an ending coma
My two functions to transmit via UART when properly initialized ( In MikroC ) :
void vTx232 (UC ucSend)
{
STATUS.RP0 = PIR1; //Sure we're in PIR1
while (PIR1.TXIF == 0);//While last TX not done
TXREG = ucSend; //Input param into TXREG
}
void vTxString(UC *ucpString)
{
while (*ucpString!= 0x00) //While string not at its end
{
vTx232(*ucpString); //Send string character
ucpString++; //Increm string pointer
}
}

Resources