Slow OpenGL context access on Ubuntu 12.4 - linux

I have an application which accesses OpenGL context.I run it on 2 OSs :
1.Kubuntu 13.4
2.Ubuntu 12.4
I am experiencing the following issue: on OS 1 it takes around 60 ms to setup the context, while on OS 2 it takes 10 times more.Both OSs use Nvidia GPUs with driver version 319.It also seems like OpenGL API calls are slower in general for OS 2.The contexts are offscreen.Currently I have no clue what could cause it.My question is what are possible sources of such an overhead?X11 setup?Or may be something on the OS level?
Another difference is that OS 1 uses Nvidia GTX680 while OS2 uses Nvidia GRID K1 card.Also OS2 resides on a server and the latency tests are run locally on that machine.
UPDATE:
This is the part which causes most of overhead:
typedef GLXContext (*glXCreateContextAttribsARBProc)(Display*, GLXFBConfig, GLXContext, Bool, const int*);
typedef Bool (*glXMakeContextCurrentARBProc)(Display*, GLXDrawable, GLXDrawable, GLXContext);
static glXCreateContextAttribsARBProc glXCreateContextAttribsARB = 0;
static glXMakeContextCurrentARBProc glXMakeContextCurrentARB = 0;
int main(int argc, const char* argv[]){
static int visual_attribs[] = {
None
};
int context_attribs[] = {
GLX_CONTEXT_MAJOR_VERSION_ARB, 3,
GLX_CONTEXT_MINOR_VERSION_ARB, 0,
None
};
Display* dpy = XOpenDisplay(0);
int fbcount = 0;
GLXFBConfig* fbc = NULL;
GLXContext ctx;
GLXPbuffer pbuf;
/* open display */
if ( ! (dpy = XOpenDisplay(0)) ){
fprintf(stderr, "Failed to open display\n");
exit(1);
}
/* get framebuffer configs, any is usable (might want to add proper attribs) */
if ( !(fbc = glXChooseFBConfig(dpy, DefaultScreen(dpy), visual_attribs, &fbcount) ) ){
fprintf(stderr, "Failed to get FBConfig\n");
exit(1);
}
/* get the required extensions */
glXCreateContextAttribsARB = (glXCreateContextAttribsARBProc)glXGetProcAddressARB( (const GLubyte *) "glXCreateContextAttribsARB");
glXMakeContextCurrentARB = (glXMakeContextCurrentARBProc)glXGetProcAddressARB( (const GLubyte *) "glXMakeContextCurrent");
if ( !(glXCreateContextAttribsARB && glXMakeContextCurrentARB) ){
fprintf(stderr, "missing support for GLX_ARB_create_context\n");
XFree(fbc);
exit(1);
}
/* create a context using glXCreateContextAttribsARB */
if ( !( ctx = glXCreateContextAttribsARB(dpy, fbc[0], 0, True, context_attribs)) ){
fprintf(stderr, "Failed to create opengl context\n");
XFree(fbc);
exit(1);
}
/* create temporary pbuffer */
int pbuffer_attribs[] = {
GLX_PBUFFER_WIDTH, 800,
GLX_PBUFFER_HEIGHT, 600,
None
};
pbuf = glXCreatePbuffer(dpy, fbc[0], pbuffer_attribs);
XFree(fbc);
XSync(dpy, False);
/* try to make it the current context */
if ( !glXMakeContextCurrent(dpy, pbuf, pbuf, ctx) ){
/* some drivers does not support context without default framebuffer, so fallback on
* using the default window.
*/
if ( !glXMakeContextCurrent(dpy, DefaultRootWindow(dpy), DefaultRootWindow(dpy), ctx) ){
fprintf(stderr, "failed to make current\n");
exit(1);
}
}
/* try it out */
printf("vendor: %s\n", (const char*)glGetString(GL_VENDOR));
return 0;
}
Specifically , the line :
pbuf = glXCreatePbuffer(dpy, fbc[0], pbuffer_attribs);
where the dummy pbuffer is created is the slowest.If the rest of function calls take in average 2-4 ms,this call takes 40 ms on OS 1. Now , on OS2 (which is slow) the pbuffer creation takes 700ms! I hope now my problems looks more clear.

Are you absolutely sure "OS2" has correctly set up drivers and isn't falling back on SW OpenGL (Mesa) rendering? What framerate does glxgears report on each system?
I note Ubuntu 12.4 was released April 2012 while I believe NVidia's "GRID" tech wasn't even announced until GTC May 2012 and I think cards didn't turn up until 2013 (see relevant Nvidia press releases). Therefore it seems very unlikely Nvidia's drivers as supplied with Ubuntu 12.4 support the grid card (unless you've made some effort to upgrade using more recent driver releases from Nvidia?).
You may be able to check the list of supported hardware in /usr/share/doc/nvidia-glx/README.txt.gz's Appendix A "Supported NVIDIA GPU Products" (at least that's where this useful information lives on my Debian machines).

Related

multiple thread in CMSIS RTOS - STM32 nucleo L053R8

Today I developing RTOS (CMSIS RTOS) for kit STM32 nucleo L053R8. I have issue relate to multiple task.
I create 4 task(task_1, task_2, task_3, task_4), however only 3 task run.
This is part of my code:
#include "main.h"
#include "stm32l0xx_hal.h"
#include "cmsis_os.h"
osMutexId stdio_mutex;
osMutexDef(stdio_mutex);
int main(void){
.....
stdio_mutex = osMutexCreate(osMutex(stdio_mutex));
osThreadDef(defaultTask_1, StartDefaultTask_1, osPriorityNormal, 0, 128);
defaultTaskHandle = osThreadCreate(osThread(defaultTask_1), NULL);
osThreadDef(defaultTask_2, StartDefaultTask_2, osPriorityNormal, 0, 128);
defaultTaskHandle = osThreadCreate(osThread(defaultTask_2), NULL);
osThreadDef(defaultTask_3, StartDefaultTask_3, osPriorityNormal, 0, 128);
defaultTaskHandle = osThreadCreate(osThread(defaultTask_3), NULL);
osThreadDef(defaultTask_4, StartDefaultTask_4, osPriorityNormal, 0, 600);
defaultTaskHandle = osThreadCreate(osThread(defaultTask_4), NULL);
}
void StartDefaultTask_1(void const * argument){
for(;;){
osMutexWait(stdio_mutex, osWaitForever);
printf("%s\n\r", __func__);
osMutexRelease(stdio_mutex);
osDelay(1000);
}
}
void StartDefaultTask_2(void const * argument){
for(;;){
osMutexWait(stdio_mutex, osWaitForever);
printf("%s\n\r", __func__);
osMutexRelease(stdio_mutex);
osDelay(1000);
}
}
void StartDefaultTask_3(void const * argument){
for(;;){
osMutexWait(stdio_mutex, osWaitForever);
printf("%s\n\r", __func__);
osMutexRelease(stdio_mutex);
osDelay(1000);
}
}
void StartDefaultTask_4(void const * argument){
for(;;){
osMutexWait(stdio_mutex, osWaitForever);
printf("%s\n\r", __func__);
osMutexRelease(stdio_mutex);
osDelay(1000);
}
}
this is reslut in console (uart):
when I change stack size for task 4 from 600 -> 128 as below:
osThreadDef(defaultTask_4, StartDefaultTask_4, osPriorityNormal, 0, 128);
defaultTaskHandle = osThreadCreate(osThread(defaultTask_4), NULL);
then don't have any task run.
Actualy I want make many thread for my application, however this issue cause difficult to implement.
Could you let me know root cause of prblem? and how to resolve it.
Thank you advance!!
There is no common easy method of the stack calculation. It depends on many factors.
I would suggest to avoid stack greedy functions like printf scanf etc. Write your own ones, not as "smart" and universal but less resources greedy.
Avoid large local variables. Be very careful when you allocate the memory
As your Suggestions, I checked by debug and see root cause is heap size is small.
I resolve by 2 method
increase heap size: #define configTOTAL_HEAP_SIZE ((size_t)5120)
decrease stack size: #define configMINIMAL_STACK_SIZE ((uint16_t)64)
osThreadDef(defaultTask_6, StartDefaultTask_6, osPriorityNormal, 0, 64);
Do you know how to determine max of heap size? Please let me known.
Thank you so much

Uinput and Raspberry Pi

I tried to ask this question on the Raspberry Pi forums, but I have received no responses at all. I thought I might query the minds of the StackOverflow community that has been so helpful in the past.
I'm writing a userspace driver for the Raspberry Pi (specifically, may be ported to other platforms later) which makes use of the bcm2835 library (GPIO) and uinput (Linux user-input virtual devices). I need to read GPIO pins and translate their values into simulated keypresses on a virtual keyboard. The GPIO part has been completed, and the translation part is also completed. Unfortunately, the virtual-keyboard part has not been solved. Uinput refuses to cooperate.
Now, the exact same code works perfectly on a Debian desktop machine. The evdev and uinput modules are required, both of which were loaded in all test cases. On the desktop, inputs can be triggered, however, on the Raspberry Pi, I can verify that the GPIO subsystem has registered the input, but the uinput events do not trigger. Does anyone have a lead on what I might do?
Thank you very much, if you need any information, logs, or otherwise, please let me know and I will post them as soon as I can.
This is a complete solution that works for me. I have a custom-made keypad and these are the keys I have defined. Here is the link to original pdf I used.
Of course you can define whatever key you want, just add it to the array.
Note: this code only works with elevated permission.
int allowed_keys[allowed_KEYS_size][2] = {0};
void main()
{
init_keys();
int fd = open_uinput();
int key_evt = getKeyEVT(49); // ASCII code for 1
// simulate key press and key release
emit(fd, EV_KEY, key_evt, 1);
emit(fd, EV_SYN, SYN_REPORT, 0);
emit(fd, EV_KEY, key_evt, 0);
emit(fd, EV_SYN, SYN_REPORT, 0);
}
long int emit(int fd, int type, int code, int val)
{
struct input_event ie;
ie.type = type;
ie.code = code;
ie.value = val;
/* timestamp values below are ignored */
ie.time.tv_sec = 0;
ie.time.tv_usec = 0;
long int y = write(fd, &ie, sizeof(ie));
return y;
}
int open_uinput()
{
int fdui = open("/dev/uinput", O_WRONLY | O_NONBLOCK);
if (fdui < 0)
{
printf("uinput fd creation failed!\n");
exit(EXIT_FAILURE);
}
ioctl(fdui, UI_SET_EVBIT, EV_KEY);
ioctl(fdui, UI_SET_EVBIT, EV_SYN); //added by behzad
for (int i = 0; i < allowed_KEYS_size; i++)
ioctl(fdui, UI_SET_KEYBIT, allowed_keys[i][1]);
struct uinput_setup usetup;
memset(&usetup, 0, sizeof(usetup));
usetup.id.bustype = BUS_USB;
usetup.id.vendor = 0x1234; /* sample vendor */
usetup.id.product = 0x5678; /* sample product */
strcpy(usetup.name, "My Keypad. Ver 1.1");
ioctl(fdui, UI_DEV_SETUP, &usetup);
ioctl(fdui, UI_DEV_CREATE);
sleep(2);
return fdui;
}
int getKeyEVT(int k)
{
for (int i = 0; i < allowed_KEYS_size; i++)
{
if (allowed_keys[i][0] == k)
return allowed_keys[i][1];
}
return -1;
}
void init_keys()
{
// Reference:
// https://www.alt-codes.net/arrow_alt_codes.php
// /usr/include/linux/input-event-codes.h
allowed_keys[0][0] = 48; //ASCII ---> 0
allowed_keys[0][1] = KEY_0; //LINUX
allowed_keys[1][0] = 49; //ASCII
allowed_keys[1][1] = KEY_1; //LINUX
allowed_keys[2][0] = 50; //ASCII
allowed_keys[2][1] = KEY_2; //LINUX
allowed_keys[3][0] = 51; //ASCII
allowed_keys[3][1] = KEY_3; //LINUX
}

what could be causing this opengl segfault in glBufferSubData?

I've been whittling down this segfault for a while, and here's a pretty minimal reproducible example on my machine (below). I have the sinking feeling that it's a driver bug, but I'm very unfamiliar with OpenGL, so it's more likely I'm just doing something wrong.
Is this correct OpenGL 3.3 code? Should be fine regardless of platform and compiler and all that?
Here's the code, compiled with gcc -ggdb -lGL -lSDL2
#include <stdio.h>
#include "GL/gl.h"
#include "GL/glext.h"
#include "SDL2/SDL.h"
// this section is for loading OpenGL things from later versions.
typedef void (APIENTRY *GLGenVertexArrays) (GLsizei n, GLuint *arrays);
typedef void (APIENTRY *GLGenBuffers) (GLsizei n, GLuint *buffers);
typedef void (APIENTRY *GLBindVertexArray) (GLuint array);
typedef void (APIENTRY *GLBindBuffer) (GLenum target, GLuint buffer);
typedef void (APIENTRY *GLBufferData) (GLenum target, GLsizeiptr size, const GLvoid* data, GLenum usage);
typedef void (APIENTRY *GLBufferSubData) (GLenum target, GLintptr offset, GLsizeiptr size, const GLvoid* data);
typedef void (APIENTRY *GLGetBufferSubData) (GLenum target, GLintptr offset, GLsizeiptr size, const GLvoid* data);
typedef void (APIENTRY *GLFlush) (void);
typedef void (APIENTRY *GLFinish) (void);
GLGenVertexArrays glGenVertexArrays = NULL;
GLGenBuffers glGenBuffers = NULL;
GLBindVertexArray glBindVertexArray = NULL;
GLBindBuffer glBindBuffer = NULL;
GLBufferData glBufferData = NULL;
GLBufferSubData glBufferSubData = NULL;
GLGetBufferSubData glGetBufferSubData = NULL;
void load_gl_pointers() {
glGenVertexArrays = (GLGenVertexArrays)SDL_GL_GetProcAddress("glGenVertexArrays");
glGenBuffers = (GLGenBuffers)SDL_GL_GetProcAddress("glGenBuffers");
glBindVertexArray = (GLBindVertexArray)SDL_GL_GetProcAddress("glBindVertexArray");
glBindBuffer = (GLBindBuffer)SDL_GL_GetProcAddress("glBindBuffer");
glBufferData = (GLBufferData)SDL_GL_GetProcAddress("glBufferData");
glBufferSubData = (GLBufferSubData)SDL_GL_GetProcAddress("glBufferSubData");
glGetBufferSubData = (GLGetBufferSubData)SDL_GL_GetProcAddress("glGetBufferSubData");
}
// end OpenGL loading stuff
#define CAPACITY (1 << 8)
// return nonzero if an OpenGL error has occurred.
int opengl_checkerr(const char* const label) {
GLenum err;
switch(err = glGetError()) {
case GL_INVALID_ENUM:
printf("GL_INVALID_ENUM");
break;
case GL_INVALID_VALUE:
printf("GL_INVALID_VALUE");
break;
case GL_INVALID_OPERATION:
printf("GL_INVALID_OPERATION");
break;
case GL_INVALID_FRAMEBUFFER_OPERATION:
printf("GL_INVALID_FRAMEBUFFER_OPERATION");
break;
case GL_OUT_OF_MEMORY:
printf("GL_OUT_OF_MEMORY");
break;
case GL_STACK_UNDERFLOW:
printf("GL_STACK_UNDERFLOW");
break;
case GL_STACK_OVERFLOW:
printf("GL_STACK_OVERFLOW");
break;
default: return 0;
}
printf(" %s\n", label);
return 1;
}
int main(int nargs, const char* args[]) {
printf("initializing..\n");
SDL_Init(SDL_INIT_EVERYTHING);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 3);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
SDL_Window* const w =
SDL_CreateWindow(
"broken",
SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED,
1, 1,
SDL_WINDOW_OPENGL
);
if(w == NULL) {
printf("window was null\n");
return 0;
}
SDL_GLContext context = SDL_GL_CreateContext(w);
if(context == NULL) {
printf("context was null\n");
return 0;
}
load_gl_pointers();
if(opengl_checkerr("init")) {
return 1;
}
printf("GL_VENDOR: %s\n", glGetString(GL_VENDOR));
printf("GL_RENDERER: %s\n", glGetString(GL_RENDERER));
float* const vs = malloc(CAPACITY * sizeof(float));
memset(vs, 0, CAPACITY * sizeof(float));
unsigned int i = 0;
while(i < 128000) {
GLuint vertex_array;
GLuint vertex_buffer;
glGenVertexArrays(1, &vertex_array);
glBindVertexArray(vertex_array);
glGenBuffers(1, &vertex_buffer);
glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
if(opengl_checkerr("gen/binding")) {
return 1;
}
glBufferData(
GL_ARRAY_BUFFER,
CAPACITY * sizeof(float),
vs, // initialize with `vs` just to make sure it's allocated.
GL_DYNAMIC_DRAW
);
// verify that the memory is allocated by reading it back into `vs`.
glGetBufferSubData(
GL_ARRAY_BUFFER,
0,
CAPACITY * sizeof(float),
vs
);
if(opengl_checkerr("creating buffer")) {
return 1;
}
glFlush();
glFinish();
// segfault occurs here..
glBufferSubData(
GL_ARRAY_BUFFER,
0,
CAPACITY * sizeof(float),
vs
);
glFlush();
glFinish();
++i;
}
return 0;
}
When I bump the iterations from 64k to 128k, I start getting:
Program received signal SIGSEGV, Segmentation fault.
0x00007ffff754c859 in __memcpy_sse2_unaligned () from /usr/lib/libc.so.6
(gdb) bt
#0 0x00007ffff754c859 in __memcpy_sse2_unaligned () from /usr/lib/libc.so.6
#1 0x00007ffff2ea154d in ?? () from /usr/lib/xorg/modules/dri/i965_dri.so
#2 0x0000000000400e5c in main (nargs=1, args=0x7fffffffe8d8) at opengl-segfault.c:145
However, I can more than double the capacity (keeping the number of iterations at 64k) without segfaulting.
GL_VENDOR: Intel Open Source Technology Center
GL_RENDERER: Mesa DRI Intel(R) Haswell Mobile
I had a very similar issue when calling glGenTextures and glBindTexture. I tried debugging and when i would try to step through these lines I would get something like:
Program received signal SIGSEGV, Segmentation fault.
0x00007ffff26eaaa8 in ?? () from /usr/lib/x86_64-linux-gnu/dri/i965_dri.so
Note that prior to adding textures, I could successfully run programs with vbos and vaos and generate meshes fine. After looking into the answer suggesting switching from xf86-video-intel driver to xf86-video-fbdev driver, I would advise against it(There really isn't that much info on this issue or users facing segfaults on linux with integrated intel graphics cards. perhaps a good question to ask the folks over at Intel OpenSource).
The solution I found was to stop using freeglut. Switch to glfw instead. Whether there actually is some problem with the intel linux graphics stack is besides the matter, it seems the solvable problem is freeglut. If you want to use glfw with your machines most recent opengl core profile you need just the following:
glfwWindowHint (GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint (GLFW_CONTEXT_VERSION_MINOR, 0);
glfwWindowHint (GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);
Setting forward compat(although ive seen lots of post argueing you shouldnt do this) means mesa is free to select a core context permitted one sets the minimum context to 3.0 or higher. I guess freeglut must be going wrong somewhere in its interactions with mesa, if anyone can share some light on this that would be great!
This is a bug in the intel graphics drivers for Linux. Switching from the xf86-video-intel driver to xf86-video-fbdev driver solves the problem.
Edit: I'm not recommending switching to fbdev, just using it as an experiment to see whether the segfault goes away.

error:capture is NULL in opencv in window7

I've seen other posts here similar to this question and even goggled and attempted to try every possible method stated, but neither of them worked for me.
following code is just to capture the image infinitely from webcam and Code is building successfully
getting error "error:capture is NULL".
Does opencv2.2.0 is supported for windows 7, i have seen in many posts where it is mentioned to use Direct show for video capturing in window 7
#include<opencv/cxcore.h>
#include<opencv/highgui.h>
#include<opencv/cxcore.h>
#include<stdio.h>
#include<stdlib.h>
int main(int argc,char* argv[])
{
CvSize size640x480 = cvSize(640,480);
CvCapture* p_capWebcam;
IplImage* p_imgOriginal;
p_capWebcam=cvCaptureFromCAM(0);//i tried p_capWebcam=cvCaptureFromCAM(CV_CAP_ANY)
//i tried index from -1 to 10 but nothing worked
if(p_capWebcam==NULL)
{
printf("error:capture is NULL");
getchar();
return -1;
}
cvNamedWindow("Original",CV_WINDOW_AUTOSIZE);
while(1)
{
p_imgOriginal=cvQueryFrame(p_capWebcam);
if(p_imgOriginal=NULL)
{
printf("error :frame is NULL \n");
break;
}
cvWaitKey(10);
cvShowImage("Original",p_imgOriginal);
}
}
IDE is Microsoft Visual C++ 2010 Express,
Webcamera(Frontech) usb2.0 supports following formats
{'YUY2_160x120' 'YUY2_176x144' 'YUY2_320x240' 'YUY2_352x288' 'YUY2_640x480'}
you're lacking a call to cvWaitKey(10); after the cvShowImage() (thus your window does not get updated).
and please, move over to the c++ api, the outdated c-api won't be supported for long.
so, the whole thing should look more like this:
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/core/core.hpp"
using namespace cv;
int main()
{
VideoCapture cap(0);
while( cap.isOpened() )
{
Mat frame;
if ( ! cap.read(frame) )
break;
imshow("lalala",frame);
int k = waitKey(10);
if ( k==27 )
break;
}
return 0;
}

Direct access to linux framebuffer - copyarea

I want to move very quickly a rectangle over a framebuffer in an embedded linux application. I have found that the function cfb_copyarea may be useful. But I cannot find any ioctl over the /dev/fb device to call the function. Or can this function be called directly?
Here is a code to init and close FrameBuffer
class CFrameBuffer
{
void* m_FrameBuffer;
struct fb_fix_screeninfo m_FixInfo;
struct fb_var_screeninfo m_VarInfo;
int m_FBFD;
int InitFB()
{
int iFrameBufferSize;
/* Open the framebuffer device in read write */
m_FBFD = open(FB_NAME, O_RDWR);
if (m_FBFD < 0) {
printf("Unable to open %s.\n", FB_NAME);
return 1;
}
/* Do Ioctl. Retrieve fixed screen info. */
if (ioctl(m_FBFD, FBIOGET_FSCREENINFO, &m_FixInfo) < 0) {
printf("get fixed screen info failed: %s\n",
strerror(errno));
close(m_FBFD);
return 1;
}
/* Do Ioctl. Get the variable screen info. */
if (ioctl(m_FBFD, FBIOGET_VSCREENINFO, &m_VarInfo) < 0) {
printf("Unable to retrieve variable screen info: %s\n",
strerror(errno));
close(m_FBFD);
return 1;
}
/* Calculate the size to mmap */
iFrameBufferSize = m_FixInfo.line_length * m_VarInfo.yres;
printf("Line length %d\n", m_FixInfo.line_length);
/* Now mmap the framebuffer. */
m_FrameBuffer = mmap(NULL, iFrameBufferSize, PROT_READ | PROT_WRITE,
MAP_SHARED, m_FBFD,0);
if (m_FrameBuffer == NULL) {
printf("mmap failed:\n");
close(m_FBFD);
return 1;
}
return 0;
}
void CloseFB()
{
munmap(m_FrameBuffer,0);
close(m_FBFD);
}
};
Note that this code is not entirely correct, although it will work on many linux devices, on some it won't. To calculate the framebuffer size, do not do this:
iFrameBufferSize = m_FixInfo.line_length * m_VarInfo.yres;
Instead, do this:
iFrameBufferSize = m_FixInfo.smem_len;
And your code will be more portable.
As far as I know after a few days of research, there is no ioctl for invoking this function. I have to write my own system call preferrably in a kernel module. Or copy the algorithm the from kernel source and use it in the user space via nmaped memory.

Resources