I am learning Vulkan and started having a problem where no vertices would get displayed.
After analyzing my program with RenderDoc (https://renderdoc.org/builds),
I realized that the buffer containing the vertex and index information contained the rights values.
At the end of the same buffer, the indices data:
The problem is that when I check the data that is transmitted to the vertex shader, it is empty:
Here is the command buffer section where it is supposed to send the data to the shader:
VkDeviceSize indicesOffset = sizeof(Vertex) * this->nbVertices;
VkDeviceSize offsets[] = {0};
vkCmdBindVertexBuffers(commandBuffers[i], 0, 1, &this->vertexBuffer, offsets);
vkCmdBindIndexBuffer(commandBuffers[i], this->vertexBuffer, indicesOffset, VK_INDEX_TYPE_UINT32);
for(size_t j = 0 ; j < this->models.size() ; j++){
Model *model = this->models[j];
uint32_t modelDynamicOffset = j * static_cast<uint32_t>(this->uniformDynamicAlignment);
VkDescriptorSet* modelDescriptorSet = model->getDescriptorSet(i);
vkCmdBindDescriptorSets(this->commandBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, modelDescriptorSet, 1, &modelDynamicOffset);
vkCmdDrawIndexed(commandBuffers[i], this->nbIndices, 1, 0, indicesOffset, 0);
}
Also, here is how I create the vertex buffer:
void Application::createVertexBuffers() {
for(Model *model : this->models){
for(Vertex vertex : model->getVertices()){
vertices.push_back(vertex);
}
for(uint32_t index : model->getIndices()){
indices.push_back(index);
}
}
VkDeviceSize vertexBufferSize = sizeof(vertices[0]) * vertices.size();
VkDeviceSize indexBufferSize = sizeof(uint32_t) * indices.size();
this->nbVertices = vertices.size();
this->nbIndices = indices.size();
VkBuffer stagingBuffer;
VkDeviceMemory stagingBufferMemory;
//To CPU
this->createBuffer(vertexBufferSize + indexBufferSize,
VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
stagingBuffer,
stagingBufferMemory);
void *data;
vkMapMemory(device, stagingBufferMemory, 0, vertexBufferSize, 0, &data);
memcpy(data, vertices.data(), (size_t)vertexBufferSize);
vkUnmapMemory(device, stagingBufferMemory);
//Add the index data after vertex data
vkMapMemory(device, stagingBufferMemory, vertexBufferSize, indexBufferSize, 0, &data);
memcpy(data, indices.data(), (size_t)indexBufferSize);
vkUnmapMemory(device, stagingBufferMemory);
//To GPU
this->createBuffer(vertexBufferSize + indexBufferSize,
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
this->vertexBuffer,
this->vertexBufferMemory);
this->copyBuffer(stagingBuffer, this->vertexBuffer, vertexBufferSize + indexBufferSize);
vkDestroyBuffer(device, stagingBuffer, nullptr);
vkFreeMemory(device, stagingBufferMemory, nullptr);
}
If you need more information to help me solve my problem, please tell me.
Thank you.
The indices that renderdoc reports for the render are a bit high.
You pass indicesOffset as vertexOffset in your draw command. Which is:
vertexOffset is the value added to the vertex index before indexing into the vertex buffer.
So replace that with 0 and you should get your proper vertices again.
Related
OS : Win 10
IDE: Visual Studio 2015
Language: C++
Others: I use OpenCV 3.4
I just create a Window Form using CLR empty project,
then put on a pictureBox & three buttons.
First button: load a local image and show on the pictureBox:
pictureBox1->Image = Image::FromFile("D:/something.png");
global_mat = imread("D:/something.png", 1); // global_mat is a global Mat.
zoom_in_counter = 0; // zoom_in_counter is a global int.
Second button: zoom in the image in the pictureBox
if (zoom_in_counter < 5) // You can only enlarge the image 5 times.
{
Mat new_mat = Mat::zeros(0, 0, CV_8UC3);
resize(global_mat, new_mat, cv::Size(global_mat.cols * 2, global_mat.rows * 2));
global_mat = new_mat;
if ((pictureBox1->Width != new_mat.cols) || (pictureBox1->Height != new_mat.rows))
{
pictureBox1->Width = new_mat.cols;
pictureBox1->Height = new_mat.rows;
pictureBox1->Image = gcnew System::Drawing::Bitmap(new_mat.cols, new_mat.rows);
}
System::Drawing::Bitmap^ bmpImage = gcnew Bitmap(
new_mat.cols, new_mat.rows, new_mat.step,
System::Drawing::Imaging::PixelFormat::Format24bppRgb,
System::IntPtr(new_mat.data)
);
Graphics^ g = Graphics::FromImage(pictureBox1->Image);
g->DrawImage(bmpImage, 0, 0, new_mat.cols, new_mat.rows);
pictureBox1->Refresh();
delete g;
zoom_in_counter++;
}
Third buttin: zoom out the image in the pictureBox
if (zoom_in_counter > 0) // You can't shrink the image.
{
Mat new_mat = Mat::zeros(0, 0, CV_8UC3);
resize(global_mat, new_mat, cv::Size(global_mat.cols * 0.5, global_mat.rows * 0.5));
global_mat = new_mat;
if ((pictureBox1->Width != new_mat.cols) || (pictureBox1->Height != new_mat.rows))
{
pictureBox1->Width = new_mat.cols;
pictureBox1->Height = new_mat.rows;
pictureBox1->Image = gcnew System::Drawing::Bitmap(new_mat.cols, new_mat.rows);
}
System::Drawing::Bitmap^ bmpImage = gcnew Bitmap(
new_mat.cols, new_mat.rows, new_mat.step,
System::Drawing::Imaging::PixelFormat::Format24bppRgb,
System::IntPtr(new_mat.data)
);
Graphics^ g = Graphics::FromImage(pictureBox1->Image);
g->DrawImage(bmpImage, 0, 0, new_mat.cols, new_mat.rows);
pictureBox1->Refresh();
delete g;
zoom_in_counter--;
}
And then,
every I zoom in or zoom out, it works,
excludeingthe image is zoomed back to the original size.
I'll get such error message:
An unhandled exception of type 'System.ArgumentException' occurred in System.Drawing.dll
It's really odd!
Finally, my friend figure out what's wrong,
please refer to:
https://msdn.microsoft.com/zh-tw/library/windows/desktop/ms536315(v=vs.85).aspx
stride [in]
Type: INT
Integer that specifies the byte offset between the beginning of one scan line and the next. This is usually (but not necessarily) the number of bytes in the pixel format (for example, 2 for 16 bits per pixel) multiplied by the width of the bitmap. The value passed to this parameter must be a multiple of four.
What's the fewest number of steps needed to draw all of the cube's vertices, without picking up the pen from the paper?
So far I have reduced it to 16 steps:
0, 0, 0
0, 0, 1
0, 1, 1
1, 1, 1
1, 1, 0
0, 1, 0
0, 0, 0
1, 0, 0
1, 0, 1
0, 0, 1
0, 1, 1
0, 1, 0
1, 1, 0
1, 0, 0
1, 0, 1
1, 1, 1
I presume it can be reduced less than 16 steps as there are only 12 vertices to be drawn
You can view a working example in three.js javascript here:
http://jsfiddle.net/kmturley/5aeucehf/show/
Well I encoded a small brute force solver for this
the best solution is with 16 vertexes
took about 11.6 sec to compute
all is in C++ (visualization by OpenGL)
First the cube representation:
//---------------------------------------------------------------------------
#define a 0.5
double pnt[]=
{
-a,-a,-a, // point 0
-a,-a,+a,
-a,+a,-a,
-a,+a,+a,
+a,-a,-a,
+a,-a,+a,
+a,+a,-a,
+a,+a,+a, // point 7
1e101,1e101,1e101, // end tag
};
#undef a
int lin[]=
{
0,1,
0,2,
0,4,
1,3,
1,5,
2,3,
2,6,
3,7,
4,5,
4,6,
5,7,
6,7,
-1,-1, // end tag
};
// int solution[]={ 0, 1, 3, 1, 5, 4, 0, 2, 3, 7, 5, 4, 6, 2, 6, 7, -1 }; // found polyline solution
//---------------------------------------------------------------------------
void draw_lin(double *pnt,int *lin)
{
glBegin(GL_LINES);
for (int i=0;lin[i]>=0;)
{
glVertex3dv(pnt+(lin[i]*3)); i++;
glVertex3dv(pnt+(lin[i]*3)); i++;
}
glEnd();
}
//---------------------------------------------------------------------------
void draw_pol(double *pnt,int *pol)
{
glBegin(GL_LINE_STRIP);
for (int i=0;pol[i]>=0;i++) glVertex3dv(pnt+(pol[i]*3));
glEnd();
}
//---------------------------------------------------------------------------
Now the solver:
//---------------------------------------------------------------------------
struct _vtx // vertex
{
List<int> i; // connected to (vertexes...)
_vtx(){}; _vtx(_vtx& a){ *this=a; }; ~_vtx(){}; _vtx* operator = (const _vtx *a) { *this=*a; return this; }; /*_vtx* operator = (const _vtx &a) { ...copy... return this; };*/
};
const int _max=16; // know solution size (do not bother to find longer solutions)
int use[_max],uses=0; // temp line usage flag
int pol[_max],pols=0; // temp solution
int sol[_max+2],sols=0; // best found solution
List<_vtx> vtx; // model vertexes + connection info
//---------------------------------------------------------------------------
void _solve(int a)
{
_vtx *v; int i,j,k,l,a0,a1,b0,b1;
// add point to actual polyline
pol[pols]=a; pols++; v=&vtx[a];
// test for solution
for (l=0,i=0;i<uses;i++) use[i]=0;
for (a0=pol[0],a1=pol[1],i=1;i<pols;i++,a0=a1,a1=pol[i])
for (j=0,k=0;k<uses;k++)
{
b0=lin[j]; j++;
b1=lin[j]; j++;
if (!use[k]) if (((a0==b0)&&(a1==b1))||((a0==b1)&&(a1==b0))) { use[k]=1; l++; }
}
if (l==uses) // better solution found
if ((pols<sols)||(sol[0]==-1))
for (sols=0;sols<pols;sols++) sol[sols]=pol[sols];
// recursion only if pol not too big
if (pols+1<sols) for (i=0;i<v->i.num;i++) _solve(v->i.dat[i]);
// back to previous state
pols--; pol[pols]=-1;
}
//---------------------------------------------------------------------------
void solve(double *pnt,int *lin)
{
int i,j,a0,a1;
// init sizes
for (i=0;i<_max;i++) { use[i]=0; pol[i]=-1; sol[i]=-1; }
for(i=0,j=0;pnt[i]<1e100;i+=3,j++); vtx.allocate(j); vtx.num=j;
for(i=0;i<vtx.num;i++) vtx[i].i.num=0;
// init connections
for(uses=0,i=0;lin[i]>=0;uses++)
{
a0=lin[i]; i++;
a1=lin[i]; i++;
vtx[a0].i.add(a1);
vtx[a1].i.add(a0);
}
// start actual solution (does not matter which vertex on cube is first)
pols=0; sols=_max+1; _solve(0);
sol[sols]=-1; if (sol[0]<0) sols=0;
}
//---------------------------------------------------------------------------
Usage:
solve(pnt,lin); // call once to compute the solution
glColor3f(0.2,0.2,0.2); draw_lin(pnt,lin); // draw gray outline
glColor3f(1.0,1.0,1.0); draw_pol(pnt,sol); // overwrite by solution to visually check correctness (Z-buffer must pass also on equal values!!!)
List
is just mine template for dynamic array
List<int> x is equivalent to int x[]
x.add(5) ... adds 5 to the end of list
x.num is the used size of list in entries
x.allocate(100) preallocate list size to 100 entries (to avoid relocations slowdowns)
solve(pnt,lin) algorithm
first prepare vertex data
each vertex vtx[i] corresponds to point i-th point in pnt table
i[] list contains the index of each vertex connected to this vertex
start with vertex 0 (on cube is irrelevant the start point
otherwise there would be for loop through every vertex as start point
_solve(a)
it adds a vertex index to actual solution pol[pols]
then test how many lines is present in actual solution
and if all lines from lin[] are drawn and solution is smaller than already found one
copy it as new solution
after test if actual solution is not too long recursively add next vertex
as one of the vertex that is connected to last vertex used
to limit the number of combinations
at the end sol[sols] hold the solution vertex index list
sols is the number of vertexes used (lines-1)
[Notes]
the code is not very clean but it works (sorry for that)
hope I did not forget to copy something
I'm learning DirectX, using the book "Sherrod A., Jones W. - Beginning DirectX 11 Game Programming - 2011" Now I'm exploring the 4th chapter about drawing text.
Please, help we to fix my function, that I'm using to draw a string on the screen. I've already loaded font texture and in the function I create some sprites with letters and define texture coordinates for them. This compiles correctly, but doesn't draw anything. What's wrong?
bool DirectXSpriteGame :: DrawString(char* StringToDraw, float StartX, float StartY)
{
//VAR
HRESULT D3DResult; //The result of D3D functions
int i; //Counters
const int IndexA = static_cast<char>('A'); //ASCII index of letter A
const int IndexZ = static_cast<char>('Z'); //ASCII index of letter Z
int StringLenth = strlen(StringToDraw); //Lenth of drawing string
float ScreenCharWidth = static_cast<float>(LETTER_WIDTH) / static_cast<float>(SCREEN_WIDTH); //Width of the single char on the screen(in %)
float ScreenCharHeight = static_cast<float>(LETTER_HEIGHT) / static_cast<float>(SCREEN_HEIGHT); //Height of the single char on the screen(in %)
float TexelCharWidth = 1.0f / static_cast<float>(LETTERS_NUM); //Width of the char texel(in the texture %)
float ThisStartX; //The start x of the current letter, drawingh
float ThisStartY; //The start y of the current letter, drawingh
float ThisEndX; //The end x of the current letter, drawing
float ThisEndY; //The end y of the current letter, drawing
int LetterNum; //Letter number in the loaded font
int ThisLetter; //The current letter
D3D11_MAPPED_SUBRESOURCE MapResource; //Map resource
VertexPos* ThisSprite; //Vertecies of the current sprite, drawing
//VAR
//Clamping string, if too long
if(StringLenth > LETTERS_NUM)
{
StringLenth = LETTERS_NUM;
}
//Mapping resource
D3DResult = _DeviceContext -> Map(_vertexBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MapResource);
if(FAILED(D3DResult))
{
throw("Failed to map resource");
}
ThisSprite = (VertexPos*)MapResource.pData;
for(i = 0; i < StringLenth; i++)
{
//Creating geometry for the letter sprite
ThisStartX = StartX + ScreenCharWidth * static_cast<float>(i);
ThisStartY = StartY;
ThisEndX = ThisStartX + ScreenCharWidth;
ThisEndY = StartY + ScreenCharHeight;
ThisSprite[0].Position = XMFLOAT3(ThisEndX, ThisEndY, 1.0f);
ThisSprite[1].Position = XMFLOAT3(ThisEndX, ThisStartY, 1.0f);
ThisSprite[2].Position = XMFLOAT3(ThisStartX, ThisStartY, 1.0f);
ThisSprite[3].Position = XMFLOAT3(ThisStartX, ThisStartY, 1.0f);
ThisSprite[4].Position = XMFLOAT3(ThisStartX, ThisEndY, 1.0f);
ThisSprite[5].Position = XMFLOAT3(ThisEndX, ThisEndY, 1.0f);
ThisLetter = static_cast<char>(StringToDraw[i]);
//Defining the letter place(number) in the font
if(ThisLetter < IndexA || ThisLetter > IndexZ)
{
//Invalid character, the last character in the font, loaded
LetterNum = IndexZ - IndexA + 1;
}
else
{
LetterNum = ThisLetter - IndexA;
}
//Unwraping texture on the geometry
ThisStartX = TexelCharWidth * static_cast<float>(LetterNum);
ThisStartY = 0.0f;
ThisEndY = 1.0f;
ThisEndX = ThisStartX + TexelCharWidth;
ThisSprite[0].TextureCoords = XMFLOAT2(ThisEndX, ThisEndY);
ThisSprite[1].TextureCoords = XMFLOAT2(ThisEndX, ThisStartY);
ThisSprite[2].TextureCoords = XMFLOAT2(ThisStartX, ThisStartY);
ThisSprite[3].TextureCoords = XMFLOAT2(ThisStartX, ThisStartY);
ThisSprite[4].TextureCoords = XMFLOAT2(ThisStartX, ThisEndY);
ThisSprite[5].TextureCoords = XMFLOAT2(ThisEndX, ThisEndY);
ThisSprite += VERTEX_IN_RECT_NUM;
}
for(i = 0; i < StringLenth; i++, ThisSprite -= VERTEX_IN_RECT_NUM);
_DeviceContext -> Unmap(_vertexBuffer, 0);
_DeviceContext -> Draw(VERTEX_IN_RECT_NUM * StringLenth, 0);
return true;
}
Although the piece of code constructing the Vertex Array seems correct to me at first glance, it seems like you are trying to Draw your vertices with a Shader which has not been set yet !
It is difficult to precisely answer you without looking at the whole code, but I can guess that you will need to do something like that :
1) Create Vertex and Pixel Shaders by compiling them first from their respective buffers
2) Create the Input Layout description, which describes the Input Buffers that will be read by the Input Assembler stage. It will have to match your VertexPos structure and your shader structure.
3) Set the Shader parameters.
4) Only now you can Set Shader rendering parameters : Set the InputLayout, as well as the Vertex and Pixel Shaders that will be used to render your triangles by something like :
_DeviceContext -> Unmap(_vertexBuffer, 0);
_DeviceContext->IASetInputLayout(myInputLayout);
_DeviceContext->VSSetShader(myVertexShader, NULL, 0); // Set Vertex shader
_DeviceContext->PSSetShader(myPixelShader, NULL, 0); // Set Pixel shader
_DeviceContext -> Draw(VERTEX_IN_RECT_NUM * StringLenth, 0);
This link should help you achieve what you want to do : http://www.rastertek.com/dx11tut12.html
Also, I recommend you to set an IndexBuffer and to use the method DrawIndexed to render your triangles for performance reasons : It will allow the graphics adapter to store vertices in a vertex cache, allowing recently-used vertex to be fetched from the cache instead of reading it from the vertex buffer.
More about this concern can be found on MSDN : http://msdn.microsoft.com/en-us/library/windows/desktop/bb147325(v=vs.85).aspx
Hope this helps!
P.S : Also, don't forget to release the resources after using them by calling Release().
I am trying to use OpenCL and image2d_t objects to speed up image convolution. When I noticed that the output was a blank image of all zeros, I simplified the OpenCL kernel to a basic read from the input and write to the output (shown below). With a little bit of tweaking, I got it to write a few scattered pixels of the image into the output image.
I have verified that the image is intact up until the call to read_imageui() in the OpenCL kernel. I wrote the image to GPU memory with CommandQueue::enqueueWriteImage() and immediately read it back into a brand new buffer in CPU memory with CommandQueue::enqueueReadImage(). The result of this call matched the original input image. However, when I retrieve the pixels with read_imageui() in the kernel, the vast majority of the pixels are set to 0.
C++ source:
int height = 112;
int width = 9216;
unsigned int numPixels = height * width;
unsigned int numInputBytes = numPixels * sizeof(uint16_t);
unsigned int numDuplicatedInputBytes = numInputBytes * 4;
unsigned int numOutputBytes = numPixels * sizeof(int32_t);
cl::size_t<3> origin;
origin.push_back(0);
origin.push_back(0);
origin.push_back(0);
cl::size_t<3> region;
region.push_back(width);
region.push_back(height);
region.push_back(1);
std::ifstream imageFile("hri_vis_scan.dat", std::ifstream::binary);
checkErr(imageFile.is_open() ? CL_SUCCESS : -1, "hri_vis_scan.dat");
uint16_t *image = new uint16_t[numPixels];
imageFile.read((char *) image, numInputBytes);
imageFile.close();
// duplicate our single channel image into all 4 channels for Image2D
cl_ushort4 *imageDuplicated = new cl_ushort4[numPixels];
for (int i = 0; i < numPixels; i++)
for (int j = 0; j < 4; j++)
imageDuplicated[i].s[j] = image[i];
cl::Buffer imageBufferOut(context, CL_MEM_WRITE_ONLY, numOutputBytes, NULL, &err);
checkErr(err, "Buffer::Buffer()");
cl::ImageFormat inFormat;
inFormat.image_channel_data_type = CL_UNSIGNED_INT16;
inFormat.image_channel_order = CL_RGBA;
cl::Image2D bufferIn(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, inFormat, width, height, 0, imageDuplicated, &err);
checkErr(err, "Image2D::Image2D()");
cl::ImageFormat outFormat;
outFormat.image_channel_data_type = CL_UNSIGNED_INT16;
outFormat.image_channel_order = CL_RGBA;
cl::Image2D bufferOut(context, CL_MEM_WRITE_ONLY, outFormat, width, height, 0, NULL, &err);
checkErr(err, "Image2D::Image2D()");
int32_t *imageResult = new int32_t[numPixels];
memset(imageResult, 0, numOutputBytes);
cl_int4 *imageResultDuplicated = new cl_int4[numPixels];
for (int i = 0; i < numPixels; i++)
for (int j = 0; j < 4; j++)
imageResultDuplicated[i].s[j] = 0;
std::ifstream kernelFile("convolutionKernel.cl");
checkErr(kernelFile.is_open() ? CL_SUCCESS : -1, "convolutionKernel.cl");
std::string imageProg(std::istreambuf_iterator<char>(kernelFile), (std::istreambuf_iterator<char>()));
cl::Program::Sources imageSource(1, std::make_pair(imageProg.c_str(), imageProg.length() + 1));
cl::Program imageProgram(context, imageSource);
err = imageProgram.build(devices, "");
checkErr(err, "Program::build()");
cl::Kernel basic(imageProgram, "basic", &err);
checkErr(err, "Kernel::Kernel()");
basic.setArg(0, bufferIn);
basic.setArg(1, bufferOut);
basic.setArg(2, imageBufferOut);
queue.finish();
cl_ushort4 *imageDuplicatedTest = new cl_ushort4[numPixels];
for (int i = 0; i < numPixels; i++)
{
imageDuplicatedTest[i].s[0] = 0;
imageDuplicatedTest[i].s[1] = 0;
imageDuplicatedTest[i].s[2] = 0;
imageDuplicatedTest[i].s[3] = 0;
}
double gpuTimer = clock();
err = queue.enqueueReadImage(bufferIn, CL_FALSE, origin, region, 0, 0, imageDuplicatedTest, NULL, NULL);
checkErr(err, "CommandQueue::enqueueReadImage()");
// Output from above matches input image
err = queue.enqueueNDRangeKernel(basic, cl::NullRange, cl::NDRange(height, width), cl::NDRange(1, 1), NULL, NULL);
checkErr(err, "CommandQueue::enqueueNDRangeKernel()");
queue.flush();
err = queue.enqueueReadImage(bufferOut, CL_TRUE, origin, region, 0, 0, imageResultDuplicated, NULL, NULL);
checkErr(err, "CommandQueue::enqueueReadImage()");
queue.flush();
err = queue.enqueueReadBuffer(imageBufferOut, CL_TRUE, 0, numOutputBytes, imageResult, NULL, NULL);
checkErr(err, "CommandQueue::enqueueReadBuffer()");
queue.finish();
OpenCL kernel:
__kernel void basic(__read_only image2d_t input, __write_only image2d_t output, __global int *result)
{
const sampler_t smp = CLK_NORMALIZED_COORDS_TRUE | //Natural coordinates
CLK_ADDRESS_NONE | //Clamp to zeros
CLK_FILTER_NEAREST; //Don't interpolate
int2 coord = (get_global_id(1), get_global_id(0));
uint4 pixel = read_imageui(input, smp, coord);
result[coord.s0 + coord.s1 * 9216] = pixel.s0;
write_imageui(output, coord, pixel);
}
The coordinates in the kernel are currently mapped to (x, y) = (width, height).
The input image is a single channel greyscale image with 16 bits per pixel, which is why I had to duplicate the channels to fit into OpenCL's Image2D. The output after convolution will be 32 bits per pixel, which is why numOutputBytes is set to that. Also, although the width and height appear weird, the input image's dimensions are 9216x7824, so I'm only taking a portion of it to test the code first, so it doesn't take forever.
I added in a write to global memory after reading from the image in the kernel to see if the issue was reading the image or writing the image. After the kernel executes, this section of global memory also contains mostly zeros.
Any help would be greatly appreciated!
The documentation for read_imageui states that
Furthermore, the read_imagei and read_imageui calls that take integer coordinates must use a sampler with normalized coordinates set to CLK_NORMALIZED_COORDS_FALSE and addressing mode set to CLK_ADDRESS_CLAMP_TO_EDGE, CLK_ADDRESS_CLAMP or CLK_ADDRESS_NONE; otherwise the values returned are undefined.
But you're creating a sampler with CLK_NORMALIZED_COORDS_TRUE (but seem to be passing in non-normalized coords :S ?).
I have a problem with my program written in Visual C++ using OpenCV:
i have to capture frames from webcam and find all the various rectangle (it doesn't matter the color).
I try to modify the samples in c, squares.c, but it doesn't work as well, because the program wait any key (different from 'q') to continue.
This is the code. Someone can tell me where is the problem???
Thank you in advance.
//
// Object Detection of squares
// Take images from webcam and find the square in them
//
//
#include "stdafx.h"
#include <stdio.h>
#include <math.h>
#include <string.h>
int thresh = 50;
IplImage* img = 0;
IplImage* img0 = 0;
CvMemStorage* storage = 0;
//const char* wndname = "Square Detection Demo with Webcam";
// helper function:
// finds a cosine of angle between vectors
// from pt0->pt1 and from pt0->pt2
double angle( CvPoint* pt1, CvPoint* pt2, CvPoint* pt0 )
{
double dx1 = pt1->x - pt0->x;
double dy1 = pt1->y - pt0->y;
double dx2 = pt2->x - pt0->x;
double dy2 = pt2->y - pt0->y;
return (dx1*dx2 + dy1*dy2)/sqrt((dx1*dx1 + dy1*dy1)*(dx2*dx2 + dy2*dy2) + 1e-10);
}
// returns sequence of squares detected on the image.
// the sequence is stored in the specified memory storage
CvSeq* findSquares4( IplImage* img, CvMemStorage* storage )
{
CvSeq* contours;
int i, c, l, N = 11;
CvSize sz = cvSize( img->width & -2, img->height & -2 );
IplImage* timg = cvCloneImage( img ); // make a copy of input image
IplImage* gray = cvCreateImage( sz, 8, 1 );
IplImage* pyr = cvCreateImage( cvSize(sz.width/2, sz.height/2), 8, 3 );
IplImage* tgray;
CvSeq* result;
double s, t;
// create empty sequence that will contain points -
// 4 points per square (the square's vertices)
CvSeq* squares = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvPoint), storage );
// select the maximum ROI in the image
// with the width and height divisible by 2
cvSetImageROI( timg, cvRect( 0, 0, sz.width, sz.height ));
// down-scale and upscale the image to filter out the noise
cvPyrDown( timg, pyr, 7 );
cvPyrUp( pyr, timg, 7 );
tgray = cvCreateImage( sz, 8, 1 );
// find squares in every color plane of the image
for( c = 0; c < 3; c++ )
{
// extract the c-th color plane
cvSetImageCOI( timg, c+1 );
cvCopy( timg, tgray, 0 );
// try several threshold levels
for( l = 0; l < N; l++ )
{
// hack: use Canny instead of zero threshold level.
// Canny helps to catch squares with gradient shading
if( l == 0 )
{
// apply Canny. Take the upper threshold from slider
// and set the lower to 0 (which forces edges merging)
cvCanny( tgray, gray, 0, thresh, 5 );
// dilate canny output to remove potential
// holes between edge segments
cvDilate( gray, gray, 0, 1 );
}
else
{
// apply threshold if l!=0:
// tgray(x,y) = gray(x,y) < (l+1)*255/N ? 255 : 0
cvThreshold( tgray, gray, (l+1)*255/N, 255, CV_THRESH_BINARY );
}
// find contours and store them all as a list
cvFindContours( gray, storage, &contours, sizeof(CvContour),
CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE, cvPoint(0,0) );
// test each contour
while( contours )
{
// approximate contour with accuracy proportional
// to the contour perimeter
result = cvApproxPoly( contours, sizeof(CvContour), storage,
CV_POLY_APPROX_DP, cvContourPerimeter(contours)*0.02, 0 );
// square contours should have 4 vertices after approximation
// relatively large area (to filter out noisy contours)
// and be convex.
// Note: absolute value of an area is used because
// area may be positive or negative - in accordance with the
// contour orientation
if( result->total == 4 &&
fabs(cvContourArea(result,CV_WHOLE_SEQ)) > 1000 &&
cvCheckContourConvexity(result) )
{
s = 0;
printf("ciclo for annidato fino a 5\t\n");
for( i = 0; i < 5; i++ )
{
// find minimum angle between joint
// edges (maximum of cosine)
if( i >= 2 )
{
t = fabs(angle(
(CvPoint*)cvGetSeqElem( result, i ),
(CvPoint*)cvGetSeqElem( result, i-2 ),
(CvPoint*)cvGetSeqElem( result, i-1 )));
s = s > t ? s : t;
}
}
// if cosines of all angles are small
// (all angles are ~90 degree) then write quandrange
// vertices to resultant sequence
if( s < 0.3 )
for( i = 0; i < 4; i++ )
cvSeqPush( squares,
(CvPoint*)cvGetSeqElem( result, i ));
}
// take the next contour
contours = contours->h_next;
}
}
}
// release all the temporary images
cvReleaseImage( &gray );
cvReleaseImage( &pyr );
cvReleaseImage( &tgray );
cvReleaseImage( &timg );
return squares;
}
// the function draws all the squares in the image
void drawSquares( IplImage* img, CvSeq* squares )
{
CvSeqReader reader;
IplImage* cpy = cvCloneImage( img );
int i;
// initialize reader of the sequence
cvStartReadSeq( squares, &reader, 0 );
// read 4 sequence elements at a time (all vertices of a square)
for( i = 0; i < squares->total; i += 4 )
{
CvPoint pt[4], *rect = pt;
int count = 4;
// read 4 vertices
CV_READ_SEQ_ELEM( pt[0], reader );
CV_READ_SEQ_ELEM( pt[1], reader );
CV_READ_SEQ_ELEM( pt[2], reader );
CV_READ_SEQ_ELEM( pt[3], reader );
// draw the square as a closed polyline
cvPolyLine( cpy, &rect, &count, 1, 1, CV_RGB(0,255,0), 3, CV_AA, 0 );
}
cvSaveImage("squares.jpg",cpy);
//show the resultant image
//cvShowImage( wndname, cpy );
cvReleaseImage( &cpy );
//return cpy;
}
int _tmain(int argc, _TCHAR* argv[])
{
int key = 0;
IplImage* frame =0;
IplImage* squares=0;
// create memory storage that will contain all the dynamic data
storage = cvCreateMemStorage(0);
CvCapture *camera = cvCreateCameraCapture(CV_CAP_ANY); /* Usa USB camera */
frame = cvQueryFrame(camera);
frame = cvQueryFrame(camera);
frame = cvQueryFrame(camera);
while(key!='q'){
frame = cvQueryFrame(camera);
frame = cvQueryFrame(camera);
if(frame!=NULL){
printf("Got frame\t\n");
cvSaveImage("frame.jpg", frame);
/*img0*/ img = cvLoadImage("frame.jpg");
//img = cvCloneImage( img0 );
cvNamedWindow( "img0", CV_WINDOW_AUTOSIZE);
cvShowImage("img0",/*img0*/img);
// find and draw the squares
drawSquares( img, findSquares4( img, storage ) );
squares = cvLoadImage("squares.jpg");
// create window and a trackbar (slider)
//with parent "image" and set callback
//(the slider regulates upper threshold,
//passed to Canny edge detector)
cvNamedWindow( "main", CV_WINDOW_AUTOSIZE);
cvShowImage("main", squares);
/* wait for key.
Also the function cvWaitKey takes care of event processing */
key = cvWaitKey(0);
}
}
// release both images
cvReleaseImage( &img );
cvReleaseImage( &img0 );
cvReleaseCapture(&camera);
cvDestroyWindow("main");
cvDestroyWindow("img0");
// clear memory storage - reset free space position
cvClearMemStorage( storage );
return 0;
}
I believe your problem is here:
/* wait for key.
Also the function cvWaitKey takes care of event processing */
key = cvWaitKey(0);
Try changing 0 to 10.
I see some other problems in your code. For instance, you create windows inside the while loop, which is not good. Try moving cvNamedWindow() function calls outside your while loop. Also, I'm not sure why you query camera for frames and do not process them?
If your problem is that the window dissappers without waiting for any hit from the keyboard, you can add a cvWaitKey(0) at the end of the code.
Also a getch( ) at the end would help. make sure you include in the headers.