CUDA Object copy from device to host - object
I'm trying to copy an object back from the device to host, and it works, but if the object contains a pointer to something i can't find the right way of calling cudaMemcpy.
This is a simplified code to show what i'm trying to do. The cudaMemcpy returns with cudaSuccess but the temp variable stays "empty".
class A {
public:
int *s;
};
__global__ void MethodA(A *a) {
printf("%d\n", a->s[2]);
}
int main() {
A *a = new A();
int asd[] = { 0, 1, 2, 3, 4 };
a->s = asd;
A *d_a;
cudaMalloc((void**)&d_a, sizeof(A));
cudaMemcpy(d_a, a, sizeof(A), cudaMemcpyHostToDevice);
int * temp;
cudaError e;
e = cudaMalloc((void**)&temp, sizeof(int) * 5);
e = cudaMemcpy(temp, a->s, sizeof(int) * 5, cudaMemcpyHostToDevice);
e = cudaMemcpy(&(d_a->s), &temp, sizeof(int*), cudaMemcpyHostToDevice);
MethodA << <1, 1 >> > (d_a);
cudaMemcpy(a, d_a, sizeof(A), cudaMemcpyDeviceToHost);
e = cudaMemcpy(&temp, a->s, sizeof(int) * 5, cudaMemcpyDeviceToHost);
a->s = temp;
cudaFree(d_a);
delete(a);
return 0;
}
The problem is here:
e = cudaMemcpy(&(d_a->s), &temp, sizeof(int*), cudaMemcpyHostToDevice);
d_a is a pointer to a device object, you cannot dereference it on the host.
You'll have to first copy s to the device, then create an object of type A on the host which has a pointer to the device copy of s, and then copy this object on the device.
This is a known issue with CUDA, and happens often with structures like linked lists or trees, that's one of the reasons why Nvidia is investing a lot of effort in improving unified memory. If you can use that, and it doesn't decrease the performance of your application, it could save you a lot of trouble with problems like this.
Here is your example with the problems fixed:
class A {
public:
int *s;
};
__global__ void MethodA(A *a) {
printf("%d\n", a->s[2]);
a->s[2] = 6;
}
int main() {
A *a = new A();
int asd[] = { 0, 1, 2, 3, 4 };
a->s = asd;
A *a_with_d_s = new A();
cudaMalloc(&(a_with_d_s->s), sizeof(int) * 5);
cudaMemcpy(a_with_d_s->s, a->s, sizeof(int) * 5, cudaMemcpyHostToDevice);
A *d_a;
cudaMalloc(&d_a, sizeof(A));
cudaMemcpy(d_a, a_with_d_s, sizeof(A), cudaMemcpyHostToDevice);
MethodA << <1, 1 >> > (d_a);
// note that if we call the following line, a->s will point to device
// memory!
//cudaMemcpy(a, d_a, sizeof(A), cudaMemcpyDeviceToHost);
cudaMemcpy(a->s, a_with_d_s->s, sizeof(int) * 5, cudaMemcpyDeviceToHost);
printf("%d\n", a->s[2]);
cudaFree(d_a);
cudaFree(a_with_d_s->s);
delete(a);
delete(a_with_d_s);
return 0;
}
Prints out:
2
6
Related
GDB: Displaying incorrect values in struct
I'm trying to implement the malloc function and it looks like that gdb is giving me some weird values from this struct: struct MemoryBlock { struct MemoryBlock * next; size_t size; signed char is_free; } startBlock; And that's the function where I'm debugging it with gdb: struct MemoryBlock * create_new_block(size_t size) { struct MemoryBlock * ret_block; // add some space for the struct block size += sizeof(struct MemoryBlock); ret_block = (void *) sbrk(size); // test first, if we can allocate that much of ram if (ret_block == (void *) -1) return NULL; ret_block->size = size - sizeof(struct MemoryBlock); ret_block->is_free = 0; ret_block->next = NULL; return ret_block; // HERE'S the breakpoint } So here's the issue (I'm at the breakpoint return ret_block): If I want to see what kind of values are inside of the ret_block pointer, than I'm getting this: (gdb) p (struct MemoryBlock) ret_block $26 = {next = 0x555555559000, size = 140737488347680, is_free = -53 '\313'} size is fine, because if I convert it into the decimal system than I'm getting 3 as expected. (the argument size from the function is currently 3) But I'm surprised that next and is_free aren't 0 since the last three lines should set both to 0. So I looked up what is in the memory: As you can see each value is correctly stored in my heap. But why am I getting these values if I do p (struct MemoryBlock) ret_block? If you need the whole code #include <unistd.h> #include <stdio.h> /* ============ * Structs * ============ */ struct MemoryBlock { struct MemoryBlock * next; size_t size; signed char is_free; } startBlock; /* ============== * Functions * ============== */ struct MemoryBlock * create_new_block(size_t size); void * malloc(size_t size); /* ================== * Main Programm * ================== */ int main() { char * buffer; char * b2; unsigned short index; // The start of my heap :D startBlock.is_free = 0; startBlock.size = 0; buffer = malloc(3); b2 = malloc(3); // ----- ERROR ----- if (buffer == NULL || b2 == NULL) return 1; // ----- ERROR ----- // fill the buffers with random stuff for (index=0; index<2; index++) { buffer[index] = 'a'; b2[index] = 'b'; } buffer[index] = '\0'; b2[index] = '\0'; puts(buffer); puts(b2); return 0; } struct MemoryBlock * create_new_block(size_t size) { struct MemoryBlock * ret_block; // add some space for the struct block size += sizeof(struct MemoryBlock); ret_block = (void *) sbrk(size); // test first, if we can allocate that much of ram if (ret_block == (void *) -1) return NULL; ret_block->size = size - sizeof(struct MemoryBlock); ret_block->is_free = 0; ret_block->next = NULL; return ret_block; } void * malloc (size_t size) { struct MemoryBlock * ret_block; struct MemoryBlock * prev_block; prev_block = &startBlock; ret_block = startBlock.next; // go through the linked lists and look if you can find a suitable block while (ret_block != NULL && (ret_block->size < size || !ret_block->is_free)) { prev_block = ret_block; ret_block = ret_block->next; } // couldn't find a suitable block => create a new one if (ret_block == NULL) { ret_block = create_new_block(size); if (ret_block == NULL) return NULL; } prev_block->next = ret_block; ret_block->is_free = 0; return ret_block; }
Ok, one of my friends told me my issue... The casting was wrong! Here's the solution: (gdb) p * ret_block $57 = {next = 0x0, size = 3, is_free = 0 '\000'} A star was enough to get the desired output...
What happens if two threads call fork() simultaneously
I have a process with multiple threads. I have registered prepare function and parent handler using __register_atfork(blocksigprof,restoresigprof,NULL,NULL); function. Now let us assume that two threads call fork at the same time. And I have a counter increment in blocksigprof and counter decrement in restoresigprof. Considering above scenario, will the blocksigprof and restoresigprof be called in pair always? Is there any locking mechanism which inherently done in __register_atfork. #define NUM_THREADS 8 static int go=0; static int exec = 1; static int ev_od = 0; static void * test_thread (void *arg) { int j; pid_t c, d; while(!go) // Wait, so that all threads are here. continue; // All will fork, hopefully at same time because of go signal wait. while(exec) { c = fork(); if (c < 0) { printf("SANJAY: fork() failed.\n"); exit(1); } else if (c == 0) { // Child exit(0); } else { // parent d = waitpid(c, NULL, 0); } } return NULL; } extern int __register_atfork(void (*)(void),void (*)(void),void (*)(void),void *); static sigset_t s_new; static sigset_t s_old; static int count = 0; static void blocksigprof(void){ count++; #ifdef SYS_gettid pid_t tid = syscall(SYS_gettid); if (tid % 2) { printf("sleep.\n"); usleep(1); } #else #error "SYS_gettid unavailable on this system" #endif printf("Pre-fork. Count should be one. %d\n", count); } static void restoresigprof(void){ printf("Post-fork. Count should be one. %d\n", count); count--; } int main () { pthread_t t[NUM_THREADS]; void *ptr; long size = 500 * 1024 * 1024; int i, m; volatile int result = 0; int g_iters = 100; (void) __register_atfork(blocksigprof,restoresigprof,NULL,NULL); // Increase size, so fork takes time. printf("SANJAY: Increasing process size.\n"); ptr = malloc(size); memset(ptr, 0, size); ptr = malloc(size); memset(ptr, 0, size); ptr = malloc(size); memset(ptr, 0, size); ptr = malloc(size); memset(ptr, 0, size); ptr = malloc(size); memset(ptr, 0, size); ptr = malloc(size); memset(ptr, 0, size); ptr = malloc(size); memset(ptr, 0, size); ptr = malloc(size); memset(ptr, 0, size); ptr = malloc(size); memset(ptr, 0, size); // Create threads. for (i = 0; i < NUM_THREADS; ++i) { pthread_create(&t[i], NULL, test_thread, NULL); } printf("SANJAY: Killing time.\n"); // Kill time, so that all threads are at same place post it, waiting for go. 100M cycles. for (m = 0; m < 1000000; ++m) for (i = 0; i < g_iters; ++i ) result ^= i; // Give all threads go at same time. printf("SANJAY: Let threads execute.\n"); go = 1; usleep(10000000); // Wait for 10 sec. exec = 0; // Wait for all threads to finish. for (i = 0; i < NUM_THREADS; ++i) { pthread_join(t[i], NULL); } printf("SANJAY: Done.\n"); return 0; }
pthread_atfork specification doesn't require its implementation to serialize calls to prepare and parent handlers, so a safe assumption is that there is no syncronization. glibc implementation does lock an internal mutex that prevents multiple threads from entering the handlers in parallel. However, that is an implementation detail. The comments in the code say that such an implementation is not POSIX-compliant because POSIX requires pthread_atfork to be async-signal-safe, and using a mutex there makes it not async-signal-safe. To make your code robust, I recommend using atomics or a mutex to protect your shared state from race condition.
Memory allocation of struct in C++
My struct is as follows: typedef struct KeypointSt { float row, col; float scale, ori; unsigned char *descrip; /* Vector of descriptor values */ struct KeypointSt *next; } *Keypoint; The following is a part of a code in C. How can I translate it to C++, considering allocation and de-allocation of heap. Keypoint k, keys = NULL; for (i = 0; i < num; i++) { /* Allocate memory for the keypoint. */ k = (Keypoint) malloc(sizeof(struct KeypointSt)); k->next = keys; keys = k; k->descrip = malloc(len); for (j = 0; j < len; j++) { k->descrip[j] = (unsigned char) val; } }
One possible way of converting to C++ is: #include <cstring> // memset() typedef struct KeypointSt { float row, col; float scale, ori; size_t len; unsigned char *descrip; /* Vector of descriptor values */ KeypointSt *next; KeypointSt(int p_len, int p_val) : row(0.0), col(0.0), scale(0.0), ori(0.0), len(p_len), descrip(new unsigned char[len]), next(0) { memset(descrip, len, p_val); } ~KeypointSt() { delete descrip; } } *Keypoint; extern KeypointSt *init_keypoints(size_t num, size_t len, unsigned char val); extern void free_keypoints(KeypointSt *list); KeypointSt *init_keypoints(size_t num, size_t len, unsigned char val) { KeypointSt *keys = NULL; for (size_t i = 0; i < num; i++) { /* Allocate memory for the keypoint. */ KeypointSt *k = new KeypointSt(len, val); k->next = keys; keys = k; } return keys; } void free_keypoints(KeypointSt *list) { while (list != 0) { KeypointSt *next = list->next; delete list; list = next; } } int main(void) { KeypointSt *keys = init_keypoints(4, 5, 6); free_keypoints(keys); return 0; } The only reason I've kept the typedef in place is because you have existing code; the C++ code would be better using KeypointSt * everywhere — or renaming the structure tag to Keypoint and using Keypoint * in place of your original Keypoint. I don't like non-opaque types where the typedef conceals a pointer. If I see a declaration XYZ xyz;, and it is a structure or class type, I expect to use xyz.pqr and not xyz->pqr. We can debate code layout of the constructor code, the absence of a default constructor (no arrays), and the absence of a copy constructor and an assignment operator (both needed because of the allocation for descrip). The code of init_keypoints() is not exception safe; a memory allocation failure will leak memory. Fixing that is left as an exercise (it isn't very hard, I think, but I don't claim exception-handling expertise). I've not attempted to consider any extra requirements imposed by C++11. Simply translating from C to C++ is 'easy' until you look at the extra demands that C++ makes — demands that make your life easier in the long run, but at a short-term cost in pain.
using destructors in c++ to delete pointers
I have the following: //in Matrix.h class Matrix { public: Matrix (int _row1=1, int _col1=1); Matrix(const Matrix &); ~Matrix(); int row1; //int row2; int col1; double **m; //other functions and members... void Print(int id); } //In Matrix.cpp Matrix::Matrix(int _row1, int _col1): row1(_row1),col1(_col1) { m=(double **) new double[row1]; for(long p=0;p<row1;p++) m[p]=(double *) new double[col1]; for(long p=0;p<row1;p++) for (long q=0;q<col1;q++) m[p][q]=0.0; } //copy constructor Matrix::Matrix(const Matrix &Mat){ m=(double **) new double[row1]; **m=**Mat.m;// copy the value } // Destructor Matrix::~Matrix(void){ //We need to deallocate our buffer delete[] *m; delete [] m; //Set m to null just in case m=0; cout<<" Freeing m "<<endl; } void Matrix::Print(int id) { cout<<"Element ID: "<<id<<endl; for(int i=0; i<row1; i++) { for(int j=0;j<col1;j++) { cout<<m[i][j]; if(j==col1-1) cout<<"\n"; } } system("PAUSE"); } a call like the following: elem[e].d0 = matel[i].OrgD;// Both Matrix elem[e].d0.Print(1); // checking to see if at all copied fails at: void Matrix::Print(int id){ //... cout<<m[i][j];//here ...// } in fact it fails where ever m[i][j] is used by other functions. this only happens if any object is used successively. And this error goes away if I comment out the destructor. I fail to understand? Any help! EDIT 1: I have changed the copy constructor to: Matrix::Matrix(const Matrix &Mat):row1(Mat.row1),col1(Mat.col1) { m= new double *[row1]; for(long p=0;p<row1;p++) m[p]=new double [col1]; for(long p=0;p<row1;p++)for (long q=0;q<col1;q++) m[p][q]=Mat.m[p][q]; // copy the Value } And have a assignment operator as: Matrix& Matrix::operator = (const Matrix& o) { if ( this == &o ) { return *this; //Self assignment : nothing to do } delete[] *m; delete[] m; row1 = o.row1; col1 = o.col1; m = new double*[row1]; for(long p=0;p<row1;p++) m[p]=new double [col1]; for(long p=0;p<row1;p++) for (long q=0;q<col1;q++) m[p][q]=o.m[p][q]; return *this; } Now it fails at: Matrix::Operator=... { o.m[p] 0xcdcdcdcd double * CXX0030: Error: expression cannot be evaluated // in the debugger } I have noticed that the same thing happens form all functions that use '.m' that is m of the calling object is not available if there is the destructor deleting 'm'. Hope to get some answers.
In your constructor you are using m=(double **) new double[row1]; for(long p=0;p<row1;p++) m[p]=(double *) new double[col1]; new double[row1] is of type double[]. You cast this to double**. You should use new (double*)[row1] here if you really want to get an array of double pointers.
Looking for code samples for Direct3D tessellation
I am trying to learn how to use the Direct3D function D3DXTessellateRectPatch: msdn.microsoft.com/en-us/library/bb205471(v=VS.85).aspx I have found the MSDN documentation quite useful and have been able to implement tessellation of a single rectangle patch. I am now trying to tesselate a mesh that consists of thirty two bicubic Bezier 3D patches (the Utah teapot). I have tried a simple approach - tesselate each Bezier curve individually, then join the vertices and indices appropriately, taking into account vertex offsets, to create a tessellated merged mesh. However, this does not quite seem to have the desired result. If anyone has hints on this problem or, even better, code samples, much appreciated. Specifically, I have checked: Www.directxtutorial.com http://www.amazon.com/Introduction-Game-Programming-Direct-9-0c/dp/1598220160/ And another Direct3D reference, as well as Google. Thank you and look forward to your advice/pointers. Yours Misha
Tim C Schroeder has been a huge help and suggested I use ID3DXPatchMesh. Here is some sample code that generates a tessellated teapot (place in file tester.cpp): // Main D3DX framework from www.directxtutorial.com (free section) #include <assert.h> #include <stdio.h> // include the basic windows header files and the Direct3D header file #include <windows.h> #include <windowsx.h> #include <d3d9.h> #include <d3dx9.h> // define the screen resolution #define SCREEN_WIDTH 800 #define SCREEN_HEIGHT 600 // include the Direct3D Library files #pragma comment (lib, "d3d9.lib") #pragma comment (lib, "d3dx9.lib") // global declarations LPDIRECT3D9 d3d; LPDIRECT3DDEVICE9 d3ddev; LPD3DXMESH mesh = NULL; // define the mesh pointer // function prototypes void initD3D(HWND hWnd); void render_frame(void); void cleanD3D(void); void init_graphics(void); struct vertex_data { D3DXVECTOR3 position; DWORD color; }; #define FVF_VERTEX_DATA (D3DFVF_XYZ | D3DFVF_DIFFUSE) // the WindowProc function prototype LRESULT CALLBACK WindowProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam); // the entry point for any Windows program int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nCmdShow) { HWND hWnd; WNDCLASSEX wc; ZeroMemory(&wc, sizeof(WNDCLASSEX)); wc.cbSize = sizeof(WNDCLASSEX); wc.style = CS_HREDRAW | CS_VREDRAW; wc.lpfnWndProc = WindowProc; wc.hInstance = hInstance; wc.hCursor = LoadCursor(NULL, IDC_ARROW); wc.lpszClassName = "WindowClass"; RegisterClassEx(&wc); hWnd = CreateWindowEx(NULL, "WindowClass", "Our Direct3D Program", WS_OVERLAPPEDWINDOW, 0, 0, SCREEN_WIDTH, SCREEN_HEIGHT, NULL, NULL, hInstance, NULL); ShowWindow(hWnd, nCmdShow); initD3D(hWnd); MSG msg; while(TRUE) { while(PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) { TranslateMessage(&msg); DispatchMessage(&msg); } if(msg.message == WM_QUIT) break; render_frame(); } cleanD3D(); return msg.wParam; } // this is the main message handler for the program LRESULT CALLBACK WindowProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) { switch(message) { case WM_DESTROY: PostQuitMessage(0); return 0; } return DefWindowProc (hWnd, message, wParam, lParam); } // this function initializes and prepares Direct3D for use void initD3D(HWND hWnd) { d3d = Direct3DCreate9(D3D_SDK_VERSION); D3DPRESENT_PARAMETERS d3dpp; ZeroMemory(&d3dpp, sizeof(d3dpp)); d3dpp.Windowed = TRUE; d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD; d3dpp.hDeviceWindow = hWnd; d3dpp.BackBufferFormat = D3DFMT_X8R8G8B8; d3dpp.BackBufferWidth = SCREEN_WIDTH; d3dpp.BackBufferHeight = SCREEN_HEIGHT; d3dpp.EnableAutoDepthStencil = TRUE; d3dpp.AutoDepthStencilFormat = D3DFMT_D16; d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, hWnd, D3DCREATE_SOFTWARE_VERTEXPROCESSING, &d3dpp, &d3ddev); init_graphics(); d3ddev->SetRenderState(D3DRS_LIGHTING, FALSE); // turn off the 3D lighting d3ddev->SetRenderState(D3DRS_CULLMODE, D3DCULL_NONE); // turn off culling d3ddev->SetRenderState(D3DRS_ZENABLE, TRUE); // turn on the z-buffer } // this is the function used to render a single frame void render_frame(void) { d3ddev->Clear(0, NULL, D3DCLEAR_TARGET, D3DCOLOR_XRGB(0, 0, 0), 1.0f, 0); d3ddev->Clear(0, NULL, D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 1.0f, 0); d3ddev->BeginScene(); d3ddev->SetFVF(FVF_VERTEX_DATA); // set the view transform D3DXMATRIX matView; // the view transform matrix D3DXMatrixLookAtLH(&matView, &D3DXVECTOR3 (0.0f, 8.0f, 25.0f), // the camera position &D3DXVECTOR3 (0.0f, 0.0f, 0.0f), // the look-at position &D3DXVECTOR3 (0.0f, 1.0f, 0.0f)); // the up direction d3ddev->SetTransform(D3DTS_VIEW, &matView); // set the view transform to matView // set the projection transform D3DXMATRIX matProjection; // the projection transform matrix D3DXMatrixPerspectiveFovLH(&matProjection, D3DXToRadian(45), // the horizontal field of view (FLOAT)SCREEN_WIDTH / (FLOAT)SCREEN_HEIGHT, // aspect ratio 1.0f, // the near view-plane 100.0f); // the far view-plane d3ddev->SetTransform(D3DTS_PROJECTION, &matProjection); // set the projection // set the world transform static float index = 0.0f; index+=0.03f; // an ever-increasing float value D3DXMATRIX matRotateY; // a matrix to store the rotation for each triangle D3DXMatrixRotationY(&matRotateY, index); // the rotation matrix d3ddev->SetTransform(D3DTS_WORLD, &(matRotateY)); // set the world transform if (mesh) mesh->DrawSubset(0); d3ddev->EndScene(); d3ddev->Present(NULL, NULL, NULL, NULL); } // this is the function that cleans up Direct3D and COM void cleanD3D(void) { if (mesh) mesh->Release(); d3ddev->Release(); d3d->Release(); } #define MAX_PATCHES 1000 #define POINTS_PER_PATCH 16 int num_patches = -1; int patches[MAX_PATCHES][POINTS_PER_PATCH]; void B_patch(int ii, int a, int b, int c, int d, int e, int f, int g, int h, int i, int j, int k, int l, int m, int n, int o, int p) { assert(ii < MAX_PATCHES); patches[ii][0] = a-1; patches[ii][1] = b-1; patches[ii][2] = c-1; patches[ii][3] = d-1; patches[ii][4] = e-1; patches[ii][5] = f-1; patches[ii][6] = g-1; patches[ii][7] = h-1; patches[ii][8] = i-1; patches[ii][9] = j-1; patches[ii][10] = k-1; patches[ii][11] = l-1; patches[ii][12] = m-1; patches[ii][13] = n-1; patches[ii][14] = o-1; patches[ii][15] = p-1; assert(POINTS_PER_PATCH==16); } #define MAX_POINTS 1000 int num_points = -1; D3DXVECTOR3 points[MAX_POINTS]; void B_point(int ii, double x, double y, double z) { ii--; assert(ii < MAX_POINTS); points[ii].x = x; /*** Y AND Z FLIPPED ***/ points[ii].y = z; points[ii].z = y; } // BEGIN http://www.sjbaker.org/teapot/teaset.tgz /* * The file input.c -- Juhana Kouhia, jk87377#cs.tut.fi, Oct. 25, 1991 * * Load_patch(filename, patches, vertices); * char *filename; int *patches, *vertices; * A sample program to read Bezier patches in. * Returns count of patches and vertices. * User defined subroutines: * B_patch(ii, a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p); * int ii, a, b, ..., p; * Defines one Bezier patch with index number ii, * indexes to points are in a, b, c, ..., p. * B_point(ii, x, y, z); * int ii; double x, y, z; * Defines one point with index number ii. */ #include <stdio.h> // Modified to work with g++ void Load_patch(char *filename, int *patches, int *vertices) { int ii; float x,y,z; int a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p; FILE *fp; if (!(fp = fopen(filename,"r"))) { fprintf(stderr,"Load_patch: Can't open %s\n",filename); exit(1); } (void)fscanf(fp,"%i\n",patches); for (ii = 0; ii < *patches; ii++) { (void)fscanf(fp,"%i, %i, %i, %i,",&a,&b,&c,&d); (void)fscanf(fp,"%i, %i, %i, %i,",&e,&f,&g,&h); (void)fscanf(fp,"%i, %i, %i, %i,",&i,&j,&k,&l); (void)fscanf(fp,"%i, %i, %i, %i\n",&m,&n,&o,&p); B_patch(ii, a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p); } (void)fscanf(fp,"%i\n",vertices); for (ii = 1; ii <= *vertices; ii++) { (void)fscanf(fp,"%f, %f, %f\n",&x,&y,&z); B_point(ii, (double)x,(double)y,(double)z); } } // END http://www.sjbaker.org/teapot/teaset.tgz // this is the function that puts the 3D models into video RAM void init_graphics(void) { // load patch char filename[255]; sprintf(filename,"teapot"); Load_patch(filename,&num_patches,&num_points); printf("Loaded patch %s with %d patches and %d vertices.\n", filename,num_patches,num_points); // create declarator from FVF D3DVERTEXELEMENT9 inDecl[MAX_FVF_DECL_SIZE]; if (!SUCCEEDED(D3DXDeclaratorFromFVF(FVF_VERTEX_DATA,inDecl))) assert(FALSE); // create patch mesh LPD3DXPATCHMESH p_mesh; D3DXPATCHINFO info; info.PatchType = D3DXPATCHMESH_RECT; info.Degree = D3DDEGREE_CUBIC; info.Basis = D3DBASIS_BEZIER; if (!SUCCEEDED(D3DXCreatePatchMesh(&info,num_patches,num_points,0,inDecl,d3ddev,&p_mesh))) assert(FALSE); assert(p_mesh->GetControlVerticesPerPatch()==POINTS_PER_PATCH); // copy vertices LPDIRECT3DVERTEXBUFFER9 v_buffer = NULL; if (!SUCCEEDED(p_mesh->GetVertexBuffer(&v_buffer))) assert(FALSE); struct vertex_data* vertex_data = NULL; DWORD number_of_vertices=p_mesh->GetNumVertices(); assert(number_of_vertices==num_points); if (!SUCCEEDED(v_buffer->Lock(0,number_of_vertices*sizeof(struct vertex_data),(void **)&vertex_data,D3DLOCK_DISCARD))) assert(FALSE); for (int i=0; i<num_points; i++) { vertex_data[i].position.x = points[i].x; vertex_data[i].position.y = points[i].y; vertex_data[i].position.z = points[i].z; vertex_data[i].color = D3DCOLOR_XRGB(255,0,0); } v_buffer->Unlock(); v_buffer->Release(); // copy indices LPDIRECT3DINDEXBUFFER9 i_buffer = NULL; if (!SUCCEEDED(p_mesh->GetIndexBuffer(&i_buffer))) assert(FALSE); D3DINDEXBUFFER_DESC i_buffer_desc; if (!SUCCEEDED(i_buffer->GetDesc(&i_buffer_desc))) assert(FALSE); assert(i_buffer_desc.Size==num_patches*POINTS_PER_PATCH*sizeof(WORD)); WORD* index_data = NULL; if (!SUCCEEDED(i_buffer->Lock(0,0,(void **)&index_data,D3DLOCK_DISCARD))) assert(FALSE); int idx=0; for (int i=0; i<num_patches; i++) { for (int j=0; j<POINTS_PER_PATCH; j++) { index_data[idx] = patches[i][j]; idx++; } } i_buffer->Unlock(); i_buffer->Release(); // create mesh for tesselation FLOAT fTessLevel=1.0f; DWORD Adaptive=FALSE; DWORD NumTriangles,NumVertices; if (!SUCCEEDED(p_mesh->GetTessSize(fTessLevel,Adaptive,&NumTriangles,&NumVertices))) assert(FALSE); if (!SUCCEEDED(D3DXCreateMeshFVF(NumTriangles,NumVertices,D3DXMESH_MANAGED,FVF_VERTEX_DATA,d3ddev,&mesh))) assert(FALSE); // tesselate assert(Adaptive==FALSE); if (!SUCCEEDED(p_mesh->Tessellate(fTessLevel,mesh))) assert(FALSE); printf("Generated tesselated mesh with %d triangles, %d vertices\n",NumTriangles,NumVertices); p_mesh->Release(); } The teapot data (place in file teapot) is (from http://www.sjbaker.org/teapot/teaset.tgz): 32 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 4,17,18,19,8,20,21,22,12,23,24,25,16,26,27,28 19,29,30,31,22,32,33,34,25,35,36,37,28,38,39,40 31,41,42,1,34,43,44,5,37,45,46,9,40,47,48,13 13,14,15,16,49,50,51,52,53,54,55,56,57,58,59,60 16,26,27,28,52,61,62,63,56,64,65,66,60,67,68,69 28,38,39,40,63,70,71,72,66,73,74,75,69,76,77,78 40,47,48,13,72,79,80,49,75,81,82,53,78,83,84,57 57,58,59,60,85,86,87,88,89,90,91,92,93,94,95,96 60,67,68,69,88,97,98,99,92,100,101,102,96,103,104,105 69,76,77,78,99,106,107,108,102,109,110,111,105,112,113,114 78,83,84,57,108,115,116,85,111,117,118,89,114,119,120,93 121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136 124,137,138,121,128,139,140,125,132,141,142,129,136,143,144,133 133,134,135,136,145,146,147,148,149,150,151,152,69,153,154,155 136,143,144,133,148,156,157,145,152,158,159,149,155,160,161,69 162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177 165,178,179,162,169,180,181,166,173,182,183,170,177,184,185,174 174,175,176,177,186,187,188,189,190,191,192,193,194,195,196,197 177,184,185,174,189,198,199,186,193,200,201,190,197,202,203,194 204,204,204,204,207,208,209,210,211,211,211,211,212,213,214,215 204,204,204,204,210,217,218,219,211,211,211,211,215,220,221,222 204,204,204,204,219,224,225,226,211,211,211,211,222,227,228,229 204,204,204,204,226,230,231,207,211,211,211,211,229,232,233,212 212,213,214,215,234,235,236,237,238,239,240,241,242,243,244,245 215,220,221,222,237,246,247,248,241,249,250,251,245,252,253,254 222,227,228,229,248,255,256,257,251,258,259,260,254,261,262,263 229,232,233,212,257,264,265,234,260,266,267,238,263,268,269,242 270,270,270,270,279,280,281,282,275,276,277,278,271,272,273,274 270,270,270,270,282,289,290,291,278,286,287,288,274,283,284,285 270,270,270,270,291,298,299,300,288,295,296,297,285,292,293,294 270,270,270,270,300,305,306,279,297,303,304,275,294,301,302,271 306 1.4,0.0,2.4 1.4,-0.784,2.4 0.784,-1.4,2.4 0.0,-1.4,2.4 1.3375,0.0,2.53125 1.3375,-0.749,2.53125 0.749,-1.3375,2.53125 0.0,-1.3375,2.53125 1.4375,0.0,2.53125 1.4375,-0.805,2.53125 0.805,-1.4375,2.53125 0.0,-1.4375,2.53125 1.5,0.0,2.4 1.5,-0.84,2.4 0.84,-1.5,2.4 0.0,-1.5,2.4 -0.784,-1.4,2.4 -1.4,-0.784,2.4 -1.4,0.0,2.4 -0.749,-1.3375,2.53125 -1.3375,-0.749,2.53125 -1.3375,0.0,2.53125 -0.805,-1.4375,2.53125 -1.4375,-0.805,2.53125 -1.4375,0.0,2.53125 -0.84,-1.5,2.4 -1.5,-0.84,2.4 -1.5,0.0,2.4 -1.4,0.784,2.4 -0.784,1.4,2.4 0.0,1.4,2.4 -1.3375,0.749,2.53125 -0.749,1.3375,2.53125 0.0,1.3375,2.53125 -1.4375,0.805,2.53125 -0.805,1.4375,2.53125 0.0,1.4375,2.53125 -1.5,0.84,2.4 -0.84,1.5,2.4 0.0,1.5,2.4 0.784,1.4,2.4 1.4,0.784,2.4 0.749,1.3375,2.53125 1.3375,0.749,2.53125 0.805,1.4375,2.53125 1.4375,0.805,2.53125 0.84,1.5,2.4 1.5,0.84,2.4 1.75,0.0,1.875 1.75,-0.98,1.875 0.98,-1.75,1.875 0.0,-1.75,1.875 2.0,0.0,1.35 2.0,-1.12,1.35 1.12,-2.0,1.35 0.0,-2.0,1.35 2.0,0.0,0.9 2.0,-1.12,0.9 1.12,-2.0,0.9 0.0,-2.0,0.9 -0.98,-1.75,1.875 -1.75,-0.98,1.875 -1.75,0.0,1.875 -1.12,-2.0,1.35 -2.0,-1.12,1.35 -2.0,0.0,1.35 -1.12,-2.0,0.9 -2.0,-1.12,0.9 -2.0,0.0,0.9 -1.75,0.98,1.875 -0.98,1.75,1.875 0.0,1.75,1.875 -2.0,1.12,1.35 -1.12,2.0,1.35 0.0,2.0,1.35 -2.0,1.12,0.9 -1.12,2.0,0.9 0.0,2.0,0.9 0.98,1.75,1.875 1.75,0.98,1.875 1.12,2.0,1.35 2.0,1.12,1.35 1.12,2.0,0.9 2.0,1.12,0.9 2.0,0.0,0.45 2.0,-1.12,0.45 1.12,-2.0,0.45 0.0,-2.0,0.45 1.5,0.0,0.225 1.5,-0.84,0.225 0.84,-1.5,0.225 0.0,-1.5,0.225 1.5,0.0,0.15 1.5,-0.84,0.15 0.84,-1.5,0.15 0.0,-1.5,0.15 -1.12,-2.0,0.45 -2.0,-1.12,0.45 -2.0,0.0,0.45 -0.84,-1.5,0.225 -1.5,-0.84,0.225 -1.5,0.0,0.225 -0.84,-1.5,0.15 -1.5,-0.84,0.15 -1.5,0.0,0.15 -2.0,1.12,0.45 -1.12,2.0,0.45 0.0,2.0,0.45 -1.5,0.84,0.225 -0.84,1.5,0.225 0.0,1.5,0.225 -1.5,0.84,0.15 -0.84,1.5,0.15 0.0,1.5,0.15 1.12,2.0,0.45 2.0,1.12,0.45 0.84,1.5,0.225 1.5,0.84,0.225 0.84,1.5,0.15 1.5,0.84,0.15 -1.6,0.0,2.025 -1.6,-0.3,2.025 -1.5,-0.3,2.25 -1.5,0.0,2.25 -2.3,0.0,2.025 -2.3,-0.3,2.025 -2.5,-0.3,2.25 -2.5,0.0,2.25 -2.7,0.0,2.025 -2.7,-0.3,2.025 -3.0,-0.3,2.25 -3.0,0.0,2.25 -2.7,0.0,1.8 -2.7,-0.3,1.8 -3.0,-0.3,1.8 -3.0,0.0,1.8 -1.5,0.3,2.25 -1.6,0.3,2.025 -2.5,0.3,2.25 -2.3,0.3,2.025 -3.0,0.3,2.25 -2.7,0.3,2.025 -3.0,0.3,1.8 -2.7,0.3,1.8 -2.7,0.0,1.575 -2.7,-0.3,1.575 -3.0,-0.3,1.35 -3.0,0.0,1.35 -2.5,0.0,1.125 -2.5,-0.3,1.125 -2.65,-0.3,0.9375 -2.65,0.0,0.9375 -2.0,-0.3,0.9 -1.9,-0.3,0.6 -1.9,0.0,0.6 -3.0,0.3,1.35 -2.7,0.3,1.575 -2.65,0.3,0.9375 -2.5,0.3,1.125 -1.9,0.3,0.6 -2.0,0.3,0.9 1.7,0.0,1.425 1.7,-0.66,1.425 1.7,-0.66,0.6 1.7,0.0,0.6 2.6,0.0,1.425 2.6,-0.66,1.425 3.1,-0.66,0.825 3.1,0.0,0.825 2.3,0.0,2.1 2.3,-0.25,2.1 2.4,-0.25,2.025 2.4,0.0,2.025 2.7,0.0,2.4 2.7,-0.25,2.4 3.3,-0.25,2.4 3.3,0.0,2.4 1.7,0.66,0.6 1.7,0.66,1.425 3.1,0.66,0.825 2.6,0.66,1.425 2.4,0.25,2.025 2.3,0.25,2.1 3.3,0.25,2.4 2.7,0.25,2.4 2.8,0.0,2.475 2.8,-0.25,2.475 3.525,-0.25,2.49375 3.525,0.0,2.49375 2.9,0.0,2.475 2.9,-0.15,2.475 3.45,-0.15,2.5125 3.45,0.0,2.5125 2.8,0.0,2.4 2.8,-0.15,2.4 3.2,-0.15,2.4 3.2,0.0,2.4 3.525,0.25,2.49375 2.8,0.25,2.475 3.45,0.15,2.5125 2.9,0.15,2.475 3.2,0.15,2.4 2.8,0.15,2.4 0.0,0.0,3.15 0.0,-0.002,3.15 0.002,0.0,3.15 0.8,0.0,3.15 0.8,-0.45,3.15 0.45,-0.8,3.15 0.0,-0.8,3.15 0.0,0.0,2.85 0.2,0.0,2.7 0.2,-0.112,2.7 0.112,-0.2,2.7 0.0,-0.2,2.7 -0.002,0.0,3.15 -0.45,-0.8,3.15 -0.8,-0.45,3.15 -0.8,0.0,3.15 -0.112,-0.2,2.7 -0.2,-0.112,2.7 -0.2,0.0,2.7 0.0,0.002,3.15 -0.8,0.45,3.15 -0.45,0.8,3.15 0.0,0.8,3.15 -0.2,0.112,2.7 -0.112,0.2,2.7 0.0,0.2,2.7 0.45,0.8,3.15 0.8,0.45,3.15 0.112,0.2,2.7 0.2,0.112,2.7 0.4,0.0,2.55 0.4,-0.224,2.55 0.224,-0.4,2.55 0.0,-0.4,2.55 1.3,0.0,2.55 1.3,-0.728,2.55 0.728,-1.3,2.55 0.0,-1.3,2.55 1.3,0.0,2.4 1.3,-0.728,2.4 0.728,-1.3,2.4 0.0,-1.3,2.4 -0.224,-0.4,2.55 -0.4,-0.224,2.55 -0.4,0.0,2.55 -0.728,-1.3,2.55 -1.3,-0.728,2.55 -1.3,0.0,2.55 -0.728,-1.3,2.4 -1.3,-0.728,2.4 -1.3,0.0,2.4 -0.4,0.224,2.55 -0.224,0.4,2.55 0.0,0.4,2.55 -1.3,0.728,2.55 -0.728,1.3,2.55 0.0,1.3,2.55 -1.3,0.728,2.4 -0.728,1.3,2.4 0.0,1.3,2.4 0.224,0.4,2.55 0.4,0.224,2.55 0.728,1.3,2.55 1.3,0.728,2.55 0.728,1.3,2.4 1.3,0.728,2.4 0.0,0.0,0.0 1.5,0.0,0.15 1.5,0.84,0.15 0.84,1.5,0.15 0.0,1.5,0.15 1.5,0.0,0.075 1.5,0.84,0.075 0.84,1.5,0.075 0.0,1.5,0.075 1.425,0.0,0.0 1.425,0.798,0.0 0.798,1.425,0.0 0.0,1.425,0.0 -0.84,1.5,0.15 -1.5,0.84,0.15 -1.5,0.0,0.15 -0.84,1.5,0.075 -1.5,0.84,0.075 -1.5,0.0,0.075 -0.798,1.425,0.0 -1.425,0.798,0.0 -1.425,0.0,0.0 -1.5,-0.84,0.15 -0.84,-1.5,0.15 0.0,-1.5,0.15 -1.5,-0.84,0.075 -0.84,-1.5,0.075 0.0,-1.5,0.075 -1.425,-0.798,0.0 -0.798,-1.425,0.0 0.0,-1.425,0.0 0.84,-1.5,0.15 1.5,-0.84,0.15 0.84,-1.5,0.075 1.5,-0.84,0.075 0.798,-1.425,0.0 1.425,-0.798,0.0 Finally, to compile using mingw on Ubuntu 10.04 amd64 with proper software installed: #!/bin/bash rm tester.exe > /dev/null 2>&1 i586-mingw32msvc-g++ tester.cpp -o tester.exe -fcheck-new -Idxsdk/DXSDK/Include -ld3d9 dxsdk/DXSDK/Lib/x86/d3dx9.lib