Unexpected output from CUDA program - reference

I am having a problem with the code in CUDA. The code compiles properly but it gives an unexpected output when the program runs completely.
In this case, step is supposed to increase until stepcount is reached. However, only one step is outputted. What exactly did I do wrong?
Also, how do I make reference to a particular xcord, ycord or zcord. E.g. using arrays in CPU code I can refer to a particular element by xcord[1]. In CUDA's case, do I use xcord[threadidx.x]?
#include <stdio.h>
#include <stdlib.h>
#include <cuda.h>
/* Kernal code */
__global__
void run(float *lvelox, float *lveloy, float *lveloz, float *xcord, float *ycord, float *zcord)
{
lveloy[threadIdx.x] = lveloy[threadIdx.x] - 9.81;
xcord[threadIdx.x] = xcord[threadIdx.x] + lvelox[threadIdx.x];
ycord[threadIdx.x] = ycord[threadIdx.x] + lveloy[threadIdx.x];
zcord[threadIdx.x] = zcord[threadIdx.x] + lveloz[threadIdx.x];
}
/* Host code */
int main(void) {
FILE *ofp;
char *mode = "r";
char outputFilename[] = "Output.txt";
float dlvelox;
float dlveloy;
float dlveloz;
float lvelox[1000] = {};
float lveloy[1000] = {};
float lveloz[1000] = {};
float xcord[1000] = {};
float ycord[1000] = {};
float zcord[1000] = {};
int fp;
int id;
int stepcount;
int step = 0;
{
ofp = fopen(outputFilename, "w");
if (ofp == NULL) {
fprintf(stderr, "Can't open output file %s!\n", outputFilename);
exit(1);
}
/* Obtaining velocity */
{
printf("Enter the initial horizontal velocity of the balls:\n");
scanf("%f", &dlvelox);
fprintf(ofp, "Initial horizontal velocity: %f\n", dlvelox);
printf("Enter the initial vertical velocity of the balls:\n");
scanf("%f", &dlveloy);
fprintf(ofp, "Initial vertical velocity: %f\n", dlveloy);
printf("Enter the initial Z velocity of the balls:\n");
scanf("%f", &dlveloz);
fprintf(ofp, "Initial Z velocity: %f\n", dlveloz);
}
for (int i = 0; i < 1000; i++)
lvelox[i] = dlvelox;
for (int i = 0; i < 1000; i++)
lveloy[i] = dlveloy;
for (int i = 0; i < 1000; i++)
lveloz[i] = dlveloz;
/* Obtain number of steps */
{
printf("Enter the number of steps wanted:\n");
scanf("%d", &stepcount);
fprintf(ofp, "Number of steps: %d\n", stepcount);
}
/* Initial console display */
{
fprintf(ofp, "\n");
fprintf(ofp, "X-cord, Y-cord, Z-cord, Horizontal Velo, Vertical Velo, Z Velo, Ball ID, Step\n");
fprintf(ofp, "\n");
}
/* GPU setup */
float *lveloxd;
float *lveloyd;
float *lvelozd;
float *xcordd;
float *ycordd;
float *zcordd;
int *stepd;
const int fsize = 1000*sizeof(float);
const int isize = 1000*sizeof(int);
/* Loop method */
while ( step < stepcount )
{
/* Memory allocation and copying to GPU */
cudaMalloc( (void**)&lveloxd, fsize );
cudaMalloc( (void**)&lveloyd, fsize );
cudaMalloc( (void**)&lvelozd, fsize );
cudaMalloc( (void**)&xcordd, fsize );
cudaMalloc( (void**)&ycordd, fsize );
cudaMalloc( (void**)&zcordd, fsize );
cudaMemcpy( lveloxd, lvelox, fsize, cudaMemcpyHostToDevice );
cudaMemcpy( lveloyd, lveloy, fsize, cudaMemcpyHostToDevice );
cudaMemcpy( lvelozd, lveloz, fsize, cudaMemcpyHostToDevice );
cudaMemcpy( xcordd, xcord, fsize, cudaMemcpyHostToDevice );
cudaMemcpy( ycordd, ycord, fsize, cudaMemcpyHostToDevice );
cudaMemcpy( zcordd, zcord, fsize, cudaMemcpyHostToDevice );
/* Perform ACTUAL LOOP */
dim3 dimBlock( 1000 );
dim3 dimGrid ( 1 );
run<<<dimGrid, dimBlock>>>(lveloxd, lveloyd, lvelozd, xcordd, ycordd, zcordd);
/* Copy back the data */
cudaMemcpy( lvelox, lveloxd, fsize, cudaMemcpyDeviceToHost );
cudaMemcpy( lveloy, lveloyd, fsize, cudaMemcpyDeviceToHost );
cudaMemcpy( lveloz, lvelozd, fsize, cudaMemcpyDeviceToHost );
cudaMemcpy( xcord, xcordd, fsize, cudaMemcpyDeviceToHost );
cudaMemcpy( ycord, ycordd, fsize, cudaMemcpyDeviceToHost );
cudaMemcpy( zcord, zcordd, fsize, cudaMemcpyDeviceToHost );
cudaFree( lveloxd );
cudaFree( lveloyd );
cudaFree( lvelozd );
cudaFree( xcordd );
cudaFree( ycordd );
cudaFree( zcordd );
fprintf(ofp, "%f, %f, %f, %f, %f, %f, %d\n", xcord, ycord, zcord, lvelox, lveloy, lveloz, step);
step = step + 1;
if ( step == stepcount )
{
return 0;
}
}
fclose(ofp);
}
}

The probable answer is the following if it is due to SEGFAULT in the kernel:
You are allocating arrays of 1000 entities, and launching one block of 1000 threads. Due to cuda having a granularity of 32 threads (warpsize) and you want 1000 threads, 1024 threads will be launched as ceil(1000/32) = 32 warps = 1024 threads.
Now as you only have arrays of 1000 elements, the 24 threads you did not request but was spawned, will access outside allocated memory resulting in a SEGFAULT, i.e. lveloy[threadIdx.x] where threadIdx.x = 1000-1023.
To prevent that, just enclose as such:
if(threadIdx.x < 1000) {
lveloy[threadIdx.x] = lveloy[threadIdx.x] - 9.81;
xcord[threadIdx.x] = xcord[threadIdx.x] + lvelox[threadIdx.x];
ycord[threadIdx.x] = ycord[threadIdx.x] + lveloy[threadIdx.x];
zcord[threadIdx.x] = zcord[threadIdx.x] + lveloz[threadIdx.x];
}
Also, how do I make reference to a particular xcord, ycord or zcord.
E.g. using arrays in CPU code I can refer to a particular element by
xcord[1]. In CUDA's case, do I use xcord[threadidx.x]?
You can access it just as you would on the CPU, either through literals xcord[1] or dependent on the thread xcord[threadidx.x] or both as xcord[threadidx.x+1] (not recommended due to non coalesced memory )

Related

Reduction in running time of multiple modules due to linking

This question may seem very vague, hence I have included the code snippets for the modules mentioned. I have written a program that collects data from various sensors on an I2C bus and stores the formatted values in a file. This shall run on an ARM cortex A9 processor (single core) in an SoC configuration called Zedboard by Xilinx, and uses the petalinux operating system with the vanilla linux kernel. The time is being measured using clock_gettime(). I have noticed significant reduction in a single sensor access time when all of the sensors are being accessed sequentially within a single process. The comparison of this time was done with that of individual processes that access a single sensor only and do not write the data to a file, but print it to stdout instead.
Sensors used along with modules:
GY521 Module:
#include <linux/i2c-dev-user.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdint.h>
#include <inttypes.h>
#include "GY521.h"
#include <time.h>
#define ADDR 0x68
static int file;
static __s32 res;
static __u8 reg;
static __u8 values[14]; //array to hold all the register values
void set_sleep_gy521(int flag)
{
if(flag==0) //wake up the device
{
//Accessing reg 107
reg = 0x6B;
uint8_t val8 = 0x01; //write 0x00 if you want to set the internal 8MHz oscillator as CLK
res = i2c_smbus_write_byte_data(file, reg, val8);
if(res<0)
perror("Failed to wake it up");
/*else
printf("Device is awake\n");*/
}
else //set it to sleep
{
reg = 0x6B;
uint8_t val8 = 0x41; //write 0x40 if you want to set the internal 8MHz oscillator as CLK
res = i2c_smbus_write_byte_data(file, reg, val8);
if(res<0)
perror("Failed to go to sleep");
/*else
printf("In sleep mode\n");*/
}
}
void init_gy521()
{
char filename[20];
int adapter_no = 0;
snprintf(filename, 19, "/dev/i2c-%d", adapter_no);
file = open(filename, O_RDWR);
if(file<0)
{
perror("File not opened");
exit(1);
}
if(ioctl(file, I2C_SLAVE, ADDR)<0)
{
perror("Not able to access the device");
exit(EXIT_FAILURE);
}
//setting the sensitivity of the gyroscope and accelerometer
res = i2c_smbus_write_byte_data(file, 0x1B, 0x00);
if(res<0)
perror("Failed to set gyro range");
res = i2c_smbus_write_byte_data(file, 0x1C, 0x00);
if(res<0)
perror("Failed to set the accelerometer range");
set_sleep_gy521(0); //this also sets the clock source to X-axis gyro reference which is slightly better than the internal 8MHz oscillator
}
//get_values() stores all the register measurements in the array values
int get_values()
{
//reading all the values needed at once in a block
res = i2c_smbus_read_i2c_block_data(file, 0x3B, 14, (__u8*)values);
if(res<0)
perror("Failed to read using Block");
return res;
}
float get_Ax()
{
int c = get_values(); //calls get_values() to get all values at a time instant
int16_t xout;
if(c>0)
xout = (((int16_t)values[0])<<8) | values[1];
else
{
perror("Can't get the values");
exit(EXIT_FAILURE);
}
return xout/16384.0*9.8;
}
float get_Ay()
{
//concatenate the higher byte and the lower byte
int16_t yout = (((int16_t)values[2])<<8) | values[3];
return yout/16384.0*9.8;
}
float get_Az()
{
int16_t zout = (((int16_t)values[4])<<8) | values[5];
return zout/16384.0*9.8;
}
float get_temp_gy521()
{
__s16 temp = (((int16_t)values[6])<<8) | values[7];
return (temp/340.0 + 36.53);
}
float get_Wx()
{
__s16 xgyro = (((int16_t)values[8])<<8) | values[9];
return xgyro/131.0;
}
float get_Wy()
{
__s16 ygyro = (((int16_t)values[10])<<8) | values[11];
return ygyro/131.0;
}
float get_Wz()
{
__s16 zgyro = (((int16_t)values[12])<<8) | values[13];
return zgyro/131.0;
}
void clear_gy521()
{
close(file);
}
int main()
{
struct timespec start, end;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
init_gy521();
printf("Wx: %f\n", get_Wx());
printf("Wy: %f\n", get_Wy());
printf("Wz: %f\n", get_Wz());
printf("Ax: %f\n", get_Ax());
printf("Ay: %f\n", get_Ay());
printf("Az: %f\n", get_Az());
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);
printf("Time taken by GY521 is %d MuS\n", (end.tv_sec-start.tv_sec)*1000000L+(end.tv_nsec-start.tv_nsec)/1000);
}
LM75 Module:
#include <linux/i2c-dev-user.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include <time.h>
#define ADDRESS 0x48
static int file; //use static keyword to ensure that the scope of this variable is limited to this file.
static __u8 buffer[2];
int get_temp()
{
if(i2c_smbus_read_i2c_block_data(file, 0x00, 2, buffer)<0)
perror("Failed to read the block");
return buffer[0]&127;
}
//Initializes the file used by the userspace calls. [IMPORTANT] Must be run before any other function is called for this device!. This needs to be called only once for each process.
void init_LM75()
{
int adapter_number = 0; //check this.
char filename[20];
snprintf(filename, 19, "/dev/i2c-%d", adapter_number);
file = open(filename, O_RDWR);
if(file<0)
{
perror("File not opened");
exit(1);
}
if(ioctl(file, I2C_SLAVE, ADDRESS)<0)
{
perror("ioctl could not open file");
exit(1);
}
}
int main()
{
struct timespec start, end;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
init_LM75();
printf("Temperature is %d\n", get_temp());
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);
printf("Time taken %d\n", (end.tv_sec-start.tv_sec)*1000000L+(end.tv_nsec-start.tv_nsec)/1000);
}
HMC5883L Module:
#include <linux/i2c-dev-user.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include "HMC5883L.h"
#include <time.h>
#define ADDRESS 0x1e
static int file; //use static keyword to ensure that the scope of this variable is limited to this file.
static float factor;
static __u8 buffer[6];
//register addresses
__u8 config_reg_A = 0x00;
__u8 mode_reg = 0x02;
__u8 gain_reg = 0x01;
__u8 data_X_H = 0x03;
__u8 data_X_L = 0x04;
__u8 data_Y_H = 0x07;
__u8 data_Y_L = 0x08;
__u8 data_Z_H = 0x05;
__u8 data_Z_L = 0x06;
/**
* The value of mode must be according to the following table:
* Value Mode
* 0 Continuous
* 1 Single (Default)
* 2 Idle
* 3 Idle
*
* After any mode change care must be taken to set it back to continuous mode before reading any values.
**/
void set_magnetometer_mode(int mode)
{
__u8 value = 0x00;
value |= mode;
if(i2c_smbus_write_byte_data(file, mode_reg, value)<0)
perror("Failed to change magnetometer mode");
}
void get_B()
{
if(i2c_smbus_read_i2c_block_data(file, data_X_H, 6, buffer)<0)
perror("Failed to read the block");
}
//[IMPORTANT] Note that the following 3 functions will return the field values in milli gauss by reading them from the buffer. So call get_Bx() first!
float get_Bx()
{
get_B();
int16_t temp;
//concatenate the upper and lower bits
temp = buffer[0];
int16_t b_X = (temp<<8) | buffer[1];
return (float)b_X*factor;
}
float get_By()
{
int16_t temp;
//concatenate the upper and lower bits
temp = buffer[4];
int16_t b_Y = (temp<<8) | buffer[5];
return (float)b_Y*factor;
}
float get_Bz()
{
int16_t temp;
//concatenate the upper and lower bits
temp = buffer[2];
int16_t b_Z = (temp<<8) | buffer[3];
return (float)b_Z*factor;
}
//Initializes the file used by the userspace calls. [IMPORTANT] Must be run before any other function is called for this device!. This needs to be called only once for each process.
void init_magnetometer()
{
int adapter_number = 0; //check this.
char filename[20];
snprintf(filename, 19, "/dev/i2c-%d", adapter_number);
file = open(filename, O_RDWR);
if(file<0)
{
perror("File not opened");
exit(1);
}
if(ioctl(file, I2C_SLAVE, ADDRESS)<0)
{
perror("ioctl could not open file");
exit(1);
}
factor = 0.92;
set_magnetometer_mode(0);
}
void clear_magnetometer()
{
close(file);
}
/**
* The value of freq must be according to the following table:
* Value Rate (Hz)
* 0 0.75
* 1 1.5
* 2 3
* 3 7.5
* 4 15 (Default)
* 5 30
* 6 75
**/
void set_magnetometer_frequency(int freq)
{
__u8 value = 0x00;
value |= freq<<2;
if(i2c_smbus_write_byte_data(file, config_reg_A, value)<0)
perror("Failed to change data rate");
}
/**
* The value of gain must be according to the following table:
* Value Field Range (+/- Gauss)
* 0 0.88
* 1 1.3 (Default)
* 2 1.9
* 3 2.5
* 4 4.0
* 5 4.7
* 6 5.6
* 7 8.1
*
* This function will also set the value of the factor to be multiplied to the raw data.
**/
void set_magnetometer_gain(int gain)
{
__u8 value = 0x00;
value |= gain<<5;
if(i2c_smbus_write_byte_data(file, gain_reg, value)<0)
perror("Failed to change magnetometer gain");
else
{
switch(gain)
{
case 0: factor = 0.73; break;
case 1: factor = 0.92; break;
case 2: factor = 1.22; break;
case 3: factor = 1.52; break;
case 4: factor = 2.27; break;
case 5: factor = 2.56; break;
case 6: factor = 3.03; break;
case 7: factor = 4.35; break;
}
}
}
int main()
{
struct timespec start, end;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
init_magnetometer();
printf("%f\t%f\t%f\n", get_Bx(), get_By(), get_Bz());
clear_magnetometer();
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);
printf("Time taken by HMC is %d MuS\n", (end.tv_sec-start.tv_sec)*1000000L+(end.tv_nsec-start.tv_nsec)/1000);
}
Single module that clubs all the three together and also writes data in a file:
#include <stdio.h>
#include <stdlib.h>
#include "hwfunctions.h"
#include <time.h>
int main()
{
struct timespec start_hk, end_hk, start_hmc, end_hmc, start_gy, end_gy, start_lm, end_lm;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start_hk);
char *finalstr = (char* ) malloc(50);
FILE *f = fopen("fullhk.txt", "a");
if(f==NULL)
{
perror("Couldn't open file\n");
exit(0);
}
//initialization of the three sensors
//init_gy80();
time_t curt;
time(&curt);
//fseek(f, 0, SEEK_END);
sprintf(finalstr, "Time: %s\n", ctime(&curt));fputs(finalstr, f);
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start_hmc);
init_magnetometer();
sprintf(finalstr, "Bx: %f\n", get_Bx());fputs(finalstr, f);
sprintf(finalstr, "By: %f\n", get_By());fputs(finalstr, f);
sprintf(finalstr, "Bz: %f\n", get_Bz());fputs(finalstr, f);
clear_magnetometer();
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end_hmc);
sprintf(finalstr, "S1: %f\n", get_S1());fputs(finalstr, f);
sprintf(finalstr, "S2: %f\n", get_S2());fputs(finalstr, f);
sprintf(finalstr, "S3: %f\n", get_S3());fputs(finalstr, f);
sprintf(finalstr, "S4: %f\n", get_S4());fputs(finalstr, f);
sprintf(finalstr, "S5: %f\n", get_S5());fputs(finalstr, f);
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start_lm);
init_LM75();
sprintf(finalstr, "Temperature: %d\n", get_temp());fputs(finalstr, f);
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end_lm);
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start_gy);
init_gy521();
sprintf(finalstr, "Wy: %f\n", get_Wy());fputs(finalstr, f);
sprintf(finalstr, "Wz: %f\n", get_Wz());fputs(finalstr, f);
sprintf(finalstr, "Ax: %f\n", get_Ax());fputs(finalstr, f);
sprintf(finalstr, "Ay: %f\n", get_Ay());fputs(finalstr, f);
sprintf(finalstr, "Az: %f *end of block*\n\n", get_Az());
clear_gy521();
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end_gy);
fputs(finalstr, f);
fclose(f);
//closing the three sensors
//clear_gy80();
free(finalstr);
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end_hk);
printf("Time taken by single hmc instance: %ld microseconds\n", (end_hmc.tv_sec-start_hmc.tv_sec)*1000000L + (end_hmc.tv_nsec-start_hmc.tv_nsec)/1000);
printf("Time taken by single gy instance: %ld microseconds\n", (end_gy.tv_sec-start_gy.tv_sec)*1000000L + (end_gy.tv_nsec-start_gy.tv_nsec)/1000);
printf("Time taken by single lm instance: %ld microseconds\n", (end_lm.tv_sec-start_lm.tv_sec)*1000000L + (end_lm.tv_nsec-start_lm.tv_nsec)/1000);
printf("Time taken by single housekeeping instance: %ld microseconds\n", (end_hk.tv_sec-start_hk.tv_sec)*1000000L + (end_hk.tv_nsec-start_hk.tv_nsec)/1000);
}
Housekeeping is the name of the single module and the outputs above the housekeeping output are for the individual sensor modules. The housekeeping module has been compiled and linked with the sensor modules without the main function, and the O2 optimization flag has been used during cross compilation. This difference in the times is same even if the time is measured by CLOCK_BOOTTIME to include kernel pre-emption.
Please comment if any more information is needed to debunk this mystery!
I would suspect something happening in the background, when you use library functions for the first time.
Try to disable lazy binding, for example, by setting environment variable LD_BIND_NOW = 1 (Is there a linker flag to force it to load all shared libraries at start time?)

Dead lock without a explicit lock

I am testing a pthread program.
This program is simple. The main thread creates a child thread.
The main thread and the child thread are both operating on a queue.
The child thread keeps scanning the queue and return the minimal element and its position with a infinite loop.
The main thread also is running a loop, each iteration of which delete the minimal element calculated by the child thread from the queue, and insert some new elements to the end of the queue.
The minimal element and its position, and the queue are all declared as global variables.
The main ends when the queue is empty and it will cancel the child thread.
This progress is some like a breadth-first search.
The queue is implemented as an array with a size counter. The deletion operation is implemented as replacing the element to be deleted by the last element and decreasing the size counter by one.
No lock is used here. But when running, the program will get stuck.
What's more amazing, if I insert some printf statements to view the status, it may finish.
I want to know what causes this program endless?
struct multiblocks_pthread_args {
volatile int local_e;
volatile int local_v;
volatile int local_pos;
int* Q;
int* val;
volatile int* size;
} para;
volatile int update = 0;
void* child_thread ( void* args ) {
pthread_setcanceltype ( PTHREAD_CANCEL_ASYNCHRONOUS, NULL );
multiblocks_pthread_args* arglist = ( multiblocks_pthread_args* ) args;
bindToCore ( 1 );
int* list = arglist -> Q, * value = arglist -> val;
while ( true ) {
int size, e, v, pos;
do {
size = * ( arglist->size ), e, v = INF, pos = 0;
update = 0;
for ( int i = 0; i < size; i++ ) {
int vi = value[i];
if ( vi < v ) {
pos = i;
v = vi;
}
}
} while ( update );
if ( size > 0 ) e = list[pos];
arglist->local_e = e;
arglist->local_pos = pos;
arglist->local_v = v;
}
return NULL;
}
void main_thread () {
int size;
int* Q = ( int* ) malloc ( sizeof ( int ) * NumNode );
int** hash = ( int** ) malloc ( sizeof ( int* ) * numNode );
NodeColor* color = ( NodeColor* ) malloc ( sizeof ( NodeColor ) * numNode );
// NodeColor is a enum with 3 values: WHITE, GRAY, BLACK
memset ( color, 0, sizeof ( NodeColor ) * numNode );
pthread_t tid;
para.val = ( int* ) malloc ( sizeof ( int ) * NumNode );
para.Q = Q;
para.size = &size;
pthread_create ( &tid, NULL, child_thread, &para );
// Only one element is in the queue
size = 0;
para.Q[size] = 0;
para.val[size] = 0;
hash[0] = &para.val[size]; // hash is used to modify the value of particular element
++size;
color[0] = GRAY;
while ( true ) {
int global_e, global_v = INF, global_pos;
global_e = para.local_e, global_v = para.local_v, global_pos = para.local_pos;
if ( size == 0 ) break;
if ( color[global_e] != BLACK ) {
value[global_e] = global_v, color[global_e] = BLACK;
if ( size > 0 ) {
--size;
para.Q[global_pos] = para.Q[size];
para.val[global_pos] = para.val[size];
hash[para.Q[global_pos]] = & para.val[global_pos];
update = 1;
}
for ( int i = 0; i < MAXDEG; ++i ) {
int ee = ;// new element;
int vv = ;// value of new element;
if ( /* if new element is valid */ ) {
if ( color[ee] == WHITE ) { // WHITE means ee is not in the queue
para.Q[size] = ee;
para.val[size] = vv;
hash[ee] = &para.val[size];
++size, color[ee] = GRAY;
} else {
*hash[ee] = vv;
}
update = 1;
}
}
}
}
free ( Q );
pthread_cancel ( tid );
printf ( "Computation finishes!!!" );
return ;
}
That's not a deadlock but a race condition.
The overall structure of your hang is, you start with WHITE item at index 0 and this loop goes on forever:
size = 1;
while (size != 0) {
if (WHITE) --size;
for (...) {
if (WHITE) ++size;
}
}
The only way this changes is that your child thread would set the pos something else than 0. But your child thread depends on size to be greater than 1 to make it other than 0. There you have your race condition.
My diagnosis may not be accurate. A cleaner code would help a lot. The names like Q, e, v would save you couple of keystrokes but can easily lose you days, as in this example. You also interchangeably use numbers and enums, a bad practice.

Problems with SDL Audio (No output)

I'm facing some problems with understanding how the SDL audio callback works.
I have this simple code, which should generate a simple square wave:
#include "SDL.h"
#include "SDL_audio.h"
#include <stdlib.h>
#include <math.h>
SDL_Surface *screen;
SDL_AudioSpec spec;
Uint32 sound_len=512;
Uint8 *sound_buffer;
int sound_pos = 0;
int counter;
unsigned int phase_delta=600;
unsigned int phase;
unsigned char out;
//Initialization
void init_sdl (void)
{
if (SDL_Init (SDL_INIT_VIDEO|SDL_INIT_AUDIO) < 0)
exit (-1);
atexit (SDL_Quit);
screen = SDL_SetVideoMode (640, 480, 16, SDL_HWSURFACE);
if (screen == NULL)
exit (-1);
}
//Generates a new sample and outputs it to the audio card
void Callback (void *userdata, Uint8 *stream, int len)
{
Uint8 *waveptr;
//Generates a new sample
phase+=phase_delta;
if ((phase>>8)<127) out=255; else out=0;
//End
//Output the current sample to the audio card
waveptr = sound_buffer;
SDL_MixAudio(stream, waveptr, 1, SDL_MIX_MAXVOLUME);
}
void play (void)
{
sound_buffer = new Uint8[512];
sound_len= 512;
spec.freq = 22050;
spec.format = AUDIO_S16SYS;
spec.channels = 1;
spec.silence = 0;
spec.samples = 512;
spec.padding = 0;
spec.size = 0;
spec.userdata = 0;
spec.callback = Callback;
if (SDL_OpenAudio (&spec, NULL) < 0)
{ //Throw an error
printf ("I don't think you like this: %s\n", SDL_GetError ());
exit (-1);
}
SDL_PauseAudio (0);//Start the audio
}
int main(int argc, char* argv[])
{
init_sdl ();
play ();
SDL_Delay (250);
return 0;
}
I know that the callback is not done right, because I have no idea how to output to the buffer. Each time the callback is called, the first part of the callback function code generates the new sample, and stores it in the variabile Out.
Can anyone here modify this code so that the new samples go from Out to the correct position in the audio buffer?
Also, I don't want to have the code modified in a very super-complex way just to generate the square wave - I have already taken care of that. The wave is generated correctly, each new sample appearing in the variable Out. I just need these samples to be routed correctly to the audio buffer.
You need to cast stream to a actual.format-appropriate datatype and then overwrite the values in stream with len / sizeof( <format's datatype> ) samples.
The square-wave will be kinda hard to hear because the given algorithm will only generate a brief high pulse every ~7.1 million samples (~5 minutes #22050Hz) when phase wraps around.
Try something like this:
#include <SDL.h>
#include <SDL_audio.h>
#include <iostream>
using namespace std;
//Generates new samples and outputs them to the audio card
void Callback( void* userdata, Uint8* stream, int len )
{
// the format of stream depends on actual.format in main()
// we're assuming it's AUDIO_S16SYS
short* samples = reinterpret_cast< short* >( stream );
size_t numSamples = len / sizeof( short );
const unsigned int phase_delta = 600;
static unsigned int phase = 0;
// loop over all our samples
for( size_t i = 0; i < numSamples; ++i )
{
phase+=phase_delta;
short out = 0;
if ((phase>>8)<127) out=SHRT_MAX; else out=0;
samples[i] = out;
}
}
int main( int argc, char* argv[] )
{
if( SDL_Init( SDL_INIT_VIDEO | SDL_INIT_AUDIO ) < 0 )
return -1;
atexit( SDL_Quit );
SDL_Surface* screen = SDL_SetVideoMode( 640, 480, 16, SDL_ANYFORMAT );
if( screen == NULL)
return -1;
SDL_AudioSpec spec;
spec.freq = 22050;
spec.format = AUDIO_S16SYS;
spec.channels = 1;
spec.samples = 4096;
spec.callback = Callback;
spec.userdata = NULL;
SDL_AudioSpec actual;
if( SDL_OpenAudio( &spec, &actual ) < 0 )
{
cerr << "I don't think you like this: " << SDL_GetError() << endl;
return -1;
}
if( spec.format != actual.format )
{
cerr << "format mismatch!" << endl;
return -1;
}
SDL_PauseAudio( 0 );
SDL_Event ev;
while( SDL_WaitEvent( &ev ) )
{
if( ev.type == SDL_QUIT )
break;
}
SDL_CloseAudio();
SDL_Quit();
return 0;
}

Why slaves are not working in PVM (parallel virtual machine)

I am trying to build a code of PVM which have one master and one slave, (I am working on centOS 5.5 OS)
when I run a command aimk master1 slave1, it is expected to give below output:-
Spawning 3 worker tasks ... SUCCESSFUL
I got 100.000000 from 1; (expecting 100.000000)
I got 200.000000 from 0; (expecting 200.000000)
I got 300.000000 from 2; (expecting 300.000000)
But it shows
pvm> [1:t80002] EOF
[1:t80001] Spawning 6 worker tasks.....
[1:t80001] Trouble spawning slaves. Aborting.Error codes are:
[1:t80001] TID 3 -7
[1:t80001] TID 4 -7
[1:t80001] TID 5 -7
[1:t80001] libpvm [t80005] : pvm_mcast() : Bad parameter
[1:t80003] EOF
[1:t80004] EOF
Why it gives this error? why slaves are not working?
My codes are below, Help me in this problem.
Master1.c
static char rcsid[] =
"$Id: master1.c,v 1.4 1997/07/09 13:25:09 pvmsrc Exp $";
#include <stdio.h>
#include "pvm3.h"
#define SLAVENAME "slave1"
main()
{
int mytid; /* my task id */
int tids[32]; /* slave task ids */
int n, nproc, numt, i, who, msgtype, nhost, narch;
float data[100], result[32];
struct pvmhostinfo *hostp;
/* enroll in pvm */
mytid = pvm_mytid();
/* Set number of slaves to start */
pvm_config( &nhost, &narch, &hostp );
nproc = nhost * 3;
if( nproc > 32 ) nproc = 32 ;
printf("Spawning %d worker tasks ... " , nproc);
/* start up slave tasks */
numt=pvm_spawn(SLAVENAME, (char**)0, 0, "", nproc, tids);
if( numt < nproc ){
printf("\n Trouble spawning slaves. Aborting. Error codes are:\n");
for( i=numt ; i<nproc ; i++ ) {
printf("TID %d %d\n",i,tids[i]);
}
for( i=0 ; i<numt ; i++ ){
pvm_kill( tids[i] );
}
pvm_exit();
exit(1);
}
printf("SUCCESSFUL\n");
/* Begin User Program */
n = 100;
/* initialize_data( data, n ); */
for( i=0 ; i<n ; i++ ){
data[i] = 1.0;
}
/* Broadcast initial data to slave tasks */
pvm_initsend(PvmDataDefault);
pvm_pkint(&nproc, 1, 1);
pvm_pkint(tids, nproc, 1);
pvm_pkint(&n, 1, 1);
pvm_pkfloat(data, n, 1);
pvm_mcast(tids, nproc, 0);
/* Wait for results from slaves */
msgtype = 5;
for( i=0 ; i<nproc ; i++ ){
pvm_recv( -1, msgtype );
pvm_upkint( &who, 1, 1 );
pvm_upkfloat( &result[who], 1, 1 );
printf("I got %f from %d; ",result[who],who);
if (who == 0)
printf( "(expecting %f)\n", (nproc - 1) * 100.0);
else
printf( "(expecting %f)\n", (2 * who - 1) * 100.0);
}
/* Program Finished exit PVM before stopping */
pvm_exit();
}
slave1.c
static char rcsid[] =
"$Id: slave1.c,v 1.2 1997/07/09 13:25:18 pvmsrc Exp $";
#include <stdio.h>
#include "pvm3.h"
main()
{
int mytid; /* my task id */
int tids[32]; /* task ids */
int n, me, i, nproc, master, msgtype;
float data[100], result;
float work();
/* enroll in pvm */
mytid = pvm_mytid();
/* Receive data from master */
msgtype = 0;
pvm_recv( -1, msgtype );
pvm_upkint(&nproc, 1, 1);
pvm_upkint(tids, nproc, 1);
pvm_upkint(&n, 1, 1);
pvm_upkfloat(data, n, 1);
/* Determine which slave I am (0 -- nproc-1) */
for( i=0; i<nproc ; i++ )
if( mytid == tids[i] ){ me = i; break; }
/* Do calculations with data */
result = work( me, n, data, tids, nproc );
/* Send result to master */
pvm_initsend( PvmDataDefault );
pvm_pkint( &me, 1, 1 );
pvm_pkfloat( &result, 1, 1 );
msgtype = 5;
master = pvm_parent();
pvm_send( master, msgtype );
/* Program finished. Exit PVM before stopping */
pvm_exit();
}
float
work(me, n, data, tids, nproc )
/* Simple example: slaves exchange data with left neighbor (wrapping) */
int me, n, *tids, nproc;
float *data;
{
int i, dest;
float psum = 0.0;
float sum = 0.0;
for( i=0 ; i<n ; i++ ){
sum += me * data[i];
}
/* illustrate node-to-node communication */
pvm_initsend( PvmDataDefault );
pvm_pkfloat( &sum, 1, 1 );
dest = me+1;
if( dest == nproc ) dest = 0;
pvm_send( tids[dest], 22 );
pvm_recv( -1, 22 );
pvm_upkfloat( &psum, 1, 1 );
return( sum+psum );
}
Obviously PVM is not finding your slaves' executable. Examine this portion of the output:
[1:t80001] TID 3 -7
[1:t80001] TID 4 -7
[1:t80001] TID 5 -7
All task IDs are -7, which is PvmNoFile. Ensure that SLAVENAME (slave1 in your case) is either an absolute file path (which it's not in your case) or is the name of an executable file, located in the PVM search path. By default the PVM search path is:
$HOME/pvm3/bin/$PVM_ARCH/
where $HOME is your user's home directory path and $PVM_ARCH is the name of the PVM architecture.

Looking for code samples for Direct3D tessellation

I am trying to learn how to use the Direct3D function D3DXTessellateRectPatch:
msdn.microsoft.com/en-us/library/bb205471(v=VS.85).aspx
I have found the MSDN documentation quite useful and have been able to implement tessellation of a
single rectangle patch.
I am now trying to tesselate a mesh that consists of thirty two bicubic Bezier 3D patches (the Utah teapot).
I have tried a simple approach - tesselate each Bezier curve individually, then join the vertices and
indices appropriately, taking into account vertex offsets, to create a tessellated merged mesh.
However, this does not quite seem to have the desired result.
If anyone has hints on this problem or, even better, code samples, much appreciated.
Specifically, I have checked:
Www.directxtutorial.com
http://www.amazon.com/Introduction-Game-Programming-Direct-9-0c/dp/1598220160/
And another Direct3D reference, as well as Google.
Thank you and look forward to your advice/pointers.
Yours
Misha
Tim C Schroeder has been a huge help and suggested I use ID3DXPatchMesh. Here is some sample code that generates a tessellated teapot (place in file tester.cpp):
// Main D3DX framework from www.directxtutorial.com (free section)
#include <assert.h>
#include <stdio.h>
// include the basic windows header files and the Direct3D header file
#include <windows.h>
#include <windowsx.h>
#include <d3d9.h>
#include <d3dx9.h>
// define the screen resolution
#define SCREEN_WIDTH 800
#define SCREEN_HEIGHT 600
// include the Direct3D Library files
#pragma comment (lib, "d3d9.lib")
#pragma comment (lib, "d3dx9.lib")
// global declarations
LPDIRECT3D9 d3d;
LPDIRECT3DDEVICE9 d3ddev;
LPD3DXMESH mesh = NULL; // define the mesh pointer
// function prototypes
void initD3D(HWND hWnd);
void render_frame(void);
void cleanD3D(void);
void init_graphics(void);
struct vertex_data
{
D3DXVECTOR3 position;
DWORD color;
};
#define FVF_VERTEX_DATA (D3DFVF_XYZ | D3DFVF_DIFFUSE)
// the WindowProc function prototype
LRESULT CALLBACK WindowProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam);
// the entry point for any Windows program
int WINAPI WinMain(HINSTANCE hInstance,
HINSTANCE hPrevInstance,
LPSTR lpCmdLine,
int nCmdShow)
{
HWND hWnd;
WNDCLASSEX wc;
ZeroMemory(&wc, sizeof(WNDCLASSEX));
wc.cbSize = sizeof(WNDCLASSEX);
wc.style = CS_HREDRAW | CS_VREDRAW;
wc.lpfnWndProc = WindowProc;
wc.hInstance = hInstance;
wc.hCursor = LoadCursor(NULL, IDC_ARROW);
wc.lpszClassName = "WindowClass";
RegisterClassEx(&wc);
hWnd = CreateWindowEx(NULL, "WindowClass", "Our Direct3D Program",
WS_OVERLAPPEDWINDOW, 0, 0, SCREEN_WIDTH, SCREEN_HEIGHT,
NULL, NULL, hInstance, NULL);
ShowWindow(hWnd, nCmdShow);
initD3D(hWnd);
MSG msg;
while(TRUE)
{
while(PeekMessage(&msg, NULL, 0, 0, PM_REMOVE))
{
TranslateMessage(&msg);
DispatchMessage(&msg);
}
if(msg.message == WM_QUIT)
break;
render_frame();
}
cleanD3D();
return msg.wParam;
}
// this is the main message handler for the program
LRESULT CALLBACK WindowProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam)
{
switch(message)
{
case WM_DESTROY:
PostQuitMessage(0);
return 0;
}
return DefWindowProc (hWnd, message, wParam, lParam);
}
// this function initializes and prepares Direct3D for use
void initD3D(HWND hWnd)
{
d3d = Direct3DCreate9(D3D_SDK_VERSION);
D3DPRESENT_PARAMETERS d3dpp;
ZeroMemory(&d3dpp, sizeof(d3dpp));
d3dpp.Windowed = TRUE;
d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD;
d3dpp.hDeviceWindow = hWnd;
d3dpp.BackBufferFormat = D3DFMT_X8R8G8B8;
d3dpp.BackBufferWidth = SCREEN_WIDTH;
d3dpp.BackBufferHeight = SCREEN_HEIGHT;
d3dpp.EnableAutoDepthStencil = TRUE;
d3dpp.AutoDepthStencilFormat = D3DFMT_D16;
d3d->CreateDevice(D3DADAPTER_DEFAULT,
D3DDEVTYPE_HAL,
hWnd,
D3DCREATE_SOFTWARE_VERTEXPROCESSING,
&d3dpp,
&d3ddev);
init_graphics();
d3ddev->SetRenderState(D3DRS_LIGHTING, FALSE); // turn off the 3D lighting
d3ddev->SetRenderState(D3DRS_CULLMODE, D3DCULL_NONE); // turn off culling
d3ddev->SetRenderState(D3DRS_ZENABLE, TRUE); // turn on the z-buffer
}
// this is the function used to render a single frame
void render_frame(void)
{
d3ddev->Clear(0, NULL, D3DCLEAR_TARGET, D3DCOLOR_XRGB(0, 0, 0), 1.0f, 0);
d3ddev->Clear(0, NULL, D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 1.0f, 0);
d3ddev->BeginScene();
d3ddev->SetFVF(FVF_VERTEX_DATA);
// set the view transform
D3DXMATRIX matView; // the view transform matrix
D3DXMatrixLookAtLH(&matView,
&D3DXVECTOR3 (0.0f, 8.0f, 25.0f), // the camera position
&D3DXVECTOR3 (0.0f, 0.0f, 0.0f), // the look-at position
&D3DXVECTOR3 (0.0f, 1.0f, 0.0f)); // the up direction
d3ddev->SetTransform(D3DTS_VIEW, &matView); // set the view transform to matView
// set the projection transform
D3DXMATRIX matProjection; // the projection transform matrix
D3DXMatrixPerspectiveFovLH(&matProjection,
D3DXToRadian(45), // the horizontal field of view
(FLOAT)SCREEN_WIDTH / (FLOAT)SCREEN_HEIGHT, // aspect ratio
1.0f, // the near view-plane
100.0f); // the far view-plane
d3ddev->SetTransform(D3DTS_PROJECTION, &matProjection); // set the projection
// set the world transform
static float index = 0.0f; index+=0.03f; // an ever-increasing float value
D3DXMATRIX matRotateY; // a matrix to store the rotation for each triangle
D3DXMatrixRotationY(&matRotateY, index); // the rotation matrix
d3ddev->SetTransform(D3DTS_WORLD, &(matRotateY)); // set the world transform
if (mesh) mesh->DrawSubset(0);
d3ddev->EndScene();
d3ddev->Present(NULL, NULL, NULL, NULL);
}
// this is the function that cleans up Direct3D and COM
void cleanD3D(void)
{
if (mesh) mesh->Release();
d3ddev->Release();
d3d->Release();
}
#define MAX_PATCHES 1000
#define POINTS_PER_PATCH 16
int num_patches = -1;
int patches[MAX_PATCHES][POINTS_PER_PATCH];
void B_patch(int ii, int a, int b, int c, int d, int e, int f, int g, int h, int i, int j, int k, int l, int m, int n, int o, int p)
{
assert(ii < MAX_PATCHES);
patches[ii][0] = a-1;
patches[ii][1] = b-1;
patches[ii][2] = c-1;
patches[ii][3] = d-1;
patches[ii][4] = e-1;
patches[ii][5] = f-1;
patches[ii][6] = g-1;
patches[ii][7] = h-1;
patches[ii][8] = i-1;
patches[ii][9] = j-1;
patches[ii][10] = k-1;
patches[ii][11] = l-1;
patches[ii][12] = m-1;
patches[ii][13] = n-1;
patches[ii][14] = o-1;
patches[ii][15] = p-1;
assert(POINTS_PER_PATCH==16);
}
#define MAX_POINTS 1000
int num_points = -1;
D3DXVECTOR3 points[MAX_POINTS];
void B_point(int ii, double x, double y, double z)
{
ii--;
assert(ii < MAX_POINTS);
points[ii].x = x;
/*** Y AND Z FLIPPED ***/
points[ii].y = z;
points[ii].z = y;
}
// BEGIN http://www.sjbaker.org/teapot/teaset.tgz
/*
* The file input.c -- Juhana Kouhia, jk87377#cs.tut.fi, Oct. 25, 1991
*
* Load_patch(filename, patches, vertices);
* char *filename; int *patches, *vertices;
* A sample program to read Bezier patches in.
* Returns count of patches and vertices.
* User defined subroutines:
* B_patch(ii, a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p);
* int ii, a, b, ..., p;
* Defines one Bezier patch with index number ii,
* indexes to points are in a, b, c, ..., p.
* B_point(ii, x, y, z);
* int ii; double x, y, z;
* Defines one point with index number ii.
*/
#include <stdio.h>
// Modified to work with g++
void Load_patch(char *filename, int *patches, int *vertices)
{
int ii;
float x,y,z;
int a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p;
FILE *fp;
if (!(fp = fopen(filename,"r"))) {
fprintf(stderr,"Load_patch: Can't open %s\n",filename);
exit(1);
}
(void)fscanf(fp,"%i\n",patches);
for (ii = 0; ii < *patches; ii++) {
(void)fscanf(fp,"%i, %i, %i, %i,",&a,&b,&c,&d);
(void)fscanf(fp,"%i, %i, %i, %i,",&e,&f,&g,&h);
(void)fscanf(fp,"%i, %i, %i, %i,",&i,&j,&k,&l);
(void)fscanf(fp,"%i, %i, %i, %i\n",&m,&n,&o,&p);
B_patch(ii, a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p);
}
(void)fscanf(fp,"%i\n",vertices);
for (ii = 1; ii <= *vertices; ii++) {
(void)fscanf(fp,"%f, %f, %f\n",&x,&y,&z);
B_point(ii, (double)x,(double)y,(double)z);
}
}
// END http://www.sjbaker.org/teapot/teaset.tgz
// this is the function that puts the 3D models into video RAM
void init_graphics(void)
{
// load patch
char filename[255];
sprintf(filename,"teapot");
Load_patch(filename,&num_patches,&num_points);
printf("Loaded patch %s with %d patches and %d vertices.\n",
filename,num_patches,num_points);
// create declarator from FVF
D3DVERTEXELEMENT9 inDecl[MAX_FVF_DECL_SIZE];
if (!SUCCEEDED(D3DXDeclaratorFromFVF(FVF_VERTEX_DATA,inDecl)))
assert(FALSE);
// create patch mesh
LPD3DXPATCHMESH p_mesh;
D3DXPATCHINFO info;
info.PatchType = D3DXPATCHMESH_RECT;
info.Degree = D3DDEGREE_CUBIC;
info.Basis = D3DBASIS_BEZIER;
if (!SUCCEEDED(D3DXCreatePatchMesh(&info,num_patches,num_points,0,inDecl,d3ddev,&p_mesh)))
assert(FALSE);
assert(p_mesh->GetControlVerticesPerPatch()==POINTS_PER_PATCH);
// copy vertices
LPDIRECT3DVERTEXBUFFER9 v_buffer = NULL;
if (!SUCCEEDED(p_mesh->GetVertexBuffer(&v_buffer)))
assert(FALSE);
struct vertex_data* vertex_data = NULL;
DWORD number_of_vertices=p_mesh->GetNumVertices();
assert(number_of_vertices==num_points);
if (!SUCCEEDED(v_buffer->Lock(0,number_of_vertices*sizeof(struct vertex_data),(void **)&vertex_data,D3DLOCK_DISCARD)))
assert(FALSE);
for (int i=0; i<num_points; i++)
{
vertex_data[i].position.x = points[i].x;
vertex_data[i].position.y = points[i].y;
vertex_data[i].position.z = points[i].z;
vertex_data[i].color = D3DCOLOR_XRGB(255,0,0);
}
v_buffer->Unlock();
v_buffer->Release();
// copy indices
LPDIRECT3DINDEXBUFFER9 i_buffer = NULL;
if (!SUCCEEDED(p_mesh->GetIndexBuffer(&i_buffer)))
assert(FALSE);
D3DINDEXBUFFER_DESC i_buffer_desc;
if (!SUCCEEDED(i_buffer->GetDesc(&i_buffer_desc)))
assert(FALSE);
assert(i_buffer_desc.Size==num_patches*POINTS_PER_PATCH*sizeof(WORD));
WORD* index_data = NULL;
if (!SUCCEEDED(i_buffer->Lock(0,0,(void **)&index_data,D3DLOCK_DISCARD)))
assert(FALSE);
int idx=0;
for (int i=0; i<num_patches; i++)
{
for (int j=0; j<POINTS_PER_PATCH; j++)
{
index_data[idx] = patches[i][j];
idx++;
}
}
i_buffer->Unlock();
i_buffer->Release();
// create mesh for tesselation
FLOAT fTessLevel=1.0f;
DWORD Adaptive=FALSE;
DWORD NumTriangles,NumVertices;
if (!SUCCEEDED(p_mesh->GetTessSize(fTessLevel,Adaptive,&NumTriangles,&NumVertices)))
assert(FALSE);
if (!SUCCEEDED(D3DXCreateMeshFVF(NumTriangles,NumVertices,D3DXMESH_MANAGED,FVF_VERTEX_DATA,d3ddev,&mesh)))
assert(FALSE);
// tesselate
assert(Adaptive==FALSE);
if (!SUCCEEDED(p_mesh->Tessellate(fTessLevel,mesh)))
assert(FALSE);
printf("Generated tesselated mesh with %d triangles, %d vertices\n",NumTriangles,NumVertices);
p_mesh->Release();
}
The teapot data (place in file teapot) is (from http://www.sjbaker.org/teapot/teaset.tgz):
32
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
4,17,18,19,8,20,21,22,12,23,24,25,16,26,27,28
19,29,30,31,22,32,33,34,25,35,36,37,28,38,39,40
31,41,42,1,34,43,44,5,37,45,46,9,40,47,48,13
13,14,15,16,49,50,51,52,53,54,55,56,57,58,59,60
16,26,27,28,52,61,62,63,56,64,65,66,60,67,68,69
28,38,39,40,63,70,71,72,66,73,74,75,69,76,77,78
40,47,48,13,72,79,80,49,75,81,82,53,78,83,84,57
57,58,59,60,85,86,87,88,89,90,91,92,93,94,95,96
60,67,68,69,88,97,98,99,92,100,101,102,96,103,104,105
69,76,77,78,99,106,107,108,102,109,110,111,105,112,113,114
78,83,84,57,108,115,116,85,111,117,118,89,114,119,120,93
121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136
124,137,138,121,128,139,140,125,132,141,142,129,136,143,144,133
133,134,135,136,145,146,147,148,149,150,151,152,69,153,154,155
136,143,144,133,148,156,157,145,152,158,159,149,155,160,161,69
162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177
165,178,179,162,169,180,181,166,173,182,183,170,177,184,185,174
174,175,176,177,186,187,188,189,190,191,192,193,194,195,196,197
177,184,185,174,189,198,199,186,193,200,201,190,197,202,203,194
204,204,204,204,207,208,209,210,211,211,211,211,212,213,214,215
204,204,204,204,210,217,218,219,211,211,211,211,215,220,221,222
204,204,204,204,219,224,225,226,211,211,211,211,222,227,228,229
204,204,204,204,226,230,231,207,211,211,211,211,229,232,233,212
212,213,214,215,234,235,236,237,238,239,240,241,242,243,244,245
215,220,221,222,237,246,247,248,241,249,250,251,245,252,253,254
222,227,228,229,248,255,256,257,251,258,259,260,254,261,262,263
229,232,233,212,257,264,265,234,260,266,267,238,263,268,269,242
270,270,270,270,279,280,281,282,275,276,277,278,271,272,273,274
270,270,270,270,282,289,290,291,278,286,287,288,274,283,284,285
270,270,270,270,291,298,299,300,288,295,296,297,285,292,293,294
270,270,270,270,300,305,306,279,297,303,304,275,294,301,302,271
306
1.4,0.0,2.4
1.4,-0.784,2.4
0.784,-1.4,2.4
0.0,-1.4,2.4
1.3375,0.0,2.53125
1.3375,-0.749,2.53125
0.749,-1.3375,2.53125
0.0,-1.3375,2.53125
1.4375,0.0,2.53125
1.4375,-0.805,2.53125
0.805,-1.4375,2.53125
0.0,-1.4375,2.53125
1.5,0.0,2.4
1.5,-0.84,2.4
0.84,-1.5,2.4
0.0,-1.5,2.4
-0.784,-1.4,2.4
-1.4,-0.784,2.4
-1.4,0.0,2.4
-0.749,-1.3375,2.53125
-1.3375,-0.749,2.53125
-1.3375,0.0,2.53125
-0.805,-1.4375,2.53125
-1.4375,-0.805,2.53125
-1.4375,0.0,2.53125
-0.84,-1.5,2.4
-1.5,-0.84,2.4
-1.5,0.0,2.4
-1.4,0.784,2.4
-0.784,1.4,2.4
0.0,1.4,2.4
-1.3375,0.749,2.53125
-0.749,1.3375,2.53125
0.0,1.3375,2.53125
-1.4375,0.805,2.53125
-0.805,1.4375,2.53125
0.0,1.4375,2.53125
-1.5,0.84,2.4
-0.84,1.5,2.4
0.0,1.5,2.4
0.784,1.4,2.4
1.4,0.784,2.4
0.749,1.3375,2.53125
1.3375,0.749,2.53125
0.805,1.4375,2.53125
1.4375,0.805,2.53125
0.84,1.5,2.4
1.5,0.84,2.4
1.75,0.0,1.875
1.75,-0.98,1.875
0.98,-1.75,1.875
0.0,-1.75,1.875
2.0,0.0,1.35
2.0,-1.12,1.35
1.12,-2.0,1.35
0.0,-2.0,1.35
2.0,0.0,0.9
2.0,-1.12,0.9
1.12,-2.0,0.9
0.0,-2.0,0.9
-0.98,-1.75,1.875
-1.75,-0.98,1.875
-1.75,0.0,1.875
-1.12,-2.0,1.35
-2.0,-1.12,1.35
-2.0,0.0,1.35
-1.12,-2.0,0.9
-2.0,-1.12,0.9
-2.0,0.0,0.9
-1.75,0.98,1.875
-0.98,1.75,1.875
0.0,1.75,1.875
-2.0,1.12,1.35
-1.12,2.0,1.35
0.0,2.0,1.35
-2.0,1.12,0.9
-1.12,2.0,0.9
0.0,2.0,0.9
0.98,1.75,1.875
1.75,0.98,1.875
1.12,2.0,1.35
2.0,1.12,1.35
1.12,2.0,0.9
2.0,1.12,0.9
2.0,0.0,0.45
2.0,-1.12,0.45
1.12,-2.0,0.45
0.0,-2.0,0.45
1.5,0.0,0.225
1.5,-0.84,0.225
0.84,-1.5,0.225
0.0,-1.5,0.225
1.5,0.0,0.15
1.5,-0.84,0.15
0.84,-1.5,0.15
0.0,-1.5,0.15
-1.12,-2.0,0.45
-2.0,-1.12,0.45
-2.0,0.0,0.45
-0.84,-1.5,0.225
-1.5,-0.84,0.225
-1.5,0.0,0.225
-0.84,-1.5,0.15
-1.5,-0.84,0.15
-1.5,0.0,0.15
-2.0,1.12,0.45
-1.12,2.0,0.45
0.0,2.0,0.45
-1.5,0.84,0.225
-0.84,1.5,0.225
0.0,1.5,0.225
-1.5,0.84,0.15
-0.84,1.5,0.15
0.0,1.5,0.15
1.12,2.0,0.45
2.0,1.12,0.45
0.84,1.5,0.225
1.5,0.84,0.225
0.84,1.5,0.15
1.5,0.84,0.15
-1.6,0.0,2.025
-1.6,-0.3,2.025
-1.5,-0.3,2.25
-1.5,0.0,2.25
-2.3,0.0,2.025
-2.3,-0.3,2.025
-2.5,-0.3,2.25
-2.5,0.0,2.25
-2.7,0.0,2.025
-2.7,-0.3,2.025
-3.0,-0.3,2.25
-3.0,0.0,2.25
-2.7,0.0,1.8
-2.7,-0.3,1.8
-3.0,-0.3,1.8
-3.0,0.0,1.8
-1.5,0.3,2.25
-1.6,0.3,2.025
-2.5,0.3,2.25
-2.3,0.3,2.025
-3.0,0.3,2.25
-2.7,0.3,2.025
-3.0,0.3,1.8
-2.7,0.3,1.8
-2.7,0.0,1.575
-2.7,-0.3,1.575
-3.0,-0.3,1.35
-3.0,0.0,1.35
-2.5,0.0,1.125
-2.5,-0.3,1.125
-2.65,-0.3,0.9375
-2.65,0.0,0.9375
-2.0,-0.3,0.9
-1.9,-0.3,0.6
-1.9,0.0,0.6
-3.0,0.3,1.35
-2.7,0.3,1.575
-2.65,0.3,0.9375
-2.5,0.3,1.125
-1.9,0.3,0.6
-2.0,0.3,0.9
1.7,0.0,1.425
1.7,-0.66,1.425
1.7,-0.66,0.6
1.7,0.0,0.6
2.6,0.0,1.425
2.6,-0.66,1.425
3.1,-0.66,0.825
3.1,0.0,0.825
2.3,0.0,2.1
2.3,-0.25,2.1
2.4,-0.25,2.025
2.4,0.0,2.025
2.7,0.0,2.4
2.7,-0.25,2.4
3.3,-0.25,2.4
3.3,0.0,2.4
1.7,0.66,0.6
1.7,0.66,1.425
3.1,0.66,0.825
2.6,0.66,1.425
2.4,0.25,2.025
2.3,0.25,2.1
3.3,0.25,2.4
2.7,0.25,2.4
2.8,0.0,2.475
2.8,-0.25,2.475
3.525,-0.25,2.49375
3.525,0.0,2.49375
2.9,0.0,2.475
2.9,-0.15,2.475
3.45,-0.15,2.5125
3.45,0.0,2.5125
2.8,0.0,2.4
2.8,-0.15,2.4
3.2,-0.15,2.4
3.2,0.0,2.4
3.525,0.25,2.49375
2.8,0.25,2.475
3.45,0.15,2.5125
2.9,0.15,2.475
3.2,0.15,2.4
2.8,0.15,2.4
0.0,0.0,3.15
0.0,-0.002,3.15
0.002,0.0,3.15
0.8,0.0,3.15
0.8,-0.45,3.15
0.45,-0.8,3.15
0.0,-0.8,3.15
0.0,0.0,2.85
0.2,0.0,2.7
0.2,-0.112,2.7
0.112,-0.2,2.7
0.0,-0.2,2.7
-0.002,0.0,3.15
-0.45,-0.8,3.15
-0.8,-0.45,3.15
-0.8,0.0,3.15
-0.112,-0.2,2.7
-0.2,-0.112,2.7
-0.2,0.0,2.7
0.0,0.002,3.15
-0.8,0.45,3.15
-0.45,0.8,3.15
0.0,0.8,3.15
-0.2,0.112,2.7
-0.112,0.2,2.7
0.0,0.2,2.7
0.45,0.8,3.15
0.8,0.45,3.15
0.112,0.2,2.7
0.2,0.112,2.7
0.4,0.0,2.55
0.4,-0.224,2.55
0.224,-0.4,2.55
0.0,-0.4,2.55
1.3,0.0,2.55
1.3,-0.728,2.55
0.728,-1.3,2.55
0.0,-1.3,2.55
1.3,0.0,2.4
1.3,-0.728,2.4
0.728,-1.3,2.4
0.0,-1.3,2.4
-0.224,-0.4,2.55
-0.4,-0.224,2.55
-0.4,0.0,2.55
-0.728,-1.3,2.55
-1.3,-0.728,2.55
-1.3,0.0,2.55
-0.728,-1.3,2.4
-1.3,-0.728,2.4
-1.3,0.0,2.4
-0.4,0.224,2.55
-0.224,0.4,2.55
0.0,0.4,2.55
-1.3,0.728,2.55
-0.728,1.3,2.55
0.0,1.3,2.55
-1.3,0.728,2.4
-0.728,1.3,2.4
0.0,1.3,2.4
0.224,0.4,2.55
0.4,0.224,2.55
0.728,1.3,2.55
1.3,0.728,2.55
0.728,1.3,2.4
1.3,0.728,2.4
0.0,0.0,0.0
1.5,0.0,0.15
1.5,0.84,0.15
0.84,1.5,0.15
0.0,1.5,0.15
1.5,0.0,0.075
1.5,0.84,0.075
0.84,1.5,0.075
0.0,1.5,0.075
1.425,0.0,0.0
1.425,0.798,0.0
0.798,1.425,0.0
0.0,1.425,0.0
-0.84,1.5,0.15
-1.5,0.84,0.15
-1.5,0.0,0.15
-0.84,1.5,0.075
-1.5,0.84,0.075
-1.5,0.0,0.075
-0.798,1.425,0.0
-1.425,0.798,0.0
-1.425,0.0,0.0
-1.5,-0.84,0.15
-0.84,-1.5,0.15
0.0,-1.5,0.15
-1.5,-0.84,0.075
-0.84,-1.5,0.075
0.0,-1.5,0.075
-1.425,-0.798,0.0
-0.798,-1.425,0.0
0.0,-1.425,0.0
0.84,-1.5,0.15
1.5,-0.84,0.15
0.84,-1.5,0.075
1.5,-0.84,0.075
0.798,-1.425,0.0
1.425,-0.798,0.0
Finally, to compile using mingw on Ubuntu 10.04 amd64 with proper software installed:
#!/bin/bash
rm tester.exe > /dev/null 2>&1
i586-mingw32msvc-g++ tester.cpp -o tester.exe -fcheck-new -Idxsdk/DXSDK/Include -ld3d9 dxsdk/DXSDK/Lib/x86/d3dx9.lib

Resources