ESP8266 painless mesh, sometimes does not connect - arduino-esp8266

I have a simple setup between 2 wemos d1 boards. They work with a painless mesh.
The devices each have buttons and LEDs with which users can interact with one another.
The problem that I am having, is that sometimes the two units don't connect after one of them is turned off. I am extensively testing what happens when one of the 2 nodes falls off and then comes back on again. Sometimes they connect fast, sometimes slow, and sometimes they won't connect at all.
Retrying to reset the turned-off module mostly works but sometimes I need to reset the first module as well or they will never connect again. Judging by the LEDs and operation the program keeps running. Resetting both devices always works to reconnect the two.
// #define wrong_led // I may or may not have made a slight error in soldering the duo leds
bool connected = 0 ;
const uint32_t R1 = 1000 ;
const uint32_t R2 = 4700 ;
const uint32_t threshold[] =
{
/*4 * 1023 * R1 / ( R2 + ( 4 * R1) ) , // 470 ->*/ 682,
/*3 * 1023 * R1 / ( R2 + ( 3 * R1) ) , // 398 ->*/ 579,
/*2 * 1023 * R1 / ( R2 + ( 2 * R1) ) , // 305 ->*/ 446,
/*1 * 1023 * R1 / ( R2 + ( 1 * R1) ) , // 179 ->*/ 262,
/*0 * 1023 * R1 / ( R2 + ( 0 * R1) ) , // 0 */ 0,
} ;
const int nSections = 5 ;
#ifdef wrong_led
const int red[] = { D0, D2, D4, D6, 3 } ;
const int green[] = { D1, D3, D5, D7, 1 } ;
#else
const int green[] = { D0, D2, D4, D6, 3 } ;
const int red[] = { D1, D3, D5, D7, 1 } ;
#endif
const int switchesPin = A0 ;
uint32_t timeOut[nSections] = {0,0,0,0} ;
const int debugPin = 2 ; // DEBUG TEST ME
const uint32_t timeOutInterval = 3000 ;
const uint32_t sendInterval = 2000 ;
const uint32_t connectionTimeout = 10000 ;
enum tokenStates
{
AVAILABLE,
IN_POSSESSION,
TAKEN,
} ;
uint8_t token[ nSections ] ;
Debounce button[] =
{
Debounce ( 255 ),
Debounce ( 255 ),
Debounce ( 255 ),
Debounce ( 255 ),
Debounce ( 255 )
} ;
/************** FUNCTIONS **************/
void updateLEDs()
{
if( !connected )
{
REPEAT_MS( 500 )
{
for (int i = 0; i < nSections ; i++) digitalWrite( red[i], !digitalRead(red[i] )) ; // toggle all red lights during connecting
}
END_REPEAT
}
else for (int i = 0; i < nSections ; i++)
{
switch (token[i])
{
case AVAILABLE: analogWrite( green[i], 32 ) ; // green
digitalWrite( red[i], LOW ) ; break;
case IN_POSSESSION:analogWrite( green[i], 32 ) ; // yellow
digitalWrite( red[i], HIGH ) ; break;
case TAKEN: digitalWrite( green[i], LOW ) ; // red
digitalWrite( red[i], HIGH ) ; break;
}
}
} ;
void newConnection(uint32_t nodeId)
{
connected = 1 ;
}
void debounceInputs()
{
REPEAT_MS( 50 )
{
int sample = analogRead( switchesPin ) ;
for (int i = 0; i < nSections ; i++)
{
uint16_t ref ;
if( threshold[i] >= 35 ) ref = threshold[i] ;
else ref = 35 ;
if( sample >= ref - 35
&& sample <= ref + 35 ) button[i].debounceInputs( 1 ) ;
else button[i].debounceInputs( 0 ) ;
}
} END_REPEAT
}
void processInputs( )
{
for (int i = 0; i < nSections ; i++ )
{
String message = "" ;
message += i ;
message += ',' ;
if( button[i].readInput() == FALLING )
{
if( token[i] == TAKEN ) { continue ; } // token is claimed by another discard button press
else if( token[i] == AVAILABLE ) // if the token is available.....
{
token[i] = IN_POSSESSION ; // claim the token
message += TAKEN ;
}
else if( token[i] == IN_POSSESSION ) // if the token is in possession
{
token[i] = AVAILABLE ; // free up the token
message += AVAILABLE ;
}
mesh.sendBroadcast( message ) ;
}
}
}
void transceiveTokens()
{
static uint8_t index = 0 ;
REPEAT_MS( sendInterval / nSections ) // if we claimed atleast 1 token, transmitt this once every second
{
if( token[index] == IN_POSSESSION )
{
String message = "" ;
message += index ;
message += ',' ;
message += TAKEN ;
mesh.sendBroadcast( message ) ;
}
if( ++ index == nSections ) index = 0 ;
}
END_REPEAT
for (int i = 0; i < nSections ; i++ )
{
if( token[i] != IN_POSSESSION // if a node which claimed a token is turned off while still possessing the token
&& millis() - timeOut[i] >= timeOutInterval ) // the token becomes available again after a timeout
{
token[i] = AVAILABLE ;
}
}
}
void incomingMessage( uint32 from, String msg )
{
uint32_t tokenState ;
uint32_t tokenID ;
char char_array[32];
strcpy(char_array, msg.c_str());
sscanf( char_array, "%d,%d", &tokenID, &tokenState ) ;
if( token[ tokenID ] == IN_POSSESSION && tokenState == TAKEN ) // if we have the token and an other also claims the token...
{ // .. free the token again, and transmitt it.
token[ tokenID ] = AVAILABLE ;
String message = "" ;
message += tokenID ;
message += ',' ;
message += AVAILABLE ;
mesh.sendBroadcast( message ) ;
}
if( tokenState == AVAILABLE )
{
token[ tokenID ] = AVAILABLE ;
}
token[tokenID] = tokenState ; // update token with state
if( token[tokenID] == TAKEN )
{
timeOut[tokenID] = millis() ; // set timeout
}
}
void setup()
{
debounceInputs() ; // to be sure
mesh.init( MESH_PREFIX, MESH_PASSWORD, MESH_PORT );
mesh.onReceive(&incomingMessage );
mesh.onNewConnection( &newConnection );
for( int i = 0 ; i < nSections ; i ++ )
{
pinMode( green[i], OUTPUT ) ;
pinMode( red[i], OUTPUT ) ;
digitalWrite( green[i], LOW ) ;
digitalWrite( red[i], LOW ) ;
}
}
void loop()
{
debounceInputs() ;
processInputs( ) ;
updateLEDs() ;
transceiveTokens() ;
mesh.update() ;
if( millis() > connectionTimeout ) // the first node which is powered on, does need to work eventually, even when it is the only one.
{
connected = 1 ;
}
}
I am yet to build three more units. I am hoping that having a network with at least 2 active nodes at all times will solve this problem.
I am curious as to why it sometimes does work and sometimes it does not work.
AFAIK I am not making any obvious mistakes. None of the functions in loop() take incredibly long, but I do not know how fast mesh.update() ; is to be called. For all I know, the functions together take too long. However, if both nodes are not turned off there seem to be no problems at all. Intervals between messages are also larger than 100ms. About mesh.update() the painless mesh website only states that:
This routine runs various maintainance tasks... Not super interesting, but things don't work without it.
What could it be?

Related

my program does not continue compiling if I don't type any letter

I am solving the problem set1_ credit in cs50. the program woks fine, however after entering the credit card number, i need to enter any letter in the keyboard in order for the program to give me the answer.
here is my code
# include <cs50.h>
# include <stdio.h>
# include <math.h>
long credit(void);
int main(void)
{
long long num = credit();
long long num2 = num;
int c = log10(num), i, sum , sum1, sum2 = 0, digits[c], divid[c], remind[c], total[c], cards[c];
scanf("%d", &c);
for ( i = 0; i <= c ; i++)
{
digits[c-i] = num % 10;
num = num /10;
scanf("%d", &digits[c-i]);
}
if ( c % 2 != 0)
{
for ( i = 0 ; i <= c ; i++)
{
if ( i % 2 == 0 )
{
digits[i] = digits [i] * 2;
}
else
{
digits[i] = digits [i];
}
}
}
else
{
for ( i= 0 ; i <= c ; i++)
{
if ( i % 2 != 0)
{
digits [i] = digits[i] * 2;
}
else
{
digits[i] = digits[i];
}
}
}
for ( i = 0; i <= c; i++ )
{
remind[i] = digits[i] % 10;
}
//printf("\n");
for (i = 0; i <= c; i++)
{
divid[i] = digits[i] / 10;
}
for ( i = 0; i <= c; i++ )
{
total[i] = remind[i] + divid[i];
}
for ( i = 0; i <= c ; i++ )
{
sum = sum + total[i];
}
// recreate the card's number in a form of an array
for ( i = 0; i <= c ; i++)
{
cards[c-i] = num2 % 10;
num2 = num2 /10;
scanf("%d", &cards[c-i]);
}
// check the nature and the validity of a card
int cards1 = cards[1];
if (sum % 10 == 0 && cards[0] == 3)
{
if (c == 15 )
{
switch (cards [1])
{
case 4: printf("AMEX");
break;
case 7: printf("AMEX");
break;
}
}
//return 0;
printf("AMEX");
}
else if (cards[0] == 5 && sum % 10 == 0)
{
if (c == 16)
{
switch (cards [1])
{
case 1: printf("MASTERCARD");
break;
case 2: printf("MASTERCARD");
break;
case 3: printf("MASTERCARD");
break;
case 4: printf("MASTERCARD");
break;
case 5: printf("MASTERCARD");
break;
}
}
//return 0;
printf("MASTERCARD\n");
}
else if (cards[0] == 4 && sum % 10 == 0)
{
switch (c)
{
case 13: //printf("VISA\n");
break;
case 16: //printf("VISA\n");
break;
}
printf("VISA\n");
}
else
{
printf("INVALID\n");
}
printf("\n");
}
// get number of digits of an integer
long credit(void)
{
long long n;
do
{
n = get_long_long("Number: ");
}
while (log10(n) < 12 || log10(n) > 16);
return n;
}
I would be very grateful if anyone could help me solve this issue.
thanks in advance.
From man scanf:
The scanf() function reads input from the standard input stream stdin
The program is waiting for keyborad input (stdin) at one of the several scanf commands.
NB This question description is misleading because the program does compile, it (seemingly) stops running until keyboard input.

Raspberry Pi: SPI not working, spi_bcm2835 not showing with lsmod

I'm trying to control my WS2801 LED Stripe with my Raspberry Pi 4 over the SPI interface.
The Pi is running un Ubuntu 20.04 with the kernel version: Linux ubuntu 5.3.0-1030-raspi2 #32-Ubuntu SMP Sun Jul 12 21:20:28 UTC 2020 aarch64 aarch64 aarch64 GNU/Linux
The setup and connection to the LED Strip should be fine. I've validated it with a test script on my Pi3 and it worked properly. With the test script running on the PI there is no error, but nothing happens on the LED stripe.
So far, I have the understanding that in order to communicate over the SPI we need the spidev and the spi-bcm2835 module to be loaded.
Output of lsmod | grep spi:
spidev 28672 0
IMO, there should be spi_bcm2835 as well. In the /dev/ folder there is spidev0.0 and spidev0.1, as it should be. Calling sudo modprobe spi_bcm2835 gives no error but does not solve the issue.
My /boot/firmware/config.txt:
[pi4]
kernel=uboot_rpi_4.bin
max_framebuffers=2
[pi3]
kernel=uboot_rpi_3.bin
[all]
arm_64bit=1
device_tree_address=0x03000000
start_x=1
gpu_mem=512
dtparam=spi=on
dtparam=sound=on
dtparam=i2c_arm=on
dtparam=i2s=on
dtoverlay=spi-bcm2835
My /etc/modules:
i2c-dev
spi-bcm2835
spi-dev
snd-bcm2835
There is no blacklist entry with respect to the spi inside the /etc/modeprob.d/ files.
The only related entry in dmesg is [ 1.559971] spi-bcm2835 fe204000.spi: could not get clk: -517. But as far as I know -517 means that the module is just loaded later because it's not ready at this timestep.
Does anybody know what the issue might be here?
Thanks!
This is my test_script:
/******************************************************************************/
/* */
/* FILE: ledstrip.cpp */
/* */
/* Displays the contents of a file on a LED strip */
/* ============================================== */
/* */
/* V0.01 18-DEC-2015 Te */
/* */
/******************************************************************************/
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdint.h>
#include <string.h>
#include <time.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <linux/spi/spidev.h>
static const char *device = "/dev/spidev0.0" ;
static uint32_t speed = 500000 ;
static uint8_t mode = SPI_MODE_0 ;
static uint8_t bits = 8 ;
static unsigned char tx[75] ;
static unsigned char rx[75] ;
static struct spi_ioc_transfer parameters[1];
static int offset = 0 ;
static int spi ;
/*************/
int OpenSPI()
/*************/
{
spi = open( device, O_RDWR ) ;
if( spi < 0 )
{
fprintf( stderr, "can't open device\n" ) ;
return 1 ;
}
int ret = ioctl( spi, SPI_IOC_WR_MODE, &mode ) ;
if( ret == -1 )
{
close( spi ) ;
fprintf( stderr, "can't set mode\n" ) ;
return 2 ;
}
ret = ioctl( spi, SPI_IOC_WR_BITS_PER_WORD, &bits ) ;
if( ret == -1 )
{
close( spi ) ;
fprintf( stderr, "can't set bits\n" ) ;
return 3 ;
}
ret = ioctl( spi, SPI_IOC_WR_MAX_SPEED_HZ, &speed ) ;
if( ret == -1 )
{
close( spi ) ;
fprintf( stderr, "can't set speed\n" ) ;
return 4 ;
}
memset( &parameters, 0, sizeof(spi) ) ;
parameters[0].tx_buf = (unsigned long)tx ;
parameters[0].rx_buf = (unsigned long)rx ;
parameters[0].len = 75 ;
parameters[0].delay_usecs = 0 ;
parameters[0].speed_hz = speed ;
parameters[0].bits_per_word = bits ;
parameters[0].cs_change = 0 ;
return 0 ;
}
/***************/
void CloseSPI()
/***************/
{
close( spi ) ;
}
/*****************/
void ResetState()
/*****************/
{
memset( tx, 0, sizeof(tx) ) ;
}
/****************/
void ShowState()
/****************/
{
if( ioctl(spi,SPI_IOC_MESSAGE(1),&parameters) == -1 )
fprintf( stderr, "can't transfer data\n" ) ;
}
/*******************************/
void Token( const char *token )
/*******************************/
{
if( isdigit(*token) )
{
int value = atoi( token ) - 1 ;
if( (value >= 0) && (value <= 24) )
tx[value*3+offset] = 255 ;
}
else
{
switch( tolower(*token) )
{
case 'r' : offset = 0 ;
break ;
case 'g' : offset = 1 ;
break ;
case 'b' : offset = 2 ;
break ;
}
}
}
/***************************/
void Line( const char *ps )
/***************************/
{
ResetState() ;
char token[25] ;
size_t i = 0 ;
while( *ps != '\0' )
{
if( isspace(*ps) )
{
if( i > 0 )
{
token[i] = '\0' ;
Token( token ) ;
}
i = 0 ;
}
else
{
if( i < sizeof(token) - 1 )
token[i++] = *ps ;
}
++ps ;
}
ShowState() ;
}
/***************************/
void ReadFile( FILE *file )
/***************************/
{
char line[1024] ;
struct timespec in ;
struct timespec out ;
while( fgets(line,sizeof(line),file) != NULL )
{
Line( line ) ;
in.tv_sec = 0 ;
in.tv_nsec = 125000000 ;
nanosleep( &in, &out ) ;
}
}
/*********************************/
void File( const char *filename )
/*********************************/
{
FILE *file = fopen( filename, "r" ) ;
if( file != NULL )
{
ReadFile( file ) ;
fclose( file ) ;
}
}
/**********************************/
int main( int argc, char *argv[] )
/**********************************/
{
if( OpenSPI() != 0 )
return 1 ;
if( argc == 1 )
ReadFile( stdin ) ;
else
{
for( int i = 1; i < argc; i++ )
File( argv[i] ) ;
}
CloseSPI() ;
return 0 ;
}
I had a similar problem. This answer has helped me.
Change usercfg.txt instead of config.txt.

AMD Codexl profiling: Opencl Memory leak detected [Ref =1] Object created by clEnqueueNDRangeKernel

I have no idea why I keep getting this warning as I've gone through the code numerous times and I can't figure out where this is coming from as I'm pretty sure (but clearly not) releasing all the memory. Hopefully someone who knows more than I can take a look at my code and point out where and why this is happening.
Thanks!
int runKernel( Image *anImage,
PixelPacket *imagePixels,
MagickSizeType imageSizeBytes,
const char *kernelSource )
{
cl_context myContext ;
cl_command_queue myQueue ;
cl_mem *outputImage ;
cl_event clEvent ;
int bitsPerChannel = anImage[0].depth ;
int width = anImage[0].columns ;
int height = anImage[0].rows ;
/****************************
Setup the Opencl environment
****************************/
// Use this to check the output of each API call
cl_int status ;
// Retrieve the number of platforms
cl_uint numPlatforms = 0 ;
status = clGetPlatformIDs( 0, NULL, &numPlatforms ) ;
// Allocate enough space for each platform
cl_platform_id *platforms = NULL ;
platforms = (cl_platform_id *) malloc( numPlatforms * sizeof(cl_platform_id) ) ;
// Fill in the platforms
status = clGetPlatformIDs( numPlatforms, platforms, NULL ) ;
// Retrieve the number of devices for the 1st platform
cl_uint numDevices = 0 ;
status = clGetDeviceIDs( platforms[0], CL_DEVICE_TYPE_ALL, 0, NULL, &numDevices ) ;
// Allocate enough space for each device
cl_device_id *devices ;
devices = (cl_device_id *) malloc( numDevices * sizeof(cl_device_id) ) ;
// Fill in the devices
status = clGetDeviceIDs( platforms[0], CL_DEVICE_TYPE_ALL, numDevices,
devices, NULL ) ;
// Create the context
myContext = clCreateContext( NULL, numDevices, devices, NULL, NULL, &status ) ;
// Create the command queue with the 1st device
myQueue = clCreateCommandQueue( myContext, devices[0], 0, &status ) ;
/****************************
Create Images and Move Data
****************************/
// Set format and descriptor to proper values according to image type
cl_image_format *image_format = NULL ;
cl_image_desc *image_desc = NULL ;
get_cl_image_format( anImage, &image_desc, &image_format ) ;
// Create the image sampler
cl_sampler clSampler = clCreateSampler(
myContext,
CL_FALSE, //use pixel based addressing not normalized
CL_ADDRESS_CLAMP_TO_EDGE, // set equal to the pixel at the edge of the image
CL_FILTER_NEAREST,
&status);
// Set input Image region parameters
size_t origin[3] = {0, 0, 0} ; // Offset within the image to copy from
size_t region[3] = {width, height, 1} ; // Elements per dimension for 2d image
// Create cl memory object for the input image
cl_mem_flags flagsRead = CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR ;
cl_mem clInput = clCreateImage( myContext, flagsRead,
( const cl_image_format *)image_format,
( const cl_image_desc *)image_desc,
imagePixels,
&status ) ;
// Allocate space for output image and create cl memory object
float *outputPixels = (float *) malloc( imageSizeBytes ) ;
cl_mem_flags flagsWrite = CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR ;
cl_mem clOutput = clCreateImage( myContext, flagsWrite,
(const cl_image_format *)image_format,
( const cl_image_desc *)image_desc,
outputPixels,
&status ) ;
//Copy input image to the device
status = clEnqueueWriteImage( myQueue, clInput, CL_FALSE, origin, region,
0, 0, imagePixels, 0, NULL, NULL ) ;
/*****************************
Compile the kernel from source
*****************************/
// kernelSource stores the kernel code and must be NULL terminated
cl_program myProgram = clCreateProgramWithSource( myContext, 1,
&kernelSource,
NULL,
&status ) ;
// Compile the program
const char buildOptions[] = "-cl-std=CL1.2 -cl-mad-enable\0";
status = clBuildProgram( myProgram, 1, devices, buildOptions, NULL, NULL ) ;
// Create the kernel
cl_kernel myKernel = clCreateKernel( myProgram, "convolution", &status ) ;
/**********************************
Set kernel args and run the program
**********************************/
// Set the kernel arguments
clSetKernelArg( myKernel, 0, sizeof( cl_mem ), &clInput ) ;
clSetKernelArg( myKernel, 1, sizeof( cl_mem ), &clOutput ) ;
clSetKernelArg( myKernel, 2, sizeof( int ), &height ) ;
clSetKernelArg( myKernel, 3, sizeof( int ), &width ) ;
clSetKernelArg( myKernel, 4, sizeof( cl_sampler ), &clSampler ) ;
//Execute the kernel
status = clEnqueueTask( myQueue, myKernel, 0, NULL, NULL ) ;
//Read the output buffer back to the host
status = clEnqueueReadImage( myQueue, clOutput, CL_TRUE, origin, region, 0, 0,
(void *) outputPixels, 0, NULL, &clEvent ) ;
/**********************************
Free Resources
**********************************/
/* Wait for the kernel to finish */
clWaitForEvents( 1, &clEvent ) ;
free( refImage ) ;
free( platforms ) ;
free( devices ) ;
free( outputPixels ) ;
free( image_desc ) ;
free( image_format ) ;
clReleaseSampler( clSampler ) ;
clReleaseMemObject( clInput ) ;
clReleaseMemObject( clOutput ) ;
clReleaseProgram( myProgram ) ;
clReleaseCommandQueue( myQueue ) ;
clReleaseKernel( myKernel ) ;
clReleaseContext( myContext ) ;
clReleaseEvent( clEvent ) ;
return 0;
}
It's possible that destroying the queue, kernel, context, or program before releasing the event (clEvent) is causing the warning. You might try the following:
clReleaseEvent( clEvent ) ; // <<< THIS ONE FIRST
clReleaseSampler( clSampler ) ;
clReleaseMemObject( clInput ) ;
clReleaseMemObject( clOutput ) ;
clReleaseProgram( myProgram ) ;
clReleaseCommandQueue( myQueue ) ;
clReleaseKernel( myKernel ) ;
clReleaseContext( myContext ) ;
Alternately, debug the release ops line by line until you get the warning.

Dead lock without a explicit lock

I am testing a pthread program.
This program is simple. The main thread creates a child thread.
The main thread and the child thread are both operating on a queue.
The child thread keeps scanning the queue and return the minimal element and its position with a infinite loop.
The main thread also is running a loop, each iteration of which delete the minimal element calculated by the child thread from the queue, and insert some new elements to the end of the queue.
The minimal element and its position, and the queue are all declared as global variables.
The main ends when the queue is empty and it will cancel the child thread.
This progress is some like a breadth-first search.
The queue is implemented as an array with a size counter. The deletion operation is implemented as replacing the element to be deleted by the last element and decreasing the size counter by one.
No lock is used here. But when running, the program will get stuck.
What's more amazing, if I insert some printf statements to view the status, it may finish.
I want to know what causes this program endless?
struct multiblocks_pthread_args {
volatile int local_e;
volatile int local_v;
volatile int local_pos;
int* Q;
int* val;
volatile int* size;
} para;
volatile int update = 0;
void* child_thread ( void* args ) {
pthread_setcanceltype ( PTHREAD_CANCEL_ASYNCHRONOUS, NULL );
multiblocks_pthread_args* arglist = ( multiblocks_pthread_args* ) args;
bindToCore ( 1 );
int* list = arglist -> Q, * value = arglist -> val;
while ( true ) {
int size, e, v, pos;
do {
size = * ( arglist->size ), e, v = INF, pos = 0;
update = 0;
for ( int i = 0; i < size; i++ ) {
int vi = value[i];
if ( vi < v ) {
pos = i;
v = vi;
}
}
} while ( update );
if ( size > 0 ) e = list[pos];
arglist->local_e = e;
arglist->local_pos = pos;
arglist->local_v = v;
}
return NULL;
}
void main_thread () {
int size;
int* Q = ( int* ) malloc ( sizeof ( int ) * NumNode );
int** hash = ( int** ) malloc ( sizeof ( int* ) * numNode );
NodeColor* color = ( NodeColor* ) malloc ( sizeof ( NodeColor ) * numNode );
// NodeColor is a enum with 3 values: WHITE, GRAY, BLACK
memset ( color, 0, sizeof ( NodeColor ) * numNode );
pthread_t tid;
para.val = ( int* ) malloc ( sizeof ( int ) * NumNode );
para.Q = Q;
para.size = &size;
pthread_create ( &tid, NULL, child_thread, &para );
// Only one element is in the queue
size = 0;
para.Q[size] = 0;
para.val[size] = 0;
hash[0] = &para.val[size]; // hash is used to modify the value of particular element
++size;
color[0] = GRAY;
while ( true ) {
int global_e, global_v = INF, global_pos;
global_e = para.local_e, global_v = para.local_v, global_pos = para.local_pos;
if ( size == 0 ) break;
if ( color[global_e] != BLACK ) {
value[global_e] = global_v, color[global_e] = BLACK;
if ( size > 0 ) {
--size;
para.Q[global_pos] = para.Q[size];
para.val[global_pos] = para.val[size];
hash[para.Q[global_pos]] = & para.val[global_pos];
update = 1;
}
for ( int i = 0; i < MAXDEG; ++i ) {
int ee = ;// new element;
int vv = ;// value of new element;
if ( /* if new element is valid */ ) {
if ( color[ee] == WHITE ) { // WHITE means ee is not in the queue
para.Q[size] = ee;
para.val[size] = vv;
hash[ee] = &para.val[size];
++size, color[ee] = GRAY;
} else {
*hash[ee] = vv;
}
update = 1;
}
}
}
}
free ( Q );
pthread_cancel ( tid );
printf ( "Computation finishes!!!" );
return ;
}
That's not a deadlock but a race condition.
The overall structure of your hang is, you start with WHITE item at index 0 and this loop goes on forever:
size = 1;
while (size != 0) {
if (WHITE) --size;
for (...) {
if (WHITE) ++size;
}
}
The only way this changes is that your child thread would set the pos something else than 0. But your child thread depends on size to be greater than 1 to make it other than 0. There you have your race condition.
My diagnosis may not be accurate. A cleaner code would help a lot. The names like Q, e, v would save you couple of keystrokes but can easily lose you days, as in this example. You also interchangeably use numbers and enums, a bad practice.

Why slaves are not working in PVM (parallel virtual machine)

I am trying to build a code of PVM which have one master and one slave, (I am working on centOS 5.5 OS)
when I run a command aimk master1 slave1, it is expected to give below output:-
Spawning 3 worker tasks ... SUCCESSFUL
I got 100.000000 from 1; (expecting 100.000000)
I got 200.000000 from 0; (expecting 200.000000)
I got 300.000000 from 2; (expecting 300.000000)
But it shows
pvm> [1:t80002] EOF
[1:t80001] Spawning 6 worker tasks.....
[1:t80001] Trouble spawning slaves. Aborting.Error codes are:
[1:t80001] TID 3 -7
[1:t80001] TID 4 -7
[1:t80001] TID 5 -7
[1:t80001] libpvm [t80005] : pvm_mcast() : Bad parameter
[1:t80003] EOF
[1:t80004] EOF
Why it gives this error? why slaves are not working?
My codes are below, Help me in this problem.
Master1.c
static char rcsid[] =
"$Id: master1.c,v 1.4 1997/07/09 13:25:09 pvmsrc Exp $";
#include <stdio.h>
#include "pvm3.h"
#define SLAVENAME "slave1"
main()
{
int mytid; /* my task id */
int tids[32]; /* slave task ids */
int n, nproc, numt, i, who, msgtype, nhost, narch;
float data[100], result[32];
struct pvmhostinfo *hostp;
/* enroll in pvm */
mytid = pvm_mytid();
/* Set number of slaves to start */
pvm_config( &nhost, &narch, &hostp );
nproc = nhost * 3;
if( nproc > 32 ) nproc = 32 ;
printf("Spawning %d worker tasks ... " , nproc);
/* start up slave tasks */
numt=pvm_spawn(SLAVENAME, (char**)0, 0, "", nproc, tids);
if( numt < nproc ){
printf("\n Trouble spawning slaves. Aborting. Error codes are:\n");
for( i=numt ; i<nproc ; i++ ) {
printf("TID %d %d\n",i,tids[i]);
}
for( i=0 ; i<numt ; i++ ){
pvm_kill( tids[i] );
}
pvm_exit();
exit(1);
}
printf("SUCCESSFUL\n");
/* Begin User Program */
n = 100;
/* initialize_data( data, n ); */
for( i=0 ; i<n ; i++ ){
data[i] = 1.0;
}
/* Broadcast initial data to slave tasks */
pvm_initsend(PvmDataDefault);
pvm_pkint(&nproc, 1, 1);
pvm_pkint(tids, nproc, 1);
pvm_pkint(&n, 1, 1);
pvm_pkfloat(data, n, 1);
pvm_mcast(tids, nproc, 0);
/* Wait for results from slaves */
msgtype = 5;
for( i=0 ; i<nproc ; i++ ){
pvm_recv( -1, msgtype );
pvm_upkint( &who, 1, 1 );
pvm_upkfloat( &result[who], 1, 1 );
printf("I got %f from %d; ",result[who],who);
if (who == 0)
printf( "(expecting %f)\n", (nproc - 1) * 100.0);
else
printf( "(expecting %f)\n", (2 * who - 1) * 100.0);
}
/* Program Finished exit PVM before stopping */
pvm_exit();
}
slave1.c
static char rcsid[] =
"$Id: slave1.c,v 1.2 1997/07/09 13:25:18 pvmsrc Exp $";
#include <stdio.h>
#include "pvm3.h"
main()
{
int mytid; /* my task id */
int tids[32]; /* task ids */
int n, me, i, nproc, master, msgtype;
float data[100], result;
float work();
/* enroll in pvm */
mytid = pvm_mytid();
/* Receive data from master */
msgtype = 0;
pvm_recv( -1, msgtype );
pvm_upkint(&nproc, 1, 1);
pvm_upkint(tids, nproc, 1);
pvm_upkint(&n, 1, 1);
pvm_upkfloat(data, n, 1);
/* Determine which slave I am (0 -- nproc-1) */
for( i=0; i<nproc ; i++ )
if( mytid == tids[i] ){ me = i; break; }
/* Do calculations with data */
result = work( me, n, data, tids, nproc );
/* Send result to master */
pvm_initsend( PvmDataDefault );
pvm_pkint( &me, 1, 1 );
pvm_pkfloat( &result, 1, 1 );
msgtype = 5;
master = pvm_parent();
pvm_send( master, msgtype );
/* Program finished. Exit PVM before stopping */
pvm_exit();
}
float
work(me, n, data, tids, nproc )
/* Simple example: slaves exchange data with left neighbor (wrapping) */
int me, n, *tids, nproc;
float *data;
{
int i, dest;
float psum = 0.0;
float sum = 0.0;
for( i=0 ; i<n ; i++ ){
sum += me * data[i];
}
/* illustrate node-to-node communication */
pvm_initsend( PvmDataDefault );
pvm_pkfloat( &sum, 1, 1 );
dest = me+1;
if( dest == nproc ) dest = 0;
pvm_send( tids[dest], 22 );
pvm_recv( -1, 22 );
pvm_upkfloat( &psum, 1, 1 );
return( sum+psum );
}
Obviously PVM is not finding your slaves' executable. Examine this portion of the output:
[1:t80001] TID 3 -7
[1:t80001] TID 4 -7
[1:t80001] TID 5 -7
All task IDs are -7, which is PvmNoFile. Ensure that SLAVENAME (slave1 in your case) is either an absolute file path (which it's not in your case) or is the name of an executable file, located in the PVM search path. By default the PVM search path is:
$HOME/pvm3/bin/$PVM_ARCH/
where $HOME is your user's home directory path and $PVM_ARCH is the name of the PVM architecture.

Resources