I am trying to build a code of PVM which have one master and one slave, (I am working on centOS 5.5 OS)
when I run a command aimk master1 slave1, it is expected to give below output:-
Spawning 3 worker tasks ... SUCCESSFUL
I got 100.000000 from 1; (expecting 100.000000)
I got 200.000000 from 0; (expecting 200.000000)
I got 300.000000 from 2; (expecting 300.000000)
But it shows
pvm> [1:t80002] EOF
[1:t80001] Spawning 6 worker tasks.....
[1:t80001] Trouble spawning slaves. Aborting.Error codes are:
[1:t80001] TID 3 -7
[1:t80001] TID 4 -7
[1:t80001] TID 5 -7
[1:t80001] libpvm [t80005] : pvm_mcast() : Bad parameter
[1:t80003] EOF
[1:t80004] EOF
Why it gives this error? why slaves are not working?
My codes are below, Help me in this problem.
Master1.c
static char rcsid[] =
"$Id: master1.c,v 1.4 1997/07/09 13:25:09 pvmsrc Exp $";
#include <stdio.h>
#include "pvm3.h"
#define SLAVENAME "slave1"
main()
{
int mytid; /* my task id */
int tids[32]; /* slave task ids */
int n, nproc, numt, i, who, msgtype, nhost, narch;
float data[100], result[32];
struct pvmhostinfo *hostp;
/* enroll in pvm */
mytid = pvm_mytid();
/* Set number of slaves to start */
pvm_config( &nhost, &narch, &hostp );
nproc = nhost * 3;
if( nproc > 32 ) nproc = 32 ;
printf("Spawning %d worker tasks ... " , nproc);
/* start up slave tasks */
numt=pvm_spawn(SLAVENAME, (char**)0, 0, "", nproc, tids);
if( numt < nproc ){
printf("\n Trouble spawning slaves. Aborting. Error codes are:\n");
for( i=numt ; i<nproc ; i++ ) {
printf("TID %d %d\n",i,tids[i]);
}
for( i=0 ; i<numt ; i++ ){
pvm_kill( tids[i] );
}
pvm_exit();
exit(1);
}
printf("SUCCESSFUL\n");
/* Begin User Program */
n = 100;
/* initialize_data( data, n ); */
for( i=0 ; i<n ; i++ ){
data[i] = 1.0;
}
/* Broadcast initial data to slave tasks */
pvm_initsend(PvmDataDefault);
pvm_pkint(&nproc, 1, 1);
pvm_pkint(tids, nproc, 1);
pvm_pkint(&n, 1, 1);
pvm_pkfloat(data, n, 1);
pvm_mcast(tids, nproc, 0);
/* Wait for results from slaves */
msgtype = 5;
for( i=0 ; i<nproc ; i++ ){
pvm_recv( -1, msgtype );
pvm_upkint( &who, 1, 1 );
pvm_upkfloat( &result[who], 1, 1 );
printf("I got %f from %d; ",result[who],who);
if (who == 0)
printf( "(expecting %f)\n", (nproc - 1) * 100.0);
else
printf( "(expecting %f)\n", (2 * who - 1) * 100.0);
}
/* Program Finished exit PVM before stopping */
pvm_exit();
}
slave1.c
static char rcsid[] =
"$Id: slave1.c,v 1.2 1997/07/09 13:25:18 pvmsrc Exp $";
#include <stdio.h>
#include "pvm3.h"
main()
{
int mytid; /* my task id */
int tids[32]; /* task ids */
int n, me, i, nproc, master, msgtype;
float data[100], result;
float work();
/* enroll in pvm */
mytid = pvm_mytid();
/* Receive data from master */
msgtype = 0;
pvm_recv( -1, msgtype );
pvm_upkint(&nproc, 1, 1);
pvm_upkint(tids, nproc, 1);
pvm_upkint(&n, 1, 1);
pvm_upkfloat(data, n, 1);
/* Determine which slave I am (0 -- nproc-1) */
for( i=0; i<nproc ; i++ )
if( mytid == tids[i] ){ me = i; break; }
/* Do calculations with data */
result = work( me, n, data, tids, nproc );
/* Send result to master */
pvm_initsend( PvmDataDefault );
pvm_pkint( &me, 1, 1 );
pvm_pkfloat( &result, 1, 1 );
msgtype = 5;
master = pvm_parent();
pvm_send( master, msgtype );
/* Program finished. Exit PVM before stopping */
pvm_exit();
}
float
work(me, n, data, tids, nproc )
/* Simple example: slaves exchange data with left neighbor (wrapping) */
int me, n, *tids, nproc;
float *data;
{
int i, dest;
float psum = 0.0;
float sum = 0.0;
for( i=0 ; i<n ; i++ ){
sum += me * data[i];
}
/* illustrate node-to-node communication */
pvm_initsend( PvmDataDefault );
pvm_pkfloat( &sum, 1, 1 );
dest = me+1;
if( dest == nproc ) dest = 0;
pvm_send( tids[dest], 22 );
pvm_recv( -1, 22 );
pvm_upkfloat( &psum, 1, 1 );
return( sum+psum );
}
Obviously PVM is not finding your slaves' executable. Examine this portion of the output:
[1:t80001] TID 3 -7
[1:t80001] TID 4 -7
[1:t80001] TID 5 -7
All task IDs are -7, which is PvmNoFile. Ensure that SLAVENAME (slave1 in your case) is either an absolute file path (which it's not in your case) or is the name of an executable file, located in the PVM search path. By default the PVM search path is:
$HOME/pvm3/bin/$PVM_ARCH/
where $HOME is your user's home directory path and $PVM_ARCH is the name of the PVM architecture.
Related
I have a simple setup between 2 wemos d1 boards. They work with a painless mesh.
The devices each have buttons and LEDs with which users can interact with one another.
The problem that I am having, is that sometimes the two units don't connect after one of them is turned off. I am extensively testing what happens when one of the 2 nodes falls off and then comes back on again. Sometimes they connect fast, sometimes slow, and sometimes they won't connect at all.
Retrying to reset the turned-off module mostly works but sometimes I need to reset the first module as well or they will never connect again. Judging by the LEDs and operation the program keeps running. Resetting both devices always works to reconnect the two.
// #define wrong_led // I may or may not have made a slight error in soldering the duo leds
bool connected = 0 ;
const uint32_t R1 = 1000 ;
const uint32_t R2 = 4700 ;
const uint32_t threshold[] =
{
/*4 * 1023 * R1 / ( R2 + ( 4 * R1) ) , // 470 ->*/ 682,
/*3 * 1023 * R1 / ( R2 + ( 3 * R1) ) , // 398 ->*/ 579,
/*2 * 1023 * R1 / ( R2 + ( 2 * R1) ) , // 305 ->*/ 446,
/*1 * 1023 * R1 / ( R2 + ( 1 * R1) ) , // 179 ->*/ 262,
/*0 * 1023 * R1 / ( R2 + ( 0 * R1) ) , // 0 */ 0,
} ;
const int nSections = 5 ;
#ifdef wrong_led
const int red[] = { D0, D2, D4, D6, 3 } ;
const int green[] = { D1, D3, D5, D7, 1 } ;
#else
const int green[] = { D0, D2, D4, D6, 3 } ;
const int red[] = { D1, D3, D5, D7, 1 } ;
#endif
const int switchesPin = A0 ;
uint32_t timeOut[nSections] = {0,0,0,0} ;
const int debugPin = 2 ; // DEBUG TEST ME
const uint32_t timeOutInterval = 3000 ;
const uint32_t sendInterval = 2000 ;
const uint32_t connectionTimeout = 10000 ;
enum tokenStates
{
AVAILABLE,
IN_POSSESSION,
TAKEN,
} ;
uint8_t token[ nSections ] ;
Debounce button[] =
{
Debounce ( 255 ),
Debounce ( 255 ),
Debounce ( 255 ),
Debounce ( 255 ),
Debounce ( 255 )
} ;
/************** FUNCTIONS **************/
void updateLEDs()
{
if( !connected )
{
REPEAT_MS( 500 )
{
for (int i = 0; i < nSections ; i++) digitalWrite( red[i], !digitalRead(red[i] )) ; // toggle all red lights during connecting
}
END_REPEAT
}
else for (int i = 0; i < nSections ; i++)
{
switch (token[i])
{
case AVAILABLE: analogWrite( green[i], 32 ) ; // green
digitalWrite( red[i], LOW ) ; break;
case IN_POSSESSION:analogWrite( green[i], 32 ) ; // yellow
digitalWrite( red[i], HIGH ) ; break;
case TAKEN: digitalWrite( green[i], LOW ) ; // red
digitalWrite( red[i], HIGH ) ; break;
}
}
} ;
void newConnection(uint32_t nodeId)
{
connected = 1 ;
}
void debounceInputs()
{
REPEAT_MS( 50 )
{
int sample = analogRead( switchesPin ) ;
for (int i = 0; i < nSections ; i++)
{
uint16_t ref ;
if( threshold[i] >= 35 ) ref = threshold[i] ;
else ref = 35 ;
if( sample >= ref - 35
&& sample <= ref + 35 ) button[i].debounceInputs( 1 ) ;
else button[i].debounceInputs( 0 ) ;
}
} END_REPEAT
}
void processInputs( )
{
for (int i = 0; i < nSections ; i++ )
{
String message = "" ;
message += i ;
message += ',' ;
if( button[i].readInput() == FALLING )
{
if( token[i] == TAKEN ) { continue ; } // token is claimed by another discard button press
else if( token[i] == AVAILABLE ) // if the token is available.....
{
token[i] = IN_POSSESSION ; // claim the token
message += TAKEN ;
}
else if( token[i] == IN_POSSESSION ) // if the token is in possession
{
token[i] = AVAILABLE ; // free up the token
message += AVAILABLE ;
}
mesh.sendBroadcast( message ) ;
}
}
}
void transceiveTokens()
{
static uint8_t index = 0 ;
REPEAT_MS( sendInterval / nSections ) // if we claimed atleast 1 token, transmitt this once every second
{
if( token[index] == IN_POSSESSION )
{
String message = "" ;
message += index ;
message += ',' ;
message += TAKEN ;
mesh.sendBroadcast( message ) ;
}
if( ++ index == nSections ) index = 0 ;
}
END_REPEAT
for (int i = 0; i < nSections ; i++ )
{
if( token[i] != IN_POSSESSION // if a node which claimed a token is turned off while still possessing the token
&& millis() - timeOut[i] >= timeOutInterval ) // the token becomes available again after a timeout
{
token[i] = AVAILABLE ;
}
}
}
void incomingMessage( uint32 from, String msg )
{
uint32_t tokenState ;
uint32_t tokenID ;
char char_array[32];
strcpy(char_array, msg.c_str());
sscanf( char_array, "%d,%d", &tokenID, &tokenState ) ;
if( token[ tokenID ] == IN_POSSESSION && tokenState == TAKEN ) // if we have the token and an other also claims the token...
{ // .. free the token again, and transmitt it.
token[ tokenID ] = AVAILABLE ;
String message = "" ;
message += tokenID ;
message += ',' ;
message += AVAILABLE ;
mesh.sendBroadcast( message ) ;
}
if( tokenState == AVAILABLE )
{
token[ tokenID ] = AVAILABLE ;
}
token[tokenID] = tokenState ; // update token with state
if( token[tokenID] == TAKEN )
{
timeOut[tokenID] = millis() ; // set timeout
}
}
void setup()
{
debounceInputs() ; // to be sure
mesh.init( MESH_PREFIX, MESH_PASSWORD, MESH_PORT );
mesh.onReceive(&incomingMessage );
mesh.onNewConnection( &newConnection );
for( int i = 0 ; i < nSections ; i ++ )
{
pinMode( green[i], OUTPUT ) ;
pinMode( red[i], OUTPUT ) ;
digitalWrite( green[i], LOW ) ;
digitalWrite( red[i], LOW ) ;
}
}
void loop()
{
debounceInputs() ;
processInputs( ) ;
updateLEDs() ;
transceiveTokens() ;
mesh.update() ;
if( millis() > connectionTimeout ) // the first node which is powered on, does need to work eventually, even when it is the only one.
{
connected = 1 ;
}
}
I am yet to build three more units. I am hoping that having a network with at least 2 active nodes at all times will solve this problem.
I am curious as to why it sometimes does work and sometimes it does not work.
AFAIK I am not making any obvious mistakes. None of the functions in loop() take incredibly long, but I do not know how fast mesh.update() ; is to be called. For all I know, the functions together take too long. However, if both nodes are not turned off there seem to be no problems at all. Intervals between messages are also larger than 100ms. About mesh.update() the painless mesh website only states that:
This routine runs various maintainance tasks... Not super interesting, but things don't work without it.
What could it be?
I am getting Segmentation fault error while reading two diffentent serial communication line with using Debian GNU/Linux 7.4 on Beaglebone Black. One of them is CAN-BUS data. I am using Waveshares RS485/CAN CAPE module for this with using can-utils package. "https://github.com/linux-can/can-utils/blob/master/candump.c"
CAN log file
And the other one is UART data by a GPS module called uBlox GY-NEO6MV2 module. For the GPS I have this code which works perfectly;
#include <stdio.h>
#include <fcntl.h> /* File Control Definitions */
#include <termios.h> /* POSIX Terminal Control Definitions */
#include <unistd.h> /* UNIX Standard Definitions */
#include <errno.h> /* ERROR Number Definitions */
#include <string.h> /* Array to String */
void main(void){
int fd;/*File Descriptor*/
/*------------------------------- Opening the Serial Port -------------------------------*/
/* Change /dev/ttyUSB0 to the one corresponding to your system */
while(1){
fd = open("/dev/ttyO2",O_RDWR | O_NOCTTY); /* ttyUSB0 is the FT232 based USB2SERIAL Converter */
/* O_RDWR - Read/Write access to serial port */
/* O_NOCTTY - No terminal will control the process */
/* Open in blocking mode,read will wait */
if(fd == -1) /* Error Checking */
printf("\n Error! in Opening ttyO2 ");
else
printf("\n ttyO2 Opened Successfully ");
/*---------- Setting the Attributes of the serial port using termios structure --------- */
struct termios SerialPortSettings; /* Create the structure */
tcgetattr(fd, &SerialPortSettings); /* Get the current attributes of the Serial port */
/* Setting the Baud rate */
cfsetispeed(&SerialPortSettings,B9600); /* Set Read Speed as 9600 */
cfsetospeed(&SerialPortSettings,B9600); /* Set Write Speed as 9600 */
/* 8N1 Mode */
SerialPortSettings.c_cflag &= ~PARENB; /* Disables the Parity Enable bit(PARENB),So No Parity */
SerialPortSettings.c_cflag &= ~CSTOPB; /* CSTOPB = 2 Stop bits,here it is cleared so 1 Stop bit */
SerialPortSettings.c_cflag &= ~CSIZE; /* Clears the mask for setting the data size */
SerialPortSettings.c_cflag |= CS8; /* Set the data bits = 8 */
SerialPortSettings.c_cflag &= ~CRTSCTS; /* No Hardware flow Control */
SerialPortSettings.c_cflag |= CREAD | CLOCAL; /* Enable receiver,Ignore Modem Control lines */
SerialPortSettings.c_iflag &= ~(IXON | IXOFF | IXANY); /* Disable XON/XOFF flow control both i/p and o/p */
SerialPortSettings.c_iflag &= ~(ICANON | ECHO | ECHOE | ISIG); /* Non Cannonical mode */
SerialPortSettings.c_oflag &= ~OPOST;/*No Output Processing*/
/* Setting Time outs */
SerialPortSettings.c_cc[VMIN] = 42; /* Read at least 51 characters */
SerialPortSettings.c_cc[VTIME] = 0; /* Wait indefinetly */
if((tcsetattr(fd,TCSANOW,&SerialPortSettings)) != 0) /* Set the attributes to the termios structure*/
printf("\n ERROR ! in Setting attributes");
else
printf("\n BaudRate = 9600 \n StopBits = 1 \n Parity = none \n\n");
/*------------------------------- Read data from serial port -----------------------------*/
tcflush(fd, TCIFLUSH); /* Discards old data in the rx buffer */
char read_buffer[42]; /* Buffer to store the data received */
int bytes_read = 0; /* Number of bytes read by the read() system call */
int ia = 0; int a;
int test = 0;
char new_read[38];
char curr_read[33];
a = 0;
do{
bytes_read = read(fd,&read_buffer,42); /* Read the data */
if(read_buffer[0] == '$')
if(read_buffer[1] == 'G')
if(read_buffer[2] == 'P')
if(read_buffer[3] == 'G')
if(read_buffer[4] == 'G'){
for(ia=7;ia<bytes_read;ia++){ /*printing only the received characters*/
new_read[a] = read_buffer[ia];
printf("%c",read_buffer[ia]);
a = a+1;
test = 1;
}
strcpy(curr_read, new_read);
printf("\n%s \n", curr_read);
}
else
test = 0;
else
test = 0;
else
test = 0;
else
test = 0;
else
test = 0;
}while(test == 0);
close(fd); /* Close the serial port */
}
}
And for the CAN logging I am using the code in the link above. What I try to achive is logging two data in to same log file. I modified the code above a little to get the datas only that I need; which is timestamp and location coordinates.
GPS edited data
GPS module gives data every second so I am triyng to get one data from GPS and attach it to the next 1000 CAN data then write in to a .log file then read a new value from GPS. GPS modules communication bitrate is 9600kbps and CAN bitrate is 125000 kbps. GPS is connected to UART2 pin, CAN to UART1. When I try to combine two code into one I get the Segmentation fault error. I made a little research its UNIX error code while violeting the restiricted memory space. But these two codes works perfectly when working seperatly. This is where I got stucked.
The code I tried to merge is like;
/* for hardware timestamps - since Linux 2.6.30 */
#ifndef SO_TIMESTAMPING
#define SO_TIMESTAMPING 37
#endif
/* from #include <linux/net_tstamp.h> - since Linux 2.6.30 */
#define SOF_TIMESTAMPING_SOFTWARE (1<<4)
#define SOF_TIMESTAMPING_RX_SOFTWARE (1<<3)
#define SOF_TIMESTAMPING_RAW_HARDWARE (1<<6)
#define MAXSOCK 16 /* max. number of CAN interfaces given on the cmdline */
#define MAXIFNAMES 30 /* size of receive name index to omit ioctls */
#define MAXCOL 6 /* number of different colors for colorized output */
#define ANYDEV "any" /* name of interface to receive from any CAN interface */
#define ANL "\r\n" /* newline in ASC mode */
#define SILENT_INI 42 /* detect user setting on commandline */
#define SILENT_OFF 0 /* no silent mode */
#define SILENT_ANI 1 /* silent mode with animation */
#define SILENT_ON 2 /* silent mode (completely silent) */
static char *cmdlinename[MAXSOCK];
static __u32 dropcnt[MAXSOCK];
static __u32 last_dropcnt[MAXSOCK];
static char devname[MAXIFNAMES][IFNAMSIZ+1];
static int dindex[MAXIFNAMES];
static int max_devname_len; /* to prevent frazzled device name output */
const int canfd_on = 1;
#define MAXANI 4
const char anichar[MAXANI] = {'|', '/', '-', '\\'};
const char extra_m_info[4][4] = {"- -", "B -", "- E", "B E"};
extern int optind, opterr, optopt;
static volatile int running = 1;
void sigterm(int signo)
{
running = 0;
}
int idx2dindex(int ifidx, int socket) {
int i;
struct ifreq ifr;
for (i=0; i < MAXIFNAMES; i++) {
if (dindex[i] == ifidx)
return i;
}
/* create new interface index cache entry */
/* remove index cache zombies first */
for (i=0; i < MAXIFNAMES; i++) {
if (dindex[i]) {
ifr.ifr_ifindex = dindex[i];
if (ioctl(socket, SIOCGIFNAME, &ifr) < 0)
dindex[i] = 0;
}
}
for (i=0; i < MAXIFNAMES; i++)
if (!dindex[i]) /* free entry */
break;
if (i == MAXIFNAMES) {
fprintf(stderr, "Interface index cache only supports %d interfaces.\n",
MAXIFNAMES);
exit(1);
}
dindex[i] = ifidx;
ifr.ifr_ifindex = ifidx;
if (ioctl(socket, SIOCGIFNAME, &ifr) < 0)
perror("SIOCGIFNAME");
if (max_devname_len < strlen(ifr.ifr_name))
max_devname_len = strlen(ifr.ifr_name);
strcpy(devname[i], ifr.ifr_name);
#ifdef DEBUG
printf("new index %d (%s)\n", i, devname[i]);
#endif
return i;
}
int main(int argc, char **argv)
{
fd_set rdfs;
int s[MAXSOCK];
int bridge = 0;
useconds_t bridge_delay = 0;
unsigned char timestamp = 0;
unsigned char hwtimestamp = 0;
unsigned char down_causes_exit = 1;
unsigned char dropmonitor = 0;
unsigned char extra_msg_info = 0;
unsigned char silent = SILENT_INI;
unsigned char silentani = 0;
unsigned char color = 0;
unsigned char view = 0;
unsigned char log = 0;
unsigned char logfrmt = 0;
int count = 0;
int rcvbuf_size = 0;
int opt, ret;
int currmax, numfilter;
int join_filter;
char *ptr, *nptr;
struct sockaddr_can addr;
char ctrlmsg[CMSG_SPACE(sizeof(struct timeval) + 3*sizeof(struct timespec) + sizeof(__u32))];
struct iovec iov;
struct msghdr msg;
struct cmsghdr *cmsg;
struct can_filter *rfilter;
can_err_mask_t err_mask;
struct canfd_frame frame;
int nbytes, i, maxdlen;
struct ifreq ifr;
struct timeval tv, last_tv;
struct timeval timeout, timeout_config = { 0, 0 }, *timeout_current = NULL;
FILE *logfile = NULL;
int fd;/*File Descriptor*/
struct termios SerialPortSettings; /* Create the structure */
signal(SIGTERM, sigterm);
signal(SIGHUP, sigterm);
signal(SIGINT, sigterm);
last_tv.tv_sec = 0;
last_tv.tv_usec = 0;
if (optind == argc) {
print_usage(basename(argv[0]));
exit(0);
}
if (logfrmt && view) {
fprintf(stderr, "Log file format selected: Please disable ASCII/BINARY/SWAP options!\n");
exit(0);
}
if (silent == SILENT_INI) {
if (log) {
fprintf(stderr, "Disabled standard output while logging.\n");
silent = SILENT_ON; /* disable output on stdout */
} else
silent = SILENT_OFF; /* default output */
}
currmax = argc - optind; /* find real number of CAN devices */
if (currmax > MAXSOCK) {
fprintf(stderr, "More than %d CAN devices given on commandline!\n", MAXSOCK);
return 1;
}
for (i=0; i < currmax; i++) {
ptr = argv[optind+i];
nptr = strchr(ptr, ',');
#ifdef DEBUG
printf("open %d '%s'.\n", i, ptr);
#endif
s[i] = socket(PF_CAN, SOCK_RAW, CAN_RAW);
if (s[i] < 0) {
perror("socket");
return 1;
}
cmdlinename[i] = ptr; /* save pointer to cmdline name of this socket */
if (nptr)
nbytes = nptr - ptr; /* interface name is up the first ',' */
else
nbytes = strlen(ptr); /* no ',' found => no filter definitions */
if (nbytes >= IFNAMSIZ) {
fprintf(stderr, "name of CAN device '%s' is too long!\n", ptr);
return 1;
}
if (nbytes > max_devname_len)
max_devname_len = nbytes; /* for nice printing */
addr.can_family = AF_CAN;
memset(&ifr.ifr_name, 0, sizeof(ifr.ifr_name));
strncpy(ifr.ifr_name, ptr, nbytes);
#ifdef DEBUG
printf("using interface name '%s'.\n", ifr.ifr_name);
#endif
if (strcmp(ANYDEV, ifr.ifr_name)) {
if (ioctl(s[i], SIOCGIFINDEX, &ifr) < 0) {
perror("SIOCGIFINDEX");
exit(1);
}
addr.can_ifindex = ifr.ifr_ifindex;
} else
addr.can_ifindex = 0; /* any can interface */
if (nptr) {
/* found a ',' after the interface name => check for filters */
/* determine number of filters to alloc the filter space */
numfilter = 0;
ptr = nptr;
while (ptr) {
numfilter++;
ptr++; /* hop behind the ',' */
ptr = strchr(ptr, ','); /* exit condition */
}
rfilter = malloc(sizeof(struct can_filter) * numfilter);
if (!rfilter) {
fprintf(stderr, "Failed to create filter space!\n");
return 1;
}
numfilter = 0;
err_mask = 0;
join_filter = 0;
while (nptr) {
ptr = nptr+1; /* hop behind the ',' */
nptr = strchr(ptr, ','); /* update exit condition */
if (sscanf(ptr, "%x:%x",
&rfilter[numfilter].can_id,
&rfilter[numfilter].can_mask) == 2) {
rfilter[numfilter].can_mask &= ~CAN_ERR_FLAG;
numfilter++;
} else if (sscanf(ptr, "%x~%x",
&rfilter[numfilter].can_id,
&rfilter[numfilter].can_mask) == 2) {
rfilter[numfilter].can_id |= CAN_INV_FILTER;
rfilter[numfilter].can_mask &= ~CAN_ERR_FLAG;
numfilter++;
} else if (*ptr == 'j' || *ptr == 'J') {
join_filter = 1;
} else if (sscanf(ptr, "#%x", &err_mask) != 1) {
fprintf(stderr, "Error in filter option parsing: '%s'\n", ptr);
return 1;
}
}
if (err_mask)
setsockopt(s[i], SOL_CAN_RAW, CAN_RAW_ERR_FILTER,
&err_mask, sizeof(err_mask));
if (join_filter && setsockopt(s[i], SOL_CAN_RAW, CAN_RAW_JOIN_FILTERS,
&join_filter, sizeof(join_filter)) < 0) {
perror("setsockopt CAN_RAW_JOIN_FILTERS not supported by your Linux Kernel");
return 1;
}
if (numfilter)
setsockopt(s[i], SOL_CAN_RAW, CAN_RAW_FILTER,
rfilter, numfilter * sizeof(struct can_filter));
free(rfilter);
} /* if (nptr) */
/* try to switch the socket into CAN FD mode */
setsockopt(s[i], SOL_CAN_RAW, CAN_RAW_FD_FRAMES, &canfd_on, sizeof(canfd_on));
if (rcvbuf_size) {
int curr_rcvbuf_size;
socklen_t curr_rcvbuf_size_len = sizeof(curr_rcvbuf_size);
/* try SO_RCVBUFFORCE first, if we run with CAP_NET_ADMIN */
if (setsockopt(s[i], SOL_SOCKET, SO_RCVBUFFORCE,
&rcvbuf_size, sizeof(rcvbuf_size)) < 0) {
#ifdef DEBUG
printf("SO_RCVBUFFORCE failed so try SO_RCVBUF ...\n");
#endif
if (setsockopt(s[i], SOL_SOCKET, SO_RCVBUF,
&rcvbuf_size, sizeof(rcvbuf_size)) < 0) {
perror("setsockopt SO_RCVBUF");
return 1;
}
if (getsockopt(s[i], SOL_SOCKET, SO_RCVBUF,
&curr_rcvbuf_size, &curr_rcvbuf_size_len) < 0) {
perror("getsockopt SO_RCVBUF");
return 1;
}
/* Only print a warning the first time we detect the adjustment */
/* n.b.: The wanted size is doubled in Linux in net/sore/sock.c */
if (!i && curr_rcvbuf_size < rcvbuf_size*2)
fprintf(stderr, "The socket receive buffer size was "
"adjusted due to /proc/sys/net/core/rmem_max.\n");
}
}
if (timestamp || log || logfrmt) {
if (hwtimestamp) {
const int timestamping_flags = (SOF_TIMESTAMPING_SOFTWARE | \
SOF_TIMESTAMPING_RX_SOFTWARE | \
SOF_TIMESTAMPING_RAW_HARDWARE);
if (setsockopt(s[i], SOL_SOCKET, SO_TIMESTAMPING,
×tamping_flags, sizeof(timestamping_flags)) < 0) {
perror("setsockopt SO_TIMESTAMPING is not supported by your Linux kernel");
return 1;
}
} else {
const int timestamp_on = 1;
if (setsockopt(s[i], SOL_SOCKET, SO_TIMESTAMP,
×tamp_on, sizeof(timestamp_on)) < 0) {
perror("setsockopt SO_TIMESTAMP");
return 1;
}
}
}
if (dropmonitor) {
const int dropmonitor_on = 1;
if (setsockopt(s[i], SOL_SOCKET, SO_RXQ_OVFL,
&dropmonitor_on, sizeof(dropmonitor_on)) < 0) {
perror("setsockopt SO_RXQ_OVFL not supported by your Linux Kernel");
return 1;
}
}
if (bind(s[i], (struct sockaddr *)&addr, sizeof(addr)) < 0) {
perror("bind");
return 1;
}
}
if (log) {
time_t currtime;
struct tm now;
char fname[sizeof("candump-2006-11-20_202026.log")+1];
if (time(&currtime) == (time_t)-1) {
perror("time");
return 1;
}
localtime_r(&currtime, &now);
sprintf(fname, "candump-%04d-%02d-%02d_%02d%02d%02d.log",
now.tm_year + 1900,
now.tm_mon + 1,
now.tm_mday,
now.tm_hour,
now.tm_min,
now.tm_sec);
if (silent != SILENT_ON)
printf("\nWarning: console output active while logging!");
fprintf(stderr, "\nEnabling Logfile '%s'\n\n", fname);
logfile = fopen(fname, "w");
if (!logfile) {
perror("logfile");
return 1;
}
}
/* these settings are static and can be held out of the hot path */
iov.iov_base = &frame;
msg.msg_name = &addr;
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = &ctrlmsg;
while (running) {
/*------------------------------- Opening the Serial Port -------------------------------*/
/* Change /dev/ttyUSB0 to the one corresponding to your system */
fd = open("/dev/ttyO2",O_RDWR | O_NOCTTY); /* ttyUSB0 is the FT232 based USB2SERIAL Converter */
/* O_RDWR - Read/Write access to serial port */
/* O_NOCTTY - No terminal will control the process */
/* Open in blocking mode,read will wait */
/* Error Checking */
if(fd == -1)
printf("\n Error! in Opening ttyO2 ");
else
printf("\n ttyO2 Opened Successfully ");
/*---------- Setting the Attributes of the serial port using termios structure --------- */
//struct termios SerialPortSettings; /* Create the structure */
tcgetattr(fd, &SerialPortSettings); /* Get the current attributes of the Serial port */
/* Setting the Baud rate */
cfsetispeed(&SerialPortSettings,B9600); /* Set Read Speed as 9600 */
cfsetospeed(&SerialPortSettings,B9600); /* Set Write Speed as 9600 */
/* 8N1 Mode */
SerialPortSettings.c_cflag &= ~PARENB; /* Disables the Parity Enable bit(PARENB),So No Parity */
SerialPortSettings.c_cflag &= ~CSTOPB; /* CSTOPB = 2 Stop bits,here it is cleared so 1 Stop bit */
SerialPortSettings.c_cflag &= ~CSIZE; /* Clears the mask for setting the data size */
SerialPortSettings.c_cflag |= CS8; /* Set the data bits = 8 */
SerialPortSettings.c_cflag &= ~CRTSCTS; /* No Hardware flow Control */
SerialPortSettings.c_cflag |= CREAD | CLOCAL; /* Enable receiver,Ignore Modem Control lines */
SerialPortSettings.c_iflag &= ~(IXON | IXOFF | IXANY); /* Disable XON/XOFF flow control both i/p and o/p */
SerialPortSettings.c_iflag &= ~(ICANON | ECHO | ECHOE | ISIG); /* Non Cannonical mode */
SerialPortSettings.c_oflag &= ~OPOST;/*No Output Processing*/
/* Setting Time outs */
SerialPortSettings.c_cc[VMIN] = 42; /* Read at least 42 characters */
SerialPortSettings.c_cc[VTIME] = 0; /* Wait indefinetly */
if((tcsetattr(fd,TCSANOW,&SerialPortSettings)) != 0) /* Set the attributes to the termios structure*/
printf("\n ERROR ! in Setting attributes");
else
printf("\n BaudRate = 9600 \n StopBits = 1 \n Parity = none \n\n");
/*------------------------------- Read data from serial port -----------------------------*/
tcflush(fd, TCIFLUSH); /* Discards old data in the rx buffer */
char read_buffer[42]; /* Buffer to store the data received */
int bytes_read = 0; /* Number of bytes read by the read() system call */
int ia = 0; int a;
int test = 0;
char new_read[38];
char curr_read[33];
int countc = 0;
a = 0;
do{
bytes_read = read(fd,&read_buffer,42); /* Read the data */
if(read_buffer[0] == '$')
if(read_buffer[1] == 'G')
if(read_buffer[2] == 'P')
if(read_buffer[3] == 'G')
if(read_buffer[4] == 'G'){
for(ia=7;ia<bytes_read;ia++){ /*printing only the received characters*/
new_read[a] = read_buffer[ia];
//printf("%c",read_buffer[ia]);
a = a+1;
test = 1;
}
strcpy(curr_read, new_read);
//printf("\n%s \n", curr_read);
}
else
test = 0;
else
test = 0;
else
test = 0;
else
test = 0;
else
test = 0;
}while(test == 0);
//tcflush(fd, TCIFLUSH); /* Discards old data in the rx buffer */
close(fd); /* Close the serial port */
while(countc < 1000){
FD_ZERO(&rdfs);
for (i=0; i<currmax; i++)
FD_SET(s[i], &rdfs);
if (timeout_current)
*timeout_current = timeout_config;
if ((ret = select(s[currmax-1]+1, &rdfs, NULL, NULL, timeout_current)) <= 0) {
//perror("select");
running = 0;
continue;
}
for (i=0; i<currmax; i++) { /* check all CAN RAW sockets */
if (FD_ISSET(s[i], &rdfs)) {
int idx;
/* these settings may be modified by recvmsg() */
iov.iov_len = sizeof(frame);
msg.msg_namelen = sizeof(addr);
msg.msg_controllen = sizeof(ctrlmsg);
msg.msg_flags = 0;
nbytes = recvmsg(s[i], &msg, 0);
idx = idx2dindex(addr.can_ifindex, s[i]);
if (nbytes < 0) {
if ((errno == ENETDOWN) && !down_causes_exit) {
fprintf(stderr, "%s: interface down\n", devname[idx]);
continue;
}
perror("read");
return 1;
}
if ((size_t)nbytes == CAN_MTU)
maxdlen = CAN_MAX_DLEN;
else if ((size_t)nbytes == CANFD_MTU)
maxdlen = CANFD_MAX_DLEN;
else {
fprintf(stderr, "read: incomplete CAN frame\n");
return 1;
}
if (count && (--count == 0))
running = 0;
if (bridge) {
if (bridge_delay)
usleep(bridge_delay);
nbytes = write(bridge, &frame, nbytes);
if (nbytes < 0) {
perror("bridge write");
return 1;
} else if ((size_t)nbytes != CAN_MTU && (size_t)nbytes != CANFD_MTU) {
fprintf(stderr,"bridge write: incomplete CAN frame\n");
return 1;
}
}
for (cmsg = CMSG_FIRSTHDR(&msg);
cmsg && (cmsg->cmsg_level == SOL_SOCKET);
cmsg = CMSG_NXTHDR(&msg,cmsg)) {
if (cmsg->cmsg_type == SO_TIMESTAMP) {
memcpy(&tv, CMSG_DATA(cmsg), sizeof(tv));
} else if (cmsg->cmsg_type == SO_TIMESTAMPING) {
struct timespec *stamp = (struct timespec *)CMSG_DATA(cmsg);
/*
* stamp[0] is the software timestamp
* stamp[1] is deprecated
* stamp[2] is the raw hardware timestamp
* See chapter 2.1.2 Receive timestamps in
* linux/Documentation/networking/timestamping.txt
*/
tv.tv_sec = stamp[2].tv_sec;
tv.tv_usec = stamp[2].tv_nsec/1000;
} else if (cmsg->cmsg_type == SO_RXQ_OVFL)
memcpy(&dropcnt[i], CMSG_DATA(cmsg), sizeof(__u32));
}
/* check for (unlikely) dropped frames on this specific socket */
if (dropcnt[i] != last_dropcnt[i]) {
__u32 frames = dropcnt[i] - last_dropcnt[i];
if (silent != SILENT_ON)
printf("DROPCOUNT: dropped %d CAN frame%s on '%s' socket (total drops %d)\n",
frames, (frames > 1)?"s":"", devname[idx], dropcnt[i]);
if (log)
fprintf(logfile, "DROPCOUNT: dropped %d CAN frame%s on '%s' socket (total drops %d)\n",
frames, (frames > 1)?"s":"", devname[idx], dropcnt[i]);
last_dropcnt[i] = dropcnt[i];
}
/* once we detected a EFF frame indent SFF frames accordingly */
if (frame.can_id & CAN_EFF_FLAG)
view |= CANLIB_VIEW_INDENT_SFF;
if (log) { /* CODE GETS IN TO THIS PART */
char buf[CL_CFSZ]; /* max length */ /* WHEN PRINTING INTO FILE */
/* */
/* log CAN frame with absolute timestamp & device */ /* */
sprint_canframe(buf, &frame, 0, maxdlen); /* */
fprintf(logfile, "%s %*s %s\n", /* */
curr_read, /* */
max_devname_len, devname[idx], buf); /* */
} /* */
if (logfrmt) {
char buf[CL_CFSZ]; /* max length */
/* print CAN frame in log file style to stdout */
sprint_canframe(buf, &frame, 0, maxdlen);
printf("(%010ld.%06ld) %*s %s\n",
tv.tv_sec, tv.tv_usec,
max_devname_len, devname[idx], buf);
goto out_fflush; /* no other output to stdout */
}
if (silent != SILENT_OFF){
if (silent == SILENT_ANI) {
printf("%c\b", anichar[silentani%=MAXANI]);
silentani++;
}
goto out_fflush; /* no other output to stdout */
}
printf(" %s", (color>2)?col_on[idx%MAXCOL]:"");
switch (timestamp) {
case 'a': /* absolute with timestamp */
printf("(%010ld.%06ld) ", tv.tv_sec, tv.tv_usec);
break;
case 'A': /* absolute with date */
{
struct tm tm;
char timestring[25];
tm = *localtime(&tv.tv_sec);
strftime(timestring, 24, "%Y-%m-%d %H:%M:%S", &tm);
printf("(%s.%06ld) ", timestring, tv.tv_usec);
}
break;
case 'd': /* delta */
case 'z': /* starting with zero */
{
struct timeval diff;
if (last_tv.tv_sec == 0) /* first init */
last_tv = tv;
diff.tv_sec = tv.tv_sec - last_tv.tv_sec;
diff.tv_usec = tv.tv_usec - last_tv.tv_usec;
if (diff.tv_usec < 0)
diff.tv_sec--, diff.tv_usec += 1000000;
if (diff.tv_sec < 0)
diff.tv_sec = diff.tv_usec = 0;
printf("(%03ld.%06ld) ", diff.tv_sec, diff.tv_usec);
if (timestamp == 'd')
last_tv = tv; /* update for delta calculation */
}
break;
default: /* no timestamp output */
break;
}
printf(" %s", (color && (color<3))?col_on[idx%MAXCOL]:"");
printf("%*s", max_devname_len, devname[idx]);
if (extra_msg_info) {
if (msg.msg_flags & MSG_DONTROUTE)
printf (" TX %s", extra_m_info[frame.flags & 3]);
else
printf (" RX %s", extra_m_info[frame.flags & 3]);
}
printf("%s ", (color==1)?col_off:"");
fprint_long_canframe(stdout, &frame, NULL, view, maxdlen);
printf("%s", (color>1)?col_off:"");
printf("\n");
}
out_fflush:
fflush(stdout);
}
countc = countc +1;
}
}
for (i=0; i<currmax; i++)
close(s[i]);
if (bridge)
close(bridge);
if (log)
fclose(logfile);
return 0;
}
Actually everything matters works in while(running) block. Inside this block when I make the bytes_read = read(fd,&read_buffer,42); as comment, it didn't write anything but also doesn't give the Segmentation fault error. Same also happens when I connect the GPS' TX pin in to BBB. So the problem starts to occur when the data is coming from the GPS and read by the BBB.
Segmentation Fault Err
What should I do about it?
Thanks.
Your GPS reading code
char new_read[38];
char curr_read[33];
strcpy(curr_read, new_read);
is copying a 38 char buffer into a 33 char buffer, which can result in bad things.
Strcpy will copy the contents of the source buffer into the destination buffer until it reads NULL from the source buffer. If the NULL char is at the 36th position in new_read, strcpy will be writing in random memory which can cause the segmentation fault.
I am guessing that when you run your GPS reading code as stand-alone, the writing into random memory goes un-noticed, but when you combine it with the CAN bus reading, it writes into allocated space and the error happens.
I'm trying to send a variable length string via UART, using HAL function.
There is no way to send a string that is changing its length runtime, I have tried with various declarations, inside and outside while loop, but if I don't declare a fix length string (char buffer[30] for example), HAL is not taking it.
char buffer; char buffer[] = "", even char buffer = malloc(sizeof(char)), nothing is working. I have on terminal only some character or nothing.
Is there a way to pass a string that is variable in length to HAL?
Thanks.
int main(void)
{
/* USER CODE BEGIN 1 */
/* USER CODE END 1 */
/* MCU Configuration----------------------------------------------------------*/
/* Reset of all peripherals, Initializes the Flash interface and the Systick. */
HAL_Init();
/* USER CODE BEGIN Init */
/* USER CODE END Init */
/* Configure the system clock */
SystemClock_Config();
/* USER CODE BEGIN SysInit */
/* USER CODE END SysInit */
/* Initialize all configured peripherals */
MX_GPIO_Init();
MX_USART2_UART_Init();
/* USER CODE BEGIN 2 */
uint32_t index1 = 0, index2 = 0;
const char message[] = "Hello from Nucleo64";
const char message2[] = "Pressed!";
/* USER CODE END 2 */
/* Infinite loop */
/* USER CODE BEGIN WHILE */
while (1)
{
/* USER CODE END WHILE */
/* USER CODE BEGIN 3 */
if(HAL_GPIO_ReadPin(B1_GPIO_Port, B1_Pin)){
char *buffer;
sprintf(buffer, "%s - index=%d\n", message, index1);
HAL_UART_Transmit(&huart2, (uint8_t *)buffer, sizeof(buffer)-1, 10);
index1 += 1;
} else
{
char *buffer2;
sprintf(buffer2, "%s - index=%d\n", message2, index2);
HAL_UART_Transmit(&huart2, (uint8_t *)buffer2, sizeof(buffer2)-1, 10);
index2 += 1;
}
HAL_GPIO_TogglePin(LD2_GPIO_Port, LD2_Pin);
HAL_Delay(500);
}
/* USER CODE END 3 */
}
As suggested, I tryed also snprintf, on the terminal I see nothing but [00] when I reset, then nothing is transmitted.
uint32_t index1 = 0, index2 = 0;
char message[] = "Hello from Nucleo64";
char message2[] = "Pressed!";
/* USER CODE END 2 */
/* Infinite loop */
/* USER CODE BEGIN WHILE */
while (1)
{
/* USER CODE END WHILE */
/* USER CODE BEGIN 3 */
if(HAL_GPIO_ReadPin(B1_GPIO_Port, B1_Pin)){
size_t needed = snprintf(NULL, 0, "%s - index=%d\n", message, index1) + 1;
char *buffer = malloc(needed);
snprintf(buffer, needed, "%s - index=%d\n", message, index1);
HAL_UART_Transmit(&huart2, (uint8_t *)buffer, strlen(buffer), 10);
index1 += 1;
free(buffer);
} else
{
size_t needed = snprintf(NULL, 0, "%s - index=%d\n", message2, index2) + 1;
char *buffer2 = malloc(needed);
snprintf(buffer2, needed, "%s - index=%d\n", message2, index2);
HAL_UART_Transmit(&huart2, (uint8_t *)buffer2, strlen(buffer2), 10);
index2 += 1;
free(buffer2);
}
HAL_GPIO_TogglePin(LD2_GPIO_Port, LD2_Pin);
HAL_Delay(500);
}
/* USER CODE END 3 */
}
SOLVED *****************************
Used printf after declaring prorotype GNUC
#ifdef __GNUC__
#define PUTCHAR_PROTOTYPE int __io_putchar(int ch)
#else
#define PUTCHAR_PROTOTYPE int fputc(int ch, FILE *f)
#endif
and passing it to HAL_UART_Trasmit
PUTCHAR_PROTOTYPE
{
HAL_UART_Transmit(&huart2, (uint8_t *)&ch, 1, 0xFFFF);
return ch;
}
I am working on a MPI I/O problem. Rank 0 reads the position from a parameter file and then sends to Rank 1, 2, 3. All these processes(1,2,3) will get the text from the reading file according to the position Rank 0 gave them and write in different lines in a writing file. When I run the program in one single computer, everything is ok. But when I use 2 computers(still 4 processes, Rank 0,1 on server while Rank 1,2 on client), some random lines of the output file has gone missing! Here is my code
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
//define the message
#define MSG_MISSION_COMPLETE 78
#define MSG_EXIT 79
//define a structural message of MPI
int array_of_blocklengths[3] = { 1, 1, 1 };
MPI_Aint array_of_displacements[3] = { 0, sizeof(float), sizeof(float) + sizeof(int) };
MPI_Datatype array_of_types[3] = {MPI_FLOAT, MPI_FLOAT, MPI_INT};
MPI_Datatype location;
int master();
int slave(MPI_File fhr, MPI_File fhw);
int main(int argc, char* argv[])
{
int rank;
MPI_File fhr, fhw;
char read[] = "./sharedReadSample1.txt";
char write[] = "./sharedWriteSample1.txt";
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
printf("%d is speaking\n", rank);
MPI_File_open(MPI_COMM_WORLD, read, MPI_MODE_RDONLY, MPI_INFO_NULL, &fhr);
MPI_File_open(MPI_COMM_WORLD, write, MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &fhw);
if (rank == 0)//rank 0, dispatch the tasks
master();
else//other processes
slave(fhr, fhw);
MPI_Finalize();
printf("%d said byebye\n", rank);
MPI_File_close(&fhr);
MPI_File_close(&fhw);
return 0;
}
int master()//master, read the parameters, send them to other slave processes, get the message of task finishing, arrange next task to the slave who completed the task
{
int i, size, firstmsg, nslave;
int buf[256];
struct{
float pause;//pause time
int stand;//starting position in the file
int offset;//offset
}buf_str[10000] = { {0.0,0,0} };
MPI_Comm_size(MPI_COMM_WORLD, &size);
nslave = size - 1;//the number of slaves
FILE* fp;
FILE* fpm;//for log
fp = fopen("sharedAttributeSample1.txt", "rb");
if (fp == NULL)
{
printf("The file was not opened\n");
getchar();
//send a quit message to slaves, use the tag to tell them(>10000)
for (i = 10000; i < 10000 + nslave; i++)
{
buf[0] = MSG_EXIT;
MPI_Send(&buf[0], 1, MPI_INT, i - 10000 + 1, i, MPI_COMM_WORLD);
}
return 0;
}
else
printf("The file was opened\n");
fpm = fopen("./logs/log_master.txt","wb");
if (fpm == NULL)
printf("master log system failed to load!\n");
for (i = 0; i < 10000;i++)
{
fscanf(fp,"%f,%d,%d", &buf_str[i].pause, &buf_str[i].stand, &buf_str[i].offset);
}
MPI_Status status;
MPI_Type_struct(3, array_of_blocklengths, array_of_displacements, array_of_types, &location);
MPI_Type_commit(&location);
for (i = 0; i < nslave; i++)
{
MPI_Send(&buf_str[i], 1, location, i+1, i, MPI_COMM_WORLD);
fprintf(fpm, "initial message %d sent\n",i);
}
for (i = nslave; i < 10000; i++)
{
MPI_Recv(buf, 256, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);//receive messages from slaves
fprintf(fpm, "task %d complete massage received\n",status.MPI_TAG);
if (buf[0] == MSG_MISSION_COMPLETE)//send next task
{
firstmsg = status.MPI_SOURCE;
fprintf(fpm, "task %d is sent to %d \n", i, firstmsg);
MPI_Send(&buf_str[i], 1, location, firstmsg, i, MPI_COMM_WORLD);
}
}
for (i = 10000; i < 10000+nslave; i++)//send quitting message
{
buf[0] = MSG_EXIT;
MPI_Send(&buf_str[0], 1, location, i-10000+1, i, MPI_COMM_WORLD);
}
fclose(fp);
fclose(fpm);
return 0;
}
int slave(MPI_File fhr, MPI_File fhw)
{
struct{
float pause;
int stand;
int offset;
}buf_str;
char buf[256];
int buf_s[256];
int rank, size, nslave, i=0;
char name[30];
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
nslave = size - 1;
FILE* fps[nslave];
//open their own logging pointers
for(i=0;i<nslave;i++)
{
if(i == rank-1)
{
sprintf(name,"./logs/logfile_slave%d",i+1);
fps[i] = fopen(name, "w");
if(fps[i] == NULL)
printf("failed to open logfile of slave %d\n", i+1);
break;
}
}
MPI_Status status;
MPI_Status status_read;
MPI_Status status_write;
MPI_Type_struct(3, array_of_blocklengths, array_of_displacements, array_of_types, &location);
MPI_Type_commit(&location);
while (1)
{
//receive the message from master
MPI_Recv(&buf_str, 1, location, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
fprintf(fps[i], "process %d message %d received\n",rank,status.MPI_TAG);
if (status.MPI_TAG < 10000){//if it is a task
sleep(buf_str.pause);//sleep, to simulate a computing process
fprintf(fps[i], "process %d sleep for %f seconds\n", rank, buf_str.pause);
//read from the position given
MPI_File_read_at(fhr, buf_str.stand, buf, buf_str.offset, MPI_CHAR, &status_read);
buf[buf_str.offset] = '\n';//need a \n
MPI_File_write_at(fhw, status.MPI_TAG*(buf_str.offset+1), buf, buf_str.offset+1, MPI_CHAR, &status_write);
fprintf(fps[i], "%d has done task %d\n", rank, status.MPI_TAG);
//send task complete message to master
buf_s[0] = MSG_MISSION_COMPLETE;
MPI_Send(&buf_s, 1, MPI_INT, 0, status.MPI_TAG, MPI_COMM_WORLD);
}
else
break;
}
fclose(fps[i]);
return 0;
}
I have to find the overhead associated with NUMA memory page migration under Linux.
Can you please tell me which tools I can use?
If possible could you show an example.
If you want to understand whether your system is doing excessive remote node memory accesses and you're using intel CPUs, Intel's PMU has a utility called vtbwrun to report the QPI/uncore activity.
If you want to see how long it takes to execute a page migration, you can measure the duration of calls to numa_move_pages (provided by the numactl package).
Here's an example:
/*
* Test program to test the moving of a processes pages.
*
* (C) 2006 Silicon Graphics, Inc.
* Christoph Lameter <clameter#sgi.com>
*/
#include <stdio.h>
#include <stdlib.h>
#include "../numa.h"
#include <unistd.h>
#include <errno.h>
unsigned int pagesize;
unsigned int page_count = 32;
char *page_base;
char *pages;
void **addr;
int *status;
int *nodes;
int errors;
int nr_nodes;
struct bitmask *old_nodes;
struct bitmask *new_nodes;
int main(int argc, char **argv)
{
int i, rc;
pagesize = getpagesize();
nr_nodes = numa_max_node()+1;
old_nodes = numa_bitmask_alloc(nr_nodes);
new_nodes = numa_bitmask_alloc(nr_nodes);
numa_bitmask_setbit(old_nodes, 1);
numa_bitmask_setbit(new_nodes, 0);
if (nr_nodes < 2) {
printf("A minimum of 2 nodes is required for this test.\n");
exit(1);
}
setbuf(stdout, NULL);
printf("migrate_pages() test ......\n");
if (argc > 1)
sscanf(argv[1], "%d", &page_count);
page_base = malloc((pagesize + 1) * page_count);
addr = malloc(sizeof(char *) * page_count);
status = malloc(sizeof(int *) * page_count);
nodes = malloc(sizeof(int *) * page_count);
if (!page_base || !addr || !status || !nodes) {
printf("Unable to allocate memory\n");
exit(1);
}
pages = (void *) ((((long)page_base) & ~((long)(pagesize - 1))) + pagesize);
for (i = 0; i < page_count; i++) {
if (i != 2)
/* We leave page 2 unallocated */
pages[ i * pagesize ] = (char) i;
addr[i] = pages + i * pagesize;
nodes[i] = 1;
status[i] = -123;
}
/* Move to starting node */
rc = numa_move_pages(0, page_count, addr, nodes, status, 0);
if (rc < 0 && errno != ENOENT) {
perror("move_pages");
exit(1);
}
/* Verify correct startup locations */
printf("Page location at the beginning of the test\n");
printf("------------------------------------------\n");
numa_move_pages(0, page_count, addr, NULL, status, 0);
for (i = 0; i < page_count; i++) {
printf("Page %d vaddr=%p node=%d\n", i, pages + i * pagesize, status[i]);
if (i != 2 && status[i] != 1) {
printf("Bad page state before migrate_pages. Page %d status %d\n",i, status[i]);
exit(1);
}
}
/* Move to node zero */
numa_move_pages(0, page_count, addr, nodes, status, 0);
printf("\nMigrating the current processes pages ...\n");
rc = numa_migrate_pages(0, old_nodes, new_nodes);
if (rc < 0) {
perror("numa_migrate_pages failed");
errors++;
}
/* Get page state after migration */
numa_move_pages(0, page_count, addr, NULL, status, 0);
for (i = 0; i < page_count; i++) {
printf("Page %d vaddr=%lx node=%d\n", i,
(unsigned long)(pages + i * pagesize), status[i]);
if (i != 2) {
if (pages[ i* pagesize ] != (char) i) {
printf("*** Page contents corrupted.\n");
errors++;
} else if (status[i]) {
printf("*** Page on the wrong node\n");
errors++;
}
}
}
if (!errors)
printf("Test successful.\n");
else
printf("%d errors.\n", errors);
return errors > 0 ? 1 : 0;
}