STM32F105, arm-none-eabi-gcc, Contiki: Storing float in struct and printing float in C fails - struct

I have two typedef struct as shown below:
typedef struct{
UInt32 length;
void* data;
UInt16 value;
} my_type;
typedef struct{
UInt8 type;
UInt32 length;
void* value;
} tlv_t;
What I trying next is to allocate memory for an my_type struct, a tlv_t struct that is pointed to from the created my_type object and for a float number, which is pointed to from the tlv_t object.If I'm executing the code without the last line of the code below it is working well. I can store the value and I can access it.But as soon as I try to access it a second time the uploaded code isn't running at all anymore on the STM32F105 Contiki-based board. The odd part is that this is only the case when using floating point numbers. No problems at all with other datatypes like int. Unfortunately, I really need to use float... What am I doing wrong?
Another problem is that printf doesn't support some flags like %f or %ul. Does anybody know how to add support for it on Contiki?
my_type* t = malloc(sizeof(my_type));
t->data = malloc(sizeof(tlv_t));
tlv_t* tv = t->data;
tv->type = 10;
tv->length = sizeof(float);
tv->value = malloc(sizeof(float));
*(float*) tv->value = 212.32;
printf("tv->value: %i\n", (int) *(float*) tv->value);
printf("tv->value: %i\n", (int) *(float*) tv->value); // without this line it is working
EDIT:
I forgot to add these typedefs:
typedef unsigned char UInt8;
typedef unsigned short UInt16;
typedef unsigned long UInt32;
EDIT2:
Here is the complete code:
#include <contiki.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cfs/cfs.h>
#include <cfs/cfs-coffee.h>
#include "arg.h"
/*---------------------------------------------------------------------------*/
PROCESS(main_process, "Contiki CLV build015_1");
AUTOSTART_PROCESSES(&main_process);
/*---------------------------------------------------------------------------*/
PROCESS_THREAD(main_process, ev, data)
{
PROCESS_BEGIN();
my_type* t = malloc(sizeof(my_type));
t->data = malloc(sizeof(tlv_t));
tlv_t* tv = t->data;
tv->type = 10;
tv->length = sizeof(float);
tv->value = malloc(sizeof(float));
*(float*) tv->value = 212.32;
printf("tv->value: %i\n", (int) *(float*) tv->value);
printf("tv->value: %i\n", (int) *(float*) tv->value); // without this line it is working
while (1) {
PROCESS_YIELD();
}
PROCESS_END();
}
EDIT3:
I'm using the latest arm-none-eabi-gcc (version 4_8-2013q4-20131204). Are there any known issues when dealing with structs, floats or memory management?

Try
PROCESS_THREAD(main_process, ev, data)
{
static my_type *t;
static tlv_t *tv;
static float f = 212.32;
PROCESS_BEGIN();
t = (my_type *)malloc(sizeof(my_type));
t->data = malloc(sizeof(tlv_t));
tv = (tlv_t *)t->data;
tv->type = 10;
tv->length = sizeof(float);
tv->value = malloc(sizeof(float));
//*(float *) tv->value = 212.32;
memmove(tv->value, &f, 4);
printf("tv->value: %i\n", (int) *(float*) tv->value);
printf("tv->value: %i\n", (int) *(float*) tv->value); // without this line it is working
printf("t address: %x \n", (unsigned int)t);
while (1) {
PROCESS_YIELD();
}
PROCESS_END();
}

I suggest you fix your code, so you get no compiler warnings anymore (don't turn them off). Add casts as needed.
After I did those fixes your code worked for me, so the code is ugly but ok.
#define UInt32 unsigned int
#define UInt16 unsigned short
#define UInt8 unsigned char
typedef struct{
UInt32 length;
void* data;
UInt16 value;
} my_type;
typedef struct{
UInt8 type;
UInt32 length;
void* value;
} tlv_t;
int _tmain(int argc, _TCHAR* argv[])
{
my_type* t = (my_type*)malloc(sizeof(my_type));
t->data = malloc(sizeof(tlv_t));
tlv_t* tv = (tlv_t*)t->data;
tv->type = 10;
tv->length = sizeof(float);
tv->value = malloc(sizeof(float));
*(float*) tv->value = (float)212.32;
printf("tv->value: %i\n", (int) *(float*) tv->value);
printf("tv->value: %i\n", (int) *(float*) tv->value); // without this line it
getchar();
}
gives
tv->value: 212
tv->value: 212

Related

How to get the size of the VDSO on a Linux x86_64 system

I'd like to dump the VDSO to disk in a way that I can verify it is correct with objdump -D.
We can get the base address of the VDSO with getauxval(AT_SYSINFO_EHDR) as documented in vdso(7). But how does one get the size of the object?
I happen to know it is exactly two pages long, but I'm not certain I can rely on that.
I can't see anything in the ELF header that would indicate the size of the object as a whole, and I've also tried iterating and dumping the sections via dl_iterate_phdr(3) to no joy (presumably this would skip the ELF header?).
Any ideas? Do I really have to scrape the size out of the proc maps?
The VDSO header gives you the start of the file. To find the end, calculate the maximum of:
the end of all segments from the program header table (offset + size)
the end of the section header table
the end of the program header table
Then write everything between the start and end to disk. The following program should do the trick:
#include <stdlib.h>
#include <stdio.h>
#include <sys/auxv.h>
struct elf_fhdr_64
{
uint32_t magic;
uint8_t ei_class;
uint8_t ei_data;
uint8_t ei_version;
uint8_t ei_osabi;
uint8_t ei_abiversion;
uint8_t pad[7];
uint16_t e_type;
uint16_t e_machine;
uint32_t e_version;
uint64_t e_entry;
uint64_t e_phoff; // program header offset
uint64_t e_shoff;
uint32_t e_flags;
uint16_t e_ehsize;
uint16_t e_phentsize;
uint16_t e_phnum; // number of program headers
uint16_t e_shentsize;
uint16_t e_shnum;
// ...
};
struct elf_phdr_64
{
uint32_t p_type;
uint32_t p_flags;
uint64_t p_offset; // offset in file
uint64_t p_vaddr;
uint64_t p_paddr;
uint64_t p_filesz; // size in file
// ...
};
struct elf_shdr_64
{
uint32_t sh_name;
uint32_t sh_type;
uint64_t sh_flags;
uint64_t sh_addr; // virtual addr when loaded
uint64_t sh_offset; // offset in file
uint64_t sh_size; // size in file
// ...
};
int main(int argc, char **argv)
{
unsigned long vdso_hdr = getauxval(AT_SYSINFO_EHDR);
uint32_t elf_magic = * (uint32_t *)vdso_hdr;
if (elf_magic == 0x464C457F) {
printf("has elf magic, proceeding...\n");
}
else {
printf("no elf magic.\n");
exit(1);
}
struct elf_fhdr_64 * fhdrp = (struct elf_fhdr_64 *) vdso_hdr;
int num_phdrs = fhdrp->e_phnum;
uint16_t phentsize = fhdrp->e_phentsize;
long max_offs = 0;
for (int i = 0; i < num_phdrs; i++) {
struct elf_phdr_64 * phdr = (struct elf_phdr_64 *)(vdso_hdr
+ fhdrp->e_phoff + i * phentsize);
long file_offs = phdr->p_offset + phdr->p_filesz;
if (max_offs < file_offs) max_offs = file_offs;
}
int num_shdrs = fhdrp->e_shnum;
int shentsize = fhdrp->e_shentsize;
for (int i = 0; i < num_shdrs; i++) {
struct elf_shdr_64 * shdr = (struct elf_shdr_64 *)(vdso_hdr
+ fhdrp->e_shoff + i * shentsize);
long file_offs = shdr->sh_offset + shdr->sh_size;
if (max_offs < file_offs) max_offs = file_offs;
}
// section table:
int section_table_max = fhdrp->e_shoff + (num_shdrs * shentsize);
if (max_offs < section_table_max) max_offs = section_table_max;
// phdrs table:
int phdr_table_max = fhdrp->e_phoff + (num_phdrs * phentsize);
if (max_offs < phdr_table_max) max_offs = section_table_max;
FILE * outfile = fopen("test-vdso.so", "wb");
if (outfile) {
fwrite((void *) vdso_hdr, 1, max_offs, outfile);
fclose(outfile);
}
return 0;
}

Zero copy in using vmsplice/splice in Linux

I am trying to get zero copy semantics working in linux using
vmsplice()/splice() but I don't see any performance improvement. This
is on linux 3.10, tried on 3.0.0 and 2.6.32. The following code tries
to do file writes, I have tried network socket writes() also, couldn't
see any improvement.
Can somebody tell what am I doing wrong ?
Has anyone gotten improvement using vmsplice()/splice() in production ?
#include <assert.h>
#include <fcntl.h>
#include <iostream>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <unistd.h>
#include <vector>
const char *filename = "Test-File";
const int block_size = 4 * 1024;
const int file_size = 4 * 1024 * 1024;
using namespace std;
int pipes[2];
vector<char *> file_data;
static int NowUsecs() {
struct timeval tv;
const int err = gettimeofday(&tv, NULL);
assert(err >= 0);
return tv.tv_sec * 1000000LL + tv.tv_usec;
}
void CreateData() {
for (int xx = 0; xx < file_size / block_size; ++xx) {
// The data buffer to fill.
char *data = NULL;
assert(posix_memalign(reinterpret_cast<void **>(&data), 4096, block_size) == 0);
file_data.emplace_back(data);
}
}
int SpliceWrite(int fd, char *buf, int buf_len) {
int len = buf_len;
struct iovec iov;
iov.iov_base = buf;
iov.iov_len = len;
while (len) {
int ret = vmsplice(pipes[1], &iov, 1, SPLICE_F_GIFT);
assert(ret >= 0);
if (!ret)
break;
len -= ret;
if (len) {
auto ptr = static_cast<char *>(iov.iov_base);
ptr += ret;
iov.iov_base = ptr;
iov.iov_len -= ret;
}
}
len = buf_len;
while (len) {
int ret = splice(pipes[0], NULL, fd, NULL, len, SPLICE_F_MOVE);
assert(ret >= 0);
if (!ret)
break;
len -= ret;
}
return 1;
}
int WriteToFile(const char *filename, bool use_splice) {
// Open and write to the file.
mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
int fd = open(filename, O_CREAT | O_RDWR, mode);
assert(fd >= 0);
const int start = NowUsecs();
for (int xx = 0; xx < file_size / block_size; ++xx) {
if (use_splice) {
SpliceWrite(fd, file_data[xx], block_size);
} else {
assert(write(fd, file_data[xx], block_size) == block_size);
}
}
const int time = NowUsecs() - start;
// Close file.
assert(close(fd) == 0);
return time;
}
void ValidateData() {
// Open and read from file.
const int fd = open(filename, O_RDWR);
assert(fd >= 0);
char *read_buf = (char *)malloc(block_size);
for (int xx = 0; xx < file_size / block_size; ++xx) {
assert(read(fd, read_buf, block_size) == block_size);
assert(memcmp(read_buf, file_data[xx], block_size) == 0);
}
// Close file.
assert(close(fd) == 0);
assert(unlink(filename) == 0);
}
int main(int argc, char **argv) {
auto res = pipe(pipes);
assert(res == 0);
CreateData();
const int without_splice = WriteToFile(filename, false /* use splice */);
ValidateData();
const int with_splice = WriteToFile(filename, true /* use splice */);
ValidateData();
cout << "TIME WITH SPLICE: " << with_splice << endl;
cout << "TIME WITHOUT SPLICE: " << without_splice << endl;
return 0;
}
I did a proof-of-concept some years ago where I got as 4x speedup using an optimized, specially tailored, vmsplice() code. This was measured against a generic socket/write() based solution. This blog post from natsys-lab echoes my findings. But I believe you need to have the exact right use case to get near this number.
So what are you doing wrong? Primarily I think you are measuring the wrong thing. When writing directly to a file you have 1 system call, which is write(). And you are not actually copying data (except to the kernel). When you have a buffer with data that you want to write to disk, it's not gonna get faster than that.
In you vmsplice/splice setup you are still copying you data into the kernel, but you have a total of 2 system calls vmsplice()+splice() to get it to disk. The speed being identical to write() is probably just a testament to Linux system call speed :-)
A more "fair" setup would be to write one program that read() from stdin and write() the same data to stdout. Write an identical program that simply splice() stdin into a file (or point stdout to a file when you run it). Although this setup might be too simple to really show anything.
Aside: an (undocumented?) feature of vmsplice() is that you can also use to to read data from a pipe. I used this in my old POC. It was basically just an IPC layer based on the idea of passing memory pages around using vmsplice().
Note: NowUsecs() probably overflows the int

seperate the cuda host code in .cpp file

main.cpp
#include<iostream>
#include "cuda.h"
using namespace std;
void cuda_calculation();
int main()
{
cuda_calculation();
return 0;
}
cu.h
void call(int , int ,float* , int );
cuda.cpp
#include <stdio.h>
#include <cuda.h>
#include "cu.h"
void cuda_calculation()
{
float *a_h, *a_d; // Pointer to host & device arrays
const int N = 10; // Number of elements in arrays
size_t size = N * sizeof(float);
a_h = (float *)malloc(size); // Allocate array on host
cudaMalloc((void **) &a_d, size); // Allocate array on device
// Initialize host array and copy it to CUDA device
for (int i=0; i<N; i++) a_h[i] = (float)i;
cudaMemcpy(a_d, a_h, size, cudaMemcpyHostToDevice);
// Do calculation on device:
int block_size = 4;
int n_blocks = N/block_size + (N%block_size == 0 ? 0:1);
void call(n_blocks, block_size,&a_d, N);
/*square_array <<< n_blocks, block_size >>> (a_d, N);*/
// Retrieve result from device and store it in host array
cudaMemcpy(a_h, a_d, sizeof(float)*N, cudaMemcpyDeviceToHost);
// Print results
for (int i=0; i<N; i++) printf("%d %f\n", i, a_h[i]);
// Cleanup
free(a_h); cudaFree(a_d);
}
cu.cu
#include <stdio.h>
#include "cu.h"
#include <cuda.h>
// Kernel that executes on the CUDA device
__global__ void square_array(float *a, int N)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx<N) a[idx] = a[idx] * a[idx];
}
//}
void call(int a,int b,float* c,int d)
{
square_array <<< 3,4 >>> (c,d);
}
I tried to seperate the kernal code and host code in a cpp file, however the following error prevails:
Error 'cudaMemcpy': identifier not found and the other cuda related identifier is not identified.
how to use the cuda related identifier in cpp file and call the kernal functions
There are some errors: void cuda_calculation(); needs to be visible to main.cpp through a header file (cu.h).
Also make sure to compile your .cu files with nvcc and NOT as a standard C++ file. Use CUDA compilation rules to make this process easy (installed by default as part of CUDA toolkit)
after a long trial ,I came with the proper output,
to include the cuda identifier in the cpp files we not only need to include cuda.h but also we need to include cuda_runtime.h as
cuda.cpp as
#include <stdio.h>
#include <cuda.h>
#include<cuda_runtime.h>
#include "cu.h"
#include "cud.h"
//void call(int , int ,float * , int );
void cuda_calculation()
{
float *a_h, *a_d; // Pointer to host & device arrays
const int N = 10; // Number of elements in arrays
size_t size = N * sizeof(float);
a_h = (float *)malloc(size); // Allocate array on host
cudaMalloc((void **) &a_d, size); // Allocate array on device
// Initialize host array and copy it to CUDA device
for (int i=0; i<N; i++) a_h[i] = (float)i;
cudaMemcpy(a_d, a_h, size, cudaMemcpyHostToDevice);
// Do calculation on device:
int block_size = 4;
int n_blocks = N/block_size + (N%block_size == 0 ? 0:1);
call(n_blocks, block_size,a_d, N);
/*square_array <<< n_blocks, block_size >>> (a_d, N);*/
// Retrieve result from device and store it in host array
cudaMemcpy(a_h, a_d, sizeof(float)*N, cudaMemcpyDeviceToHost);
// Print results
for (int i=0; i<N; i++) printf("%d %f\n", i, a_h[i]);
// Cleanup
free(a_h);
cudaFree(a_d);
}
so the others files are
main.cpp
#include<iostream>
#include "cud.h"
using namespace std;
int main()
{
cuda_calculation();
return 0;
}
cud.h
void cuda_calculation();
cu.h
void call(int , int ,float* , int );
cu.cu
#include <stdio.h>
#include "cu.h"
#include <cuda.h>
// Kernel that executes on the CUDA device
__global__ void square_array(float *a, int N)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx<N) a[idx] = a[idx] * a[idx];
}
//}
void call(int a,int b,float* c,int d)
{
square_array <<< 3,4 >>> (c,d);
}

No sound using mpg123 visual c++

I'm trying to play a song using mpg123 in a visual c++ project, I'm using a code from here but I can't find how to make sound. The code doesn't have errors, it works right but close immediately, This is the code, Thanks:
#include "stdafx.h"
#include <mpg123.h>
#define INBUFF 16384
#define OUTBUFF 32768
int _tmain(int argc, _TCHAR* argv[])
{
mpg123_handle *mh;
const char* filename;
filename="D:/Jose.mp3";
unsigned char *buffer;
size_t buffer_size;
size_t done;
int err;
int channels, encoding;
long rate;
mpg123_init();
mh = mpg123_new(NULL, &err);
buffer_size = mpg123_outblock(mh);
buffer = (unsigned char*) malloc(buffer_size * sizeof(unsigned char));
/* open the file and get the decoding format */
mpg123_open(mh, filename);
mpg123_getformat(mh, &rate, &channels, &encoding);
/* set the output format and open the output device */
int m_bits = mpg123_encsize(encoding);
int m_rate = rate;
int m_channels = channels;
/* decode and play */
for (int totalBtyes = 0 ; mpg123_read(mh, buffer, buffer_size, &done) == MPG123_OK;) {
totalBtyes += done;
}
/* clean up */
free(buffer);
mpg123_close(mh);
mpg123_delete(mh);
mpg123_exit();
return 0;
}

Memory allocation of struct in C++

My struct is as follows:
typedef struct KeypointSt {
float row, col;
float scale, ori;
unsigned char *descrip; /* Vector of descriptor values */
struct KeypointSt *next;
} *Keypoint;
The following is a part of a code in C. How can I translate it to C++, considering allocation and de-allocation of heap.
Keypoint k, keys = NULL;
for (i = 0; i < num; i++) {
/* Allocate memory for the keypoint. */
k = (Keypoint) malloc(sizeof(struct KeypointSt));
k->next = keys;
keys = k;
k->descrip = malloc(len);
for (j = 0; j < len; j++) {
k->descrip[j] = (unsigned char) val;
}
}
One possible way of converting to C++ is:
#include <cstring> // memset()
typedef struct KeypointSt
{
float row, col;
float scale, ori;
size_t len;
unsigned char *descrip; /* Vector of descriptor values */
KeypointSt *next;
KeypointSt(int p_len, int p_val) : row(0.0), col(0.0), scale(0.0),
ori(0.0), len(p_len),
descrip(new unsigned char[len]), next(0)
{ memset(descrip, len, p_val); }
~KeypointSt() { delete descrip; }
} *Keypoint;
extern KeypointSt *init_keypoints(size_t num, size_t len, unsigned char val);
extern void free_keypoints(KeypointSt *list);
KeypointSt *init_keypoints(size_t num, size_t len, unsigned char val)
{
KeypointSt *keys = NULL;
for (size_t i = 0; i < num; i++)
{
/* Allocate memory for the keypoint. */
KeypointSt *k = new KeypointSt(len, val);
k->next = keys;
keys = k;
}
return keys;
}
void free_keypoints(KeypointSt *list)
{
while (list != 0)
{
KeypointSt *next = list->next;
delete list;
list = next;
}
}
int main(void)
{
KeypointSt *keys = init_keypoints(4, 5, 6);
free_keypoints(keys);
return 0;
}
The only reason I've kept the typedef in place is because you have existing code; the C++ code would be better using KeypointSt * everywhere — or renaming the structure tag to Keypoint and using Keypoint * in place of your original Keypoint. I don't like non-opaque types where the typedef conceals a pointer. If I see a declaration XYZ xyz;, and it is a structure or class type, I expect to use xyz.pqr and not xyz->pqr.
We can debate code layout of the constructor code, the absence of a default constructor (no arrays), and the absence of a copy constructor and an assignment operator (both needed because of the allocation for descrip). The code of init_keypoints() is not exception safe; a memory allocation failure will leak memory. Fixing that is left as an exercise (it isn't very hard, I think, but I don't claim exception-handling expertise). I've not attempted to consider any extra requirements imposed by C++11. Simply translating from C to C++ is 'easy' until you look at the extra demands that C++ makes — demands that make your life easier in the long run, but at a short-term cost in pain.

Resources