Boost Managed Mapped File std::unordered_map Linux Segmentation Fault - linux

I am trying to map an std::unordered_map with boost interproccess.
With Windows everything works fine. On Linux i get a segmentation fault.
using namespace boost::interprocess;
struct State {
uint8_t state = 0;
State(uint8_t _state) {
state = _state;
}
};
managed_mapped_file file(open_or_create, "MySharedMemory", 65536);
typedef uint32_t KeyType;
typedef State MappedType;
typedef std::pair<const KeyType, MappedType> ValueType;
typedef boost::interprocess::allocator<ValueType, boost::interprocess::managed_mapped_file::segment_manager> UnorderedMapAllocator;
typedef boost::unordered_map<KeyType, MappedType, boost::hash<KeyType>, std::equal_to<KeyType>, UnorderedMapAllocator> unordered_map;
unordered_map* map = file.find_or_construct<unordered_map>("fault_states")(3, boost::hash<KeyType>(), std::equal_to<KeyType>(), file.get_segment_manager());
uint32_t size = (uint32_t)map->size();
for (uint32_t i = size; i < size + 100; ++i) {
State state(46);
map->insert(ValueType(i, state));
}
for (auto x : *map) {
std::cout << x.first << " ";
std::cout << x.second.state << std::endl;
}
On the first run everything works fine. On the second run i get the segmentation fault.
If i use boost::unordered_map instead everything works fine, but i have to use the std version
Boost Version 1.6.4
Gcc Version 7.4.0

Related

How to identify the signal 11 error reason?

I´m running a C++ program in a Linux system (Kernel 4.15 - Ubuntu 16.04). When I want to compile my code, I get the signal 11 error message which is related to line 10 ("for" loop) of the following code:
void ProfilerBlock::ReadStack(const trace::EventValue& event,
std::vector<std::string>* stack)
{
process_t pid = ProcessForEvent(event);
std::vector<std::string> symbolizedStack;
const auto* stackField =
value::ArrayValue::Cast(event.getEventField("stack"));
for (const auto& addressValue : *stackField)
{
uint64_t address = addressValue.AsULong();
symbols::Symbol symbol;
uint64_t offset = 0;
if (!_symbols.LookupSymbol(address, _images[pid], &symbol,
&offset))
symbol.set_name("Unknown Symbol");
if (boost::starts_with(symbol.name(), "lttng_profile"))
continue;
stack->push_back(symbol.name());
if (_dumpStacks)
std::cout << symbol.name() << " - " << address << std::endl;
}
if (_dumpStacks)
std::cout << std::endl;
}
Does anybody have any idea about it?

Cuda object copy

I'm trying to use CUDA with objects, this is a little test code i put together to try out things, but i ran into a problem. When i'm doing anything to the device version of the variable, the copy back to the host fails with "cuda Error Ilegal Address", but if i just copy the code to the device and back it works.
If i comment out the printf... line, it the works.
class A {
public:
int s;
};
__device__ A *d_a;
__global__ void MethodA() {
printf("%d\n", d_a->s);
}
int main() {
A *a = new A();
a->s = 10;
cudaError e;
e = cudaMalloc((void**)&d_a, sizeof(A));
e = cudaMemcpy(d_a, a, sizeof(A), cudaMemcpyHostToDevice);
MethodA << <1, 1 >> > ();
e = cudaMemcpy(a, d_a, sizeof(A), cudaMemcpyDeviceToHost);
std::cout << cudaGetErrorName(e) << std::endl;
delete(a);
std::getchar();
return 0;
}
Use of the __device__ variable is causing difficulty. It is intended to be used for static allocations, known at compile time.
Your methodology would be simplified if you used an ordinary host-based pointer, pointing to a dynamic allocation created at runtime (which you are doing anyway), and then pass that host-based pointer to the device, via a kernel parameter.
Some problems with your approach:
You are using an incorrect API for modifying a __device__ variable. We don't use cudaMemcpy. We use cudaMemcpyToSymbol, etc.
You are not allowed to take the address of a device entity in host code:
e = cudaMalloc((void**)&d_a, sizeof(A));
^
cudaMalloc expects to store the allocated pointer value in host memory, not in device memory. It will point to a location in device memory, but it should be stored in a host variable.
If you want to stay with your method, the following modifications should make it correct:
$ cat t89.cu
#include <iostream>
#include <stdio.h>
class A {
public:
int s;
};
__device__ A *d_a;
__global__ void MethodA() {
printf("%d\n", d_a->s);
}
int main() {
A *a = new A();
a->s = 10;
A *temp_d_a;
cudaMalloc((void**)&temp_d_a, sizeof(A));
cudaMemcpy(temp_d_a, a, sizeof(A), cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(d_a, &temp_d_a, sizeof(A *));
MethodA << <1, 1 >> > ();
cudaMemcpy(a, temp_d_a, sizeof(A), cudaMemcpyDeviceToHost);
std::cout << cudaGetErrorString(cudaGetLastError()) << std::endl;
cudaFree(temp_d_a);
delete(a);
return 0;
}
$ nvcc t89.cu -o t89
$ cuda-memcheck ./t89
========= CUDA-MEMCHECK
10
no error
========= ERROR SUMMARY: 0 errors
$
EDIT: Regarding my previous statement:
Your methodology would be simplified if you used an ordinary host-based pointer, pointing to a dynamic allocation created at runtime (which you are doing anyway), and then pass that host-based pointer to the device, via a kernel parameter.
and asked about in the comments below, here is a worked example showing that approach:
$ cat t89.cu
#include <iostream>
#include <stdio.h>
class A {
public:
int s;
};
__global__ void MethodA(A *a) {
printf("%d\n", a->s);
}
int main() {
A *a = new A();
a->s = 10;
A *d_a; // an ordinary host-based pointer
cudaMalloc((void**)&d_a, sizeof(A)); //dynamic allocation created at runtime
cudaMemcpy(d_a, a, sizeof(A), cudaMemcpyHostToDevice);
MethodA << <1, 1 >> > (d_a); // passed to kernel via parameter
cudaMemcpy(a, d_a, sizeof(A), cudaMemcpyDeviceToHost);
std::cout << cudaGetErrorString(cudaGetLastError()) << std::endl;
cudaFree(d_a);
delete(a);
return 0;
}
$ nvcc -o t89 t89.cu
$ cuda-memcheck ./t89
========= CUDA-MEMCHECK
10
no error
========= ERROR SUMMARY: 0 errors
$

Posix semaphore for synchronisation between two different processes [duplicate]

According to my understanding, a semaphore should be usable across related processes without it being placed in shared memory. If so, why does the following code deadlock?
#include <iostream>
#include <semaphore.h>
#include <sys/wait.h>
using namespace std;
static int MAX = 100;
int main(int argc, char* argv[]) {
int retval;
sem_t mutex;
cout << sem_init(&mutex, 1, 0) << endl;
pid_t pid = fork();
if (0 == pid) {
// sem_wait(&mutex);
cout << endl;
for (int i = 0; i < MAX; i++) {
cout << i << ",";
}
cout << endl;
sem_post(&mutex);
} else if(pid > 0) {
sem_wait(&mutex);
cout << endl;
for (int i = 0; i < MAX; i++) {
cout << i << ",";
}
cout << endl;
// sem_post(&mutex);
wait(&retval);
} else {
cerr << "fork error" << endl;
return 1;
}
// sem_destroy(&mutex);
return 0;
}
When I run this on Gentoo/Ubuntu Linux, the parent hangs. Apparently, it did not receive the post by child. Uncommenting sem_destroy won't do any good. Am I missing something?
Update 1:
This code works
mutex = (sem_t *) mmap(NULL, sizeof(sem_t), PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, 0, 0);
if (!mutex) {
perror("out of memory\n");
exit(1);
}
Thanks,
Nilesh.
The wording in the manual page is kind of ambiguous.
If pshared is nonzero, then the semaphore is shared between processes,
and should be located in a region of shared memory.
Since a child created by fork(2) inherits its parent's memory
mappings, it can also access the semaphore.
Yes, but it still has to be in a shared region. Otherwise the memory simply gets copied with the usual CoW and that's that.
You can solve this in at least two ways:
Use sem_open("my_sem", ...)
Use shm_open and mmap to create a shared region
An excellent article on this topic, for future passers-by:
http://blog.superpat.com/2010/07/14/semaphores-on-linux-sem_init-vs-sem_open/

c++ async sometimes resulting in std::system_error , and sometimes not

I am trying to get the code example from there to work:
https://solarianprogrammer.com/2012/10/17/cpp-11-async-tutorial/
int twice(int m){
return 2*m;
}
int main(){
std::vector< std::future<int> > futures;
for(int i=0;i<10;++i){
futures.push_back(std::async(twice,i));
}
for(auto &e:futures){
std::cout << e.get() << std::endl;
}
return 0;
}
This code results in:
terminate called after throwing an instance of 'std::system_error'
what(): Unknown error -1
I am using these flags for compilation:
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -pthread")
The code below results in the same error (we just instantiate some minimal and unused object):
int twice(int m){
return 2*m;
}
class Foo {
public:
Foo();
};
Foo::Foo(){}
int main(){
Foo foo;
std::vector< std::future<int> > futures;
for(int i=0;i<10;++i){
futures.push_back(std::async(twice,i));
}
for(auto &e:futures){
std::cout << e.get() << std::endl;
}
return 0;
}
This ends up with the similar results:
terminate called after throwing an instance of 'std::system_error'
what(): Unknown error -1
But this works fine (i.e. prints: 0 2 4 ... 18 as expected):
int twice(int m){
return 2*m;
}
int main(){
nsp::Foo foo; // <---- difference here !
std::vector< std::future<int> > futures;
for(int i=0;i<10;++i){
futures.push_back(std::async(twice,i));
}
for(auto &e:futures){
std::cout << e.get() << std::endl;
}
return 0;
}
nsp::Foo is now defined/declared in another library (but with the same code). This library in compiled in the same CMakeLists.txt folder with the same compilation flags. And the executable links to it.
What is going on ?

Is boost::asio::strand broken on Ubuntu 11.04 (boost_all_dev 1.42)

I have a program which uses an io_service and several threads.
It instantiates some number of socket objects. These objects each have a strand for synchronization. All calls to async_read(), async_write(), and similar functions go through strand_.wrap(boost::bind(...)). Each object also has an int interlock_ variable that is initialized to 0.
Inside one of these functions (the on-data-receive callback), I do the following:
Class::startRead(...)
{
...
boost::asio::async_read(socket_, boost::asio::buffer(ptr, 16384), boost::asio::transfer_at_least(1),
strand_.wrap(boost::bind(&EagerConnection::on_read, this, placeholders::error, placeholders::bytes_transferred)));
}
Class::on_read(...)
{
...
startRead();
assert(0 == __sync_fetch_and_add(&interlock_, 1));
onData_();
assert(1 == __sync_fetch_and_add(&interlock_, -1));
}
Because everything is synchronized through the strand, that first assert should never fire. However, it does fire! When I check the value in GDB, the end value of interlock_ is 2, which means that two separate calls to on_read() are active at the same time.
Does this mean that boost::asio::strand is broken? (I've already checked that I don't have any re-entrancy within the completion function -- the onData_ signal handler does not re-call on_data()).
Can the "early" startRead somehow cause an immediate re-entry? (Both the semantics of async_x and strand seem to indicate it can't)
If you really, really want to see the full context of the class, it's available as a gist: https://gist.github.com/979212
I have spotted a few minor(?) issues:
Minor: The initialization order of interlock_ and strand_ is switched. Fix it by declaring interlock_ _after_ the strand_ member;
The readIn function returns no value (uninitialized data). You probably intend to return n?
Good news:
Running with valgrind turned up clear.
Running with helgrind turned up clear (but: I'm not using threads in my minimal example, I guess; Don't know about boost::asio and boost::signals internals).
I am trying to reproduce things, but my installation fails to raise the asserts when doing this.
I tacked on the following fragment at the end of the gist:
int split(std::string const &src, char ch, std::string &oLeft, std::string &oRight)
{
std::size_t pos = src.find(ch);
if (pos == std::string::npos)
{
oLeft = src;
oRight.clear();
return 1;
} else
{
oLeft = src.substr(0, pos);
oRight = src.substr(pos+1);
return 2;
}
}
namespace {
boost::asio::io_service svc;
EagerConnection c(svc);
void onconnect()
{
std::cout << "ONCONNECT" << std::endl;
const char data[] = "GET / HTTP/1.0\r\n\r\n";
c.writeOut(data, sizeof(data));
}
void ondata()
{
std::cout << "ONDATA" << std::endl;
std::ostringstream oss;
char buf[1024];
int read;
while ((read = c.readIn(buf, 1024)))
oss.write(buf, read);
std::cout << "response: " << oss.str() << std::endl;
}
void ondisconnect()
{
std::cout << "ON__DIS__CONNECT" << std::endl;
}
}
int main(int argc, char* argv[])
{
if (argc>1 && argv[1])
{
c.onConnect_.connect(&onconnect);
c.onData_.connect(&ondata);
c.onDisconnect_.connect(&ondisconnect);
c.open(argv[1]);
svc.run();
}
return 0;
}
As you can see, I'm really trying to do the SimplestThingThatCouldPossiblyWork. My connect/reconnect is working nicely (including the increasing backoff time).
I compile this with
strand: strand.cpp
g++ -Wall -Werror -o $# $^ -g -O0 -lboost_system -lboost_thread -lboost_signals -lpthread
And invoke it with
./strand 127.0.0.1:6767
I have a responding script sitting there doing (basically)
netcat -l -p 6767 -e rev
One other thing to note: the write buffer never seems to actually be sent/flushed until I interrupt the strand tester (client side). This happens regardless how large I make data... This is probably due to a step I'm missing?
Edit:
Tested identical on
ubuntu meerkat, gcc 4.4.5, boost 1.42.0
debian sid, gcc 4.5.2-8, boost 1.46.1

Resources