context switch in the source - linux

The folowing source is an queue class :
template<typename T>
class mpmc_bounded_queue
{
public:
bool enqueue(T const& data)
{
cell_t* cell;
size_t pos ;
for (;;)
{
pos = enqueue_pos_ ;
cell = &buffer_[pos & buffer_mask_];
size_t seq = cell->sequence_;
intptr_t dif = (intptr_t)seq - (intptr_t)pos;
if (dif == 0)
{
//spot A
if (__sync_bool_compare_and_swap(&enqueue_pos_,pos,pos+1) )
break;
} else if (dif < 0)
{
return false;
}else{
pos = enqueue_pos_;
}
}
// spot B
cell->data_ = data;
cell->sequence_ = pos + 1 ;
return true;
} //enqueue
private:
struct cell_t
{
size_t sequence_;
T data_;
};
} ;
In RedHat Enterprise Linux 7.0 x86_64 , Is it possible that context switch
happened in finishing __sync_bool_compare_and_swap ( spot A) but not yet to
execute cell->data_ = data (spot B) ?
I've told that context switch would happened in like recv , send , usleep,
something to do with I/O function , in this case , while many threads execute
enqueue , there is not possible that there exist an possibility a thread finish
__sync_bool_compare_and_swap return true but at this important moment it is context switch out before it execute cell->data_ = data , Is it true ?
or context switch was not possible happened between spot A and spot B ?

Is it true ?
Yes: a context switch can happen at any point between A and B.
That shouldn't affect the algorithm though, which appears to be correct: if __sync_bool_compare_and_swap returned true, then you have atomically reserved the cell at pos, and nobody else will interfere with that cell, so whether the context switch happens between A and B or not is irrelevant.

Related

how to implement std::weak_ptr::lock with atomic operations?

I recently tried to implement an atomic reference counter in C, so I referred to the implementation of std::shared_ptr in STL, and I am very confused about the implementation of weak_ptr::lock.
When executing compared_and_exchange, clang specified memory_order_seq_cst, g++ specified memory_order_acq_rel, and MSVC specified memory_order_relaxed.
I think memory_order_relaxed has been enough, since there is no data needed to synchronize if user_count is non-zero.
I am not an expert in this area, can anyone provide some advice?
Following are code snippets:
MSVC
bool _Incref_nz() noexcept { // increment use count if not zero, return true if successful
auto& _Volatile_uses = reinterpret_cast<volatile long&>(_Uses);
#ifdef _M_CEE_PURE
long _Count = *_Atomic_address_as<const long>(&_Volatile_uses);
#else
long _Count = __iso_volatile_load32(reinterpret_cast<volatile int*>(&_Volatile_uses));
#endif
while (_Count != 0) {
const long _Old_value = _INTRIN_RELAXED(_InterlockedCompareExchange)(&_Volatile_uses, _Count + 1, _Count);
if (_Old_value == _Count) {
return true;
}
_Count = _Old_value;
}
return false;
}
clang/libcxx
__shared_weak_count*
__shared_weak_count::lock() noexcept
{
long object_owners = __libcpp_atomic_load(&__shared_owners_);
while (object_owners != -1)
{
if (__libcpp_atomic_compare_exchange(&__shared_owners_,
&object_owners,
object_owners+1))
return this;
}
return nullptr;
}
gcc/libstdc++
template<>
inline bool
_Sp_counted_base<_S_atomic>::
_M_add_ref_lock_nothrow() noexcept
{
// Perform lock-free add-if-not-zero operation.
_Atomic_word __count = _M_get_use_count();
do
{
if (__count == 0)
return false;
// Replace the current counter value with the old value + 1, as
// long as it's not changed meanwhile.
}
while (!__atomic_compare_exchange_n(&_M_use_count, &__count, __count + 1,
true, __ATOMIC_ACQ_REL,
__ATOMIC_RELAXED));
return true;
}
I am trying to answer this question myself.
The standard spec only says that weak_ptr::lock should be executed as an atomic operation, but nothing more about the memory order. So that different threads can invoke directly weak_ptr::lock in parallel without any race condition, and when that happens, different implementations offer different memory_order.
But no matter what, all the above implementations are correct.

How does generating an interrupt (calling irqfd) calls an interrupt on the KVM VM?

The KVM irqfd ioctl starts the irqfd for a file descriptor.
It does this:
case KVM_IRQFD: {
struct kvm_irqfd data;
r = -EFAULT;
if (copy_from_user(&data, argp, sizeof(data)))
goto out;
r = kvm_irqfd(kvm, &data);
break;
}
where kvm_irqfd is here
and calls kvm_irqfd_assign which initiates a wakeup queue:
init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
That is, irqfd_wakeup does this:
if (flags & EPOLLIN) {
u64 cnt;
eventfd_ctx_do_read(irqfd->eventfd, &cnt);
idx = srcu_read_lock(&kvm->irq_srcu);
do {
seq = read_seqcount_begin(&irqfd->irq_entry_sc);
irq = irqfd->irq_entry;
} while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
/* An event has been signaled, inject an interrupt */
if (kvm_arch_set_irq_inatomic(&irq, kvm,
KVM_USERSPACE_IRQ_SOURCE_ID, 1,
false) == -EWOULDBLOCK)
schedule_work(&irqfd->inject);
srcu_read_unlock(&kvm->irq_srcu, idx);
ret = 1;
}
As you can see in schedule_work(&irqfd->inject), it schedules the inject function, which is here:
static void
irqfd_inject(struct work_struct *work)
{
struct kvm_kernel_irqfd *irqfd =
container_of(work, struct kvm_kernel_irqfd, inject);
struct kvm *kvm = irqfd->kvm;
if (!irqfd->resampler) {
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1,
false);
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0,
false);
} else
kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
irqfd->gsi, 1, false);
}
It calls kvm_set_irq defined here which does this:
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
bool line_status)
{
struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS];
int ret = -1, i, idx;
trace_kvm_set_irq(irq, level, irq_source_id);
/* Not possible to detect if the guest uses the PIC or the
* IOAPIC. So set the bit in both. The guest will ignore
* writes to the unused one.
*/
idx = srcu_read_lock(&kvm->irq_srcu);
i = kvm_irq_map_gsi(kvm, irq_set, irq);
srcu_read_unlock(&kvm->irq_srcu, idx);
while (i--) {
int r;
r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level,
line_status);
if (r < 0)
continue;
ret = r + ((ret < 0) ? 0 : ret);
}
return ret;
}
It looks like it finally calls something at:
r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level,
line_status);
This set function is filled by this.
It sets to this function:
static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id,
int level, bool line_status)
{
unsigned int spi_id = e->irqchip.pin + VGIC_NR_PRIVATE_IRQS;
if (!vgic_valid_spi(kvm, spi_id))
return -EINVAL;
return kvm_vgic_inject_irq(kvm, 0, spi_id, level, NULL);
}
which calls kvm_vgic_inject_irq which finally calls vgic_put_irq which calls this:
void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq)
{
struct vgic_dist *dist = &kvm->arch.vgic;
if (!kref_put(&irq->refcount, vgic_irq_release))
return;
list_del(&irq->lpi_list);
dist->lpi_list_count--;
kfree(irq);
}
but I don't see how the GIC is called here, I only see the list being deleted.
I thought here it would send the interrupt to the GIC, which would then call the VM somehow.
I'm trying to understand how calling the irqfd file descriptor ends up calling an interrupt in the VM.
VGIC is for arm, you should check arm support file. While x86 is using either APIC or PIC. mostly APIC now.
You can check the specification of how those IRQ chip works to transfer the external signal to the destination core(vcpu).
For example, if you were using a x86 virtual machine(I have no idea of VGIC) which is using IOAPIC, there are 24 pins for example(emulated), and you should understand the APIC(hardware), then you know how it works.
https://elixir.bootlin.com/linux/v5.2.12/source/arch/x86/kvm/irq_comm.c#L271
https://elixir.bootlin.com/linux/v5.2.12/source/arch/x86/kvm/irq_comm.c#L38

C++11 Thread Safety of Atomic Containers

I am trying to implement a thread safe STL vector without mutexes. So I followed through this post and implemented a wrapper for the atomic primitives.
However when I ran the code below, it displayed out Failed!twice from the below code (only two instances of race conditions) so it doesn't seem to be thread safe. I'm wondering how can I fix that?
Wrapper Class
template<typename T>
struct AtomicVariable
{
std::atomic<T> atomic;
AtomicVariable() : atomic(T()) {}
explicit AtomicVariable(T const& v) : atomic(v) {}
explicit AtomicVariable(std::atomic<T> const& a) : atomic(a.load()) {}
AtomicVariable(AtomicVariable const&other) :
atomic(other.atomic.load()) {}
inline AtomicVariable& operator=(AtomicVariable const &rhs) {
atomic.store(rhs.atomic.load());
return *this;
}
inline AtomicVariable& operator+=(AtomicVariable const &rhs) {
atomic.store(rhs.atomic.load() + atomic.load());
return *this;
}
inline bool operator!=(AtomicVariable const &rhs) {
return !(atomic.load() == rhs.atomic.load());
}
};
typedef AtomicVariable<int> AtomicInt;
Functions and Testing
// Vector of 100 elements.
vector<AtomicInt> common(100, AtomicInt(0));
void add10(vector<AtomicInt> &param){
for (vector<AtomicInt>::iterator it = param.begin();
it != param.end(); ++it){
*it += AtomicInt(10);
}
}
void add100(vector<AtomicInt> &param){
for (vector<AtomicInt>::iterator it = param.begin();
it != param.end(); ++it){
*it += AtomicInt(100);
}
}
void doParallelProcessing(){
// Create threads
std::thread t1(add10, std::ref(common));
std::thread t2(add100, std::ref(common));
// Join 'em
t1.join();
t2.join();
// Print vector again
for (vector<AtomicInt>::iterator it = common.begin();
it != common.end(); ++it){
if (*it != AtomicInt(110)){
cout << "Failed!" << endl;
}
}
}
int main(int argc, char *argv[]) {
// Just for testing purposes
for (int i = 0; i < 100000; i++){
// Reset vector
common.clear();
common.resize(100, AtomicInt(0));
doParallelProcessing();
}
}
Is there such a thing as an atomic container? I've also tested this with a regular vector<int> it didn't have any Failed output but that might just be a coincidence.
Just write operator += as:
inline AtomicVariable& operator+=(AtomicVariable const &rhs) {
atomic += rhs.atomic;
return *this;
}
In documentation: http://en.cppreference.com/w/cpp/atomic/atomic operator += is atomic.
Your example fails because below scenario of execution is possible:
Thread1 - rhs.atomic.load() - returns 10 ; Thread2 - rhs.atomic.load() - returns 100
Thread1 - atomic.load() - returns 0 ; Thread2 - atomic.load - returns 0
Thread1 - add values (0 + 10 = 10) ; Thread2 - add values (0 + 100)
Thread1 - atomic.store(10) ; Thread2 - atomic.store(100)
Finally in this case in atomic value might be 10 or 100, depends of which thread first execute atomic.store.
please note that
atomic.store(rhs.atomic.load() + atomic.load());
is not atomic
You have two options to solve it.
memoery
1) Use a mutex.
EDIT as T.C mentioned in the comments this is irrelevant since the operation here will be load() then load() then store() (not relaxed mode) - so memory order is not related here.
2) Use memory order http://bartoszmilewski.com/2008/12/01/c-atomics-and-memory-ordering/
memory_order_acquire: guarantees that subsequent loads are not moved before the current load or any preceding loads.
memory_order_release: preceding stores are not moved past the current store or any subsequent stores.
I'm still not sure about 2, but I think if the stores will not be on parallel, it will work.

Using malloc to create a linked list

i used malloc in order to allocate new nodes in the list,yet i am facing an error with a certain part of my code;
the following solution applies only to deleting and inserting
#include <stdio.h>
#include <malloc.h>
struct Node{
int value;
struct Node * Next;
struct Node * Previous;
};
typedef struct Node Node;
struct List{
int Count;
int Total;
Node * First;
Node * Last;
};
typedef struct List List;
List Create();
void Add(List a,int value);
void Remove(List a,Node * b);
List Create()
{
List a;
a.Count=0;
return a;
}
void Add(List a,int value)
{
Node * b = (Node *)malloc(sizeof(Node));
if(b==NULL)
printf("Memory allocation error \n");
b->value=value;
if(a.Count==0)
{
b->Next=NULL;
b->Previous=NULL;
a.First=b;
}
else
{
b->Next=NULL;
b->Previous=a.Last;
a.Last->Next=b;
}
++a.Count;
a.Total+=value;
a.Last=b;
}
void Remove(List a,Node * b)
{
if(a.Count>1)
{
if(a.Last==b)
{
b->Previous->Next=NULL;
}
else
{
b->Previous->Next=b->Next;
b->Next->Previous=b->Previous;
}
}
free(b);
}
in the delete function,in the last else condition,i am not certain whether or not using b->Next->Previous is okay,and will work;when using the -> operator,am i adressing to the node pointer or to it's value?
The short answer: Yes, b->Next->Previous is fine -- it's a struct Node*, just like the right hand side b->Previous.
I think that your error lies with the handling of Count: It is incremented by Add(), but Remove() doesn't decrement it. In fact, as the list itself only needs to know whether it is empty or not, you can remove it and instead see if a.First == NULL. (Your a.Count == 1 test can likewise be replaced with a.First != NULL && a.First->Next == NULL test.)
If you're promising Count in you API, you can add it back when you've got the list itself working. The same "remove-then-add-back" might be useful with Total. Think of both of these as caches.
An even better solution would be to implement a circular list:
struct List
{
Node Anchor;
//...
};
List Create()
{
List l;
l.Anchor.Next = l.Anchor.Previous = &l;
return l;
}
bool IsEmpty(List const* l)
{
// Both or neither point at 'l'.
assert((l->Anchor.Next == l) == (l->Anchor.Previous == l));
return l->Anchor.Next == l;
}
// Add a node 'n' to some list after 'ln'.
void AddAfter(Node* n, Node* ln)
{
n->Previous = ln;
n->Next = ln->Next;
n->Next->Previous = n->Previous->Next = n;
}
Node* Remove(Node* n)
{
n->Previous->Next = n->Next;
n->Next->Previous = n->Previous;
n->Next = n->Previous = n; // nice and proper
return x;
}
Now you longer need special cases for empty lists. I let Remove() return the node itself, to make it easy to either move nodes between lists (AddAfter(Remove(somenode), &otherlist.Anchor)) or remove and delete notes (free(Remove(somenode))).
One wart here is that my Anchor node now wastes space for data that never will be used -- but that is easily fixable.

Copying objects in C++/CLI and message passing in multithreading

I'm trying to transfer a command line code that I have to a more visual program with a
GUI to enable easier use. The original code was in C++, so I'm using Visual C++ that is
available in Visual Studio Express 2012, but I have problems understanding the "new"
managed C++/CLI way of handling objects. Being new to CLI and managed C++, I was wondering
if someone can explain what I am doing wrong, and why it doesn't work. Now here is a
description of the code and the problem.
The program is essentially an optimization program:
There are multiple boxes (modes) in a system, each mode, depending on its type has a
few numerical coefficients that control its behavior and the way it responds to outside
excitation.
The program asks the user to specify the number of boxes and the type of each box.
Then tries to find the numerical coefficients that minimize the difference between
the system response with those obtained experimentally.
So, the UI has means for user to open the experimental result files, specify the number
of modes, and specify the type of each mode. Then, the user can initiate the processing
function by clicking on a start button, that initiates a background worker.
Following the example given in MSDN, I created a class that performs the work:
ref class curveFit
{
public: ref class CurrentState{
public:
int percentage;
int iterationNo;
int stage;
bool done;
multimode systemModel;
};
public:
int modes;
int returncode;
array<double> ^expExcitations;
array<double> ^expResults;
multimode systemModel;
private:
void fcn(int, int, double*, double*, int*);
double totalError(std::vector<double> &);
public:
delegate void fcndelegate(int, int, double*, double*, int*);
public:
curveFit(void);
curveFit^ fit(System::ComponentModel::BackgroundWorker^, System::ComponentModel::DoWorkEventArgs^, Options^);
};
multimode is just a container class: a list of different boxes.
ref class multimode
{
private:
Collections::Generic::List<genericBoxModel ^>^ models;
int modes;
public:
multimode(void);
multimode(const multimode%);
int modeNo(void);
void Add(genericBoxModel^);
void Clear();
genericBoxModel^ operator[](int);
multimode% operator=(const multimode%);
double result(double);
bool isValid();
std::vector<double> MapData();
void MapData(std::vector<double> &);
};
multimode::multimode(void)
{
models = gcnew Collections::Generic::List<genericBoxModel ^>();
modes = 0;
}
multimode::multimode(const multimode% rhs)
{
models = gcnew Collections::Generic::List<genericBoxModel ^>();
for(int ind = 0; ind < rhs.modes; ind++)
models->Add(rhs.models[ind]);
modes = rhs.modes;
}
int multimode::modeNo(void)
{
return modes;
}
void multimode::Add(genericBoxModel^ model)
{
models->Add(model);
modes++;
}
void multimode::Clear()
{
models->Clear();
modes = 0;
}
genericBoxModel^ multimode::operator[](int ind)
{
return models[ind];
}
multimode% multimode::operator=(const multimode% rhs)
{
models->Clear();
for(int ind = 0; ind < rhs.modes; ind++)
models->Add(rhs.models[ind]);
modes = rhs.modes;
return *this;
}
double multimode::result(double excitation)
{
double temp = 0.0;
for(int ind = 0; ind < modes; ind++)
temp += models[ind]->result(excitation);
return temp;
}
bool multimode::isValid()
{
bool isvalid = true;
if(modes < 1)
return false;
for(int ind = 0; ind < modes; ind++)
isvalid = (isvalid && models[ind]->isValid());
return isvalid;
}
std::vector<double> multimode::fullMap()
{
//Map the model coefficients to a vector of doubles
...
}
void multimode::fullMap(std::vector<double> &data)
{
//Map a vector of doubles to the model coefficients
...
}
and genericBoxModel is an abstract class that all box models are based on.
The curvefit::fit function does the optimization based on the options passed to it:
curveFit^ curveFit::fit(System::ComponentModel::BackgroundWorker^ worker, System::ComponentModel::DoWorkEventArgs^ e, Options^ opts)
{
fcndelegate^ del = gcnew fcndelegate(this, &curveFit::fcn);
std::vector<double> data;
CurrentState^ state = gcnew CurrentState;
state->done = false;
state->stage = 0;
state->percentage = 0;
state->systemModel = systemModel;
worker->ReportProgress(state->percentage, state);
switch(opts->optimizationMethod)
{
case 0:
while(iterationNo < maxIterations)
{
data = systemModel.MapData();
OptimizationMethod0::step(some_parameters, data, (optmethods::costfunction)Runtime::InteropServices::Marshal::GetFunctionPointerForDelegate(del).ToPointer());
systemModel.MapData(data);
iterationNo++;
state->percentage = 0;
state->systemModel = systemModel;
worker->ReportProgress(state->percentage, state);
}
...
}
}
I'm passing the system model inside the state so that I can display the results of the
latest step on the screen, which doesn't work, but that is another question :-)
The start button calls the curvefit::fit function after initializing the system model:
private: System::Void btnStart_Click(System::Object^ sender, System::EventArgs^ e) {
systemModel.Clear();
for(int mode = 0; mode < modes; mode++)
{
switch(model)
{
case 0:
systemModel.Add(gcnew model0);
systemModel[mode]->coefficients[0] = 100.0 / double(mode + 1);
...
break;
...
}
}
btnStart->Enabled = false;
stStatusText->Text = "Calculating!";
Application::UseWaitCursor = true;
curveFit^ cf = gcnew curveFit;
fitCurve->RunWorkerAsync(cf);
}
private: System::Void fitCurve_DoWork(System::Object^ sender, System::ComponentModel::DoWorkEventArgs^ e) {
System::ComponentModel::BackgroundWorker^ worker;
worker = dynamic_cast<System::ComponentModel::BackgroundWorker^>(sender);
curveFit^ cf = safe_cast<curveFit^>(e->Argument);
cf->expExcitations = gcnew array<double>(expExcitations.Count);
expExcitations.CopyTo(cf->expExcitations);
cf->expResults = gcnew array<double>(expResults.Count);
expResults.CopyTo(cf->expResults);
cf->systemModel = systemModel;
cf->modes = modes;
e->Result = cf->fit(worker, e, options);
}
This works perfectly! But, in order to make the optimization process faster and more
successful, I wanted to use the results of previous optimizations as the initial guess
for the next run (if possible):
multimode oldmodel(systemModel);
systemModel.Clear();
for(int mode = 0; mode < modes; mode++)
{
switch(model)
{
case 0:
if(mode < oldmodel.modeNo() && oldmodel.isValid() && (oldmodel[mode]->model == 0))
systemModel.Add(oldmodel[mode]);
else
{
systemModel.Add(gcnew model0);
systemModel[mode]->coefficients[0] = 100.0 / double(mode + 1);
...
}
break;
...
Now, my problem is, after this change, it seems that the messages don't get passed
correctly: the first time the start button is clicked everything functions as it should,
but from then on, if the statement systemModel.Add(oldmodel[mode]); gets executed,
results remain the same as the initial guesses, and don't get updated after the fit
function is called.
So, why should these two lines(Add(oldmodel[mode]) and Add(gcnew model0)) give
such different results?

Resources