Dividing processes equally among users, linux kernel programming - linux

I want to equally distribute the processes on the CPU to the users.
For example, I have 4 users, user A and B have 2 processes , User B and C have 4 processes , in total there are 10 processes. All users can use 25% on CPU for these processes. I edited a certain part of the sched.c file in the Linux kernel, but there is a part where I am stuck.
What I want to do is suppose we have 4 users for this example, no matter how many processes the users have, they should all use the CPU equally. For example, let's say that user A has 2, user B has 2, user C has 3, user D has 3, and a total of 10 processes. User A and user B will use the CPU at 25% per user and 12.5% per processes. User C and D will use the CPU at 25% per user and 8.5% per processes.
The CPU should behave equally to all user processes, how can I do this?
asmlinkage void schedule(void)
{
struct schedule_data * sched_data;
struct task_struct *prev, *next, *p;
struct list_head *tmp;
int this_cpu, c;
/* our variables */
unsigned int rnd;
unsigned int found = 0;
gid_t runGid;
unsigned int sumOfAllFlags = 0;
spin_lock_prefetch(&runqueue_lock);
BUG_ON(!current->active_mm);
need_resched_back:
prev = current;
this_cpu = prev->processor;
if (unlikely(in_interrupt())) {
printk("Scheduling in interrupt\n");
BUG();
}
release_kernel_lock(prev, this_cpu);
/*
* 'sched_data' is protected by the fact that we can run
* only one process per CPU.
*/
sched_data = & aligned_data[this_cpu].schedule_data;
spin_lock_irq(&runqueue_lock);
/* move an exhausted RR process to be last.. */
if (unlikely(prev->policy == SCHED_RR))
if (!prev->counter)
{
prev->counter = NICE_TO_TICKS(prev->nice);
move_last_runqueue(prev);
}
switch (prev->state)
{
case TASK_INTERRUPTIBLE:
if (signal_pending(prev))
{
prev->state = TASK_RUNNING;
break;
}
default:
del_from_runqueue(prev);
case TASK_RUNNING:;
}
prev->need_resched = 0;
/*
* this is the scheduler proper:
*/
repeat_schedule:
/*
* Default process to select..
*/
next = idle_task(this_cpu);
//prev->willBeChoosen = 0; //willBeChoosen of prev process = 0
if (sched_type == SCHED_DEFAULT)
{
// next = idle_task(this_cpu);
c = -1000;
list_for_each(tmp, &runqueue_head)
{
p = list_entry(tmp, struct task_struct, run_list);
if (can_schedule(p, this_cpu))
{
int weight = goodness(p, this_cpu, prev->active_mm);
if (weight > c)
c = weight, next = p;
}
}
/* Do we need to re-calculate counters? */
if (unlikely(!c))
{
struct task_struct *p;
spin_unlock_irq(&runqueue_lock);
read_lock(&tasklist_lock);
for_each_task(p)
{
p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice);
}
read_unlock(&tasklist_lock);
spin_lock_irq(&runqueue_lock);
goto repeat_schedule;
}
}
else if (sched_type == GTICKET)
{
current->prevJiffies = jiffies;
// sum of group flags of all processes
sumOfAllFlags = 0;
// calculate sum of flags of all processes
// -> are there any unprocessed waiting processes
spin_unlock_irq(&runqueue_lock);
read_lock(&tasklist_lock);
list_for_each(tmp, &runqueue_head)
{
p = list_entry(tmp, struct task_struct, run_list);
//p->willBeChoosen = 0;
sumOfAllFlags = sumOfAllFlags + p->groupFlag;
}
read_unlock(&tasklist_lock);
spin_lock_irq(&runqueue_lock);
// Check if all processed
// If so go to repeat schedule
// Mark all existing as unprocessed
spin_unlock_irq(&runqueue_lock);
read_lock(&tasklist_lock);
if (sumOfAllFlags == 0)
{
list_for_each(tmp, &runqueue_head)
{
p = list_entry(tmp, struct task_struct, run_list);
p->groupFlag = 1;
}
// goto repeat_schedule;
}
read_unlock(&tasklist_lock);
spin_lock_irq(&runqueue_lock);
spin_unlock_irq(&runqueue_lock);
read_lock(&tasklist_lock);
list_for_each(tmp, &runqueue_head)
{
p = list_entry(tmp, struct task_struct, run_list);
if((current->prevJiffies - p->prevJiffies) > 1)
{
p->counter--;
}
else if((current->prevJiffies - p->prevJiffies) < 4)
{
p->counter--;
}
}
read_unlock(&tasklist_lock);
spin_lock_irq(&runqueue_lock);
// Random Selection of next process
// Random Selection is between 1 and 15
/*get_random_bytes(&rnd, sizeof(unsigned int));
if(rnd < 0)
rnd = rnd*(-1);
if(maxTicket>0)
{
rnd = (rnd % maxTicket);
rnd++;
}*/
// if process's ticket is greater or equal to rnd
// next process <- that process
spin_unlock_irq(&runqueue_lock);
read_lock(&tasklist_lock);
list_for_each(tmp, &runqueue_head)
{
p = list_entry(tmp, struct task_struct, run_list);
if (can_schedule(p, this_cpu))
{
if (p->groupFlag>0)
{
p->willBeChoosen = 20;
runGid = get_gid(p->user->uid);
//p->prevJiffies = jiffies;
break;
}
}
}
read_unlock(&tasklist_lock);
spin_lock_irq(&runqueue_lock);
//choosing next process using goodness with the integrated will be chosen variable
next = idle_task(this_cpu);
c = -1000;
spin_unlock_irq(&runqueue_lock);
read_lock(&tasklist_lock);
list_for_each(tmp, &runqueue_head)
{
p = list_entry(tmp, struct task_struct, run_list);
if (can_schedule(p, this_cpu))
{
int weight = goodness(p, this_cpu, prev->active_mm);
if (weight > c)
{
c = weight, next = p;
}
}
}
read_unlock(&tasklist_lock);
spin_lock_irq(&runqueue_lock);
spin_unlock_irq(&runqueue_lock);
read_lock(&tasklist_lock);
list_for_each(tmp, &runqueue_head)
{
p = list_entry(tmp, struct task_struct, run_list);
p->willBeChoosen = 0;
}
read_unlock(&tasklist_lock);
spin_lock_irq(&runqueue_lock);
/* Do we need to re-calculate counters? */
if (unlikely(!c))
{
struct task_struct *p;
spin_unlock_irq(&runqueue_lock);
read_lock(&tasklist_lock);
for_each_task(p)
{
p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice);
}
read_unlock(&tasklist_lock);
spin_lock_irq(&runqueue_lock);
goto repeat_schedule;
}
// Assign all processes with the same group id as processed
// So that a group based fair scheduler can be achived
spin_unlock_irq(&runqueue_lock);
read_lock(&tasklist_lock);
list_for_each(tmp, &runqueue_head)
{
p = list_entry(tmp, struct task_struct, run_list);
if (get_gid(p->user->uid) == runGid)
p->groupFlag = 0;
}
read_unlock(&tasklist_lock);
spin_lock_irq(&runqueue_lock);
}
/*
* from this point on nothing can prevent us from
* switching to the next task, save this fact in
* sched_data.
*/
sched_data->curr = next;
task_set_cpu(next, this_cpu);
spin_unlock_irq(&runqueue_lock);
if (unlikely(prev == next))
{
/* We won't go through the normal tail, so do this by hand */
prev->policy &= ~SCHED_YIELD;
goto same_process;
}

Related

Why is my multithreaded C program not working on macOS, but completely fine on Linux?

I have written a multithreaded program in C using pthreads to solve the N-queens problem. It uses the producer consumer programming model. One producer who creates all possible combinations and consumers who evaluate if the combination is valid. I use a shared buffer that can hold one combination at a time.
Once I have 2+ consumers the program starts to behave strange. I get more consumptions than productions. 1.5:1 ratio approx (should be 1:1). The interesting part is that this only happens on my MacBook and is nowhere to be seen when I run it on the Linux machine (Red Hat Enterprise Linux Workstation release 6.10 (Santiago)) I have access to over SSH.
I'm quite sure that my implementation is correct with locks and conditional variables too, the program runs for 10+ seconds which should reveal if there are any mistakes with the synchronization.
I compile with GCC (Apple clang version 12.0.5) via xcode developer tools on my MacBook Pro (2020, x86_64) and GCC on Linux too, but version 4.4.7 20120313 (Red Hat 4.4.7-23).
compile: gcc -o 8q 8q.c
run: ./8q <producers> <N>, NxN chess board, N queens to place
parameters: ./8q 2 4 Enough to highlight the problem (should yield 2 solutions, but every other run yields 3+ solutions, i.e duplicate solutions exist
note: print(printouts) Visualizes the valid solutions (duplicates shown)
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <assert.h>
typedef struct stack_buf {
int positions[8];
int top;
} stack_buf;
typedef struct global_buf {
int positions[8];
volatile int buf_empty;
volatile long done;
} global_buf;
typedef struct print_buf {
int qpositions[100][8];
int top;
} print_buf;
stack_buf queen_comb = { {0}, 0 };
global_buf global = { {0}, 1, 0 };
print_buf printouts = { {{0}}, -1 };
int N; //NxN board and N queens to place
clock_t start, stop, diff;
pthread_mutex_t buffer_mutex, print_mutex;
pthread_cond_t empty, filled;
/* ##########################################################################################
################################## VALIDATION FUNCTIONS ##################################
########################################################################################## */
/* Validate that no queens are placed on the same row */
int valid_rows(int qpositions[]) {
int rows[N];
memset(rows, 0, N * sizeof(int));
int row;
for (int i = 0; i < N; i++) {
row = qpositions[i] / N;
if (rows[row] == 0) rows[row] = 1;
else return 0;
}
return 1;
}
/* Validate that no queens are placed in the same column */
int valid_columns(int qpositions[]) {
int columns[N];
memset(columns, 0, N*sizeof(int));
int column;
for (int i = 0; i < N; i++) {
column = qpositions[i] % N;
if (columns[column] == 0) columns[column] = 1;
else return 0;
}
return 1;
}
/* Validate that left and right diagonals aren't used by another queen */
int valid_diagonals(int qpositions[]) {
int left_bottom_diagonals[N];
int right_bottom_diagonals[N];
int row, col, temp_col, temp_row, fill_value, index;
for (int queen = 0; queen < N; queen++) {
row = qpositions[queen] / N;
col = qpositions[queen] % N;
/* position --> left down diagonal endpoint (index) */
fill_value = col < row ? col : row; //min of col and row
temp_row = row - fill_value;
temp_col = col - fill_value;
index = temp_row * N + temp_col; // position
for (int i = 0; i < queen; i++) { // check if interference occurs
if (left_bottom_diagonals[i] == index) return 0;
}
left_bottom_diagonals[queen] = index; // no interference
/* position --> right down diagonal endpoint (index) */
fill_value = (N-1) - col < row ? N - col - 1 : row; // closest to bottom or right wall
temp_row = row - fill_value;
temp_col = col + fill_value;
index = temp_row * N + temp_col; // position
for (int i = 0; i < queen; i++) { // check if interference occurs
if (right_bottom_diagonals[i] == index) return 0;
}
right_bottom_diagonals[queen] = index; // no interference
};
return 1;
}
/* ##########################################################################################
#################################### HELPER FUNCTIONS ####################################
########################################################################################## */
/* print the collected solutions */
void print(print_buf printouts) {
static int solution_number = 1;
int placement;
for (int sol = 0; sol <= printouts.top; sol++) { // number of solutions
printf("Solution %d: [ ", solution_number++);
for (int pos = 0; pos < N; pos++) {
printf("%d ", printouts.qpositions[sol][pos]+1);
}
printf("]\n");
printf("Placement:\n");
for (int i = 1; i <= N; i++) { // rows
printf("[ ");
placement = printouts.qpositions[sol][N-i];
for (int j = (N-i)*N; j < (N-i)*N+N; j++) { // physical position
if (j == placement) {
printf(" Q ");
} else printf("%2d ", j+1);
}
printf("]\n");
}
printf("\n");
}
}
/* push value to top of list instance */
void push(stack_buf *instance, int value) {
assert(instance->top <= 8 || instance->top >= 0);
instance->positions[instance->top++] = value;
}
/* pop top element of list instance */
void pop(stack_buf *instance) {
assert(instance->top > 0);
instance->positions[--instance->top] = -1;
}
/* ##########################################################################################
#################################### THREAD FUNCTIONS ####################################
########################################################################################## */
static int consumptions = 0;
/* entry point for each worker (consumer)
workers will check each queen's row, column and
diagonal to evaluate satisfactory placements */
void *eval_positioning(void *id) {
long thr_id = (long)id;
int qpositions[N];
while (!global.done) {
pthread_mutex_lock(&buffer_mutex);
while (global.buf_empty == 1) {
if (global.done) break; // consumers who didn't get last production
pthread_cond_wait(&filled, &buffer_mutex);
}
if (global.done) break;
consumptions++;
memcpy(qpositions, global.positions, N * sizeof(int)); // retrieve queen combination
global.buf_empty = 1;
pthread_cond_signal(&empty);
pthread_mutex_unlock(&buffer_mutex);
if (valid_rows(qpositions) && valid_columns(qpositions) && valid_diagonals(qpositions)) {
/* save for printing later */
pthread_mutex_lock(&print_mutex);
memcpy(printouts.qpositions[++printouts.top], qpositions, N * sizeof(int));
pthread_mutex_unlock(&print_mutex);
}
}
return NULL;
}
static int productions = 0;
/* recursively generate all possible queen_combs */
void rec_positions(int pos, int queens) {
if (queens == 0) { // base case
pthread_mutex_lock(&buffer_mutex);
while (global.buf_empty == 0) {
pthread_cond_wait(&empty, &buffer_mutex);
}
productions++;
memcpy(global.positions, queen_comb.positions, N * sizeof(int));
global.buf_empty = 0;
pthread_mutex_unlock(&buffer_mutex);
pthread_cond_broadcast(&filled); // wake one worker
return;
}
for (int i = pos; i <= N*N - queens; i++) {
push(&queen_comb, i); // physical chess box
rec_positions(i+1, queens-1);
pop(&queen_comb);
}
}
/* binomial coefficient | without order, without replacement
8 queens on 8x8 board: 4'426'165'368 queen combinations */
void *generate_positions(void *arg) {
rec_positions(0, N);
return (void*)1;
}
/* ##########################################################################################
########################################## MAIN ##########################################
########################################################################################## */
/* main procedure of the program */
int main(int argc, char *argv[]) {
if (argc < 3) {
printf("usage: ./8q <workers> <board width/height>\n");
exit(1);
}
int workers = atoi(argv[1]);
N = atoi(argv[2]);
pthread_t thr[workers];
pthread_t producer;
// int sol1[] = {5,8,20,25,39,42,54,59};
// int sol2[] = {2,12,17,31,32,46,51,61};
printf("\n");
start = (float)clock()/CLOCKS_PER_SEC;
pthread_create(&producer, NULL, generate_positions, NULL);
for (long i = 0; i < workers; i++) {
pthread_create(&thr[i], NULL, eval_positioning, (void*)i+1);
}
pthread_join(producer, (void*)&global.done);
pthread_cond_broadcast(&filled);
for (int i = 0; i < workers; i++) {
pthread_join(thr[i], NULL);
}
stop = clock();
diff = (double)(stop - start) / CLOCKS_PER_SEC;
/* go through all valid solutions and print */
print(printouts);
printf("board: %dx%d, workers: %d (+1), exec time: %ld, solutions: %d\n", N, N, workers, diff, printouts.top+1);
printf("productions: %d\nconsumptions: %d\n", productions, consumptions);
return 0;
}
EDIT: I have reworked sync around prod_done and made a new shared variable last_done. When producer is done, it will set prod_done and the thread currently active will either return (last element already validated) or capture the last element at set last_done to inform the other consumers.
Despite the fact that I solved the data race in my book, I still have problems with the shared combination. I have really put time looking into the synchronization but I always get back to the feeling that it should work, but it clearly doesn't when I run it.
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <assert.h>
typedef struct stack_buf {
int positions[8];
int top;
} stack_buf;
typedef struct global_buf {
int positions[8];
volatile int buf_empty;
volatile long prod_done;
volatile int last_done;
} global_buf;
typedef struct print_buf {
int qpositions[100][8];
int top;
} print_buf;
stack_buf queen_comb = { {0}, 0 };
global_buf global = { {0}, 1, 0, 0 };
print_buf printouts = { {{0}}, -1 };
int N; //NxN board and N queens to place
long productions, consumptions = 0;
clock_t start, stop, diff;
pthread_mutex_t buffer_mutex, print_mutex;
pthread_cond_t empty, filled;
/* ##########################################################################################
################################## VALIDATION FUNCTIONS ##################################
########################################################################################## */
/* Validate that no queens are placed on the same row */
int valid_rows(int qpositions[]) {
int rows[N];
memset(rows, 0, N*sizeof(int));
int row;
for (int i = 0; i < N; i++) {
row = qpositions[i] / N;
if (rows[row] == 0) rows[row] = 1;
else return 0;
}
return 1;
}
/* Validate that no queens are placed in the same column */
int valid_columns(int qpositions[]) {
int columns[N];
memset(columns, 0, N*sizeof(int));
int column;
for (int i = 0; i < N; i++) {
column = qpositions[i] % N;
if (columns[column] == 0) columns[column] = 1;
else return 0;
}
return 1;
}
/* Validate that left and right diagonals aren't used by another queen */
int valid_diagonals(int qpositions[]) {
int left_bottom_diagonals[N];
int right_bottom_diagonals[N];
int row, col, temp_col, temp_row, fill_value, index;
for (int queen = 0; queen < N; queen++) {
row = qpositions[queen] / N;
col = qpositions[queen] % N;
/* position --> left down diagonal endpoint (index) */
fill_value = col < row ? col : row; // closest to bottom or left wall
temp_row = row - fill_value;
temp_col = col - fill_value;
index = temp_row * N + temp_col; // board position
for (int i = 0; i < queen; i++) { // check if interference occurs
if (left_bottom_diagonals[i] == index) return 0;
}
left_bottom_diagonals[queen] = index; // no interference
/* position --> right down diagonal endpoint (index) */
fill_value = (N-1) - col < row ? N - col - 1 : row; // closest to bottom or right wall
temp_row = row - fill_value;
temp_col = col + fill_value;
index = temp_row * N + temp_col; // board position
for (int i = 0; i < queen; i++) { // check if interference occurs
if (right_bottom_diagonals[i] == index) return 0;
}
right_bottom_diagonals[queen] = index; // no interference
}
return 1;
}
/* ##########################################################################################
#################################### HELPER FUNCTIONS ####################################
########################################################################################## */
/* print the collected solutions */
void print(print_buf printouts) {
static int solution_number = 1;
int placement;
for (int sol = 0; sol <= printouts.top; sol++) { // number of solutions
printf("Solution %d: [ ", solution_number++);
for (int pos = 0; pos < N; pos++) {
printf("%d ", printouts.qpositions[sol][pos]+1);
}
printf("]\n");
printf("Placement:\n");
for (int i = 1; i <= N; i++) { // rows
printf("[ ");
placement = printouts.qpositions[sol][N-i];
for (int j = (N-i)*N; j < (N-i)*N+N; j++) { // physical position
if (j == placement) {
printf(" Q ");
} else printf("%2d ", j+1);
}
printf("]\n");
}
printf("\n");
}
}
/* ##########################################################################################
#################################### THREAD FUNCTIONS ####################################
########################################################################################## */
/* entry point for each worker (consumer)
workers will check each queen's row, column and
diagonal to evaluate satisfactory placements */
void *eval_positioning(void *id) {
long thr_id = (long)id;
int qpositions[N];
pthread_mutex_lock(&buffer_mutex);
while (!global.last_done) {
while (global.buf_empty == 1) {
pthread_cond_wait(&filled, &buffer_mutex);
if (global.last_done) { // last_done ==> prod_done, so thread returns
pthread_mutex_unlock(&buffer_mutex);
return NULL;
}
if (global.prod_done) { // prod done, current thread takes last elem produced
global.last_done = 1;
break;
}
}
if (!global.last_done) consumptions++;
memcpy(qpositions, global.positions, N*sizeof(int)); // retrieve queen combination
global.buf_empty = 1;
pthread_mutex_unlock(&buffer_mutex);
pthread_cond_signal(&empty);
if (valid_rows(qpositions) && valid_columns(qpositions) && valid_diagonals(qpositions)) {
/* save for printing later */
pthread_mutex_lock(&print_mutex);
memcpy(printouts.qpositions[++printouts.top], qpositions, N*sizeof(int));
pthread_mutex_unlock(&print_mutex);
}
pthread_mutex_lock(&buffer_mutex);
}
pthread_mutex_unlock(&buffer_mutex);
return NULL;
}
/* recursively generate all possible queen_combs */
void rec_positions(int pos, int queens) {
if (queens == 0) { // base case
pthread_mutex_lock(&buffer_mutex);
while (global.buf_empty == 0) {
pthread_cond_wait(&empty, &buffer_mutex);
}
productions++;
memcpy(global.positions, queen_comb.positions, N*sizeof(int));
global.buf_empty = 0;
pthread_mutex_unlock(&buffer_mutex);
pthread_cond_signal(&filled);
return;
}
for (int i = pos; i <= N*N - queens; i++) {
queen_comb.positions[queen_comb.top++] = i;
rec_positions(i+1, queens-1);
queen_comb.top--;
}
}
/* binomial coefficient | without order, without replacement
8 queens on 8x8 board: 4'426'165'368 queen combinations */
void *generate_positions(void *arg) {
rec_positions(0, N);
return (void*)1;
}
/* ##########################################################################################
########################################## MAIN ##########################################
########################################################################################## */
/* main procedure of the program */
int main(int argc, char *argv[]) {
if (argc < 3) {
printf("usage: ./8q <workers> <board width/height>\n");
exit(1);
}
int workers = atoi(argv[1]);
N = atoi(argv[2]);
pthread_t thr[workers];
pthread_t producer;
printf("\n");
start = (float)clock()/CLOCKS_PER_SEC;
pthread_create(&producer, NULL, generate_positions, NULL);
for (long i = 0; i < workers; i++) {
pthread_create(&thr[i], NULL, eval_positioning, (void*)i+1);
}
pthread_join(producer, (void*)&global.prod_done);
pthread_cond_broadcast(&filled);
for (int i = 0; i < workers; i++) {
printf("thread #%d done\n", i+1);
pthread_join(thr[i], NULL);
pthread_cond_broadcast(&filled);
}
stop = clock();
diff = (double)(stop - start) / CLOCKS_PER_SEC;
/* go through all valid solutions and print */
print(printouts);
printf("board: %dx%d, workers: %d (+1), exec time: %ld, solutions: %d\n", N, N, workers, diff, printouts.top+1);
printf("productions: %ld\nconsumptions: %ld\n", productions, consumptions);
return 0;
}
I'm quite sure that my implementation is correct with locks and conditional variables
That is a bold statement, and it's provably false. Your program hangs on Linux when run with clang -g q.c -o 8q && ./8q 2 4.
When I look at the state of the program, I see one thread here:
#4 __pthread_cond_wait (cond=0x404da8 <filled>, mutex=0x404d80 <buffer_mutex>) at pthread_cond_wait.c:619
#5 0x000000000040196b in eval_positioning (id=0x1) at q.c:163
#6 0x00007ffff7f8cd80 in start_thread (arg=0x7ffff75b6640) at pthread_create.c:481
#7 0x00007ffff7eb7b6f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
and the main thread trying to join the above thread. All other threads have exited, so there is nothing to signal the condition.
One immediate problem I see is this:
void *eval_positioning(void *id) {
long thr_id = (long)id;
int qpositions[N];
while (!global.done) {
...
int main(int argc, char *argv[]) {
...
pthread_join(producer, (void*)&global.done);
If the producer thread finishes before the eval_positioning starts, then eval_positioning will do nothing at all.
You should set global.done when all positions have been evaluated, not when the producer thread is done.
Another obvious problem is that global.done is accessed without any mutexes held, yielding a data race (undefined behavior -- anything can happen).

GDB: Displaying incorrect values in struct

I'm trying to implement the malloc function and it looks like that gdb is giving me some weird values from this struct:
struct MemoryBlock {
struct MemoryBlock * next;
size_t size;
signed char is_free;
} startBlock;
And that's the function where I'm debugging it with gdb:
struct MemoryBlock * create_new_block(size_t size)
{
struct MemoryBlock * ret_block;
// add some space for the struct block
size += sizeof(struct MemoryBlock);
ret_block = (void *) sbrk(size);
// test first, if we can allocate that much of ram
if (ret_block == (void *) -1)
return NULL;
ret_block->size = size - sizeof(struct MemoryBlock);
ret_block->is_free = 0;
ret_block->next = NULL;
return ret_block; // HERE'S the breakpoint
}
So here's the issue (I'm at the breakpoint return ret_block):
If I want to see what kind of values are inside of the ret_block pointer, than I'm getting this:
(gdb) p (struct MemoryBlock) ret_block
$26 = {next = 0x555555559000, size = 140737488347680, is_free = -53 '\313'}
size is fine, because if I convert it into the decimal system than I'm getting 3 as expected. (the argument size from the function is currently 3)
But I'm surprised that next and is_free aren't 0 since the last three lines should set both to 0.
So I looked up what is in the memory:
As you can see each value is correctly stored in my heap. But why am I getting these values if I do p (struct MemoryBlock) ret_block?
If you need the whole code
#include <unistd.h>
#include <stdio.h>
/* ============
* Structs
* ============ */
struct MemoryBlock {
struct MemoryBlock * next;
size_t size;
signed char is_free;
} startBlock;
/* ==============
* Functions
* ============== */
struct MemoryBlock * create_new_block(size_t size);
void * malloc(size_t size);
/* ==================
* Main Programm
* ================== */
int main()
{
char * buffer;
char * b2;
unsigned short index;
// The start of my heap :D
startBlock.is_free = 0;
startBlock.size = 0;
buffer = malloc(3);
b2 = malloc(3);
// ----- ERROR -----
if (buffer == NULL || b2 == NULL)
return 1;
// ----- ERROR -----
// fill the buffers with random stuff
for (index=0; index<2; index++) {
buffer[index] = 'a';
b2[index] = 'b';
}
buffer[index] = '\0';
b2[index] = '\0';
puts(buffer);
puts(b2);
return 0;
}
struct MemoryBlock * create_new_block(size_t size)
{
struct MemoryBlock * ret_block;
// add some space for the struct block
size += sizeof(struct MemoryBlock);
ret_block = (void *) sbrk(size);
// test first, if we can allocate that much of ram
if (ret_block == (void *) -1)
return NULL;
ret_block->size = size - sizeof(struct MemoryBlock);
ret_block->is_free = 0;
ret_block->next = NULL;
return ret_block;
}
void * malloc (size_t size)
{
struct MemoryBlock * ret_block;
struct MemoryBlock * prev_block;
prev_block = &startBlock;
ret_block = startBlock.next;
// go through the linked lists and look if you can find a suitable block
while (ret_block != NULL && (ret_block->size < size || !ret_block->is_free))
{
prev_block = ret_block;
ret_block = ret_block->next;
}
// couldn't find a suitable block => create a new one
if (ret_block == NULL) {
ret_block = create_new_block(size);
if (ret_block == NULL)
return NULL;
}
prev_block->next = ret_block;
ret_block->is_free = 0;
return ret_block;
}
Ok, one of my friends told me my issue... The casting was wrong! Here's the solution:
(gdb) p * ret_block
$57 = {next = 0x0, size = 3, is_free = 0 '\000'}
A star was enough to get the desired output...

operating issues qustions - threads, processes etc. for the above code:

int S1 = 0;
int S2 = 0;
int x = 0;
int run = 1;
void Producer(void) {
while(run) {
while (S1 == S2);
x++;
__sync_synchronize();
S1 = S2;
__sync_synchronize();
}
}
void Consumer(void) {
while(run) {
while (S1 != S2);
x--;
__sync_synchronize();
S1 = !S2;
__sync_synchronize();
}
}
void* Worker(void *func) {
long func_id = (long)func & 0x1;
printf("%s %d\n",__func__, (int)func_id);
switch (func_id) {
case 0:
Producer();
break;
case 1:
Consumer();
break;
}
return NULL;
}
int main(int argc, char *argv[]) {
pthread_t t[argc];
pthread_attr_t at;
cpu_set_t cpuset;
int threads;
int i;
#define MAX_PROCESSORS 4 // Minimal processors is 2.
threads = argc > 1 ? (( atoi(argv[1]) < 4) ? atoi(argv[1]): MAX_PROCESSORS ) : 1;
for (i = 0;i < threads; i++){
CPU_ZERO(&cpuset);
CPU_SET(i, &cpuset);
pthread_attr_init(&at);
(&at, sizeof(cpuset), &cpuset);
if (pthread_create(&t[i], &at , Worker, (void *) (long)i) ) {
perror("pthread create 1 error\n"); }
}
do {
sleep(1);
} while(x < 0);
run = 0;
void *val;
for(i = 0; i < threads; i++)
pthread_join(t[i], &val);
printf("x=%d\n", x);
}
The questions:
In ex1.c (6.1), which of the following properties achieved:
(1) Mutual exclusion but not progress
(2) Progress but not mutual exclusion
(3) Neither mutual exclusion nor progress
(4) Both mutual exclusion and progress
Please explain?
1.2
To which arguments (in 6.1) is correct and which does not:
(1) always exits. when threads = 2 or threads <= 0
(2) always hangs. threads = 1 or thread > 2
Any help would be much appreeciated

Pthread Scheduling policy and priority

I have four threads which are waiting on a condition variable and fifth thread posts condition variable when all four threads are waiting. When I set thread priority to maximum that is 99, threads switch takes a lot of time which is far from acceptable. Can anybody please take a look and tell what's happening ?
#define N_WORK_THREADS 4
pthread_mutex_t count_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t condition_var = PTHREAD_COND_INITIALIZER;
void *functionCount1(void * arg);
void *functionCount2(void * arg);
int count = 0;
int valid = 0;
int thread_personal[N_WORK_THREADS];
static int display_thread_sched_attr(int id)
{
int policy, s;
struct sched_param param;
s = pthread_getschedparam(pthread_self(), &policy, &param);
if (s != 0) { printf("pthread_getschedparam"); return 1; }
printf("Thread Id=%d policy=%s, priority=%d\n",id,
(policy == SCHED_FIFO) ? "SCHED_FIFO" : (policy == SCHED_RR) ? "SCHED_RR" : (policy == SCHED_OTHER) ? "SCHED_OTHER" : "???",
param.sched_priority);
return 0;
}
int main(void)
{
pthread_t thread_work[N_WORK_THREADS];
pthread_t thread;
int i,s;
pthread_attr_t attr;
struct sched_param param;
s = pthread_attr_init(&attr);
if (s != 0) { printf("pthread_attr_init"); return 1; }
s = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
if (s != 0) { printf("pthread_attr_setinheritsched"); return 1; }
s = pthread_attr_setschedpolicy(&attr, SCHED_RR);
if (s != 0) { printf("pthread_attr_setschedpolicy"); return 1; }
param.sched_priority = 99;
s = pthread_attr_setschedparam(&attr, &param);
if (s != 0) { printf("pthread_attr_setschedparam"); return 1; }
for (i=0; i<N_WORK_THREADS; i++) { thread_personal[i] = 0; }
for (i=0; i<N_WORK_THREADS; i++) { pthread_create( &thread_work[i], &attr, &functionCount1, (void *)i); }
param.sched_priority = 99;
s = pthread_attr_setschedparam(&attr, &param);
if (s != 0) { printf("pthread_attr_setschedparam"); return 1; }
pthread_create( &thread, &attr, &functionCount2, (void *)N_WORK_THREADS);
for (i=0; i<N_WORK_THREADS; i++) { pthread_join( thread_work[i], NULL); }
pthread_join( thread, NULL);
for (i=0; i<N_WORK_THREADS; i++) { printf("Thread Id=%d Mutex USed=%d\n",i,thread_personal[i]); }
exit(EXIT_SUCCESS);
}
void *functionCount1(void * arg)
{
int i;
int id = (int) arg;
display_thread_sched_attr(id);
for(i=0; i<10; i++)
{
pthread_mutex_lock( &count_mutex );
thread_personal[id] += 1;
while (((count>>id) & 0x1) == 0)
{
pthread_cond_wait( &condition_var, &count_mutex );
}
count = count^ (1<<id);
printf("Thread Id %d: Valid = %d\n",id,valid);
pthread_mutex_unlock( &count_mutex );
}
return NULL;
}
void *functionCount2(void * arg)
{
int check;
int id = (int) arg;
display_thread_sched_attr(id);
check =0;
while (check < 10)
{
pthread_mutex_lock( &count_mutex );
if (count == 0)
{
pthread_cond_broadcast ( &condition_var );
count =0xF;
printf("Thread Id %d: Counter = %d\n",id,check);
valid = check++;
}
pthread_mutex_unlock( &count_mutex );
}
return NULL;
}
I'm unable to test your program with the scheduling policy code enabled because the program simply doesn't work when that's in there (as I mention in a comment: Linux 3.16.0 x86_64 with gcc 4.8.4).
But I'm guessing that your problem might be due to the loop in functionCount2():
while (check < 10)
{
pthread_mutex_lock( &count_mutex );
if (count == 0)
{
pthread_cond_broadcast ( &condition_var );
count =0xF;
printf("Thread Id %d: Counter = %d\n",id,check);
valid = check++;
}
pthread_mutex_unlock( &count_mutex );
}
In general, acquisition of mutex objects in pthreads is not guaranteed to be fair or FIFO (though to be honest, I'm not sure how thread scheduling policies might affect it). What I believe is happening is that this loop releases count_mutex then immediately re-acquires it even though other threads are blocked waiting to claim the mutex. And with the scheduling policy in place, this may occur until the thread uses its quantum.

NUMA Memory Page Migration Overhead

I have to find the overhead associated with NUMA memory page migration under Linux.
Can you please tell me which tools I can use?
If possible could you show an example.
If you want to understand whether your system is doing excessive remote node memory accesses and you're using intel CPUs, Intel's PMU has a utility called vtbwrun to report the QPI/uncore activity.
If you want to see how long it takes to execute a page migration, you can measure the duration of calls to numa_move_pages (provided by the numactl package).
Here's an example:
/*
* Test program to test the moving of a processes pages.
*
* (C) 2006 Silicon Graphics, Inc.
* Christoph Lameter <clameter#sgi.com>
*/
#include <stdio.h>
#include <stdlib.h>
#include "../numa.h"
#include <unistd.h>
#include <errno.h>
unsigned int pagesize;
unsigned int page_count = 32;
char *page_base;
char *pages;
void **addr;
int *status;
int *nodes;
int errors;
int nr_nodes;
struct bitmask *old_nodes;
struct bitmask *new_nodes;
int main(int argc, char **argv)
{
int i, rc;
pagesize = getpagesize();
nr_nodes = numa_max_node()+1;
old_nodes = numa_bitmask_alloc(nr_nodes);
new_nodes = numa_bitmask_alloc(nr_nodes);
numa_bitmask_setbit(old_nodes, 1);
numa_bitmask_setbit(new_nodes, 0);
if (nr_nodes < 2) {
printf("A minimum of 2 nodes is required for this test.\n");
exit(1);
}
setbuf(stdout, NULL);
printf("migrate_pages() test ......\n");
if (argc > 1)
sscanf(argv[1], "%d", &page_count);
page_base = malloc((pagesize + 1) * page_count);
addr = malloc(sizeof(char *) * page_count);
status = malloc(sizeof(int *) * page_count);
nodes = malloc(sizeof(int *) * page_count);
if (!page_base || !addr || !status || !nodes) {
printf("Unable to allocate memory\n");
exit(1);
}
pages = (void *) ((((long)page_base) & ~((long)(pagesize - 1))) + pagesize);
for (i = 0; i < page_count; i++) {
if (i != 2)
/* We leave page 2 unallocated */
pages[ i * pagesize ] = (char) i;
addr[i] = pages + i * pagesize;
nodes[i] = 1;
status[i] = -123;
}
/* Move to starting node */
rc = numa_move_pages(0, page_count, addr, nodes, status, 0);
if (rc < 0 && errno != ENOENT) {
perror("move_pages");
exit(1);
}
/* Verify correct startup locations */
printf("Page location at the beginning of the test\n");
printf("------------------------------------------\n");
numa_move_pages(0, page_count, addr, NULL, status, 0);
for (i = 0; i < page_count; i++) {
printf("Page %d vaddr=%p node=%d\n", i, pages + i * pagesize, status[i]);
if (i != 2 && status[i] != 1) {
printf("Bad page state before migrate_pages. Page %d status %d\n",i, status[i]);
exit(1);
}
}
/* Move to node zero */
numa_move_pages(0, page_count, addr, nodes, status, 0);
printf("\nMigrating the current processes pages ...\n");
rc = numa_migrate_pages(0, old_nodes, new_nodes);
if (rc < 0) {
perror("numa_migrate_pages failed");
errors++;
}
/* Get page state after migration */
numa_move_pages(0, page_count, addr, NULL, status, 0);
for (i = 0; i < page_count; i++) {
printf("Page %d vaddr=%lx node=%d\n", i,
(unsigned long)(pages + i * pagesize), status[i]);
if (i != 2) {
if (pages[ i* pagesize ] != (char) i) {
printf("*** Page contents corrupted.\n");
errors++;
} else if (status[i]) {
printf("*** Page on the wrong node\n");
errors++;
}
}
}
if (!errors)
printf("Test successful.\n");
else
printf("%d errors.\n", errors);
return errors > 0 ? 1 : 0;
}

Resources