C++ Threaded Template Vector Quicksort - multithreading

Threaded quick sort method:
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include "MD5.h"
#include <thread>
using namespace std;
template<typename T>
void quickSort(vector<T> &arr, int left, int right) {
int i = left, j = right; //Make local copys to modify
T tmp; //Termorary variable to use for swaping.
T pivot = arr[(left + right) / 2]; //Find the centerpoint. if 0.5 truncate.
while (i <= j) {
while (arr[i] < pivot) //is i < pivot?
i++;
while (arr[j] > pivot) //Is j > pivot?
j--;
if (i <= j) { //Swap
tmp = arr[i];
arr[i] = arr[j];
arr[j] = tmp;
i++;
j--;
}
};
thread left_t; //Left thread
thread right_t; //Right thread
if (left < j)
left_t = thread(quickSort<T>, ref(arr), left, j);
if (i < right)
right_t = thread(quickSort<T>, ref(arr), i, right);
if (left < j)
left_t.join();
if (left < j)
right_t.join();
}
int main()
{
vector<int> table;
for (int i = 0; i < 100; i++)
{
table.push_back(rand() % 100);
}
cout << "Before" << endl;
for each(int val in table)
{
cout << val << endl;
}
quickSort(table, 0, 99);
cout << "After" << endl;
for each(int val in table)
{
cout << val << endl;
}
char temp = cin.get();
return 0;
}
Above program lags like mad hell and Spams "abort()" has been called.
Im thinking it has something to do with vectors and it Having threading issues
Iv seen the Question asked by Daniel Makardich, His Utilizes a Vector int While mine uses Vector T

You don't have any problem with quick sort, but with passing a templated function to a thread. There is no function quickSort. You need to explicitly give type, to instantiate the function template:
#include <thread>
#include <iostream>
template<typename T>
void f(T a) { std::cout << a << '\n'; }
int main () {
std::thread t;
int a;
std::string b("b");
t = std::thread(f, a); // Won't work
t = std::thread(f<int>, a);
t.join();
t = std::thread(f<decltype(b)>, b); // a bit fancier, more dynamic way
t.join();
return 0;
}
I suspect in your case this should do:
left_t = thread(quickSort<T>, ref(arr), left, j);
And similar for right_t. Also, you have mistake there trying to use operator()() instead of constructing an object. That is why the error is different.
Can't verify though, cause there's no minimal verifiable example =/
I don't know if it's possible to make compiler to use automatic type deduction for f passed as a param, if anyone knows that would probably make it a better answer.

Problem was with thread joins and what #luk32 said
Needed to convert the threads to pointers to threads.
thread* left_t = nullptr; //Left thread
thread* right_t = nullptr; //Right thread
if (left < j)
left_t = new thread(quickSort<T>, ref(arr), left, j);
if (i < right)
right_t = new thread(quickSort<T>, ref(arr), i, right);
if (left_t)
{
left_t->join();
delete left_t;
}
if (right_t)
{
right_t->join();
delete right_t;
}
Seems like if you create a default constructed thread object. But don't use it, it still wants to be joined. and if you do join it, it will complain.

Related

Rstudio crashes with Rcpp and OpenMP function

This is a follow up question to dqrng with Rcpp for drawing from a normal and a binomial distribution. I tried to implement the answer but instead of drawing from a single distribution I'm drawing from 3. This is the code that I wrote:
// [[Rcpp::depends(dqrng, BH, RcppArmadillo)]]
#include <RcppArmadillo.h>
#include <boost/random/binomial_distribution.hpp>
#include <xoshiro.h>
#include <dqrng_distribution.h>
// [[Rcpp::plugins(openmp)]]
#include <omp.h>
// [[Rcpp::plugins(cpp11)]]
// [[Rcpp::export]]
arma::mat parallel_random_matrix(int n, int m, int ncores, double p=0.5) {
dqrng::xoshiro256plus rng(42);
arma::mat out(n*m,3);
// ok to use rng here
#pragma omp parallel num_threads(ncores)
{
dqrng::xoshiro256plus lrng(rng); // make thread local copy of rng
lrng.jump(omp_get_thread_num() + 1); // advance rng by 1 ... ncores jumps
int iter = 0;
#pragma omp for
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
iter = i * n + j;
// p can be a function of i and j
boost::random::binomial_distribution<int> dist_binomial(1,p);
auto gen_bernoulli = std::bind(dist_binomial, std::ref(lrng));
boost::random::normal_distribution<int> dist_normal1(2.0,1.0);
auto gen_normal1 = std::bind(dist_normal1, std::ref(lrng));
boost::random::normal_distribution<int> dist_normal2(4.0,3.0);
auto gen_normal2 = std::bind(dist_normal2, std::ref(lrng));
out(iter,0) = gen_bernoulli();
out(iter,1) = gen_normal1();
out(iter,2) = gen_normal2();
}
}
}
// ok to use rng here
return out;
}
/*** R
parallel_random_matrix(5, 5, 4, 0.75)
*/
When I try to run it Rstudio crashes. However, when I change the code like follows it does work:
// [[Rcpp::depends(dqrng, BH, RcppArmadillo)]]
#include <RcppArmadillo.h>
#include <boost/random/binomial_distribution.hpp>
#include <xoshiro.h>
#include <dqrng_distribution.h>
// [[Rcpp::plugins(openmp)]]
#include <omp.h>
// [[Rcpp::plugins(cpp11)]]
// [[Rcpp::export]]
arma::mat parallel_random_matrix(int n, int m, int ncores, double p=0.5) {
dqrng::xoshiro256plus rng(42);
arma::mat out(n*m,3);
// ok to use rng here
#pragma omp parallel num_threads(ncores)
{
dqrng::xoshiro256plus lrng(rng); // make thread local copy of rng
lrng.jump(omp_get_thread_num() + 1); // advance rng by 1 ... ncores jumps
int iter = 0;
#pragma omp for
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
iter = i * n + j;
// p can be a function of i and j
boost::random::binomial_distribution<int> dist_binomial(1,p);
auto gen_bernoulli = std::bind(dist_binomial, std::ref(lrng));
boost::random::normal_distribution<int> dist_normal1(2.0,1.0);
auto gen_normal1 = std::bind(dist_normal1, std::ref(lrng));
boost::random::normal_distribution<int> dist_normal2(4.0,3.0);
auto gen_normal2 = std::bind(dist_normal2, std::ref(lrng));
out(iter,0) = gen_bernoulli();
out(iter,1) = 2.0;//gen_normal1();
out(iter,2) = 3.0;//gen_normal2();
}
}
}
// ok to use rng here
return out;
}
/*** R
parallel_random_matrix(5, 5, 4, 0.75)
*/
What am I doing wrong?
Here lies the problem:
boost::random::normal_distribution<int> dist_normal1(2.0,1.0);
^^^
This distribution is meant for real types, not integral types, c.f. https://www.boost.org/doc/libs/1_69_0/doc/html/boost/random/normal_distribution.html. Correct would be
boost::random::normal_distribution<double> dist_normal1(2.0,1.0);

Why is this triggering a breakpoint?

I have looked extensively for the problem in this code, but I can't seem to figure out what tragic error I made and why it is triggering a breakpoint.
(After 3 or 4 inputs, it triggers and I don't know why it doesn't trigger at the start or what is causing it)
#include <conio.h> // For function getch()
#include <cstdlib> // For several general-purpose functions
#include <fstream> // For file handling
#include <iomanip> // For formatted output
#include <iostream> // For cin, cout, and system
#include <string> // For string data type
using namespace std; // So "std::cout" may be abbreviated to "cout", for example.
string convertDecToBin(int dec)
{
int *arrayHex, arraySize = 0;
arrayHex = new int[];
string s = " ";
int r = dec;
for (int i = 0; r != 0; i++)
{
arrayHex[i] = r % 2;
r = r / 2;
arraySize++;
}
for (int j = 0; j < arraySize; j++)
{
s = s + to_string(arrayHex[arraySize - 1 - j]);
}
delete[] arrayHex;
return s;
}
string convertDecToOct(int dec)
{
int *arrayHex, arraySize = 0;
arrayHex = new int[];
string s = " ";
int r = dec;
for (int i = 0; r != 0; i++)
{
arrayHex[i] = r % 8;
r = r / 8;
arraySize++;
}
for (int j = 0; j < arraySize; j++)
{
s = s + to_string(arrayHex[arraySize - 1 - j]);
}
delete[] arrayHex;
return s;
}
int main()
{
int input = 0;
while (input != -1)
{
cout << "\nEnter a decimal number (-1 to exit loop): ";
cin >> input;
if (input != -1)
{
cout << "Your decimal number in binary expansion: " << convertDecToBin(input);
cout << "\nYour decimal number in octal ecpression: " << convertDecToOct(input);
}
}
cout << "\n\nPress any key to exit. . .";
_getch();
return 0;
}
arrayHex = new int[] is your problem - C\C++ does not support dynamic sizing arrays. You need to specify a size for the array to allocation, otherwise you'll get memory block overruns.

OpenMP and MPI hybrid dynamic scheduling

As the number of threads increase, the count which is "temp" decreases..
When I sent the number of threads as "1" it gives an correct answer but as the number of threads increases, running time shorter but gives wrong answer
#include <stdio.h>
#include <mpi.h>
#include <complex.h>
#include <time.h>
#include <omp.h>
#define MAXITERS 1000
// globals
int count = 0;
int nptsside;
float side2;
float side4;
int temp = 0;
int inset(double complex c) {
int iters;
float rl,im;
double complex z = c;
for (iters = 0; iters < MAXITERS; iters++) {
z = z*z + c;
rl = creal(z);
im = cimag(z);
if (rl*rl + im*im > 4) return 0;
}
return 1;
}
int main(int argc, char **argv)
{
nptsside = atoi(argv[1]);
side2 = nptsside / 2.0;
side4 = nptsside / 4.0;
//struct timespec bgn,nd;
//clock_gettime(CLOCK_REALTIME, &bgn);
int x,y; float xv,yv;
double complex z;
int i;
int mystart, myend;
int nrows;
int nprocs, mype;
int data;
MPI_Status status;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &mype);
nrows = nptsside/nprocs;
printf("%d\n", nprocs);
mystart = mype*nrows;
myend = mystart + nrows - 1;
#pragma omp parallel shared(mystart, myend, temp)
{
int nth = omp_get_num_threads();
printf("%d\n", nth);
#ifdef STATIC
#pragma omp for reduction(+:temp) schedule(static)
#elif defined DYNAMIC
#pragma omp for reduction(+:temp) schedule(dynamic)
#elif defined GUIDED
#pragma omp for reduction(+:temp) schedule(guided)
#endif
for (x=mystart; x<=myend; x++) {
for ( y=0; y<nptsside; y++) {
xv = (x - side2) / side4;
yv = (y - side2) / side4;
z = xv + yv*I;
if (inset(z)) {
temp++;
}
}
}
}
if(mype==0) {
count += temp;
printf("%d\n", temp);
for (i = 1; i < nprocs; i++) {
MPI_Recv(&temp, 1, MPI_INT, i, 0, MPI_COMM_WORLD, &status);
count += temp;
printf("%d\n", temp);
}
}
else{
MPI_Send(&temp, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
}
MPI_Finalize();
if(mype==0) {
printf("%d\n", count);
}
//clock_gettime(CLOCK_REALTIME, &nd);
//printf("%f\n",timediff(bgn,nd));
}
You are not defining any private variables for when you enter the OpenMP loop.
First off, you must always declare your loop counter for your OpenMP loop (as well as any loop counters for nested loops inside your OpenMP loop) private.
Secondly, you have three variables (xv, yv, and z) that each depend on your iterations in these loops. Thus, each thread needs to have its own private copy of these variables as well. Changing your parallel statement to
#pragma omp parallel shared(mystart, myend, temp) private(x, y, xv, yv, z)
should fix your OpenMP problems.
Seeing as you say that setting your number of threads to 1 yields the correct answer, I have not looked at your MPI code.
EDIT: OK I lied, I briefly looked into your MPI code now. Instead of all of your sends and receives, you should be writing a single reduce. This collective will be much faster than the blocking communication you set up currently.
MPI_Reduce(&temp, &count, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);

How to solve http://www.spoj.com/problems/MST1/ in n is 10^9

Using Bottom to up DP approach, I am able to solve the problem How to solve http://www.spoj.com/problems/MST1/ upto 10^8.
If input is very large n upto 10^9. I will not be able to create lookup table for upto 10^9. So what will be better approach to solve the problem ?
Is there any heuristic solution ?
#include <iostream>
#include <climits>
#include <algorithm>
using namespace std;
int main()
{
const int N_MAX = 20000001;
int *DP = new int[N_MAX];
DP[1] = 0;
for (int i = 2; i < N_MAX; i++) {
int minimum = DP[i - 1];
if (i % 3 == 0) minimum = min(minimum, DP[i/3]);
if (i % 2 == 0) minimum = min(minimum, DP[i/2]);
DP[i] = minimum + 1;
}
int T, N; cin >> T;
int c = 1;
while (T--) {
cin >> N;
cout << "Case " << c++ << ": " << DP[N] << endl;
}
delete[] DP;
}

Broken pipes in C -- pipe(), fork(), exec() program

I need to write a simple program: There will be a Parent and a few programs [children] (started via execl in Parent). Children communicate to one another in this way: Child I sens to Parent number J, Parent sends a message (something like -- "there is a message to you") to J, J send to Parent number K etc. etc.
And there is a problem -- my program (tested by strace command) tries to send a message to child and there comes the broken pipe error.
I will be grateful if somebody looks through the code and tells me what's wrong:
Here is the code:
/**
* Arbiter zabawy w Losia
*
wersja: Alfa 3b
poczÄ…tek edycji 25.01.2009
*/
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include "err.h"
pid_t pid;
FILE *a;
int main ()
{
// my N players
int N;
N = 10;
//write -- writing from parent to child
//read -- reading from child
int rurka_write[N+1][2];
int rurka_read[N+1][2];
//initiation of N players
int i;
for(i = 1; i <= N; i++)
{
//tworze lacza
if (pipe(rurka_write[i]) == -1)
printf("wystapil blad przy rurce %d\n", i);
if (pipe(rurka_read[i]) == -1)
printf("wystapil blad przy rurce %d\n", i);
}
for(i = 1; i <= N; i++)
{
switch(pid = fork())
{
case -1:
printf("wystapil blad przy forkowaniu");
case 0:
printf("potomek numer %d\n", i);
if (close(rurka_write[i][1]) == -1)
printf("zle zamykanie");
if (close(rurka_read[i][0]) == -1)
printf("zle zamykanie");
//closing useless descriptors
int j;
for(j = 1; j <= N; j++)
{
if (j != i)
{
close(rurka_read[j][0]);
close(rurka_read[j][1]);
close(rurka_write[j][0]);
close(rurka_write[j][1]);
}
}
char str_N[20];
char str_i[20];
char str_0[20];
char str_1[20];
sprintf(str_N, "%d", N);
sprintf(str_i, "%d", i);
sprintf(str_0, "%d", rurka_write[i][0]);
sprintf(str_1, "%d", rurka_read[i][1]);
printf("%d Executing execl\n", i);
execl("./ucz", str_N, str_i, str_0, str_1, NULL);
printf("execl executed\n");
// execv("./ucz", str_N, str_i, str_0, str_1, NULL);
//exit(0);
default:
//closing useless pipes
if (close(rurka_read[i][1]) == -1)
printf("zle zamykanie rurki do czytania z potomkna\n");
if (close(rurka_write[i][0]) == -1)
printf("zle zamykanie rurki do pisania do potomka\n");
} //end of switch
} //end of for
//if I am in parent, I'm starting the game
if (pid != 0)
// delay(100);
{
printf("PLAY\n");
int l = 1;
while(l > 0)
{
printf("sending to player %d\n", l);
a = fdopen(rurka_write[l][1], "w");
printf("sending: Wake up");
fprintf(a, "Wake up\n");
printf("flushing");
fflush(a);
char k[20];
printf("reading");
read(rurka_read[l][0], k, 20);
l = k;
}
}
}
Besides the fact that you do not end your cases with a break (as noted by strager), the main problem is the statement l = k;. Note that k is a char[20] and, when assigned to a int, you don't assign any of the contents of k to l. Instead l will contain (the value of) the pointer to the array. You will have to do something different here to get the value that is in the array; what exactly depends on that ucz sends back.
After fixing this and making my own ucz, the program seems to work without any problem. Of course, it might also be that there is another problem in your version of ucz.
About ./ucz -- it takes 4 parameters -- 1st -- number of players, 2nd -- player number, 3rd -- number of descriptor to read from parent, 4th -- number of descriptor to write to parent.
Adding exit(0), return(0), break after exec (or in the end of "case: 0") doesn't help.

Resources