Matrix multiplication with threads and semaphore - multithreading

I have to implement matrix multiplication with threads using semaphore. The problem is I don't understand how semaphore works and my result matrix is filled with zeros and program hangs. Could you explain me what I'm doing wrong?
Here's my code:
class Program
{
private static Semaphore semaphore = new Semaphore(0, 1);
static void Main(string[] args)
{
int size;
int[,] a = null, b = null, c = null;
bool isNumeric;
do
{
Console.Write("Insert size: ");
isNumeric = int.TryParse(Console.ReadLine(), out size);
}
while (!isNumeric || size < 2);
Console.WriteLine();
SquareMatrix.Init(size, ref a);
SquareMatrix.Init(size, ref b);
SquareMatrix.Init(size, ref c);
SquareMatrix.Fill(a);
SquareMatrix.Fill(b);
Console.WriteLine("Matrix 1:\n");
SquareMatrix.Display(a);
Console.WriteLine("Matrix 2:\n");
SquareMatrix.Display(b);
Console.WriteLine();
DateTime start = DateTime.Now;
for (int i = 0; i < size; i++)
{
Thread thread = new Thread(() => Multiply(i, size, a, b, c));
thread.Name = i.ToString();
thread.Start();
}
DateTime stop = DateTime.Now;
Console.WriteLine("Result (" + (stop - start).TotalMilliseconds + " ms):\n");
SquareMatrix.Display(c);
Console.ReadLine();
}
public static void Multiply(int i, int size, int[,] a, int[,] b, int[,] c)
{
semaphore.WaitOne();
for (int j = 0; j < size; j++)
{
c[i, j] = 0;
for (int k = 0; k < size; k++)
{
c[i, j] += a[i, k] * b[k, j];
}
}
semaphore.Release();
}
}
And SquareMatrix code:
static class SquareMatrix
{
private static readonly Random random = new Random();
public static void Init(int size, ref int[,] array)
{
array = new int[size, size];
}
public static void Fill(int[,] array)
{
for (int i = 0; i < array.GetLength(0); i++)
{
for (int j = 0; j < array.GetLength(0); j++)
{
array[i, j] = random.Next(-10, 10);
}
}
}
public static void Display(int[,] array)
{
string result = "";
for (int i = 0; i < array.GetLength(0); i++)
{
for (int j = 0; j < array.GetLength(0); j++)
result += array[i, j] + " ";
result += "\n";
}
Console.Write(result + "\n");
}
}

Related

Python 3 c extension module free invalid pointer

I wrote this C extension module which calculates Fibonacci numbers using fast matrix multiplication.
#include <Python.h>
struct Matrix {
PyObject *m[2][2];
};
static struct Matrix matrix_mult(struct Matrix mat1, const struct Matrix mat2)
{
struct Matrix matrix;
PyObject *mults[8];
for (int i = 0; i < 8; i++) {
mults[i] = PyNumber_Multiply(mat1.m[i/4][i%2], mat2.m[i%2][(i/2)&1]);
}
for (int i = 0; i < 4; i++) {
matrix.m[i/2][i%2] = PyNumber_Add(mults[2*i], mults[2*i+1]);
}
for (int i = 0; i < 8; i++) {
Py_DECREF(mults[i]);
}
return matrix;
}
static void matrix_free(struct Matrix *matrix)
{
for (int i = 0; i < 2; i++) {
for (int j = 0; j < 2; j++) {
Py_DECREF(matrix->m[i][j]);
}
}
}
static struct Matrix matrix_pow(struct Matrix matrix, int n)
{
struct Matrix result = {{
{PyLong_FromLong(1L), PyLong_FromLong(0L)},
{PyLong_FromLong(0L), PyLong_FromLong(1L)}
}};
struct Matrix result_old;
struct Matrix matrix_old;
while (n > 0) {
if (n % 2 == 0) {
n /= 2;
matrix_old = matrix;
matrix = matrix_mult(matrix_old, matrix_old);
matrix_free(&matrix_old);
} else {
//n--;
n /= 2;
result_old = result;
matrix_old = matrix;
result = matrix_mult(result_old, matrix);
matrix = matrix_mult(matrix_old, matrix_old);
matrix_free(&result_old);
matrix_free(&matrix_old);
}
}
return result;
}
static PyObject *fib_mat(PyObject *self, PyObject *args)
{
long long int n;
if(!PyArg_ParseTuple(args, "L", &n))
return NULL;
struct Matrix fib_matrix = {{
{PyLong_FromLong(0L), PyLong_FromLong(1L)},
{PyLong_FromLong(1L), PyLong_FromLong(1L)}
}};
struct Matrix result = matrix_pow(fib_matrix, n + 1);
PyObject *inswer = result.m[0][0];
Py_INCREF(inswer);
matrix_free(&result);
matrix_free(&fib_matrix);
return inswer;
}
static PyMethodDef func_table[] = {
{ "fib_mat", fib_mat, METH_VARARGS, "Calculates fib number" },
{ NULL, NULL, 0, NULL }
};
static struct PyModuleDef fib_module = {
PyModuleDef_HEAD_INIT,
"fib_module",
"fibonacci Module",
-1,
func_table
};
PyMODINIT_FUNC PyInit_fib_module(void)
{
return PyModule_Create(&fib_module);
}
The problem is when I run the following Python 3 code using the module:
import fib_module
i = 0
while i < 10:
fib_module.fib_mat(i)
i += 1
it returns an the error listed below:
free(): invalid pointer
Aborted (core dumped)
It appears to be caused by something inside the C extension module but there is no explicit free call.

Exception thrown: read access violation. this->pt_mat was 0x1110112

I got this error while using a double pointer in a class function to set coefficient and using it in the main.
The class function is used in a stand-alone function to set the coefficient using a class type pointer.
The class code is:
class Matrix
{
private:
int size_m, size_n;
double **pt_mat;
public:
Matrix() {};
Matrix(int m, int n)
{
pt_mat = new double *[m];
for (int i = 0; i < m; i++)
{
pt_mat[i] = new double[n];
}
}
void set_size(int m, int n)
{
size_m = m;
size_n = n;
}
void set_coef(int i, int j, double x)
{
pt_mat[i][j] = x;
}
void get_size(int *pt_m, int *pt_n)
{
*pt_m = size_m;
*pt_n = size_n;
}
double get_coef(int i, int j)
{
return pt_mat[i][j];
}
void print(ofstream &fout)
{
fout << size_m << " " << size_n << endl;
int i, j;
for (i = 0; i < size_m; i++)
{
for (j = 0; j < size_n; j++)
{
fout << pt_mat[i][j] << " ";
}
fout << endl;
}
}
void max_min(int *pt_imax, int *pt_jmax, double *pt_max, int *pt_imin, int *pt_jmin, double *pt_min)
{
double max, min;
int i, j, imax = 0, imin = 0, jmax = 0, jmin = 0;
max = pt_mat[0][0];
for (i = 0; i < size_m; i++)
{
for (j = 0; j < size_n; j++)
{
if (pt_mat[i][j] > max)
{
max = pt_mat[i][j];
imax = i;
jmax = j;
}
}
}
*pt_max = max;
*pt_imax = imax;
*pt_jmax = jmax;
min = pt_mat[0][0];
for (i = 0; i < size_m; i++)
{
for (j = 0; j < size_n; j++)
{
if (pt_mat[i][j] < min)
{
min = pt_mat[i][j];
imin = i;
jmin = j;
}
}
}
*pt_min = min;
*pt_imin = imin;
*pt_jmin = jmin;
}
};
The stand-alone function:
void mat_add(Matrix a, Matrix b, Matrix *pt_c)
{
Matrix c;
pt_c = &c;
int a_m, a_n, *p_am = &a_m, *p_an = &a_n;
a.get_size(p_am, p_an);
c.set_size(a_m, a_n);
int m, n, *pt_m = &m, *pt_n = &n;
double coef;
a.get_size(pt_m, pt_n);
int i, j;
for (i = 0; i < m; i++)
for (j = 0; j < n; j++)
{
coef = a.get_coef(i, j) + b.get_coef(i, j);
c.set_coef(i, j, coef);
}
}
please help me in this regard if you can. I have no syntax error showing.enter code here

How to use multiple threads with shared data inside a loop in Vala?

I was wondering how I could take advantage of my cores to make loops run in parallel. Let's say I want to initialise all members of a big array to 1.
To check if the initialisation was done correctly, at the end, I add all the values of the array, but I get a zero. How can I make threads share the data?
Here's my attempt:
class Worker {
public uint64[] arr;
public uint64 start;
public uint64 end;
public int val;
public Worker (ref uint64[] arr, uint64 start, uint64 end, int val) {
this.arr = arr;
this.start = start;
this.end = end;
this.val = val;
}
}
void initialize (Worker p) {
for (var i = p.start; i <= p.end; i++) {
p.arr[i] = p.val;
}
}
int main (string[] args) {
var num_proc = (int) get_num_processors ();
stdout.printf ("Using %d threads\n", num_proc);
uint64 n = 50000000;
var arr = new uint64[n];
ulong microseconds;
double seconds;
var timer = new Timer ();
try {
var threads = new ThreadPool<Worker>.with_owned_data ((ThreadPoolFunc<Worker>) initialize, num_proc, true);
for (var i = 0; i < num_proc; i++) {
uint64 start = i * (n / num_proc);
uint64 end = (i + 1) * (n / num_proc) - 1;
if (i == num_proc - 1) end += n % num_proc;
print (#"Thread $(i + 1): start: $start, end: $end (amount: $(end - start + 1))\n");
threads.add (new Worker (ref arr, start, end, 1));
}
} catch (ThreadError e) {
stderr.printf ("%s\n", e.message);
}
uint64 sum = 0;
for (uint64 i = 0; i < n; i++) {
sum += arr[i];
}
stdout.printf ("sum: %llu\n", sum);
timer.stop ();
seconds = timer.elapsed (out microseconds);
stdout.printf ("Time elapsed: %.3f seconds.\n", seconds);
return 0;
}
The array is copied here:
public Worker (ref uint64[] arr, uint64 start, uint64 end, int val) {
this.arr = arr;
this.start = start;
this.end = end;
this.val = val;
}
An assignment is creating a copy here. You can use an unowned variable to fix this:
class Worker {
// The array is not owned by the worker!
public unowned uint64[] arr;
public uint64 start;
public uint64 end;
public int val;
public Worker (ref uint64[] arr, uint64 start, uint64 end, int val) {
this.arr = arr;
this.start = start;
this.end = end;
this.val = val;
}
}

how to parallel or multithreading 2 function in C++?

I'm a beginner at parallel programming. I have 2 function in my code that I want run this 2 function in parallel (multiThread). can you help me?
func 1:
void Navigation::test_end(Graph::Node *node, dtPolyRef endRef, const float *endPos, int endIdPos)
{
int k=0;
//std::ofstream fout("v4.txt", std::ios_base::app | std::ios_base::out);
auto it2 = myMap2.equal_range(endRef);
for (auto it = it2.first; it != it2.second; ++it) {
int m_npolys = 0;
int n=0;
dtPolyRef m_polys[MAX_POLYS];
int j = it->second.size();
for(int i=0;i<j-1;i++){
if(it->second[i]>0){
m_polys[m_npolys++] =it->second[i];
// fout <<it->second[i]<<" ";
}
currentGraph.AddIntraEdge(node->idNode,endIdPos, node->edges[k].idPos, it->second[j-1], m_polys ,m_npolys); //n=m_npolys
k++;
}
}
==========
func 2:
void Navigation::test_start(Graph::Node *node, dtPolyRef startRef, const float *startPos, int startIdPos)
{
int k=0;
//std::ofstream fout("v4.txt", std::ios_base::app | std::ios_base::out);
auto it2 = myMap2.equal_range(startRef);
for (auto it = it2.first; it != it2.second; ++it) {
int m_npolys = 0;
int n=0;
dtPolyRef m_polys[MAX_POLYS];
int j = it->second.size();
for(int i=0;i<j-1;i++){
if(it->second[i]>0){
m_polys[m_npolys++] =it->second[i];
// fout <<it->second[i]<<" ";
}
currentGraph.AddIntraEdge(node->idNode,startIdPos, node->edges[k].idPos, it->second[j-1], m_polys ,m_npolys); //n=m_npolys
k++;
}
}
now i want to run this 2 function in parallel mode in the main function :
test_start(sNode,startRef,startPos,startIdPos);
test_end(eNode,endRef,endPos, endIdPos);

Find the word in the stream?

Given an infinite stream of characters and a list L of strings, create a function that calls an external API when a word in L is recognized during the processing of the stream.
Example:
L = ["ok","test","one","try","trying"]
stream = a,b,c,o,k,d,e,f,t,r,y,i,n,g.............
The call to external API will happen when 'k' is encountered, again when the 'y' is encountered, and again at 'g'.
My idea:
Create trie out of the list and navigate the nodes as you read from stream in linear time. But there would be a bug if you just do simple trie search.
Assume you have words "abxyz" and "xyw" and your input is "abxyw".In this case you can't recognize "xyw" with trie.
So search should be modified as below:
let's take above use case "abxyw". We start the search and we find we have all the element till 'x'. Moment you get 'x' you have two options:
Check if the current element is equal to the head of trie and if it is equal to head of trie then call recursive search.
Continue till the end of current word. In this case for your given input it will return false but for the recursive search we started in point 1, it will return true.
Below is my modified search but I think it has bugs and can be improved. Any suggestions?
#define SIZE 26
struct tri{
int complete;
struct tri *child[SIZE];
};
void insert(char *c, struct tri **t)
{
struct tri *current = *t;
while(*c != '\0')
{
int i;
int letter = *c - 'a';
if(current->child[letter] == NULL) {
current->child[letter] = malloc(sizeof(*current));
memset(current->child[letter], 0, sizeof(struct tri));
}
current = current->child[letter];
c++;
}
current->complete = 1;
}
struct tri *t;
int flag = 0;
int found(char *c, struct tri *tt)
{
struct tri *current = tt;
if (current == NULL)
return 0;
while(*c != '\0')
{
int i;
int letter = *c - 'a';
/* if this is the first char then recurse from begining*/
if (t->child[letter] != NULL)
flag = found(c+1, t->child[letter]);
if (flag == 1)
return 1;
if(!flag && current->child[letter] == NULL) {
return 0;
}
current = current->child[letter];
c++;
}
return current->complete;
}
int main()
{
int i;
t = malloc(sizeof(*t));
t->complete = 0;
memset(t, 0, sizeof(struct tri));
insert("weathez", &t);
insert("eather", &t);
insert("weather", &t);
(1 ==found("weather", t))?printf("found\n"):printf("not found\n");
return 0;
}
What you want to do is exactly what Aho-Corasick algorithm does.
You can take a look at my Aho-Corasick implementation. It's contest-oriented, so maybe not focused on readability but I think it's quite clear:
typedef vector<int> VI;
struct Node {
int size;
Node *fail, *output;
VI id;
map<char, Node*> next;
};
typedef pair<Node*, Node*> P;
typedef map<char, Node*> MCP;
Node* root;
inline void init() {
root = new Node;
root->size = 0;
root->output = root->fail = NULL;
}
Node* add(string& s, int u, int c = 0, Node* p = root) {
if (p == NULL) {
p = new Node;
p->size = c;
p->fail = p->output = NULL;
}
if (c == s.size()) p->id.push_back(u);
else {
if (not p->next.count(s[c])) p->next[s[c]] = NULL;
p->next[s[c]] = add(s, u, c + 1, p->next[s[c]]);
}
return p;
}
void fill_fail_output() {
queue<pair<char, P> > Q;
for (MCP::iterator it=root->next.begin();
it!=root->next.end();++it)
Q.push(pair<char, P> (it->first, P(root, it->second)));
while (not Q.empty()) {
Node *pare = Q.front().second.first;
Node *fill = Q.front().second.second;
char c = Q.front().first; Q.pop();
while (pare != root && !pare->fail->next.count(c))
pare=pare->fail;
if (pare == root) fill->fail = root;
else fill->fail = pare->fail->next[c];
if (fill->fail->id.size() != 0)
fill->output = fill->fail;
else fill->output = fill->fail->output;
for (MCP::iterator it=fill->next.begin();
it!=fill->next.end();++it)
Q.push(pair<char,P>(it->first,P(fill,it->second)));
}
}
void match(int c, VI& id) {
for (int i = 0; i < id.size(); ++i) {
cout << "Matching of pattern " << id[i];
cout << " ended at " << c << endl;
}
}
void search(string& s) {
int i = 0, j = 0;
Node *p = root, *q;
while (j < s.size()) {
while (p->next.count(s[j])) {
p = p->next[s[j++]];
if (p->id.size() != 0) match(j - 1, p->id);
q = p->output;
while (q != NULL) {
match(j - 1, q->id);
q = q->output;
}
}
if (p != root) {
p = p->fail;
i = j - p->size;
}
else i = ++j;
}
}
void erase(Node* p = root) {
for (MCP::iterator it = p->next.begin();
it != p->next.end(); ++it)
erase(it->second);
delete p;
}
int main() {
init();
int n;
cin >> n;
for (int i = 0; i < n; ++i) {
string s;
cin >> s;
add(s, i);
}
fill_fail_output();
string text;
cin >> text;
search(text);
erase(root);
}

Resources