Are these two code different - multithreading

foreach $thr (1..5)
{
$threads[$thr]=threads->create("worker");
}
and
foreach (1..5)
{
push #threads,threads->create("worker");
}
the latter works well, well the former gives warning.
#!/usr/bin/perl
use strict;
use threads;
use threads::shared;
use Thread::Queue;
my $queue = Thread::Queue->new();
my #threads;
my $thr;
#----------------------------------------create
#send work first,and then creat threads, the first thread work earlier.
$queue->enqueue(1..10000);
foreach (1..5)
{
push #threads,threads->create("worker");
}
$queue->end();
sub worker
{
while (my #DataElement = $queue->dequeue(100))
{
my $tid = threads->tid();
#open (my $out,">>$tid.txt") or die $!;
print "Threads ID:$tid\t#DataElement\n";
#print $out "Threads ID:$tid\t#DataElement\n";
#close $out;
}
}
#----------------------------------------cut
my $thr_num=1;
my $i;
while ($thr_num)
{
$i++;
foreach (#threads) #store threads, TRUE even if joined.
{
$thr_num = threads->list();
print "threads total: $thr_num\n";
if ($_->is_running())
{
sleep 1; #wait
next;
}
if ($_->is_joinable())
{
$_->join();
}
sleep 1;# wait
}
print $i,"\n";
}
this is the whole code. and the warning is can't call method "is_running" on an undefined value at threadqueue2(1).plx.line42. Perl exited with active threads.

No. You will end up with different data structures. As you can see from this simplified version of your code.
#!/usr/bin/perl
use strict;
use warnings;
use 5.010;
use Data::Dumper;
my #threads;
foreach my $thr (1 .. 5) {
$threads[$thr] = 'A Thread';
}
say Dumper \#threads;
#threads = ();
foreach (1 .. 5) {
push #threads, 'A Thread';
}
say Dumper \#threads;
The output is:
$VAR1 = [
undef,
'A Thread',
'A Thread',
'A Thread',
'A Thread',
'A Thread'
];
$VAR1 = [
'A Thread',
'A Thread',
'A Thread',
'A Thread',
'A Thread'
];
In your first example, you begin populating the array at element 1, so the first element (which has an index of 0) contains undef.

Related

Perl hand Module to threads

i am trying to pass a subroutine from an self written module to threads using the following code.
This is my first time using threads so I'm kinda not familiar with it.
Main Script (shortend)
#!/usr/bin/perl -w
use strict;
use threads;
use lib 'PATH TO LIB';
use goldstandard;
my $delete_raw_files = 0;
my $outfolder = /PATH/;
my %folder = goldstandard -> create_folder($outfolder,$delete_raw_files);
&tagging if $tagging == 1;
sub tagging{
my %hash = goldstandard -> tagging_hash(\%folder);
my #threads;
foreach(keys %hash){
if($_ =~ m/mate/){
my $arguments = "goldstandard -> mate_tagging($hash{$_}{raw},$hash{$_}{temp},$hash{$_}{tagged},$mate_anna,$mate_model)";
push(#threads,$arguments);
}
if($_ =~ m/morpheus/){
my $arguments = "goldstandard -> morpheus_tagging($hash{$_}{source},$hash{$_}{tagged},$morpheus_stemlib,$morpheus_cruncher)";
push(#threads,$arguments)
}
}
foreach(#threads){
my $thread = threads->create($_);
$thread ->join();
}
}
Module
package goldstandard;
use strict;
use warnings;
sub mate_tagging{
my $Referenz = shift;
my $input = shift;
my $output_temp_dir = shift;
my $output_mate_human = shift;
my $anna = shift;
my $model = shift;
opendir(DIR,"$input");
my #dir = readdir(DIR);
my $anzahl = #dir;
foreach(#dir){
unless($_ =~ m/^\./){
my $name = $_;
my $path = $input . $_;
my $out_temp = $output_temp_dir . $name;
my $out_mate_human_final = $output_mate_human . $name;
qx(java -Xmx10G -classpath $anna is2.tag.Tagger -model $model -test $path -out $out_temp);
open(OUT, "> $out_mate_human_final");
open(TEMP, "< $out_temp");
my $output_text;
while(<TEMP>){
unless($_ =~ m/^\s+$/){
if ($_ =~ m/^\d+\t(.*?)\t_\t_\t_\t(.*?)\t_\t/) {
my $tags = $2;
my $words = $1;
print OUT "$words\t$tags\n";
}
}
}
}
}
}
sub morpheus_tagging{
my $Referenz = shift;
my $input = shift;
my $output = shift;
my $stemlib = shift;
my $cruncher = shift;
opendir(DIR,"$input");
my #dir = readdir(DIR);
foreach(#dir){
unless($_ =~ m/^\./){
my $name = $_;
my $path = $input . $_;
my $out = $output . $name;
qx(env MORPHLIB='$stemlib' '$cruncher' < '$path' > '$out');
}
}
}
1;
Executing this code gets me
Thread 1 terminated abnormally: Undefined subroutine &main::goldstandard -> morpheus_tagging(...) called at ... line 43.
I guess eather the way I am calling the treads or the way I am providing the arguments are wrong. I Hope some can help me with that? I Also found something on safe and unsafe modules bum I'm not sure is this is realy the problem.
I guess eather the way I am calling the treads or the way I am providing the arguments are wrong. I Hope some can help me with that? I Also found something on safe and unsafe modules bum I'm not sure is this is realy the problem.Thanks in advance
You must pass the name of a sub or a reference to a sub, plus arguments, to threads->create. So you need something like
my $method_ref = $invoker->can($method_name);
threads->create($method_ref, $invoker, #args);
That said, passing arguments to threads->create has issues that can be avoided by using a closure.
threads->create(sub { $invoker->$method_name(#args) })
The above can be written more simply as follows:
async { $invoker->$method_name(#args) }
This gets us the following:
sub tagging {
my %hash = goldstandard->tagging_hash(\%folder);
my #jobs;
for (keys %hash) {
if (/mate/) {
push #jobs, [ 'goldstandard', 'mate_tagging',
$hash{$_}{raw},
$hash{$_}{temp},
$hash{$_}{tagged},
$mate_anna,
$mate_model,
];
}
if (/morpheus/) {
push #jobs, [ 'goldstandard', 'morpheus_tagging',
$hash{$_}{source},
$hash{$_}{tagged},
$morpheus_stemlib,
$morpheus_cruncher,
];
}
}
my #threads;
for my $job (#jobs) {
my ($invoker, $method_name, #args) = #$job;
push #threads, async { $invoker->$method_name(#args) };
}
$_->join for #threads;
}
or just
sub tagging {
my %hash = goldstandard->tagging_hash(\%folder);
my #threads;
for (keys %hash) {
if (/mate/) {
push #threads, async {
goldstandard->mate_tagging(
$hash{$_}{raw},
$hash{$_}{temp},
$hash{$_}{tagged},
$mate_anna,
$mate_model,
);
};
}
if (/morpheus/) {
push #threads, async {
goldstandard->morpheus_tagging(
$hash{$_}{source},
$hash{$_}{tagged},
$morpheus_stemlib,
$morpheus_cruncher,
);
};
}
}
$_->join for #threads;
}
Notes that I delayed the calls to join until after all the threads are created. Your way made it so only one thread would run at a time.
But what we have isn't great. We have no way of limiting how many threads are active at a time, and we (expensively) create many threads instead of reusing them. We can use a worker pool to solve both of these problems.
use constant NUM_WORKERS => 5;
use Thread::Queue 3.01 qw( );
my $q;
sub tagging {
my %hash = goldstandard->tagging_hash(\%folder);
my #threads;
for (keys %hash) {
if (/mate/) {
$q->enqueue(sub {
goldstandard->mate_tagging(
$hash{$_}{raw},
$hash{$_}{temp},
$hash{$_}{tagged},
$mate_anna,
$mate_model,
);
});
}
if (/morpheus/) {
$q->enqueue(sub {
goldstandard->morpheus_tagging(
$hash{$_}{source},
$hash{$_}{tagged},
$morpheus_stemlib,
$morpheus_cruncher,
);
});
}
}
}
{
$q = Thread::Queue->new();
for (1..NUM_WORKERS) {
async {
while ( my $job = $q->dequeue() ) {
$job->();
}
};
}
... call tagging and whatever ...
$q->end();
$_->join() for threads->list();
}

error code is 'Thread 1 terminated abnormally: Invalid value for shared scalar at'

This is my code.
the code has some problem about hash shared.
use strict;
use warnings;
use threads;
use threads::shared;
my %db;
share(%db);
my #threads;
sub test{
my $db_ref = $_[0];
my #arr = ('a','b');
push #{$db_ref->{'key'}}, \#arr;
}
foreach(1..2){
my $t = threads->new(
sub {
test(\%db);
}
);
push(#threads,$t);
}
foreach (#threads) {
$_->join;
}
error code.
Thread 1 terminated abnormally: Invalid value for shared scalar at test1.pl line 13.
Thread 2 terminated abnormally: Invalid value for shared scalar at test1.pl line 13.
I waana using threads::shared.
But I don`t know what is problem.
help me plz~
You can only place references to shared objects into shared vars. #arr isn't shared, and neither is the array onto which you push a reference to #arr.
Replace
my #arr = ('a','b');
push #{$db_ref->{'key'}}, \#arr;
with
my #arr :shared = ('a','b');
lock %$db_ref;
# We can't use autovivification as we need a shared array.
$db_ref->{'key'} = shared_clone([]);
push #{$db_ref->{'key'}}, \#arr;
I changed code.
But can not save all data in hash(%db). Next code is check code.
use strict;
use warnings;
use threads;
use threads::shared;
my %db;
share(%db);
my #threads;
sub test{
my $db_ref = $_[0];
my #arr :shared = ('a','b');
lock %$db_ref;
$db_ref->{'key'} = shared_clone([]);
push #{$db_ref->{'key'}}, \#arr;
}
foreach(1..5){
my $t = threads->new(
sub {
test(\%db);
}
);
push(#threads,$t);
}
foreach (#threads) {
$_->join;
}
while(my ($key, $val) = each %db){
print "$key => $val\n";
foreach my $value (#$val) {
foreach (#$value) {
print $_, " ";
}
print "\n";
}
}
Only one data(a,b) in %db.
We must one more data in %db.

Sharing a thread variable without making it global (Perl)

I'm trying to write a simple script that uses threads and shares a variable, but I don't want to make this variable global to the whole script. Below is a simplified example.
use strict;
use warnings;
use threads;
use threads::shared;
my $val:shared;
# Create threads
for my $i (1 .. 5) {
threads->create(\&do_something, $i);
}
# Wait for all threads to complete
map { $_->join(); } threads->list();
# $val is global to the script so this line will work!
print "VAL IS: $val\n";
sub do_something {
my $i = shift;
print "Doing something with thread $i!\n";
{
lock $val;
$val = "SOMETHING IS $i";
print "$val\n\n";
}
}
Output:
Doing something with thread 1!
SOMETHING IS 1
Doing something with thread 2!
SOMETHING IS 2
Doing something with thread 3!
SOMETHING IS 3
Doing something with thread 4!
SOMETHING IS 4
Doing something with thread 5!
SOMETHING IS 5
VAL IS: SOMETHING IS 5
How can I get this effect without making $val accessible to the whole script? In other words, how can I make it so attempting to print VAL IS: $val will fail, but the variable will still be successfully shared by the threads?
I can't define it like this:
# Create threads
for my $i (1 .. 5) {
my $val:shared;
threads->create(\&do_something, $i);
}
Or I will get:
Global symbol "$val" requires explicit package
What is the right way to lexically scope a shared variable?
Pass a reference to it as an argument.
sub do_something {
my ($id, $lock_ref) = #_;
print("$id: Started\n");
{
lock $$lock_ref;
print("$id: Exclusive\n");
sleep(1);
}
print("$id: done.\n");
}
{
my $lock :shared;
for my $id (1..5) {
async { do_something($id, \$lock); };
}
}
Or scope it so only the worker subs can see it.
{
my $lock :shared;
sub do_something {
my ($id) = #_;
print("$id: Started\n");
{
lock $lock;
print("$id: Exclusive\n");
sleep(1);
}
print("$id: done.\n");
}
}
for my $id (1..5) {
async { do_something($id); };
}
You can limit the scope of shared variable (make sure that perl sees shared variable before thread creation),
# ..
{
my $val:shared;
sub do_something {
my $i = shift;
print "Doing something with thread $i!\n";
{
lock $val;
$val = "SOMETHING IS $i";
print "$val\n\n";
}
}
}
# Create threads
for my $i (1 .. 5) {
threads->create(\&do_something, $i);
}
# ...

How to pause and resume a multithread perl script?

I have written the perl script to pause and resume.When the user enters Ctrl+c it has to pause and on pressing c it should resume. But is not working properly as expected. Can anyone help me on this what mistake i am making:
use strict;
use threads;
use threads::shared;
use Thread::Suspend;
use Lens;
$SIG{'INT'} = 'Pause';
#$| = 1;
print chr(7);
my $nthreads = 64;
my #thrs;
for(1..$nthreads)
{
print "START $_ \n";
my ($thr) = threads->create(\&worker, $_);
push #thrs ,$thr;
}
$_->join for #thrs;
exit;
sub worker
{
my $id = shift;
my $tmp;
my $lens = Lens->new("172.16.1.65:2000");
die "cannot create object" unless defined $lens;
die "cannot connect to XRay at " unless defined $lens->open("172.16.1.65:2000");
for(1..100000)
{
print "Thread $id \n";
}
print "$id>LOAD EXIT\n";
}
sub Pause
{
sleep(1);
print "\nCaught ^C\n";
print "Press \"c\" to continue, \"e\" to exit: ";
$_->suspend() for #thrs;
while (1)
{
my $input = lc(getc());
chomp ($input);
if ($input eq 'c') {
#clock($hour,$min,$sec);
$_->resume() for #thrs;
return;
}
elsif ($input eq 'e') {
exit 1;
}
}
}
Well, you haven't been too specific as to how it's "not working properly". But I would suggest looking at using Thread::Semaphore for a 'suspend' mechanism.
I would also suggest not using signal and instead doing something like:
#!/usr/bin/perl
use strict;
use warnings;
use threads;
use Thread::Semaphore;
use Term::ReadKey;
my $nthreads = 64;
my $thread_semaphore = Thread::Semaphore->new($nthreads);
sub worker {
for ( 1 .. 10 ) {
$thread_semaphore->down();
print threads->self->tid(), "\n";
sleep 1;
$thread_semaphore->up();
}
}
for ( 1 .. $nthreads ) {
threads->create( \&worker );
}
my $keypress;
ReadMode 4;
while ( threads->list(threads::running) ) {
while ( not defined( $keypress = ReadKey(-1) )
and threads->list(threads::running) )
{
print "Waiting\nRunning:". threads->list(threads::running) . "\n";
sleep 1;
}
print "Got $keypress\n";
if ( $keypress eq "p" ) {
print "Pausing...";
$thread_semaphore -> down_force($nthreads);
print "All paused\n";
}
if ( $keypress eq "c" ) {
print "Resuming...";
$thread_semaphore -> up ( $nthreads );
}
}
ReadMode 0;
foreach my $thr ( threads->list ) {
$thr->join();
}
It'll 'suspend' by setting the semaphores to zero (or negative) and relies on the threads checking if they should be stopping here or not.
I think the root of your problem though, will probably be signal propagation - your signal handler is global across your threads. You might find configuring $SIG{'INT'} for your threads separately will yield better results. (E.g. set the signal handler to 'IGNORE' at the start of your code, and set specific ones in the thread/main once the threads have been spawned).

Perl Multithread Program

I am new in Perl. I want to write a Perl script using thread.I have few files say 20 files and want to process those files using 5 threads in 4 batches. I am printing the thread no. After completing one batch ,the thread no must start with 1 for the next batch. But instead of that its creating 20 threads.please help. my code is as follows:
#!/usr/bin/perl -w
use strict;
use warnings;
use threads;
use threads::shared;
my $INPUT_DIR="/home/Documents/myscript/IMPORTLDIF/";
opendir(DIR, $INPUT_DIR) ;
my #files = grep { /^InputFile/ } readdir DIR;
my $count = #files;
#print "Total Files: $count \n";
my #threads;
my $noofthread = 5;
my $nooffiles = $count;
my $noofbatch = $nooffiles / $noofthread;
#print "No of batch: $noofbatch \n";
my $fileIndex = 0;
my $batch = 1;
while ($fileIndex < $nooffiles) {
print "Batch: $batch \n";
for (my $i=0; $i < $noofthread && $fileIndex < $nooffiles ; $i++) {
my $t = threads->new(\&doOperation, $files[$fileIndex], $i)->join;
push(#threads, $t);
$fileIndex++;
print "FileIndex: $fileIndex \n";
}
$batch++;
}
sub doOperation () {
my $ithread = threads->tid() ;
print "Thread Index : [id=$ithread]\n" ;
foreach my $item (#_){
my $filename = $item;
print "Filename name: $filename \n";
}
Edited program using thread queue:
#!/usr/bin/perl -w
# This is compiled with threading support
use strict;
use warnings;
use threads;
use Thread::Queue;
my $q = Thread::Queue->new(); # A new empty queue
# Worker thread
my $INPUT_DIR="/home/Documents/myscript/IMPORTLDIF/";
opendir(DIR, $INPUT_DIR) or die "Cannot opendir: $!";
my #thrs = threads->create(\&doOperation ) for 1..5;#for 5 threads
#my #files = `ls -1 /home/Documents/myscript/IMPORTLDIF/`;
my #files = grep { /^Input/ } readdir DIR or die "File not present present. \n";
chomp(#files);
#add files to queue
foreach my $f (#files){
# Send work to the thread
$q->enqueue($f);
print "Pending items: " + $q->pending()."\n";
}
$q->enqueue('_DONE_') for #thrs;
$_->join() for #thrs;
sub doOperation () {
my $ithread = threads->tid() ;
while (my $filename = $q->dequeue()) {
# Do work on $item
return 1 if $filename eq '_DONE_';
print "[id=$ithread]\t$filename\n";
}
return 1;
}
You are spawning a thread and then waiting for it to complete before spawning the next, each thread handling one file. That is why you see as many threads as you have files.
my $t = threads->new(\&doOperation, $files[$fileIndex], $i)->join;
^^^^--- This will block
Instead try something like this:
....
# split the workload into N batches
#
while (my #batch = splice(#files, 0, $batch_size)) {
push #threads, threads->new(\&doOperation, #batch);
}
# now wait for all workers to finish
#
for my $thr (#threads) {
$thr->join;
}
As an aside, Thread::Queue and Thread-Pool might imply better designs for the work you want to do.
You could use Paralel:Queue and create 4 thread and pass them items that they could work on.
To fork or not to fork?
use strict;
use warnings;
use threads;
use Thread::Queue;
my $q = Thread::Queue->new(); # A new empty queue
# Worker thread
my #thrs;
push #thrs, threads->create(\&doOperation ) for 1..5;#for 5 threads
my #files = `ls -1 /tmp/`;chomp(#files);
#add files to queue
foreach my $f (#files){
# Send work to the thread
$q->enqueue($f);
print "Pending items: "$q->pending()."\n";
}
$q->enqueue('_DONE_') for #thrs;
$_->join() for threads->list();
sub doOperation () {
my $ithread = threads->tid() ;
while (my $filename = $q->dequeue()) {
# Do work on $item
return 1 if $filename eq '_DONE_';
print "[id=$ithread]\t$filename\n";
}
return 1;
}

Resources