Thread::queue return nothing - multithreading

I working on below script in which I split file content in #file_splitted and trying to apply Thread::Queue to speed up the process. But the $result returns nothing at the end. Can you please check what happening?
my $NUM_WORKERS = 5;
my $q = Thread::Queue->new();
sub worker {
my ($job) = #_;
print "#_ \n################\n";
my ($sub_name, #args) = #$job;
my $sub_ref = \&Subroutine;
$sub_ref->(#args);
}
{
my $q = Thread::Queue->new();
my #workers;
for (1..$NUM_WORKERS) {
push #workers, async {
while (my $job = $q->dequeue()) {
worker($job);
# print "$job \n";
}
};
}
$q->enqueue($_) for #file_splitted;
$q->end();
for my $t(#workers){
(my #getit)= $t->join();
my $tmp = join '', #getit;
$result .= $tmp;
print "$result\n";
}
}

This here is your current code, but tidied up a bit, and commented:
my $NUM_WORKERS = 5;
my $q = Thread::Queue->new();
{
# ok, so here we create a new queue for some reason that shadows the outer $q
my $q = Thread::Queue->new();
my #workers;
for (1 .. $NUM_WORKERS) {
push #workers, async {
while (my $job = $q->dequeue()) {
my ($sub_name, #args) = #$job; # so the $job is an arrayref
Subroutine(#args); # what is "Subroutine"?
}
# this worker does not explicitly return any values
};
}
# what is #file_splitted? Does it contain arrayrefs?
$q->enqueue($_) for #file_splitted;
$q->end();
for my $t (#workers){
# where is $result declared? And why are you using a return value when you
# don't explicitly return anything from your threads?
$result .= join '', $t->join;
print "$result\n";
}
}
The problem is that you aren't actually returning anything useful from your threads – note that I removed the worker subroutine above because it doesn't add anything to this discussion, and probably confused you.
Quite likely, you will want to create another queue from which the threads can return results:
my $job_queue = Thread::Queue->new;
my $result_queue = Thread::Queue->new;
my #workers;
for (1 .. $NUM_WORKERS) {
push #workers, async {
while(defined(my $job = $job_queue->dequeue)) {
my $result = Subroutine($job); # or something like this
$result_queue->enqueue($result);
}
$result_queue->enqueue(undef);
};
}
$job_queue->enqueue(#jobs);
$job_queue->end;
my $waiting = $NUM_WORKERS;
my #results;
while ($waiting) {
if (defined(my $result = $result_queue->dequeue)) {
# do something with the results
push #results, $result;
}
else {
$waiting--;
}
}
$_->join for #workers;
If you only want to collect all results at the end, you could do something like this instead:
my $job_queue = Thread::Queue->new;
my $result_queue = Thread::Queue->new;
my #workers;
for (1 .. $NUM_WORKERS) {
push #workers, async {
while(defined(my $job = $job_queue->dequeue)) {
my $result = Subroutine($job); # or something like this
$result_queue->enqueue($result);
}
};
}
$job_queue->enqueue(#jobs);
$job_queue->end;
$_->join for #workers;
my #results = $result_queue->dequeue($result_queue->pending);

Related

Perl: multithreaded server with Coro and AnyEvent

I am a newbie in Perl, so in educational purposes I am developing multithreaded server using AnyEvent and Coro. The client sends to server list of directory paths and server responses with listing of these directories.
I am using tcp_server and AnyEvent::Handle for connections handling, and for each client I want server to check thread pool (which is actually pool of coros) for free coro to handle request. When handling request is finished, I want coro to wait for another client instead of finishing.
However, it seems like at the end of handle_request sub, coro is destroyed and not avaliable anymore.
#!/usr/bin/perl
use strict;
use v5.18;
use AnyEvent;
use AnyEvent::Socket qw(tcp_server);
use AnyEvent::Handle;
use Coro;
use Class::Struct;
print("Server is running...\n");
# dirs function
sub print_dirs {
my $dir_list = $_[0];
my #dirs = split(" ", $dir_list);
my $result = "";
for my $dir (#dirs) {
if (opendir my $dirent, $dir) {
my #files = readdir $dirent;
closedir $dirent;
$result = $result . "\nContents of $dir:\r\n" . join("\r\n", #files) . "\r\n";
} else {
$result = $result . "Failed to open $dir: $!\r\n";
}
}
return $result;
}
# thread struct
struct clt_thread => {
id => '$',
thread => '$',
is_busy => '$',
args => '$',
client => '$',
clt_key => '$'
};
my $threads_num = 16;
my $thread_id = 0;
my #pool = ();
# handling request
my $cv = AE::cv;
my %client = ();
sub handle_request {
my $thread_id = shift;
my $thread;
foreach my $thr (#pool) {
if ($thr->id == $thread_id) { $thread = $thr; }
}
my $self = $thread->client;
my $client_key = $thread->clt_key;
my $dir_list = $thread->args;
if ($thread->client != '') {
say "Directories read: " . $dir_list . "\n";
my #clients = keys %client;
for my $key (grep {$_ ne $client_key} #clients) {
my $response = print_dirs($dir_list);
$client{$key}->push_write("$response");
$self->push_shutdown;
delete $client{$client_key};
delete $client{$self};
}
}
$thread->is_busy(0);
Coro::cede();
}
# threads creation
for my $i (0..$threads_num) {
my $coro = new Coro(\&handle_request, $thread_id);
my $thread = clt_thread->new(id => $thread_id, thread => $coro, is_busy => 0, args => '', client => '', clt_key => '');
push #pool, $thread;
$thread_id = $thread_id+1;
}
# tcp server creation - main part
tcp_server '127.0.0.1', 8015, sub {
my ($fh, $host, $port) = #_;
my $client_key = "$host:$port";
my $hdl = AnyEvent::Handle->new(
fh => $fh,
poll => 'r',
on_read => sub {
my ($self) = #_;
foreach my $thr (#pool) {
if (!($thr->is_busy)) {
$thr->client($self);
$thr->args($self->rbuf);
$thr->clt_key($client_key);
$thr->is_busy(1);
$thr->thread->ready();
return;
}
}
},
on_error => sub {
say "Something went wrong: $!\n";
},
);
$client{$client_key} = $hdl;
$client{$hdl} = $hdl;
};
$cv->recv;
I have already tried using infinite loop inside handle_request, but this way everything stops working at all. Do you have any ideas how to fix that? I suppose using Coro::AnyEvent to integrate coroutines into event loop might be solution. Can it be helpful in my case?
Thans for your help.
The thread exits when handle_request exits, so you want to wrap the body of handle_request in an infinite loop.
You also want to use Coro::schedule; instead of Coro::cede; to wait for ->ready to be called again before continuing.
That first loop in handle_request can be reduced to my $thread = $pool[$thread_id];.
Untested fix:
sub handle_request {
my ($thread_id) = #_;
my $thread = $pool[$thread_id];
while (1) {
my $self = $thread->client;
my $client_key = $thread->clt_key;
my $dir_list = $thread->args;
...
$thread->is_busy(0);
Coro::schedule();
}
}
That said, the following is the approach I'd use:
use Coro;
use Coro::Channel;
use constant NUM_WORKERS => 16;
sub worker {
my ($job) = #_;
my $self = $job->client;
my $client_key = $job->clt_key;
my $dir_list = $job->args;
...
}
{
my $q = Coro::Channel->new();
my #threads =
map {
async {
while ( my $job = $q->get() ) {
eval { worker($job); 1 }
or warn $#;
}
}
}
1..NUM_WORKERS;
...
on_read => sub {
my ($self) = #_;
$q->put({
client => $self,
clt_key => $client_key,
args => $self->rbuf,
});
}
...
$cv->recv;
$q->shutdown;
$_->join for #threads;
}
This is the same approach I'd use with real threads (using Thread::Queue instead of Coro::Channel).

Limiting thread numbers to run at a time

Consider that I have around 100 subroutines which I have to run using threads.How can I limit all threads so that only 10 threads will run at a time? Can u give me a sample code.
Here is the sample code where i need to implement it
use threads;
my ($thr1) = threads->create(\&sub1,$parameter);
my ($thr2) = threads->create(\&sub2,$parameter);
...
my ($thr100) = threads->create(\&sub100,$parameter);
my $result;
for my $t(#threads){
#print "$t\n";
(my #getit)= $t->join();
my $tmp = join '', #getit;
$result .= $tmp;
}
print "$result\n";
Or Do you have any other method for it. Each subroutine will do different task.
use threads;
use Thread::Queue 3.01 qw( );
my $NUM_WORKERS = 10;
sub worker {
my ($job) = #_;
my ($sub_name, #args) = #$job;
my $sub_ref = \&$sub_name;
$sub_ref->(#args);
}
{
my $q = Thread::Queue->new();
my #workers;
for (1..$NUM_WORKERS) {
push #workers, async {
while (my $job = $q->dequeue()) {
worker($job);
}
};
}
$q->enqueue($_)
for
[ sub1 => ( #args ) ],
[ sub2 => ( #args ) ];
$q->end();
$_->join() for #workers;
}

Segmentation fault on merging threads (Perl)

I had some working code that I've tried to multithread using the tutorial on dreamincode: http://www.dreamincode.net/forums/topic/255487-multithreading-in-perl/
The example code there seems to work fine, but I can't for the life of me work out why mine isn't. From putting in debug messages it seems to get all the way to the end of the subroutine with all of the threads, and then sit there for a while before hitting a segmentation fault and dumping the core. That being said I've also not managed to find the core dump files anywhere (Ubuntu 13.10).
If anyone has any suggested reading, or can see the error in the rather messy code below I'd be eternally grateful.
#!/usr/bin/env perl
use Email::Valid;
use LWP::Simple;
use XML::LibXML;
use Text::Trim;
use threads;
use DB_File;
use Getopt::Long;
my $sourcefile = "thislevel.csv";
my $startOffset = 0;
my $chunk = 10000;
my $num_threads = 8;
$result = GetOptions ("start=i" => \$startOffset, # numeric
"chunk=i" => \$chunk, # numeric
"file=s" => \$sourcefile, # string
"threads=i" => \$num_threads, #numeric
"verbose" => \$verbose); # flag
$tie = tie(#filedata, "DB_File", $sourcefile, O_RDWR, 0666, $DB_RECNO)
or die "Cannot open file $sourcefile: $!\n";
my $filenumlines = $tie->length;
if ($filenumlines>$startOffset + $chunk){
$numlines = $startOffset + $chunk;
} else {
$numlines = $filenumlines;
}
open (emails, '>>emails.csv');
open (errorfile, '>>errors.csv');
open (nxtlvl, '>>nextlevel.csv');
open (donefile, '>>donelines.csv');
my $line = '';
my $found = false;
my $linenum=0;
my #threads = initThreads();
foreach(#threads){
$_ = threads->create(\&do_search);
}
foreach(#threads){
$_->join();
}
close nxtlvl;
close emails;
close errorfile;
close donefile;
sub initThreads{
# An array to place our threads in
my #initThreads;
for(my $i = 1;$i<=$num_threads;$i++){
push(#initThreads,$i);
}
return #initThreads;
}
sub do_search{
my $id = threads->tid();
my $linenum=$startOffset-1+$id;
my $parser = XML::LibXML->new();
$parser->set_options({ recover => 2,
validation => 0,
suppress_errors => 1,
suppress_warnings => 1,
pedantic_parser => 0,
load_ext_dtd => 0, });
while ($linenum < $numlines) {
$found = false;
#full_line = split ',', $filedata[$linenum-1];
$line = trim(#full_line[1]);
$this_url = trim(#full_line[2]);
print "Thread $id Scanning $linenum of $filenumlines\: ";
printf "%.3f\%\n", 100 * $linenum / $filenumlines;
my $content = get trim($this_url);
if (!defined($content)) {
print errorfile "$this_url, no content\n";
}elsif (length($content)<100) {
print errorfile "$this_url, short\n";
}else {
my $doc = $parser->load_html(string => $content);
if(defined($doc)){
for my $anchor ( $doc->findnodes("//a[\#href]") )
{
$is_email = substr $anchor->getAttribute("href") ,7;
if(Email::Valid->address($is_email)) {
printf emails "%s, %s\n", $line, $is_email;
$found = true;
} else{
$link = $anchor->getAttribute("href");
if (substr lc(trim($link)),0,4 eq "http"){
printf nxtlvl "%s, %s\n", $line, $link;
} else {
printf nxtlvl "%s, %s/%s\n", $line, $line, $link;
}
}
}
}
if ($found=false){
my #lines = split '\n',$content;
foreach my $cline (#lines){
my #words = split ' ',$cline;
foreach my $word (#words) {
my #subwords = split '"',$word ;
foreach my $subword (#subwords) {
if(Email::Valid->address($subword)) {
printf emails "%s, %s\n", $line, $subword;
}
}
}
}
}
}
printf donefile "%s\n",$linenum;
$linenum = $linenum + $num_threads;
}
threads->exit();
}
In addition to sundry coding errors that mean my code should never ever be used as an example for other visitors, DB_File is not a thread-safe module.
Annoyingly, and perhaps misleadingly, it works absolutely as it should do right up until you close the threads that have been successfully accessing the file throughout your code.

Migrating threads to forks

There is forks::shared that should be drop in replacement for threads::shared, but how should I replace Thread::Queue?
I would like to convert following code to forks, if it is possible?
sub Qfac {
use threads;
use threads::shared;
use Thread::Queue;
my ($worker, $arrid, $arg) = #_;
$arrid ||= [];
$arg ||= {};
my %tr;
my %h :shared;
my %qe = map { $_ => Thread::Queue->new() } #$arrid;
for my $id (#$arrid) {
my $q = $qe{$id};
$tr{$id} = threads->create($arg, sub{
my $me = { id => $id };
while (my $item = $q->dequeue()) {
$me->{item} = $item;
eval { $worker->($me, \%h) };
$me->{err} = $# if $#;
my $temp = threads::shared::shared_clone($me);
{
lock (%h);
$h{$id} = $temp;
}
}
});
$tr{$id}->detach();
}
##
return sub {
my $act = shift;
if ($act eq "getshared") { return #_ ? #h{#_} : \%h }
elsif ($act eq "enqueue") { $_->enqueue(#_) for values %qe }
elsif ($act eq "getqe") { return \%qe }
elsif ($act eq "gettr") { return \%tr }
elsif ($act eq "getssize") { return threads->get_stack_size() }
elsif ($act eq "setssize") { return threads->set_stack_size(#_) }
else { die "unknown method" }
};
}
my $worker = sub {
my ($me) = #_;
my $id = $me->{id};
# $me->{foo} = "bar";
};
my $qf = Qfac($worker, [ 0 .. 10 ]);
# Send work to the thread
$qf->("enqueue", "do_something");
# ...
my $shared = $qf->("getshared");
use forks; (and its ::shared) is a drop-in replacement for use threads; (and its ::shared), and Thread::Queue uses thread::shared, so Thread::Queue will continue working just fine.
use if $ARGV[0], "forks";
use threads; # No effect under "use forks;"
use Thread::Queue;
my $q = Thread::Queue->new();
print "$$\n";
async {
print "$$\n";
print "$_\n" while $_ = $q->dequeue();
};
$q->enqueue($_) for 1..4;
$q->end();
$_->join() for threads->list();
$ perl x.pl 0
6047
6047
1
2
3
4
$ perl x.pl 1
6054
6056
1
2
3
4
(You'll need to add use forks::shared; with older versions for forks.)

How can I rewrite this multiple OR condition so that it uses a Perl "for" loop?

I have a hard-coded if loops that looks like this:
my $tp = 0;
my $fp = 0;
if ( $myhash{$pred}
|| $myhash{$pred1}
|| $myhash{$pred2}
|| $myhash{$pred3}
|| $myhash{$pred4}
|| $myhash{$pred5}
|| $myhash{$pred6} )
{
$tp++;
}
else {
$fp++;
}
How can I do that in for loops?
I tried the following but it give different result in total of fp and tp:
my $tp=0;
my $fp=0;
my #allpreds = ($pred,$pred1,$pred2,$pred3,$pred4,$pred5);
foreach my $allpred ( #allpreds ) {
if ( $myhash{$allpred} ) {
$tp++;
}
}
if ( !myhash{$pred} ) {
$fp++;
}
my $tp=0;
my $fp=0;
my $count=0;
my #allpreds = ($pred,$pred1,$pred2,$pred3,$pred4,$pred5);
foreach my $allpred ( #allpreds ) {
if ( $myhash{$allpred} ) {
$count++;
}
}
if ($count>0) { $tp=1; } else { $fp=1; }
This is somewhat more compact than some answers, but fully equivalent.
my $tp = 0;
my $fp = 0;
foreach my $allpred ($pred, $pred1, $pred2, $pred3, $pred4, $pred5)
{
$tp++, last if ($myhash{$allpred});
}
$fp = !$tp;
It is not clear that the variable $fp earns its keep when !$tp is almost as simple to write.
Test code - change the settings on the RHS of the fat commas to change the behaviour of the test.
use strict;
use warnings;
my $pred = "";
my $pred1 = "1";
my $pred2 = "2 2";
my $pred3 = "3 3 3";
my $pred4 = "4 4 4 4";
my $pred5 = "5 5 5 5 5";
my %myhash = ( $pred1 => 0, $pred2 => 0, $pred3 => 0, $pred4 => 0, $pred5 => 1 );
my $tp = 0;
my $fp = 0;
foreach my $allpred ($pred, $pred1, $pred2, $pred3, $pred4, $pred5)
{
$tp++, last if ($myhash{$allpred});
}
$fp = !$tp;
printf "%d:%d\n", $tp, $fp;
grep will return a true value when any one or more of its inputs returns true for it's statement, basically allowing it to act like a chain of ||'ed statements, except that it won't short circuit.
my #allpreds = ($pred, $pred1, $pred2, $pred3, $pred4, $pred5, $pred6);
if (grep { $myhash{$_} } #allpreds) {
$tp++;
}
else {
$fp++;
}
Using grep this way can be a bit confusing so a more explicit way is to use List::MoreUtils any
use List::MoreUtils qw(any);
my #allpreds = ($pred, $pred1, $pred2, $pred3, $pred4, $pred5, $pred6);
if (any { $myhash{$_} } #allpreds) {
$tp++;
}
else {
$fp++;
}

Resources