Consider that I have around 100 subroutines which I have to run using threads.How can I limit all threads so that only 10 threads will run at a time? Can u give me a sample code.
Here is the sample code where i need to implement it
use threads;
my ($thr1) = threads->create(\&sub1,$parameter);
my ($thr2) = threads->create(\&sub2,$parameter);
...
my ($thr100) = threads->create(\&sub100,$parameter);
my $result;
for my $t(#threads){
#print "$t\n";
(my #getit)= $t->join();
my $tmp = join '', #getit;
$result .= $tmp;
}
print "$result\n";
Or Do you have any other method for it. Each subroutine will do different task.
use threads;
use Thread::Queue 3.01 qw( );
my $NUM_WORKERS = 10;
sub worker {
my ($job) = #_;
my ($sub_name, #args) = #$job;
my $sub_ref = \&$sub_name;
$sub_ref->(#args);
}
{
my $q = Thread::Queue->new();
my #workers;
for (1..$NUM_WORKERS) {
push #workers, async {
while (my $job = $q->dequeue()) {
worker($job);
}
};
}
$q->enqueue($_)
for
[ sub1 => ( #args ) ],
[ sub2 => ( #args ) ];
$q->end();
$_->join() for #workers;
}
Related
I am a newbie in Perl, so in educational purposes I am developing multithreaded server using AnyEvent and Coro. The client sends to server list of directory paths and server responses with listing of these directories.
I am using tcp_server and AnyEvent::Handle for connections handling, and for each client I want server to check thread pool (which is actually pool of coros) for free coro to handle request. When handling request is finished, I want coro to wait for another client instead of finishing.
However, it seems like at the end of handle_request sub, coro is destroyed and not avaliable anymore.
#!/usr/bin/perl
use strict;
use v5.18;
use AnyEvent;
use AnyEvent::Socket qw(tcp_server);
use AnyEvent::Handle;
use Coro;
use Class::Struct;
print("Server is running...\n");
# dirs function
sub print_dirs {
my $dir_list = $_[0];
my #dirs = split(" ", $dir_list);
my $result = "";
for my $dir (#dirs) {
if (opendir my $dirent, $dir) {
my #files = readdir $dirent;
closedir $dirent;
$result = $result . "\nContents of $dir:\r\n" . join("\r\n", #files) . "\r\n";
} else {
$result = $result . "Failed to open $dir: $!\r\n";
}
}
return $result;
}
# thread struct
struct clt_thread => {
id => '$',
thread => '$',
is_busy => '$',
args => '$',
client => '$',
clt_key => '$'
};
my $threads_num = 16;
my $thread_id = 0;
my #pool = ();
# handling request
my $cv = AE::cv;
my %client = ();
sub handle_request {
my $thread_id = shift;
my $thread;
foreach my $thr (#pool) {
if ($thr->id == $thread_id) { $thread = $thr; }
}
my $self = $thread->client;
my $client_key = $thread->clt_key;
my $dir_list = $thread->args;
if ($thread->client != '') {
say "Directories read: " . $dir_list . "\n";
my #clients = keys %client;
for my $key (grep {$_ ne $client_key} #clients) {
my $response = print_dirs($dir_list);
$client{$key}->push_write("$response");
$self->push_shutdown;
delete $client{$client_key};
delete $client{$self};
}
}
$thread->is_busy(0);
Coro::cede();
}
# threads creation
for my $i (0..$threads_num) {
my $coro = new Coro(\&handle_request, $thread_id);
my $thread = clt_thread->new(id => $thread_id, thread => $coro, is_busy => 0, args => '', client => '', clt_key => '');
push #pool, $thread;
$thread_id = $thread_id+1;
}
# tcp server creation - main part
tcp_server '127.0.0.1', 8015, sub {
my ($fh, $host, $port) = #_;
my $client_key = "$host:$port";
my $hdl = AnyEvent::Handle->new(
fh => $fh,
poll => 'r',
on_read => sub {
my ($self) = #_;
foreach my $thr (#pool) {
if (!($thr->is_busy)) {
$thr->client($self);
$thr->args($self->rbuf);
$thr->clt_key($client_key);
$thr->is_busy(1);
$thr->thread->ready();
return;
}
}
},
on_error => sub {
say "Something went wrong: $!\n";
},
);
$client{$client_key} = $hdl;
$client{$hdl} = $hdl;
};
$cv->recv;
I have already tried using infinite loop inside handle_request, but this way everything stops working at all. Do you have any ideas how to fix that? I suppose using Coro::AnyEvent to integrate coroutines into event loop might be solution. Can it be helpful in my case?
Thans for your help.
The thread exits when handle_request exits, so you want to wrap the body of handle_request in an infinite loop.
You also want to use Coro::schedule; instead of Coro::cede; to wait for ->ready to be called again before continuing.
That first loop in handle_request can be reduced to my $thread = $pool[$thread_id];.
Untested fix:
sub handle_request {
my ($thread_id) = #_;
my $thread = $pool[$thread_id];
while (1) {
my $self = $thread->client;
my $client_key = $thread->clt_key;
my $dir_list = $thread->args;
...
$thread->is_busy(0);
Coro::schedule();
}
}
That said, the following is the approach I'd use:
use Coro;
use Coro::Channel;
use constant NUM_WORKERS => 16;
sub worker {
my ($job) = #_;
my $self = $job->client;
my $client_key = $job->clt_key;
my $dir_list = $job->args;
...
}
{
my $q = Coro::Channel->new();
my #threads =
map {
async {
while ( my $job = $q->get() ) {
eval { worker($job); 1 }
or warn $#;
}
}
}
1..NUM_WORKERS;
...
on_read => sub {
my ($self) = #_;
$q->put({
client => $self,
clt_key => $client_key,
args => $self->rbuf,
});
}
...
$cv->recv;
$q->shutdown;
$_->join for #threads;
}
This is the same approach I'd use with real threads (using Thread::Queue instead of Coro::Channel).
What is the way to say
wait for the 10 sec for the input
if no input recognized
print something
in Perl?
IO::Select and can_read with a timeout.
#!/usr/bin/env perl
use strict;
use warnings;
use IO::Select;
my $select = IO::Select->new();
$select->add( \*STDIN );
my $input = "NONE";
if ( $select->can_read(10) ) {
$input = <STDIN>;
}
print "Got input of $input\n";
You can use alarm(),
use strict;
use warnings;
sub timeout {
my ($f, $sec) = #_;
return eval {
local $SIG{ALRM} = sub { die };
alarm($sec);
$f->();
alarm(0);
1;
};
}
my $text;
my $ok = timeout(sub{ $text = <STDIN>; }, 10);
if ($ok) {
print "input: $text";
}
else {
print "timeout occurred\n";
}
I working on below script in which I split file content in #file_splitted and trying to apply Thread::Queue to speed up the process. But the $result returns nothing at the end. Can you please check what happening?
my $NUM_WORKERS = 5;
my $q = Thread::Queue->new();
sub worker {
my ($job) = #_;
print "#_ \n################\n";
my ($sub_name, #args) = #$job;
my $sub_ref = \&Subroutine;
$sub_ref->(#args);
}
{
my $q = Thread::Queue->new();
my #workers;
for (1..$NUM_WORKERS) {
push #workers, async {
while (my $job = $q->dequeue()) {
worker($job);
# print "$job \n";
}
};
}
$q->enqueue($_) for #file_splitted;
$q->end();
for my $t(#workers){
(my #getit)= $t->join();
my $tmp = join '', #getit;
$result .= $tmp;
print "$result\n";
}
}
This here is your current code, but tidied up a bit, and commented:
my $NUM_WORKERS = 5;
my $q = Thread::Queue->new();
{
# ok, so here we create a new queue for some reason that shadows the outer $q
my $q = Thread::Queue->new();
my #workers;
for (1 .. $NUM_WORKERS) {
push #workers, async {
while (my $job = $q->dequeue()) {
my ($sub_name, #args) = #$job; # so the $job is an arrayref
Subroutine(#args); # what is "Subroutine"?
}
# this worker does not explicitly return any values
};
}
# what is #file_splitted? Does it contain arrayrefs?
$q->enqueue($_) for #file_splitted;
$q->end();
for my $t (#workers){
# where is $result declared? And why are you using a return value when you
# don't explicitly return anything from your threads?
$result .= join '', $t->join;
print "$result\n";
}
}
The problem is that you aren't actually returning anything useful from your threads – note that I removed the worker subroutine above because it doesn't add anything to this discussion, and probably confused you.
Quite likely, you will want to create another queue from which the threads can return results:
my $job_queue = Thread::Queue->new;
my $result_queue = Thread::Queue->new;
my #workers;
for (1 .. $NUM_WORKERS) {
push #workers, async {
while(defined(my $job = $job_queue->dequeue)) {
my $result = Subroutine($job); # or something like this
$result_queue->enqueue($result);
}
$result_queue->enqueue(undef);
};
}
$job_queue->enqueue(#jobs);
$job_queue->end;
my $waiting = $NUM_WORKERS;
my #results;
while ($waiting) {
if (defined(my $result = $result_queue->dequeue)) {
# do something with the results
push #results, $result;
}
else {
$waiting--;
}
}
$_->join for #workers;
If you only want to collect all results at the end, you could do something like this instead:
my $job_queue = Thread::Queue->new;
my $result_queue = Thread::Queue->new;
my #workers;
for (1 .. $NUM_WORKERS) {
push #workers, async {
while(defined(my $job = $job_queue->dequeue)) {
my $result = Subroutine($job); # or something like this
$result_queue->enqueue($result);
}
};
}
$job_queue->enqueue(#jobs);
$job_queue->end;
$_->join for #workers;
my #results = $result_queue->dequeue($result_queue->pending);
I had some working code that I've tried to multithread using the tutorial on dreamincode: http://www.dreamincode.net/forums/topic/255487-multithreading-in-perl/
The example code there seems to work fine, but I can't for the life of me work out why mine isn't. From putting in debug messages it seems to get all the way to the end of the subroutine with all of the threads, and then sit there for a while before hitting a segmentation fault and dumping the core. That being said I've also not managed to find the core dump files anywhere (Ubuntu 13.10).
If anyone has any suggested reading, or can see the error in the rather messy code below I'd be eternally grateful.
#!/usr/bin/env perl
use Email::Valid;
use LWP::Simple;
use XML::LibXML;
use Text::Trim;
use threads;
use DB_File;
use Getopt::Long;
my $sourcefile = "thislevel.csv";
my $startOffset = 0;
my $chunk = 10000;
my $num_threads = 8;
$result = GetOptions ("start=i" => \$startOffset, # numeric
"chunk=i" => \$chunk, # numeric
"file=s" => \$sourcefile, # string
"threads=i" => \$num_threads, #numeric
"verbose" => \$verbose); # flag
$tie = tie(#filedata, "DB_File", $sourcefile, O_RDWR, 0666, $DB_RECNO)
or die "Cannot open file $sourcefile: $!\n";
my $filenumlines = $tie->length;
if ($filenumlines>$startOffset + $chunk){
$numlines = $startOffset + $chunk;
} else {
$numlines = $filenumlines;
}
open (emails, '>>emails.csv');
open (errorfile, '>>errors.csv');
open (nxtlvl, '>>nextlevel.csv');
open (donefile, '>>donelines.csv');
my $line = '';
my $found = false;
my $linenum=0;
my #threads = initThreads();
foreach(#threads){
$_ = threads->create(\&do_search);
}
foreach(#threads){
$_->join();
}
close nxtlvl;
close emails;
close errorfile;
close donefile;
sub initThreads{
# An array to place our threads in
my #initThreads;
for(my $i = 1;$i<=$num_threads;$i++){
push(#initThreads,$i);
}
return #initThreads;
}
sub do_search{
my $id = threads->tid();
my $linenum=$startOffset-1+$id;
my $parser = XML::LibXML->new();
$parser->set_options({ recover => 2,
validation => 0,
suppress_errors => 1,
suppress_warnings => 1,
pedantic_parser => 0,
load_ext_dtd => 0, });
while ($linenum < $numlines) {
$found = false;
#full_line = split ',', $filedata[$linenum-1];
$line = trim(#full_line[1]);
$this_url = trim(#full_line[2]);
print "Thread $id Scanning $linenum of $filenumlines\: ";
printf "%.3f\%\n", 100 * $linenum / $filenumlines;
my $content = get trim($this_url);
if (!defined($content)) {
print errorfile "$this_url, no content\n";
}elsif (length($content)<100) {
print errorfile "$this_url, short\n";
}else {
my $doc = $parser->load_html(string => $content);
if(defined($doc)){
for my $anchor ( $doc->findnodes("//a[\#href]") )
{
$is_email = substr $anchor->getAttribute("href") ,7;
if(Email::Valid->address($is_email)) {
printf emails "%s, %s\n", $line, $is_email;
$found = true;
} else{
$link = $anchor->getAttribute("href");
if (substr lc(trim($link)),0,4 eq "http"){
printf nxtlvl "%s, %s\n", $line, $link;
} else {
printf nxtlvl "%s, %s/%s\n", $line, $line, $link;
}
}
}
}
if ($found=false){
my #lines = split '\n',$content;
foreach my $cline (#lines){
my #words = split ' ',$cline;
foreach my $word (#words) {
my #subwords = split '"',$word ;
foreach my $subword (#subwords) {
if(Email::Valid->address($subword)) {
printf emails "%s, %s\n", $line, $subword;
}
}
}
}
}
}
printf donefile "%s\n",$linenum;
$linenum = $linenum + $num_threads;
}
threads->exit();
}
In addition to sundry coding errors that mean my code should never ever be used as an example for other visitors, DB_File is not a thread-safe module.
Annoyingly, and perhaps misleadingly, it works absolutely as it should do right up until you close the threads that have been successfully accessing the file throughout your code.
There is forks::shared that should be drop in replacement for threads::shared, but how should I replace Thread::Queue?
I would like to convert following code to forks, if it is possible?
sub Qfac {
use threads;
use threads::shared;
use Thread::Queue;
my ($worker, $arrid, $arg) = #_;
$arrid ||= [];
$arg ||= {};
my %tr;
my %h :shared;
my %qe = map { $_ => Thread::Queue->new() } #$arrid;
for my $id (#$arrid) {
my $q = $qe{$id};
$tr{$id} = threads->create($arg, sub{
my $me = { id => $id };
while (my $item = $q->dequeue()) {
$me->{item} = $item;
eval { $worker->($me, \%h) };
$me->{err} = $# if $#;
my $temp = threads::shared::shared_clone($me);
{
lock (%h);
$h{$id} = $temp;
}
}
});
$tr{$id}->detach();
}
##
return sub {
my $act = shift;
if ($act eq "getshared") { return #_ ? #h{#_} : \%h }
elsif ($act eq "enqueue") { $_->enqueue(#_) for values %qe }
elsif ($act eq "getqe") { return \%qe }
elsif ($act eq "gettr") { return \%tr }
elsif ($act eq "getssize") { return threads->get_stack_size() }
elsif ($act eq "setssize") { return threads->set_stack_size(#_) }
else { die "unknown method" }
};
}
my $worker = sub {
my ($me) = #_;
my $id = $me->{id};
# $me->{foo} = "bar";
};
my $qf = Qfac($worker, [ 0 .. 10 ]);
# Send work to the thread
$qf->("enqueue", "do_something");
# ...
my $shared = $qf->("getshared");
use forks; (and its ::shared) is a drop-in replacement for use threads; (and its ::shared), and Thread::Queue uses thread::shared, so Thread::Queue will continue working just fine.
use if $ARGV[0], "forks";
use threads; # No effect under "use forks;"
use Thread::Queue;
my $q = Thread::Queue->new();
print "$$\n";
async {
print "$$\n";
print "$_\n" while $_ = $q->dequeue();
};
$q->enqueue($_) for 1..4;
$q->end();
$_->join() for threads->list();
$ perl x.pl 0
6047
6047
1
2
3
4
$ perl x.pl 1
6054
6056
1
2
3
4
(You'll need to add use forks::shared; with older versions for forks.)