I'm trying to implement a syscall which allows me to get the number of threads for the current process. I am new to the Linux kernel, and so my understanding of it is limited.
Currently, I am trying to iterate through all the task_structs, and compare their thread group leader's PID with the current thread group leader's PID:
// ...
int nthreads = 0;
struct task_struct *task_it;
for_each_process(task_it) {
if (task_it->group_leader->pid == current->group_leader->pid) {
nthreads++;
}
}
// ...
However, this doesn't seem to be working (a quick test spawning some pthreads is still giving 1. What about the group_leader is common to all threads in the same process?
The problem with your code is that what the kernel calls a PID (the pid field of task_struct) is what userspace calls a TID (ie. it's what's returned by sys_gettid() and is unique per thread). What userspace calls a PID is called a TGID in the kernel (for "task group ID") - that's what the sys_getpid() syscall returns.
You don't need to actually check the TGID, though - just comparing the struct task_struct * pointers is enough:
if (task_it->group_leader == current->group_leader) {
By the way, you could just iterate over the thread_group list that current is a member of (with while_each_thread()), then you wouldn't need any test at all. Or even better, just use get_nr_threads(current).
Note that all methods that loop over the task lists need to be wrapped in rcu_read_lock(); / rcu_read_unlock(); to be correct.
This chunk of code is a good demonstration.
The following C program creates a list of all processes in the process
table of a node and shows in one column the number of threads for any
single process. Using this tool, it was possible to identify that the
network daemon created a new thread anytime a network problem
occurred. A severe network problem was responsible for the logon
problems.
#include "sys/param.h"
#include "sys/pstat.h"
int main ( void )
{
struct pst_status * psa = NULL;
struct pst_status * prc = NULL;
struct pst_dynamic psd;
long nproc = 0;
long thsum = 0;
long i;
if ( pstat_getdynamic(&psd, sizeof(psd), 1, 0) == -1 )
(void)perror("pstat_getdynamic failed");
// Get the number of active processes from pst_dynamic
nproc = psd.psd_activeprocs;
psa = (struct pst_status *)malloc(nproc * sizeof(struct pst_status));
// Read the info about the active processes into the array 'psa'
if ( pstat_getproc(psa, sizeof(struct pst_status), nproc, 0) == -1 )
(void)perror("pstat_getproc failed");
(void)printf("\n\n------------------------------------------------------------------------------");
(void)printf("\n %5s | %5s |%7s| %5s | %s", "PID", "UID", "Threads", "RSS", "Command");
(void)printf("\n------------------------------------------------------------------------------");
// Report the process info as required
prc = (struct pst_status *)psa;
for (i=0; i < nproc; i++)
{
(void)printf("\n %5ld | ", prc->pst_pid);
(void)printf("%5ld | ", prc->pst_uid);
(void)printf("%5ld | ", prc->pst_nlwps);
(void)printf("%5ld | ", prc->pst_rssize);
(void)printf("%s ", prc->pst_cmd);
thsum += prc->pst_nlwps;
++prc;
}
(void)printf("\n\n*** %ld processes, %ld threads running\n\n", nproc, thsum);
(void)free(psa);
(void)exit(0);
}
Found here:
http://h21007.www2.hp.com/portal/site/dspp/menuitem.863c3e4cbcdc3f3515b49c108973a801?ciid=060818f70fe0211018f70fe02110275d6e10RCRD
Here's another link using task_struct:
http://tuxthink.blogspot.com/2011/03/using-foreachprocess-in-proc-entry.html
Related
I want to know the CPU utilization of a process and all the child processes, for a fixed period of time, in Linux.
To be more specific, here is my use-case:
There is a process which waits for a request from the user to execute the programs. To execute the programs, this process invokes child processes (maximum limit of 5 at a time) & each of this child process executes 1 of these submitted programs (let's say user submitted 15 programs at once). So, if user submits 15 programs, then 3 batches of 5 child processes each will run. Child processes are killed as soon as they finish their execution of the program.
I want to know about % CPU Utilization for the parent process and all its child process during the execution of those 15 programs.
Is there any simple way to do this using top or another command? (Or any tool i should attach to the parent process.)
You can find this information in /proc/PID/stat where PID is your parent process's process ID. Assuming that the parent process waits for its children then the total CPU usage can be calculated from utime, stime, cutime and cstime:
utime %lu
Amount of time that this process has been scheduled in user mode,
measured in clock ticks (divide by sysconf(_SC_CLK_TCK). This includes
guest time, guest_time (time spent running a virtual CPU, see below),
so that applications that are not aware of the guest time field do not
lose that time from their calculations.
stime %lu
Amount of time that this process has been scheduled in kernel mode,
measured in clock ticks (divide by sysconf(_SC_CLK_TCK).
cutime %ld
Amount of time that this process's waited-for children have been
scheduled in user mode, measured in clock ticks (divide by
sysconf(_SC_CLK_TCK). (See also times(2).) This includes guest time,
cguest_time (time spent running a virtual CPU, see below).
cstime %ld
Amount of time that this process's waited-for children have been
scheduled in kernel mode, measured in clock ticks (divide by
sysconf(_SC_CLK_TCK).
See proc(5) manpage for details.
And of course you can do it in hardcore-way using good old C
find_cpu.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#define MAX_CHILDREN 100
/**
* System command execution output
* #param <char> command - system command to execute
* #returb <char> execution output
*/
char *system_output (const char *command)
{
FILE *pipe;
static char out[1000];
pipe = popen (command, "r");
fgets (out, sizeof(out), pipe);
pclose (pipe);
return out;
}
/**
* Finding all process's children
* #param <Int> - process ID
* #param <Int> - array of childs
*/
void find_children (int pid, int children[])
{
char empty_command[] = "/bin/ps h -o pid --ppid ";
char pid_string[5];
snprintf(pid_string, 5, "%d", pid);
char *command = (char*) malloc(strlen(empty_command) + strlen(pid_string) + 1);
sprintf(command, "%s%s", empty_command, pid_string);
FILE *fp = popen(command, "r");
int child_pid, i = 1;
while (fscanf(fp, "%i", &child_pid) != EOF)
{
children[i] = child_pid;
i++;
}
}
/**
* Parsign `ps` command output
* #param <char> out - ps command output
* #return <int> cpu utilization
*/
float parse_cpu_utilization (const char *out)
{
float cpu;
sscanf (out, "%f", &cpu);
return cpu;
}
int main(void)
{
unsigned pid = 1;
// getting array with process children
int process_children[MAX_CHILDREN] = { 0 };
process_children[0] = pid; // parent PID as first element
find_children(pid, process_children);
// calculating summary processor utilization
unsigned i;
float common_cpu_usage = 0.0;
for (i = 0; i < sizeof(process_children)/sizeof(int); ++i)
{
if (process_children[i] > 0)
{
char *command = (char*)malloc(1000);
sprintf (command, "/bin/ps -p %i -o 'pcpu' --no-headers", process_children[i]);
common_cpu_usage += parse_cpu_utilization(system_output(command));
}
}
printf("%f\n", common_cpu_usage);
return 0;
}
Compile:
gcc -Wall -pedantic --std=gnu99 find_cpu.c
Enjoy!
Might not be the exact command. But you can do something like below to get cpu usage of various process and add it.
#ps -C sendmail,firefox -o pcpu= | awk '{s+=$1} END {print s}'
/proc/[pid]/stat Status information about the process. This is used by ps and made into human readable form.
Another way is to use cgroups and use cpuacct.
http://www.kernel.org/doc/Documentation/cgroups/cpuacct.txt
https://access.redhat.com/knowledge/docs/en-US/Red_Hat_Enterprise_Linux/6/html/Resource_Management_Guide/sec-cpuacct.html
Here's one-liner to compute total CPU for all processes. You can adjust it by passing column filter into top output:
top -b -d 5 -n 2 | awk '$1 == "PID" {block_num++; next} block_num == 2 {sum += $9;} END {print sum}'
Look at this sample code:
void OutputElement(int e, int delay)
{
this_thread::sleep_for(chrono::milliseconds(100 * delay));
cout << e << '\n';
}
void SleepSort(int v[], uint n)
{
for (uint i = 0 ; i < n ; ++i)
{
thread t(OutputElement, v[i], v[i]);
t.detach();
}
}
It starts n new threads and each one sleeps for some time before outputting a value and finishing. What's the correct/best/recommended way of waiting for all threads to finish in this case? I know how to work around this but I want to know what's the recommended multithreading tool/design that I should use in this situation (e.g. condition_variable, mutex etc...)?
And now for the slightly dissenting answer. And I do mean slightly because I mostly agree with the other answer and the comments that say "don't detach, instead join."
First imagine that there is no join(). And that you have to communicate among your threads with a mutex and condition_variable. This really isn't that hard nor complicated. And it allows an arbitrarily rich communication, which can be anything you want, as long as it is only communicated while the mutex is locked.
Now a very common idiom for such communication would simply be a state that says "I'm done". Child threads would set it, and the parent thread would wait on the condition_variable until the child said "I'm done." This idiom would in fact be so common as to deserve a convenience function that encapsulated the mutex, condition_variable and state.
join() is precisely this convenience function.
But imho one has to be careful. When one says: "Never detach, always join," that could be interpreted as: Never make your thread communication more complicated than "I'm done."
For a more complex interaction between parent thread and child thread, consider the case where a parent thread launches several child threads to go out and independently search for the solution to a problem. When the problem is first found by any thread, that gets communicated to the parent, and the parent can then take that solution, and tell all the other threads that they don't need to search any more.
For example:
#include <chrono>
#include <iostream>
#include <iterator>
#include <random>
#include <thread>
#include <vector>
void OneSearch(int id, std::shared_ptr<std::mutex> mut,
std::shared_ptr<std::condition_variable> cv,
int& state, int& solution)
{
std::random_device seed;
// std::mt19937_64 eng{seed()};
std::mt19937_64 eng{static_cast<unsigned>(id)};
std::uniform_int_distribution<> dist(0, 100000000);
int test = 0;
while (true)
{
for (int i = 0; i < 100000000; ++i)
{
++test;
if (dist(eng) == 999)
{
std::unique_lock<std::mutex> lk(*mut);
if (state == -1)
{
state = id;
solution = test;
cv->notify_one();
}
return;
}
}
std::unique_lock<std::mutex> lk(*mut);
if (state != -1)
return;
}
}
auto findSolution(int n)
{
std::vector<std::thread> threads;
auto mut = std::make_shared<std::mutex>();
auto cv = std::make_shared<std::condition_variable>();
int state = -1;
int solution = -1;
std::unique_lock<std::mutex> lk(*mut);
for (uint i = 0 ; i < n ; ++i)
threads.push_back(std::thread(OneSearch, i, mut, cv,
std::ref(state), std::ref(solution)));
while (state == -1)
cv->wait(lk);
lk.unlock();
for (auto& t : threads)
t.join();
return std::make_pair(state, solution);
}
int
main()
{
auto p = findSolution(5);
std::cout << '{' << p.first << ", " << p.second << "}\n";
}
Above I've created a "dummy problem" where a thread searches for how many times it needs to query a URNG until it comes up with the number 999. The parent thread puts 5 child threads to work on it. The child threads work for awhile, and then every once in a while, look up and see if any other thread has found the solution yet. If so, they quit, else they keep working. The main thread waits until solution is found, and then joins with all the child threads.
For me, using the bash time facility, this outputs:
$ time a.out
{3, 30235588}
real 0m4.884s
user 0m16.792s
sys 0m0.017s
But what if instead of joining with all the threads, it detached those threads that had not yet found a solution. This might look like:
for (unsigned i = 0; i < n; ++i)
{
if (i == state)
threads[i].join();
else
threads[i].detach();
}
(in place of the t.join() loop from above). For me this now runs in 1.8 seconds, instead of the 4.9 seconds above. I.e. the child threads are not checking with each other that often, and so main just detaches the working threads and lets the OS bring them down. This is safe for this example because the child threads own everything they are touching. Nothing gets destructed out from under them.
One final iteration can be realized by noticing that even the thread that finds the solution doesn't need to be joined with. All of the threads could be detached. The code is actually much simpler:
auto findSolution(int n)
{
auto mut = std::make_shared<std::mutex>();
auto cv = std::make_shared<std::condition_variable>();
int state = -1;
int solution = -1;
std::unique_lock<std::mutex> lk(*mut);
for (uint i = 0 ; i < n ; ++i)
std::thread(OneSearch, i, mut, cv,
std::ref(state), std::ref(solution)).detach();
while (state == -1)
cv->wait(lk);
return std::make_pair(state, solution);
}
And the performance remains at about 1.8 seconds.
There is still (sort of) an effective join with the solution-finding thread here. But it is accomplished with the condition_variable::wait instead of with join.
thread::join() is a convenience function for the very common idiom that your parent/child thread communication protocol is simply "I'm done." Prefer thread::join() in this common case as it is easier to read, and easier to write.
However don't unnecessarily constrain yourself to such a simple parent/child communication protocol. And don't be afraid to build your own richer protocol when the task at hand needs it. And in this case, thread::detach() will often make more sense. thread::detach() doesn't necessarily imply a fire-and-forget thread. It can simply mean that your communication protocol is more complex than "I'm done."
Don't detach, but instead join:
std::vector<std::thread> ts;
for (unsigned int i = 0; i != n; ++i)
ts.emplace_back(OutputElement, v[i], v[i]);
for (auto & t : threads)
t.join();
I am trying to create a number of threads (representing persons), in a for loop, and display the person id, which is passed as an argument, together with the thread id. The person id is displayed as exepected, but the thread id is always the same.
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
void* travelers(void* arg) {
int* person_id = (int*) arg;
printf("\nPerson %d was created, TID = %d", *person_id, pthread_self());
}
int main(int argc, char** argv)
{
int i;
pthread_t th[1000];
for (i=0; i < 10; i++) {
if ((pthread_create(&th[i], NULL, travelers, &i)) != 0) {
perror("Could not create threads");
exit(2);
}
else {
// Join thread
pthread_join(th[i], NULL);
}
}
printf("\n");
return 0;
}
The output I get is something like this:
Person 0 was created, TID = 881035008
Person 1 was created, TID = 881035008
Person 2 was created, TID = 881035008
Person 3 was created, TID = 881035008
Person 4 was created, TID = 881035008
Person 5 was created, TID = 881035008
Person 6 was created, TID = 881035008
Person 7 was created, TID = 881035008
Person 8 was created, TID = 881035008
Person 9 was created, TID = 881035008
What am I doing wrong?
Since only one of the created threads runs at a time, every new one gets the same ID as the one that finished before, i.e. the IDs are simply reused. Try creating threads in a loop and then joining them in a second loop.
However, you will then have to take care that each thread independently reads the content of i, which will give you different headaches. I'd pass the index as context argument, and then cast it to an int inside the thread function.
It does that, because it re-uses thread-ids. The thread id is only unique among all running threads, but not for threads running at different times; look what your for-loop does essentially:
for (i = 0 to 10) {
start a thread;
wait for termination of the thread;
}
So the program has only one thread running at any given time, one thread is only started after the previous started thread has terminated (with pthread_join ()). To make them run at the same time, use two for loops:
for (i = 0 to 10) {
start thread i;
}
for (i = 0 to 10) {
wait until thread i is finished;
}
Then you will likely get different thread-ids. (I.e. you will get different thread-ids, but if the printf-function will write them out differently depends on your specific implementation/architecture, in particular if thread_t is essentially an int or not. It might be a long int, for example).
if ((pthread_create(&th[i], NULL, travelers, &i)) != 0)
If the thread is successfully created it returns 0. If != 0 will return false and you will execute the pthread_join. You are effectively creating one thread repeatedly.
I have many shared data between threads in my server. If I use one pthread_rwlock, the multithreading stoppid. I use arrays of rwlocks:
#define DIR_LOCK_COUNT 32
pthread_rwlock_t dir_mutex[DIR_LOCK_COUNT];
...
# into main thread initialize pthread_rwlock
for(i=0; i < DIR_LOCK_COUNT; i++){
if(pthread_rwlock_init(&dir_mutex[i], NULL) != 0) {
syslog(LOG_ERR, "can't initialize rwlock %i", i);
return ERR;
}
}
...
# in the worker thread
int num = user_id % DIR_LOCK_COUNT;
pthread_rwlock_rdlock(&dir_mutex[num]);
struct dir *dir_trash = dict_search((dict*)user->dirs, &dir_trash_id);
pthread_rwlock_unlock(&dir_mutex[num]);
I have arrays of 35K users and 16 thread pool. Can I use dimension of pthread_rwlock is 1024 or more?
The Linux / glibc NPTL implementation of pthread_rwlock() doesn't use any per-lock in-kernel resources. If there's enough memory for the pthread_rwlock_t in the first place, then you can create the lock - pthread_rwlock_init() never fails in the NPTL implementation.
I want to know the CPU utilization of a process and all the child processes, for a fixed period of time, in Linux.
To be more specific, here is my use-case:
There is a process which waits for a request from the user to execute the programs. To execute the programs, this process invokes child processes (maximum limit of 5 at a time) & each of this child process executes 1 of these submitted programs (let's say user submitted 15 programs at once). So, if user submits 15 programs, then 3 batches of 5 child processes each will run. Child processes are killed as soon as they finish their execution of the program.
I want to know about % CPU Utilization for the parent process and all its child process during the execution of those 15 programs.
Is there any simple way to do this using top or another command? (Or any tool i should attach to the parent process.)
You can find this information in /proc/PID/stat where PID is your parent process's process ID. Assuming that the parent process waits for its children then the total CPU usage can be calculated from utime, stime, cutime and cstime:
utime %lu
Amount of time that this process has been scheduled in user mode,
measured in clock ticks (divide by sysconf(_SC_CLK_TCK). This includes
guest time, guest_time (time spent running a virtual CPU, see below),
so that applications that are not aware of the guest time field do not
lose that time from their calculations.
stime %lu
Amount of time that this process has been scheduled in kernel mode,
measured in clock ticks (divide by sysconf(_SC_CLK_TCK).
cutime %ld
Amount of time that this process's waited-for children have been
scheduled in user mode, measured in clock ticks (divide by
sysconf(_SC_CLK_TCK). (See also times(2).) This includes guest time,
cguest_time (time spent running a virtual CPU, see below).
cstime %ld
Amount of time that this process's waited-for children have been
scheduled in kernel mode, measured in clock ticks (divide by
sysconf(_SC_CLK_TCK).
See proc(5) manpage for details.
And of course you can do it in hardcore-way using good old C
find_cpu.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#define MAX_CHILDREN 100
/**
* System command execution output
* #param <char> command - system command to execute
* #returb <char> execution output
*/
char *system_output (const char *command)
{
FILE *pipe;
static char out[1000];
pipe = popen (command, "r");
fgets (out, sizeof(out), pipe);
pclose (pipe);
return out;
}
/**
* Finding all process's children
* #param <Int> - process ID
* #param <Int> - array of childs
*/
void find_children (int pid, int children[])
{
char empty_command[] = "/bin/ps h -o pid --ppid ";
char pid_string[5];
snprintf(pid_string, 5, "%d", pid);
char *command = (char*) malloc(strlen(empty_command) + strlen(pid_string) + 1);
sprintf(command, "%s%s", empty_command, pid_string);
FILE *fp = popen(command, "r");
int child_pid, i = 1;
while (fscanf(fp, "%i", &child_pid) != EOF)
{
children[i] = child_pid;
i++;
}
}
/**
* Parsign `ps` command output
* #param <char> out - ps command output
* #return <int> cpu utilization
*/
float parse_cpu_utilization (const char *out)
{
float cpu;
sscanf (out, "%f", &cpu);
return cpu;
}
int main(void)
{
unsigned pid = 1;
// getting array with process children
int process_children[MAX_CHILDREN] = { 0 };
process_children[0] = pid; // parent PID as first element
find_children(pid, process_children);
// calculating summary processor utilization
unsigned i;
float common_cpu_usage = 0.0;
for (i = 0; i < sizeof(process_children)/sizeof(int); ++i)
{
if (process_children[i] > 0)
{
char *command = (char*)malloc(1000);
sprintf (command, "/bin/ps -p %i -o 'pcpu' --no-headers", process_children[i]);
common_cpu_usage += parse_cpu_utilization(system_output(command));
}
}
printf("%f\n", common_cpu_usage);
return 0;
}
Compile:
gcc -Wall -pedantic --std=gnu99 find_cpu.c
Enjoy!
Might not be the exact command. But you can do something like below to get cpu usage of various process and add it.
#ps -C sendmail,firefox -o pcpu= | awk '{s+=$1} END {print s}'
/proc/[pid]/stat Status information about the process. This is used by ps and made into human readable form.
Another way is to use cgroups and use cpuacct.
http://www.kernel.org/doc/Documentation/cgroups/cpuacct.txt
https://access.redhat.com/knowledge/docs/en-US/Red_Hat_Enterprise_Linux/6/html/Resource_Management_Guide/sec-cpuacct.html
Here's one-liner to compute total CPU for all processes. You can adjust it by passing column filter into top output:
top -b -d 5 -n 2 | awk '$1 == "PID" {block_num++; next} block_num == 2 {sum += $9;} END {print sum}'