I want to use systemtap for extracting details of my linux production server. my systemtap script is
global bt;
global quit = 0
probe begin {
printf("start profiling...\n")
}
probe timer.profile {
if (pid() == target()) {
if (!quit)
{
bt[backtrace(), ubacktrace()] <<< 1
}
else
{
foreach ([sys, usr] in bt- limit 1000)
{
print_stack(sys)
print_ustack(usr)
printf("\t%d\n", #count(bt[sys, usr]))
}
exit()
}
}
}
probe timer.s(20) {
quit = 1
}
When I start run this script with command
sudo stap --ldd -d $program_name --all-modules \
-D MAXMAPENTRIES=10240 -D MAXACTION=20000 -D MAXTRACE=40 \
-D MAXSTRINGLEN=4096 -D MAXBACKTRACE=40 -x $program_pid \
profile.stp --vp 00001 > profile.out
It fails, and prints following error:
ERROR: error allocating hash
ERROR: global variable 'bt' allocation failed
WARNING: /usr/bin/staprun exited with status: 1
my production server memory info is
total used free shared buffers cached
Mem: 16008 15639 368 0 80 3090
-/+ buffers/cache: 12468 3539
I think it is enough, because in my test server, there is only 2G memory, and the systemtap script runs well for another server
Unfortunately, this is intended behavior, see my discussion here: https://sourceware.org/ml/systemtap/2015-q1/msg00033.html
The problem is that SystemTap allocates associative arrays at once (to prevent allocation failures in future) and on per-cpu basis (to prevent locking), which means that bt will require (2 * MAXSTRINGLEN + sizeof(statistic)) * MAXMAPENTRIES * NR_CPU =~ 2 Gb if NR_CPU == 128.
Reduce MAXSTRINGLEN (which is set to 4k in your case) or size of bt array:
global bt[128];
Related
I sent this question a few months ago to the slurm-dev list, but it is still unsolved.
The problem is: after trying to change the job size as describes the FAQ, I wanted to do it programatically using the API.
Everything seems to work fine before the step of updating the environment variables.
When I launch the application this is what I get:
$ salloc -N1 mpiexec -n 1 ./jobExpansion
salloc: Granted job allocation 559
srun: error: Only allocated 1 nodes asked for 4
In the squeue I can see that the allocation has changed, but srun cannot see the changes.
I continued debugging and if I executed:
$ salloc -N1
$ export SLURM_NODELIST=n04,n06,n00,n01
$ export SLURM_NNODES=4
$ mpiexec -n 1 ./jobExpansion
It worked.
So, I don't want to overwhelm with the complete code but, just in case you could help me I paste here the parts of the resizing:
slurm_init_job_desc_msg(&job);
job.user_id = getuid();
job.min_nodes = hostsToExpand;
job.dependency = (char *) malloc(sizeof (char)*20);
sprintf(job.dependency, (char *) "expand:%s", pID);
//$ salloc -N4 --dependency=expand:$SLURM_JOBID
slurm_alloc_msg_ptr = slurm_allocate_resources_blocking(&job, 0, NULL);
//$ scontrol update jobid=$SLURM_JOBID NumNodes=0
slurm_init_job_desc_msg(&job_update);
job_update.job_id = slurm_alloc_msg_ptr->job_id;
job_update.min_nodes = 0;
slurm_update_job(&job_update);
//exit
slurm_kill_job(slurm_alloc_msg_ptr->job_id, 9, 0);
//$ scontrol update jobid=$SLURM_JOBID NumNodes=ALL
slurm_init_job_desc_msg(&job_update);
job_update.job_id = procID;
job_update.min_nodes = INFINITE;
slurm_update_job(&job_update);
Everything points out the environment variables but I am not sure how to properly update them.
Thank you.
EDITED
If somebody would like test what I've said, here is the repository:
git clone https://siserte#bitbucket.org/siserte/slurm-job-expansion-test.git
I am walking around this problem a long time - cgroups just don't want to work when reloading config file again(hangs on mount), have to reboot each time to changes take effect.
This are my steps:
(1.)Fresh start of OS.
(2.)cgsnapshot -s
# Configuration file generated by cgsnapshot
mount {
cpuset = /sys/fs/cgroup/cpuset;
cpu = /sys/fs/cgroup/cpu;
cpuacct = /sys/fs/cgroup/cpuacct;
memory = /sys/fs/cgroup/memory;
devices = /sys/fs/cgroup/devices;
freezer = /sys/fs/cgroup/freezer;
net_cls = /sys/fs/cgroup/net_cls;
blkio = /sys/fs/cgroup/blkio;
perf_event = /sys/fs/cgroup/perf_event;
}
(3.)cgclear(4.)cgsnapshot -s
# Configuration file generated by cgsnapshot
(5.)cgconfigparser -l /etc/cgconfig.conf (6.)cgsnapshot -s
mount {
cpu = /cgroup/cpu_mem_blkio;
cpuacct = /cgroup/cpu_mem_blkio;
memory = /cgroup/cpu_mem_blkio;
blkio = /cgroup/cpu_mem_blkio;
}
group hello1 {
...
group hello2 {
...
(7.)bash script /etc/rc.d/rc.cgred start
Now everything is working, but when i do this (the same config):
(8.)cgclear
(9.)cgconfigparser -l /etc/cgconfig.conf
It hangs forever, when i use strace it stops at:
mount("cgroup", "/cgroup/cpu_mem_blkio", "cgroup", 0,
"cpu,cpuacct,blkio,memory") = ? ERESTARTNOINTR (To be restarted)
Could someone point me whats wrong?
How i can i add new group, without rebooting?
Is this normal behavior of cgroups?
I even tried adding this patch from here:
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
<at> <at> -1909,7 +1909,7 <at> <at> static void cgroup_kill_sb(struct super_block *sb)
*
* And don't kill the default root.
*/
- if (css_has_online_children(&root->cgrp.self) ||
+ if (!list_empty(&root->cgrp.self.children) ||
root == &cgrp_dfl_root)
cgroup_put(&root->cgrp);
else
still testing, but looks the same.
Looks like the right way of doing it, is setting everything from the command line.
mount -t cgroup -o cpu,memory,blkio,cpuacct cpu_mem_blkio /cgroup/cpu_mem_blkio
mkdir /cgroup/cpu_mem_blkio/hello1
mkdir /cgroup/cpu_mem_blkio/hello2
echo 200 > /cgroup/cpu_mem_blkio/hello1/cpu.shares
echo 200M > /cgroup/cpu_mem_blkio/hello1/memory.limit_in_bytes
echo 400M > /cgroup/cpu_mem_blkio/hello1/memory.memsw.limit_in_bytes
echo 100 > /cgroup/cpu_mem_blkio/hello1/blkio.weight
...
Is there any option in perf to look into processes running on a particular cpu /core, and how much percentage of that core is taken by each process.
Reference links would be helpful.
perf is intended to do a profiling which is not good fit for your case. You may try to do sampling /proc/sched_debug (if it is compiled in your kernel). For example you may check which process is currently running on CPU:
egrep '^R|cpu#' /proc/sched_debug
cpu#0, 917.276 MHz
R egrep 2614 37730.177313 ...
cpu#1, 917.276 MHz
R bash 2023 218715.010833 ...
By using his PID as a key, you may check how many CPU time in milliseconds it consumed:
grep se.sum_exec_runtime /proc/2023/sched
se.sum_exec_runtime : 279346.058986
However, as #BrenoLeitão mentioned, SystemTap is quite useful for your script. Here is script for your task.
global cputimes;
global cmdline;
global oncpu;
global NS_PER_SEC = 1000000000;
probe scheduler.cpu_on {
oncpu[pid()] = local_clock_ns();
}
probe scheduler.cpu_off {
if(oncpu[pid()] == 0)
next;
cmdline[pid()] = cmdline_str();
cputimes[pid(), cpu()] <<< local_clock_ns() - oncpu[pid()];
delete oncpu[pid()];
}
probe timer.s(1) {
printf("%6s %3s %6s %s\n", "PID", "CPU", "PCT", "CMDLINE");
foreach([pid+, cpu] in cputimes) {
cpupct = #sum(cputimes[pid, cpu]) * 10000 / NS_PER_SEC;
printf("%6d %3d %3d.%02d %s\n", pid, cpu,
cpupct / 100, cpupct % 100, cmdline[pid]);
}
delete cputimes;
}
It traces moments when process is running on CPU and stops execution on that (due to migration or sleeping) by attaching to scheduler.cpu_on and scheduler.cpu_off probes. Second probe calculates time difference between these events and saves it to cputimes aggregation along with process command line arguments.
timer.s(1) fires once per second -- it walks over aggregation and calculates percentage. Here is sample output for Centos 7 with bash running infinite loop:
0 0 100.16
30 1 0.00
51 0 0.00
380 0 0.02 /usr/bin/python -Es /usr/sbin/tuned -l -P
2016 0 0.08 sshd: root#pts/0 "" "" "" ""
2023 1 100.11 -bash
2630 0 0.04 /usr/libexec/systemtap/stapio -R stap_3020c9e7ba76838179be68cd2390a10c_2630 -F3
I understand that perf is not the proper way to do it, although you can limit perf per CPU, as using perf record -C <cpulist> or even perf stat -c <cpulist>.
The close you are going to see is the context-switch event, but, this is not going to provide you the application names at all.
I think you are going to need something more powerful, as systemtap.
I have a RHEL (Redhat Enterprise Linux) v6.5 (Santiago) server. On this server if i do a df -help there are list of options available. I am interested in the option --total
However there is an older version of RHEL (v5.5). In which there is no --total option.
My question is, I have a command like this:
df -h --total | grep total | awk 'NR==1{print$2}+NR==1{print$3}+NR==1{print$4}+NR==1{print$5}'
which gives the output as
62G
39G
21G
66%
Where
62G is Total size of the Disk
39G is Used
21G is remaining
61% Total usage %
The above command is working fine in RHEL v6.5. But fails in RHEL v5.5 since it does not have a --total option for df command.
When i run the same command on RHEL v5.5 i get the below error:
df: unrecognized option `--total'
Try `df --help' for more information.
So is there a command that can give me the output in the following way:
Total Disk Space
Used Space
Remaining Disk space
Usage %
Ex:
62G
39G
21G
66%
You'll have to do the calculation work yourself.
Something like this awk script should work.
$ cat dftotal.awk
BEGIN {
map[0] = "K"
map[1] = "M"
map[2] = "G"
map[3] = "T"
}
function fmt(val, c) {
c=0
while (val > 1024) {
c++
val = val / 1024
}
return val map[c]
}
{
for (i=2;i<5;i++) {
sum[i]+=$i
}
}
END {
print fmt(sum[2]) ORS fmt(sum[3]) ORS fmt(sum[4])
print ((sum[3] / sum[2]) * 100) "%"
}
$ df -P | awk -f dftotal.awk
I am using a bash script to generate mobility files (setdest) in ns2 for various seeds. But I am running into this troublesome segmentation fault. Any help would be appreciated. The setdest.cc has been modified, so its not the standard ns2 file.
I will walk you through the problem.
This code in a shell script returns the segmentation fault.
#! /bin/sh
setdest="/root/ns-allinone-2.1b9a/ns-2.1b9a/indep-utils/cmu-scen-gen/setdest/setdest_mesh_seed_mod"
let nn="70" #Number of nodes in the simulation
let time="900" #Simulation time
let x="1000" #Horizontal dimensions
let y="1000" #Vertical dimensions
for speed in 5
do
for pause in 10
do
for seed in 1 5
do
echo -e "\n"
echo Seed = $seed Speed = $speed Pause Time = $pause
chmod 700 $setdest
setdest -n $nn -p $pause -s $speed -t $time -x $x -y $y -l 1 -m 50 > scen-mesh-n$nn-seed$seed-p$pause-s$speed-t$time-x$x-y$y
done
done
done
error is
scengen_mesh: line 21: 14144 Segmentation fault $setdest -n $nn -p $pause -s $speed -t $time -x $x -y $y -l 1 -m 50 >scen-mesh-n$nn-seed$seed-p$pause-s$speed-t$time-x$x-y$y
line 21 is the last line of the shell script (done)
The strange thing is If i run the same setdest command on the terminal, there is no problem! like
$setdest -n 70 -p 10 -s 5 -t 900 -x 1000 -y 1000 -l 1 -m 50
I have made out where the problem is exactly. Its with the argument -l. If i remove the argument in the shell script, there is no problem. Now i will walk you through the modified setdest.cc where this argument is coming from.
This modified setdest file uses a text file initpos to read XY coordinates of static nodes for a wireless mesh topology. the relevant lines of code are
FILE *fp_loc;
int locinit;
fp_loc = fopen("initpos","r");
while ((ch = getopt(argc, argv, "r:m:l:n:p:s:t:x:y:i:o")) != EOF) {
switch (ch) {
case 'l':
locinit = atoi(optarg);
break;
default:
usage(argv);
exit(1);
if(locinit)
fscanf(fp_loc,"%lf %lf",&position.X, &position.Y);
if (position.X == -1 && position.Y == -1){
position.X = uniform() * MAXX;
position.Y = uniform() * MAXY;
}
What i dont get is...
In Shell script..
-option -l if supplied by 0 returns no error,
-but if supplied by any other value (i used 1 mostly) returns this segmentation fault.
In Terminal..
-no segmentation fault with any value. 0 or 1
something to do with the shell script surely. I am amazed what is going wrong where!
Your help will be highly appreciated.
Cheers