How to fix too many open files error when aggregating billions of records - linux
I got the following error
opening file "/workspace/mongo/data/_tmp/extsort.63355": errno:24 Too many open files
How could I fix this error ?
Because the opened files is alreaday 63355 ?
2015-05-02T08:01:40.490+0000 I COMMAND [conn1] command sandbox.$cmd command: listCollections { listCollections: 1.0 } keyUpdates:0 writeConflicts:0 numYields:0 reslen:411 locks:{} 169ms
2015-05-02T15:01:02.060+0000 I - [conn2] Assertion: 16818:error opening file "/workspace/mongo/data/_tmp/extsort.63355": errno:24 Too many open files
2015-05-02T15:01:02.235+0000 I CONTROL [conn2]
0xf4d299 0xeeda71 0xed2d3f 0xed2dec 0xb3f453 0xb3c88c 0xb3d2dd 0xb3dfe2 0xb499c5 0xb49136 0xb7e3e6 0x987165 0x9d8b04 0x9d9aed 0x9da7fb 0xb9e956 0xab4d20 0x80e75d 0xf00e6b 0x7fe38e8b4182 0x7fe38d37c47d
----- BEGIN BACKTRACE -----
{"backtrace":[{"b":"400000","o":"B4D299"},{"b":"400000","o":"AEDA71"},{"b":"400000","o":"AD2D3F"},{"b":"400000","o":"AD2DEC"},{"b":"400000","o":"73F453"},{"b":"400000","o":"73C88C"},{"b":"400000","o":"73D2DD"},{"b":"400000","o":"73DFE2"},{"b":"400000","o":"7499C5"},{"b":"400000","o":"749136"},{"b":"400000","o":"77E3E6"},{"b":"400000","o":"587165"},{"b":"400000","o":"5D8B04"},{"b":"400000","o":"5D9AED"},{"b":"400000","o":"5DA7FB"},{"b":"400000","o":"79E956"},{"b":"400000","o":"6B4D20"},{"b":"400000","o":"40E75D"},{"b":"400000","o":"B00E6B"},{"b":"7FE38E8AC000","o":"8182"},{"b":"7FE38D282000","o":"FA47D"}],"processInfo":{ "mongodbVersion" : "3.0.1", "gitVersion" : "534b5a3f9d10f00cd27737fbcd951032248b5952", "uname" : { "sysname" : "Linux", "release" : "3.13.0-44-generic", "version" : "#73-Ubuntu SMP Tue Dec 16 00:22:43 UTC 2014", "machine" : "x86_64" }, "somap" : [ { "elfType" : 2, "b" : "400000", "buildId" : "C35E766AD226FC0C16CB0C3885EC3B59E288A3F2" }, { "b" : "7FFF448FE000", "elfType" : 3, "buildId" : "9D77366C6409A9EA266179080FA7C779EEA8A958" }, { "b" : "7FE38E8AC000", "path" : "/lib/x86_64-linux-gnu/libpthread.so.0", "elfType" : 3, "buildId" : "9318E8AF0BFBE444731BB0461202EF57F7C39542" }, { "b" : "7FE38E64E000", "path" : "/lib/x86_64-linux-gnu/libssl.so.1.0.0", "elfType" : 3, "buildId" : "FF43D0947510134A8A494063A3C1CF3CEBB27791" }, { "b" : "7FE38E273000", "path" : "/lib/x86_64-linux-gnu/libcrypto.so.1.0.0", "elfType" : 3, "buildId" : "B927879B878D90DD9FF4B15B00E7799AA8E0272F" }, { "b" : "7FE38E06B000", "path" : "/lib/x86_64-linux-gnu/librt.so.1", "elfType" : 3, "buildId" : "92FCF41EFE012D6186E31A59AD05BDBB487769AB" }, { "b" : "7FE38DE67000", "path" : "/lib/x86_64-linux-gnu/libdl.so.2", "elfType" : 3, "buildId" : "C1AE4CB7195D337A77A3C689051DABAA3980CA0C" }, { "b" : "7FE38DB63000", "path" : "/usr/lib/x86_64-linux-gnu/libstdc++.so.6", "elfType" : 3, "buildId" : "19EFDDAB11B3BF5C71570078C59F91CF6592CE9E" }, { "b" : "7FE38D85D000", "path" : "/lib/x86_64-linux-gnu/libm.so.6", "elfType" : 3, "buildId" : "1D76B71E905CB867B27CEF230FCB20F01A3178F5" }, { "b" : "7FE38D647000", "path" : "/lib/x86_64-linux-gnu/libgcc_s.so.1", "elfType" : 3, "buildId" : "8D0AA71411580EE6C08809695C3984769F25725B" }, { "b" : "7FE38D282000", "path" : "/lib/x86_64-linux-gnu/libc.so.6", "elfType" : 3, "buildId" : "30C94DC66A1FE95180C3D68D2B89E576D5AE213C" }, { "b" : "7FE38EACA000", "path" : "/lib64/ld-linux-x86-64.so.2", "elfType" : 3, "buildId" : "9F00581AB3C73E3AEA35995A0C50D24D59A01D47" } ] }}
mongod(_ZN5mongo15printStackTraceERSo+0x29) [0xf4d299]
mongod(_ZN5mongo10logContextEPKc+0xE1) [0xeeda71]
mongod(_ZN5mongo11msgassertedEiPKc+0xAF) [0xed2d3f]
mongod(+0xAD2DEC) [0xed2dec]
mongod(_ZN5mongo16SortedFileWriterINS_5ValueES1_EC1ERKNS_11SortOptionsERKSt4pairINS1_25SorterDeserializeSettingsES7_E+0x5D3) [0xb3f453]
mongod(_ZN5mongo19DocumentSourceGroup5spillEv+0x1BC) [0xb3c88c]
mongod(_ZN5mongo19DocumentSourceGroup8populateEv+0x46D) [0xb3d2dd]
mongod(_ZN5mongo19DocumentSourceGroup7getNextEv+0x292) [0xb3dfe2]
mongod(_ZN5mongo21DocumentSourceProject7getNextEv+0x45) [0xb499c5]
mongod(_ZN5mongo17DocumentSourceOut7getNextEv+0xD6) [0xb49136]
mongod(_ZN5mongo8Pipeline3runERNS_14BSONObjBuilderE+0xA6) [0xb7e3e6]
mongod(_ZN5mongo15PipelineCommand3runEPNS_16OperationContextERKSsRNS_7BSONObjEiRSsRNS_14BSONObjBuilderEb+0x7A5) [0x987165]
mongod(_ZN5mongo12_execCommandEPNS_16OperationContextEPNS_7CommandERKSsRNS_7BSONObjEiRSsRNS_14BSONObjBuilderEb+0x34) [0x9d8b04]
mongod(_ZN5mongo7Command11execCommandEPNS_16OperationContextEPS0_iPKcRNS_7BSONObjERNS_14BSONObjBuilderEb+0xC7D) [0x9d9aed]
mongod(_ZN5mongo12_runCommandsEPNS_16OperationContextEPKcRNS_7BSONObjERNS_11_BufBuilderINS_16TrivialAllocatorEEERNS_14BSONObjBuilderEbi+0x28B) [0x9da7fb]
mongod(_ZN5mongo8runQueryEPNS_16OperationContextERNS_7MessageERNS_12QueryMessageERKNS_15NamespaceStringERNS_5CurOpES3_+0x746) [0xb9e956]
mongod(_ZN5mongo16assembleResponseEPNS_16OperationContextERNS_7MessageERNS_10DbResponseERKNS_11HostAndPortE+0xB10) [0xab4d20]
mongod(_ZN5mongo16MyMessageHandler7processERNS_7MessageEPNS_21AbstractMessagingPortEPNS_9LastErrorE+0xDD) [0x80e75d]
mongod(_ZN5mongo17PortMessageServer17handleIncomingMsgEPv+0x34B) [0xf00e6b]
libpthread.so.0(+0x8182) [0x7fe38e8b4182]
libc.so.6(clone+0x6D) [0x7fe38d37c47d]
----- END BACKTRACE -----
2015-05-02T15:02:07.753+0000 I COMMAND [conn2] CMD: drop sandbox.tmp.agg_out.1
UPDATE
I typed ulimit -n unlimited on the console,
and modified the /etc/security/limits.conf with the following setting
* soft nofile unlimited
* hard nofile unlimited
* soft nproc unlimited
* hard nproc unlimited
check it by ulimit -a
health# ulimit -a
-t: cpu time (seconds) unlimited
-f: file size (blocks) unlimited
-d: data seg size (kbytes) unlimited
-s: stack size (kbytes) 8192
-c: core file size (blocks) 0
-m: resident set size (kbytes) unlimited
-u: processes unlimited
-n: file descriptors 4096
-l: locked-in-memory size (kbytes) 64
-v: address space (kbytes) unlimited
-x: file locks unlimited
-i: pending signals 31538
-q: bytes in POSIX msg queues 819200
-e: max nice 0
-r: max rt priority 0
-N 15: unlimited
health# ulimit -Sn
4096
health# ulimit -Hn
4096
Is my system's setting alreday unlimited on open files ?
There is no clean answer for this as you are doing something very heavy stuff but workaround is available
ulimit is command in unix/linux which allows to set system limits for all properties.
in your case you need to increase max. no. of open files count or make it unlimited on safer side (it is also recommended by MongoDB)
ulimit -n <large value in your case 1000000>
or
sysctl -w fs.file-max=1000000
and
/etc/security/limits.conf or /etc/sysctl.conf:
change
fs.file-max = 1000000
I found that it was necessary to change the system-wide settings (using ulimit as suggested Nachiket Kate; another great description for Ubuntu may be found here) as well as the mongodb settings (as documented here).
For the sake of explanation, I'll summarize the commands I performed to get a handle on things (I'll reference the links again where they belong in the discussion).
Determine if the maximum number of file descriptors as enforced by the kernel are sufficient (the amount was sufficient)?
$ cat /proc/sys/fs/file-max
6569231
In my case this was not the problem. Checking the ulimit settings for the mongodb user revealed what the number of file descriptors were a paltry 1024:
$ sudo -H -u mongodb bash -c 'ulimit -a'
...
open files (-n) 1024
...
These values could be changed for all users by increasing the soft (user can modify them) and hard limits (I set mine quite high):
$ sudo su
$ echo -e "* hard\tnofile\t1000000\n* soft\tnofile\t990000" >> /etc/security/limits.conf
This may also be done on a user basis by replacing the * with the username. Although this worked on per-user basis, restarting the mongo daemon resulted in the number of file descriptors returning to 1024. It was necessary to follow the advice here regarding the pam session:
$ for file in /etc/pam.d/common-session*; do
echo 'session required pam_limits.so' >> $file
done
To test that the settings have been applied, I created a wee python script (placed in /tmp/file_descriptor_test.py):
#!/usr/bin/env python
n=990000
fd_list=list()
for i in range(1,n):
fd_list.append(open('/tmp/__%08d' % (i), 'w'))
print 'opened %d fds' % n
Running this as the mongodb user revealed that all was well system-wise:
sudo -H -u mongodb bash -c '/tmp/file_descriptor_test.py'
Traceback (most recent call last):
File "/tmp/fd.py", line 8, in <module>
IOError: [Errno 24] Too many open files: '/tmp/__00989998'
The files in /tmp/ may be deleted using
sudo find -type f -name '__*' -delete
as you'll be unable to list them properly (so rm doesn't work).
However, when running the offending mongo process, I still encountered the same Too many open files error. This led me to believe that the problem also lay with mongo (and led me, finally and embarrassingly to the excellent documentation. Editing the etc/systemd/system/multi-user.target.wants/mongodb-01.service and adding the following lines beneath [Service] directive
# (file size)
LimitFSIZE=infinity
# (cpu time)
LimitCPU=infinity
# (virtual memory size)
LimitAS=infinity
# (open files)
LimitNOFILE=990000
# (processes/threads)
LimitNPROC=495000
finally resolved the issue (remember to restart systemctl with sudo systemctl daemon-reload && systemctl restart mongodb-01.service). You can monitor the progress of the mongo process (mine was a temporary space hungry aggregate) via
$ while true; do echo $(find /var/lib/mongodb_01/_tmp/ | wc -l); sleep 1; done
Related
MMLS (Sleuth Kit) not working in some situations using DCFLDD
I am experiencing some issues when using mmls command after having created an image with dcfldd/guymager in some particular situations. Usually this approach seems to be working fine to create physical images of devices, but with some USBs (working fine and undamaged) I manage to create the .dd disk image file, but then it won't be opened by mmls, nor fsstat. fls does open the file system structure, but it seems like it won't show me any unallocated files just as if this was a logical image. This is the command run to create a disk image using dcfldd: sudo dcfldd if=/dev/sda hash=sha256 hashlog=usb.sha256hash of=./usb.dd bs=512 conv=noerror,sync,notrunc Also, this is the output of usb.info, generated by guymager: GUYMAGER ACQUISITION INFO FILE ============================== Guymager ======== Version : 0.8.13-1 Version timestamp : 2022-05-11-00.00.00 UTC Compiled with : gcc 12.1.1 20220507 (Red Hat 12.1.1-1) libewf version : 20140812 (not used as Guymager is configured to use its own EWF module) libguytools version: 2.0.2 Host name : lucafedora Domain name : (none) System : Linux lucafedora 6.1.7-100.fc36.x86_64 #1 SMP PREEMPT_DYNAMIC Wed Jan 18 18:37:43 UTC 2023 x86_64 Device information ================== Command executed: bash -c "search="`basename /dev/sda`: H..t P.......d A..a de.....d" && dmesg | grep -A3 "$search" || echo "No kernel HPA messages for /dev/sda"" Information returned: ---------------------------------------------------------------------------------------------------- No kernel HPA messages for /dev/sda Command executed: bash -c "smartctl -s on /dev/sda ; smartctl -a /dev/sda" Information returned: ---------------------------------------------------------------------------------------------------- /usr/bin/bash: line 1: smartctl: command not found /usr/bin/bash: line 1: smartctl: command not found Command executed: bash -c "hdparm -I /dev/sda" Information returned: ---------------------------------------------------------------------------------------------------- /usr/bin/bash: line 1: hdparm: command not found Command executed: bash -c "CIDFILE=/sys/block/$(basename /dev/sda)/device/cid; echo -n "CID: " ; if [ -e $CIDFILE ] ; then cat $CIDFILE ; else echo "not available" ; fi " Information returned: ---------------------------------------------------------------------------------------------------- CID: not available Hidden areas: unknown Acquisition =========== Linux device : /dev/sda Device size : 8053063680 (8.1GB) Format : Linux dd raw image - file extension is .dd Image path and file name: /home/HOMEDIR/case_usb/usb.dd Info path and file name: /home/HOMEDIR/case_usb/usb.info Hash calculation : SHA-256 Source verification : on Image verification : on No bad sectors encountered during acquisition. No bad sectors encountered during verification. State: Finished successfully MD5 hash : -- MD5 hash verified source : -- MD5 hash verified image : -- SHA1 hash : -- SHA1 hash verified source : -- SHA1 hash verified image : -- SHA256 hash : 7285a8b0a2b472a8f120c4ca4308a94a3aaa3e308a1dd86e3670041b07c27e76 SHA256 hash verified source: 7285a8b0a2b472a8f120c4ca4308a94a3aaa3e308a1dd86e3670041b07c27e76 SHA256 hash verified image : 7285a8b0a2b472a8f120c4ca4308a94a3aaa3e308a1dd86e3670041b07c27e76 Source verification OK. The device delivered the same data during acquisition and verification. Image verification OK. The image contains exactely the data that was written. Acquisition started : 2023-01-28 12:27:07 (ISO format YYYY-MM-DD HH:MM:SS) Verification started: 2023-01-28 12:30:11 Ended : 2023-01-28 12:35:24 (0 hours, 8 minutes and 16 seconds) Acquisition speed : 41.97 MByte/s (0 hours, 3 minutes and 3 seconds) Verification speed : 24.62 MByte/s (0 hours, 5 minutes and 12 seconds) Generated image files and their MD5 hashes ========================================== No MD5 hashes available (configuration parameter CalcImageFileMD5 is off) MD5 Image file n/a usb.dd Worth to mention that when mmls is run against usb.dd it produces no output whatsoever. I have to forcefully add -v option for it to spit out this kind of information: tsk_img_open: Type: 0 NumImg: 1 Img1: usb.dd aff_open: Error determining type of file: usb.dd aff_open: Success Error opening vmdk file Error checking file signature for vhd file tsk_img_findFiles: usb.dd found tsk_img_findFiles: 1 total segments found raw_open: segment: 0 size: 8053063680 max offset: 8053063680 path: usb.dd dos_load_prim: Table Sector: 0 raw_read: byte offset: 0 len: 65536 raw_read: found in image 0 relative offset: 0 len: 65536 raw_read_segment: opening file into slot 0: usb.dd dos_load_prim_table: Testing FAT/NTFS conditions dos_load_prim_table: MSDOS OEM name exists bsd_load_table: Table Sector: 1 gpt_load_table: Sector: 1 gpt_open: Trying other sector sizes gpt_open: Trying sector size: 512 gpt_load_table: Sector: 1 gpt_open: Trying sector size: 1024 gpt_load_table: Sector: 1 gpt_open: Trying sector size: 2048 gpt_load_table: Sector: 1 gpt_open: Trying sector size: 4096 gpt_load_table: Sector: 1 gpt_open: Trying sector size: 8192 gpt_load_table: Sector: 1 gpt_open: Trying secondary table gpt_load_table: Sector: 15728639 raw_read: byte offset: 8053063168 len: 512 raw_read: found in image 0 relative offset: 8053063168 len: 512 gpt_open: Trying secondary table sector size: 512 gpt_load_table: Sector: 15728639 gpt_open: Trying secondary table sector size: 1024 gpt_load_table: Sector: 7864319 raw_read: byte offset: 8053062656 len: 1024 raw_read: found in image 0 relative offset: 8053062656 len: 1024 gpt_open: Trying secondary table sector size: 2048 gpt_load_table: Sector: 3932159 raw_read: byte offset: 8053061632 len: 2048 raw_read: found in image 0 relative offset: 8053061632 len: 2048 gpt_open: Trying secondary table sector size: 4096 gpt_load_table: Sector: 1966079 raw_read: byte offset: 8053059584 len: 4096 raw_read: found in image 0 relative offset: 8053059584 len: 4096 gpt_open: Trying secondary table sector size: 8192 gpt_load_table: Sector: 983039 raw_read: byte offset: 8053055488 len: 8192 raw_read: found in image 0 relative offset: 8053055488 len: 8192 sun_load_table: Trying sector: 0 sun_load_table: Trying sector: 1 mac_load_table: Sector: 1 mac_load: Missing initial magic value mac_open: Trying 4096-byte sector size instead of 512-byte mac_load_table: Sector: 1 mac_load: Missing initial magic value
Recover informations from CSV files with my awk script
I have this CSV files : Monday,linux,6,0.2 Tuesday,linux,0.25,0.2 Wednesday,linux,64,3 I create a little script that allow me to recover the informations from my csv and to place them like this : Day : Monday OS : Linux RAM : 6 CPU1 : 0.2 My script is : #!/bin/bash awk -F'[ ,;|.]' 'FNR==0{next} FNR>1 { print "DAY : " $1; print "OS :\n " $2 print "RAM :\n " $3 print "CPU1 :\n " $4 }' mycsvfile.csv But the result is : DAY : Tuesday OS : linux RAM : 0 CPU1 : 25 DAY : Wednesday OS : linux RAM : 64 CPU1 Or I want : DAY : Monday OS : linux RAM : 0.2 CPU 1 : 1 DAY : Tuesday OS : linux RAM : 0.25 CPU 1 : 0.2 DAY : Wednesday OS : linux RAM : 64 CPU 1 : 3 Can you tell me why my script doesn't works and why floats are not taken into account ? Thank you !
Added tab and newline to same awk as Cyrus posted. awk -F ',' '{ print "DAY :",$1 print "OS :",$2 print "RAM :",$3 print "CPU1 :",$4"\n" }' OFS='\t' file DAY : Monday OS : linux RAM : 6 CPU1 : 0.2 DAY : Tuesday OS : linux RAM : 0.25 CPU1 : 0.2 DAY : Wednesday OS : linux RAM : 64 CPU1 : 3 A more generic solution: awk -F, 'BEGIN {split("DAY OS RAM CPU", header, " ")}{for (i=1;i<=4;i++) print header[i]":\t",$i;print ""}' t DAY: Monday OS: linux RAM: 6 CPU: 0.2 DAY: Tuesday OS: linux RAM: 0.25 CPU: 0.2 DAY: Wednesday OS: linux RAM: 64 CPU: 3 More readable: awk -F, ' BEGIN {split("DAY OS RAM CPU", header, " ")} { for (i=1;i<=4;i++) print header[i]":\t",$i; print "" }' file
linux bash cut one row which starts with a certain string
Good day, im using linux bash commands to extract certain data of each sip account and put them next to each other. i have an array called $peers that i put all 1000 sips into and now i need to for loop through them to set every sip to its useragent. what i have so far is #! /bin/bash peers="$(asterisk -rx "sip show peers" | cut -f1 -d" " | cut -f1 -d"/" "=")" "= " asterisk -rx "sip show peer " $peer | cut -f2 -d"Useragent" for peer in $peers do echo $peers done #echo $peers I need to extract a row from a collection of rows that starts with "Useragent" I start by running asterisk -rx "sip show peer 101" and that gives me the result below * Name : 101 Description : Secret : <Set> MD5Secret : <Not set> Remote Secret: <Not set> Context : outgoing Record On feature : automon Record Off feature : automon Subscr.Cont. : <Not set> Language : Tonezone : <Not set> AMA flags : Unknown Transfer mode: open CallingPres : Presentation Allowed, Not Screened Callgroup : Pickupgroup : Named Callgr : Nam. Pickupgr: MOH Suggest : Mailbox : VM Extension : asterisk LastMsgsSent : 0/0 Call limit : 0 Max forwards : 0 Dynamic : Yes Callerid : "" <> MaxCallBR : 384 kbps Expire : 23 Insecure : no Force rport : Yes Symmetric RTP: Yes ACL : No DirectMedACL : No T.38 support : No T.38 EC mode : Unknown T.38 MaxDtgrm: -1 DirectMedia : Yes PromiscRedir : No User=Phone : No Video Support: No Text Support : No Ign SDP ver : No Trust RPID : No Send RPID : No Subscriptions: Yes Overlap dial : Yes DTMFmode : rfc2833 Timer T1 : 500 Timer B : 32000 ToHost : Addr->IP : xxx.xxx.xxx.xxx:5060 Defaddr->IP : (null) Prim.Transp. : UDP Allowed.Trsp : UDP Def. Username: 101 SIP Options : (none) Codecs : (gsm|ulaw|alaw|g729|g722) Codec Order : (gsm:20,g722:20,g729:20,ulaw:20,alaw:20) Auto-Framing : No Status : OK (9 ms) Useragent : UniFi VoIP Phone 4.6.6.489 Reg. Contact : sip:101#xxx.xxx.xxx.xxx:5060;ob Qualify Freq : 60000 ms Keepalive : 0 ms Sess-Timers : Accept Sess-Refresh : uas Sess-Expires : 1800 secs Min-Sess : 90 secs RTP Engine : asterisk Parkinglot : Use Reason : No Encryption : No Now i need to cut this part Useragent : UniFi VoIP Phone 4.6.6.489 and display it as 101 : UniFi VoIP Phone 4.6.6.489 any help would be much appreciated Thank you. that top answer worked perfectly. this is my solution now. peer="$(asterisk -rx "sip show peers" | cut -f1 -d" " | cut -f1 -d"/" )" for peer in $peers do output= "$(asterisk -rx "sip show peer $peers" | sed -nE '/Useragent/ s/^[^:]+/101 /p')" echo $output done But is is still giving issue, my problem is the loop of the variables
With sed: ... | sed -nE '/Useragent/ s/^[^:]+/101 /p' /Useragent/ matches line(s) with Useragent it s/^[^:]+/101 substitutes the portion from start till : (exclusive) with 101
systemtap global variable allocation failed
I want to use systemtap for extracting details of my linux production server. my systemtap script is global bt; global quit = 0 probe begin { printf("start profiling...\n") } probe timer.profile { if (pid() == target()) { if (!quit) { bt[backtrace(), ubacktrace()] <<< 1 } else { foreach ([sys, usr] in bt- limit 1000) { print_stack(sys) print_ustack(usr) printf("\t%d\n", #count(bt[sys, usr])) } exit() } } } probe timer.s(20) { quit = 1 } When I start run this script with command sudo stap --ldd -d $program_name --all-modules \ -D MAXMAPENTRIES=10240 -D MAXACTION=20000 -D MAXTRACE=40 \ -D MAXSTRINGLEN=4096 -D MAXBACKTRACE=40 -x $program_pid \ profile.stp --vp 00001 > profile.out It fails, and prints following error: ERROR: error allocating hash ERROR: global variable 'bt' allocation failed WARNING: /usr/bin/staprun exited with status: 1 my production server memory info is total used free shared buffers cached Mem: 16008 15639 368 0 80 3090 -/+ buffers/cache: 12468 3539 I think it is enough, because in my test server, there is only 2G memory, and the systemtap script runs well for another server
Unfortunately, this is intended behavior, see my discussion here: https://sourceware.org/ml/systemtap/2015-q1/msg00033.html The problem is that SystemTap allocates associative arrays at once (to prevent allocation failures in future) and on per-cpu basis (to prevent locking), which means that bt will require (2 * MAXSTRINGLEN + sizeof(statistic)) * MAXMAPENTRIES * NR_CPU =~ 2 Gb if NR_CPU == 128. Reduce MAXSTRINGLEN (which is set to 4k in your case) or size of bt array: global bt[128];
Jiffies not zero on boot
After reboot the jiffies are not initialized to zero, but instead to some high value (near the wrap-around). For example (immediately after reboot): cat /proc/timer_list | grep jiffies .idle_jiffies : 4294902561 .last_jiffies : 4294902561 .next_jiffies : 4294902623 jiffies: 4294902561 .idle_jiffies : 4294902561 .last_jiffies : 4294902561 .next_jiffies : 4294902568 jiffies: 4294902561 .idle_jiffies : 4294902561 .last_jiffies : 4294902561 .next_jiffies : 4294902679 jiffies: 4294902561 .idle_jiffies : 4294902561 .last_jiffies : 4294902561 .next_jiffies : 4294902607 What am I missing? I am running Ubuntu 12.04 LTS, 3.13.0-36-generic. Thanks!
At the boot jiffies is not initalized by zero, it is initalized by INITIAL_JIFFIES constant. You can see this constant in linux kernel headers: $ cd your_path_to_linux_kernel_headers $ grep INITIAL_JIFFIES linux/jiffies.h #define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ)) It was introduced by this patch to help detect problems related to that very overflow. Sorry for long waiting for answer. :)