Using golang to create a new child process with syscall.CLONE_NEWUSER but why at the new usernamespace the uid is nobody? - linux

exe := exec.Command("/proc/self/exe","init")
exe.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: syscall.CLONE_NEWUTS|syscall.CLONE_NEWNS|syscall.CLONE_NEWUSER|syscall.CLONE_NEWPID|syscall.CLONE_NEWIPC|syscall.CLONE_NEWNET,
//mapping the uid and gid I can figure out the mapping rules????
UidMappings: []syscall.SysProcIDMap{
{
ContainerID: 0,
HostID: 1000000,
Size: 65536,
},
},
GidMappings: []syscall.SysProcIDMap{
{
ContainerID: 0,
HostID: 100000,
Size: 65536,
},
},
}
when I run with exe.Start() get the result at terminal is "nobody#xx:/home/xx$" .
I read the code of runc ,likewise,runc use the same way to implement usernamespace's isolation,but set correctly uid with 'root'. what's wrong with me?
There is system info of my new process,getting from another bash.
root#xx:/home/xx# cat /proc/159624/uid_map
0 1000000 65536
root#xx:/home/xx# cat /etc/subuid
xx:100000:65536
os: ubuntu20.04
go version:1.18.3 linux/amd64

Related

Sharp fails on resize with zsh: killed

I have a small node script to resize images for a website:
const sharp = require('sharp')
const fsp = require('fs/promises')
const path = require('path')
const FILES = [
'/home/xx/68/ea/68ea8c14c2b655c50cbc560b9f3a5af620882670.jpeg',
'/home/xx/4e/7f/4e7f94103480d8ff231b17310c598470dc10489c.jpeg',
]
const SIZES = [ 300, 500, 800, 1024, 2048 ]
const FORMATS = [ 'jpeg' ]
const dir = '/home/xx/media/temp/'
async function generatePreviews(pathName, name) {
const image = sharp(pathName)
const metadatas = await image.metadata()
console.log('format:', metadatas.format)
console.log('width:', metadatas.width)
console.log('height:', metadatas.height)
console.log('weight:', await fsp.stat(pathName))
for (const size of SIZES) {
for (const format of FORMATS) {
console.log('start create preview', name, format, size)
try {
await image.clone()
.resize(size, size, {fit: 'inside'})
.toFile(path.join(dir, `preview_${name}_${size}.${format}`))
console.log('Preview created', name, format, size)
} catch (e) {
console.log('An error occured')
console.log(e)
}
}
}
}
async function testPreviews() {
console.log('start generating previews')
for (let i = 0; i < FILES.length; i++) {
await generatePreviews(FILES[i], i)
}
console.log('All previews generated')
}
testPreviews()
It works perfectly on my desktop computer (old i5 760 from 2010), but on my server, it fails almost silently when generating the largest previews for the largest file…
Here is the output on the server:
% node ./src/scripts/test_sharp.js
start generating previews
format: jpeg
width: 683
height: 1024
weight: Stats {
dev: 51713,
mode: 33188,
nlink: 1,
uid: 1001,
gid: 1001,
rdev: 0,
blksize: 4096,
ino: 658937,
size: 473209,
blocks: 928,
atimeMs: 1654021875500.3074,
mtimeMs: 1467568028343.9807,
ctimeMs: 1645954831869.6965,
birthtimeMs: 1645954825541.6052,
atime: 2022-05-31T18:31:15.500Z,
mtime: 2016-07-03T17:47:08.344Z,
ctime: 2022-02-27T09:40:31.870Z,
birthtime: 2022-02-27T09:40:25.542Z
}
start create preview 0 jpeg 300
Preview created 0 jpeg 300
start create preview 0 jpeg 500
Preview created 0 jpeg 500
start create preview 0 jpeg 800
Preview created 0 jpeg 800
start create preview 0 jpeg 1024
Preview created 0 jpeg 1024
start create preview 0 jpeg 2048
Preview created 0 jpeg 2048
format: jpeg
width: 3840
height: 5760
weight: Stats {
dev: 51713,
mode: 33188,
nlink: 1,
uid: 1001,
gid: 1001,
rdev: 0,
blksize: 4096,
ino: 658752,
size: 2272667,
blocks: 4440,
atimeMs: 1654021876468.3223,
mtimeMs: 1468007124640.3523,
ctimeMs: 1645954169924.1448,
birthtimeMs: 1645954151475.8787,
atime: 2022-05-31T18:31:16.468Z,
mtime: 2016-07-08T19:45:24.640Z,
ctime: 2022-02-27T09:29:29.924Z,
birthtime: 2022-02-27T09:29:11.476Z
}
start create preview 1 jpeg 300
Preview created 1 jpeg 300
start create preview 1 jpeg 500
Preview created 1 jpeg 500
start create preview 1 jpeg 800
zsh: killed node ./src/scripts/test_sharp.js
It fails at 800px side preview generation for the largest file (5760 x 3840px).
The catch block doesn't trigger…
Server is a 1 CPU VPS with 1GB of RAM (hosted at gandi.net).
% node --version
v16.15.0
% yarn list --pattern sharp 1 Jun 04:06:40
yarn list v1.22.18
└─ sharp#0.30.6
Done in 1.49s.
Any help would be appreciated :)

Impossible to activate HugePage on AKS nodes

Hi dear Stackoverflow community,
I'm struggling in HugePage activation on a AKS cluster.
I noticed that I first have to configure a nodepool with HugePage support.
The only official Azure Hugepage doc is about transparentHugePage (https://learn.microsoft.com/en-us/azure/aks/custom-node-configuration), but I don't know if it's sufficient...
Then I know that I have to configure pod also
I wanted to rely on this (https://kubernetes.io/docs/tasks/manage-hugepages/scheduling-hugepages/), but as 2) not working...
But in despite of whole things i've done, I could not make it.
If I'm following Microsoft documentation, my nodepool spawn like this:
"kubeletConfig": {
"allowedUnsafeSysctls": null,
"cpuCfsQuota": null,
"cpuCfsQuotaPeriod": null,
"cpuManagerPolicy": null,
"failSwapOn": false,
"imageGcHighThreshold": null,
"imageGcLowThreshold": null,
"topologyManagerPolicy": null
},
"linuxOsConfig": {
"swapFileSizeMb": null,
"sysctls": {
"fsAioMaxNr": null,
"fsFileMax": null,
"fsInotifyMaxUserWatches": null,
"fsNrOpen": null,
"kernelThreadsMax": null,
"netCoreNetdevMaxBacklog": null,
"netCoreOptmemMax": null,
"netCoreRmemMax": null,
"netCoreSomaxconn": null,
"netCoreWmemMax": null,
"netIpv4IpLocalPortRange": "32000 60000",
"netIpv4NeighDefaultGcThresh1": null,
"netIpv4NeighDefaultGcThresh2": null,
"netIpv4NeighDefaultGcThresh3": null,
"netIpv4TcpFinTimeout": null,
"netIpv4TcpKeepaliveProbes": null,
"netIpv4TcpKeepaliveTime": null,
"netIpv4TcpMaxSynBacklog": null,
"netIpv4TcpMaxTwBuckets": null,
"netIpv4TcpRmem": null,
"netIpv4TcpTwReuse": null,
"netIpv4TcpWmem": null,
"netIpv4TcpkeepaliveIntvl": null,
"netNetfilterNfConntrackBuckets": null,
"netNetfilterNfConntrackMax": null,
"vmMaxMapCount": null,
"vmSwappiness": null,
"vmVfsCachePressure": null
},
"transparentHugePageDefrag": "defer+madvise",
"transparentHugePageEnabled": "madvise"
But My node is still like that:
# kubectl describe nodes aks-deadpoolhp-31863567-vmss000000|grep hugepage
Capacity:
attachable-volumes-azure-disk: 16
cpu: 8
ephemeral-storage: 129901008Ki
hugepages-1Gi: 0
hugepages-2Mi: 0
memory: 32940620Ki
pods: 110
Allocatable:
attachable-volumes-azure-disk: 16
cpu: 7820m
ephemeral-storage: 119716768775
hugepages-1Gi: 0
hugepages-2Mi: 0
memory: 28440140Ki
pods: 110
My kube version is 1.16.15
I saw also that I should enable featuregate like this --feature-gates=HugePages=true (https://dev.to/dannypsnl/hugepages-on-kubernetes-5e7p) but I don't know how to make that in AKS... anyway As my node is not displaying any HugePage availability, i'm not sure it's useful for now.
I even try to recreate the aks cluster with a --kubeconfig, but everything remain the same: i cannot use HugePage...
Please I need your help again, i'm completely lost into this AKS service...
Install kubectl-node-shell on your laptop
curl -LO https://github.com/kvaps/kubectl-node-shell/raw/master/kubectl-node_shell
chmod +x ./kubectl-node_shell
sudo mv ./kubectl-node_shell /usr/local/bin/kubectl-node_shell
Get the nodes you want to get inside:
kubectl get pod <YOUR_POD> -o custom-columns=CONTAINER:.spec.nodeName -n <YOUR_NAMESPACE>
If node is NONE, that means your pod is in pending state. Pick up one random node:
kubectl get pod -n <YOUR_NAMESPACE>
Get inside your node:
kubectl node-shell <NODE>
Configure Hugepage:
mkdir -p /mnt/huge
mount -t hugetlbfs nodev /mnt/huge
echo 1024 > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
cat /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
Restart kubelet (still in the node, yes):
systemctl restart kubelet
Exit from node-shell by C-d (Ctrl + d)
Check HugePage is ON (ie. Values must not be 0)
kubectl describe node <NODE>|grep -i -e "capacity" -e "allocatable" -e "huge"
Either check you pod not in pending state anymore, or launch your helm install/kubectl apply now!

Linux User NameSpaces

I am experimenting with user namespaces using Go on Linux. The thing that I cannot figure out is that although am setting the uid and gid mappings when creating the namespace it still identifies as the nobody user when I launch the binary using sudo but when I launch it using the normal user everything works fine. For reference please see my code below
...
cmd := exec.Command("/bin/sh")
cmd.Stdout = os.Stdout
cmd.Stdin = os.Stdin
cmd.Stderr = os.Stderr
cmd.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: syscall.CLONE_NEWUSER,
UidMappings: []syscall.SysProcIDMap{
{
ContainerID: 0,
HostID: 1000,
Size: 1,
},
},
GidMappings: []syscall.SysProcIDMap{
{
ContainerID: 0,
HostID: 1000,
Size: 1,
},
},
}
cmd.Run()
....
...
From the host I can confirm that indeed the user and group mappings were successful. The current pid is 87751
sudo cat /proc/87751/uid_map
0 1000 1
sudo cat /proc/87751/gid_map
0 1000 1
But when I run the binary after building
go build -o user_n
sudo ./user_n
sh-5.0$ whoami
nobody
sh-5.0$ id
uid=65534(nobody) gid=65534(nobody) groups=65534(nobody) context=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023
But when I run the binary using the normal user it works as expected
./user_n
sh-5.0# whoami
root
sh-5.0# id
uid=0(root) gid=0(root) groups=0(root),65534(nobody) context=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023
While running the binary using the normal user is an option I would like to know why running using sudo does not give the expected results. Any pointers will be greatly appreciated.
More info
Fedora 31
Kernel 5.3.11-100.fc29.x86_64
go version go1.14.3 linux/amd64
In the first case, you are running as root user (through sudo) for which there is no mapping specified in the child user namespace. Hence, the resulting "nobody" id.
In the second case, you run the program as user id 1000 for which the mapping says : 1000 becomes root in the child user namespace. Hence, the resulting "root" id.

No UiSlider remove decimal?

How to remove decimals digit from linked output
I am using this code
$("#slider_01").noUiSlider({
start: [2000, 24000],
connect: true,
step: 0.01,
range: {
'min': 0,
'max': 28500
},
format: wNumb({
decimals: false,
thousand: ',',
prefix: '$ ',
})
});
$('#slider_01').Link('lower').to($('#value-lower_1'));
$('#slider_01').Link('upper').to($('#value-upper_1'));
I didn't have access to the wNumb library in the environment I was working with.
Had a look under the hood in the library and this also works:
$("#slider_01").noUiSlider({
...
format: {
to: (v) => parseFloat(v).toFixed(0),
from: (v) => parseFloat(v).toFixed(0)
}
});
Decimals decimals: false is invalid, use decimals: 0. Also, you are setting formatting for the .val() method. Use it like this:
$('#slider_01').Link('lower').to($('#value-lower_1'), null, wNumb({
decimals: 0,
thousand: ',',
prefix: '$ ',
}));
Change the step from 0.01 to 1.
I know it's a very old question, but I did not want to include another library Wnumb just to remove the decimal from one place. Here is my solution without using the wnumb.
var slider = document.getElementById('prcsldr');
noUiSlider.create(slider, {
start: [10000],
range: {
min: 1000,
max: 50000
},
step: 1000,
format:{
to: (v) => v | 0,
from: (v) => v | 0
}
});

What is the analogue of an NDIS filter in linux?

I am working on an as close to real-time system as possible in linux and need to send about 600-800 bytes in a TCP packet as soon as I receive a specific packet.
For best possible latencies I want this packet to be sent directly from the kernel instead of it the received packet going all the way up to the userspace and the applicaiton and then making its way back.
If I were on windows I'd have written an NDIS filter which I would cache the packet to be sent with and the matching parameters so that it would check the received packet and on a match fire the pre-cached packet onto the network without passing the received packet up to the higher layers.
So my question is what is the closest analogue of an NDIS filter on linux?
I have read about netfilter and perhaps that is what I would use, but I do not know if it is the best way possible.
What else could I do to achieve lowest-possible latencies?
My current purely userspace code gives me about 80-100 micro seconds on an Intel Xeon 3.7 GHz processor running Ubuntu 10.04 on 2.6.3x kernel.
You can use the iptables target NFLOG to copy packets out to userspace or NFQUEUE to allow userspace to mangle them. This interaction happens over netlink, but you can use libraries such as libnetfilter_log and libnetfilter_queue which wrap around it.
There is similar mechanism in Linux kernel called BPF ( Berkeley packet filter).
Register a BPF filter into kernel from your application. The packets matching the filter would be captured and forwarded to registered hook function.
Below is an exmpale code I found on internet. ( https://gist.github.com/939154 )
Basically you had to create an open, binding it with an BPF filter and then select to this FD for receiving packets:
;
set_filter(int fd)
{
struct bpf_program fcode = {0};
/* dump ssh packets only */
struct bpf_insn insns[] = {
BPF_STMT(BPF_LD+BPF_H+BPF_ABS, 12),
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ETHERTYPE_IP, 0, 10),
BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 23),
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, IPPROTO_TCP, 0, 8),
BPF_STMT(BPF_LD+BPF_H+BPF_ABS, 20),
BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x1fff, 6, 0),
BPF_STMT(BPF_LDX+BPF_B+BPF_MSH, 14),
BPF_STMT(BPF_LD+BPF_H+BPF_IND, 14),
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 22, 2, 0),
BPF_STMT(BPF_LD+BPF_H+BPF_IND, 16),
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 22, 0, 1),
BPF_STMT(BPF_RET+BPF_K, (u_int)-1),
BPF_STMT(BPF_RET+BPF_K, 0),
};
/* Set the filter */
fcode.bf_len = sizeof(insns) / sizeof(struct bpf_insn);
fcode.bf_insns = &insns[0];
if(ioctl(fd, BIOCSETF, &fcode) < 0)
return -1;
return 0;
}
The bpf_inst looks terrbiel. However, it is not needed to write it manually.
You can use tcp-dump to auto generated these scripts.
for example:
sudo tcpdump 'tcp[13]=18' -i eth0 -dd
{ 0x28, 0, 0, 0x0000000c },
{ 0x15, 0, 8, 0x00000800 },
{ 0x30, 0, 0, 0x00000017 },
{ 0x15, 0, 6, 0x00000006 },
{ 0x28, 0, 0, 0x00000014 },
{ 0x45, 4, 0, 0x00001fff },
{ 0xb1, 0, 0, 0x0000000e },
{ 0x50, 0, 0, 0x0000001b },
{ 0x15, 0, 1, 0x00000012 },
{ 0x6, 0, 0, 0x00000060 },
{ 0x6, 0, 0, 0x00000000 },

Resources