Reason for SIGSEGV - linux

I have a sample application that I have created to understand an experiment with ELF binary format.
When I run it, it crashes after receiving SIGSEGV.
After attaching it with gdb and then running, I see that it crashes at the following line
(gdb) x/i 0x08054697
=> 0x8054697: mov %edx,0x80f8f5c
But, the destination address of this instruction is a valid address and this memory is mapped as writable.
(gdb) p/x *0x80f8f5c
$3 = 0x0
(gdb) si
Program received signal SIGSEGV, Segmentation fault.
0x08054697 in ?? ()
I am trying to understand why does this process receive SIGSEGV? What other things should I look for in order to figure out the reason.
Here is the output of readelf showing regions of virtual memory that are mapped.
Elf file type is EXEC (Executable file)
Entry point 0x8048e08
There are 13 program headers, starting at offset 52
Program Headers:
Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
PHDR 0x000034 0x08047034 0x08047034 0x002a4 0x002a4 R E 0x1
INTERP 0x0001d4 0x080471d4 0x080471d4 0x00013 0x00013 R 0x1
[Requesting program interpreter: /lib/ld-linux.so.2]
DYNAMIC 0x0001e7 0x080471e7 0x080471e7 0x00060 0x00060 RW 0x1
LOAD 0x000000 0x08047000 0x08047000 0x01000 0x01000 R E 0x1
LOAD 0x001000 0x08048000 0x08048000 0xae948 0xae948 R E 0x1000
LOAD 0x0b06dc 0x080f86dc 0x080f86dc 0x015f8 0x07730 RW 0x1000
LOAD 0x0c52b8 0x081002b8 0x081002b8 0x00400 0x00400 R E 0x1
LOAD 0x0c56b8 0x081006b8 0x081006b8 0x00400 0x00400 R E 0x1
LOAD 0x0c5ab8 0x08100ab8 0x08100ab8 0x00400 0x00400 R E 0x1
NOTE 0x0010f4 0x080480f4 0x080480f4 0x00044 0x00044 R 0x4
TLS 0x0b06dc 0x080f86dc 0x080f86dc 0x00010 0x00030 R 0x4
GNU_STACK 0x001000 0x00000000 0x00000000 0x00000 0x00000 RW 0x4
GNU_RELRO 0x0b06dc 0x080f86dc 0x080f86dc 0x00924 0x00924 R 0x1
Relevant instructions from the binary are
0x805467d: mov 0x64(%esp),%edx
0x8054681: mov 0x68(%esp),%ecx
0x8054685: mov %eax,0x80f9a44
0x805468a: lea 0x4(%ecx,%edx,4),%eax
0x805468e: mov 0x78(%esp),%edx
0x8054692: mov %eax,0x80ff1c8
==> 0x8054697: mov %edx,0x80f8f5c
0x805469d: lea 0x0(%esi),%esi
Is there a way in gdb to figure out if the address is mapped as readonly or not?
What could be the reason for this Segmentation fault?
C Code
/*
ECHOSERV.C
==========
(c) Paul Griffiths, 1999
Email: mail#paulgriffiths.net
Simple TCP/IP echo server.
*/
#include <sys/socket.h> /* socket definitions */
#include <sys/types.h> /* socket types */
#include <arpa/inet.h> /* inet (3) functions */
#include <unistd.h> /* misc. UNIX functions */
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <fcntl.h>
#include "helper.h"
#define LOG_FILE "test_disk.txt"
/* Global constants */
#define ECHO_PORT (20002)
#define MAX_LINE (1000)
int main(int argc, char *argv[]) {
int list_s; /* listening socket */
int conn_s; /* connection socket */
short int port; /* port number */
struct sockaddr_in servaddr; /* socket address structure */
char buffer[MAX_LINE]; /* character buffer */
char *endptr; /* for strtol() */
int file_fd = open(LOG_FILE, O_WRONLY|O_CREAT);
/* Get port number from the command line, and
set to default port if no arguments were supplied */
if ( argc == 2 ) {
port = strtol(argv[1], &endptr, 0);
if ( *endptr ) {
fprintf(stderr, "ECHOSERV: Invalid port number.\n");
exit(EXIT_FAILURE);
}
}
else if ( argc < 2 ) {
port = ECHO_PORT;
}
else {
fprintf(stderr, "ECHOSERV: Invalid arguments.\n");
exit(EXIT_FAILURE);
}
/* Create the listening socket */
if ( (list_s = socket(AF_INET, SOCK_STREAM, 0)) < 0 ) {
fprintf(stderr, "ECHOSERV: Error creating listening socket.\n");
exit(EXIT_FAILURE);
}
/* Set all bytes in socket address structure to
zero, and fill in the relevant data members */
memset(&servaddr, 0, sizeof(servaddr));
servaddr.sin_family = AF_INET;
servaddr.sin_addr.s_addr = htonl(INADDR_ANY);
servaddr.sin_port = htons(port);
/* Bind our socket addresss to the
listening socket, and call listen() */
if ( bind(list_s, (struct sockaddr *) &servaddr, sizeof(servaddr)) < 0 ) {
fprintf(stderr, "ECHOSERV: Error calling bind()\n");
exit(EXIT_FAILURE);
}
if ( listen(list_s, LISTENQ) < 0 ) {
fprintf(stderr, "ECHOSERV: Error calling listen()\n");
exit(EXIT_FAILURE);
}
/* Enter an infinite loop to respond
to client requests and echo input */
while ( 1 ) {
/* Wait for a connection, then accept() it */
if ( (conn_s = accept(list_s, NULL, NULL) ) < 0 ) {
fprintf(stderr, "ECHOSERV: Error calling accept()\n");
exit(EXIT_FAILURE);
}
/* Retrieve an input line from the connected socket
then simply write it back to the same socket. */
Readline(conn_s, buffer, MAX_LINE-1);
Writeline(conn_s, buffer, strlen(buffer));
Writeline(file_fd, buffer, strlen(buffer));
printf("%s\n", buffer);
/* Close the connected socket */
if ( close(conn_s) < 0 ) {
fprintf(stderr, "ECHOSERV: Error calling close()\n");
exit(EXIT_FAILURE);
}
}
}
/*
HELPER.C
========
(c) Paul Griffiths, 1999
Email: mail#paulgriffiths.net
Implementation of sockets helper functions.
Many of these functions are adapted from, inspired by, or
otherwise shamelessly plagiarised from "Unix Network
Programming", W Richard Stevens (Prentice Hall).
*/
#include "helper.h"
#include <sys/socket.h>
#include <unistd.h>
#include <errno.h>
/* Read a line from a socket */
ssize_t Readline(int sockd, void *vptr, size_t maxlen) {
ssize_t n, rc;
char c, *buffer;
buffer = (char *)vptr;
for ( n = 1; n < maxlen; n++ ) {
if ( (rc = read(sockd, &c, 1)) == 1 ) {
*buffer++ = c;
if ( c == '\n' )
break;
}
else if ( rc == 0 ) {
if ( n == 1 )
return 0;
else
break;
}
else {
if ( errno == EINTR )
continue;
return -1;
}
}
*buffer = 0;
return n;
}
/* Write a line to a socket */
ssize_t Writeline(int sockd, const void *vptr, size_t n) {
size_t nleft;
ssize_t nwritten;
const char *buffer;
buffer = (const char *)vptr;
nleft = n;
while ( nleft > 0 ) {
if ( (nwritten = write(sockd, buffer, nleft)) <= 0 ) {
if ( errno == EINTR )
nwritten = 0;
else
return -1;
}
nleft -= nwritten;
buffer += nwritten;
}
return n;
}

But, the destination address of this instruction is a valid address and this memory is mapped as writable.
Not it's not (or the instruction wouldn't have caused a SIGSEGV).
The destination 0x80f8f5c is "covered" by this LOAD segment:
LOAD 0x0b06dc 0x080f86dc 0x080f86dc 0x015f8 0x07730 RW 0x1000
but also by this:
GNU_RELRO 0x0b06dc 0x080f86dc 0x080f86dc 0x00924 0x00924 R 0x1
the GNU_RELRO asks the runtime loader to make this part of address space read-only after the loader has performed the relocation (which is exactly what it did, and what triggered your crash).
Is there a way in gdb to figure out if the address is mapped as readonly or not?
You can ask gdb with info proc map, or just look in /proc/<pid>/maps. Either way you'll discover that the memory is mapped read-only.

Related

How I can choose network device in socket ()?

My plan is to write a user-based bandwidth control for Internet connection. To do this, I first want to write a network cable emulator.
My Linux box has three network devices:
Eth0 for normal connection to the network.
Eth1 and Eth2 are the endings of the emulated network cable.
So, what my program has to do is only to get every network packet from the input of eth1 and put it to the output ofeth2, and get every network packet from the input of eth2 and put it to the output of eth1.
With s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)), I can access every network packet. Hope that eth1and eth2 are in promiscuous mode.
But, how do I choose the network device? Later, I want to differentiate on the IP source-address that is in each packet, and how long this packet must wait, before I put it to the other network device.
With socket () you can not chose your network device. This is done with sendto () and recvfrom (). At argument 5 of recvfrom () and sendto () you put a sockaddr struct. There you have a field sockaddr.sll_ifindex where you choose your network card. In my environment 3 for enp3s0 and 4 for enp3s1.
See the cable-emulator code. I testet it 20 minutes and it worked without problems.
// network cable emulator
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <net/if.h>
#include <netinet/ether.h>
#include <arpa/inet.h>
#include <linux/if_packet.h>
signed long long llmac (unsigned char* pmac)
{
signed long long sum= 0;
signed long long sumd;
for (signed long lauf= 0; lauf < 6; lauf++)
{
sumd= pmac[lauf];
sum= sum | (sumd << (40 - lauf*8));
}
return sum;
}
struct macliste
{
signed long long anz;
signed long long mac[100000];
macliste ();
void add (unsigned char* pmac);
signed long neu (unsigned char* pmac);
};
macliste::macliste ()
{
anz= 0;
}
void macliste::add (unsigned char* pmac)
{
if (neu (pmac))
{
mac[anz]= llmac (pmac);
anz++;
}
}
signed long macliste::neu (unsigned char* pmac)
{
signed long long smac;
smac= llmac (pmac);
for (signed long lauf= 0; lauf < anz; lauf++)
if (mac[lauf] == smac)
return 0;
return 1;
}
int main ()
{
int sockemp, socksenleft, socksenright;
signed long result;
unsigned char transferpuffer[10000];
struct sockaddr_ll sockaddrleft;
struct sockaddr_ll sockaddrright;
struct sockaddr_ll sockaddrboth;
signed long sockaddrsize= sizeof (sockaddrboth);
char ifnameleft[IFNAMSIZ] = "enp3s0";
char ifnameright[IFNAMSIZ] = "enp3s1";
struct ifreq ifindexleft;
struct ifreq ifindexright;
int indexleft;
int indexright;
// Socket öffnen
sockemp= socket (AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
printf ("sockemp: %5d\n", sockemp);
// Socket öffnen
socksenleft= socket (AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
printf ("socksenleft: %5d\n", socksenleft);
// Socket öffnen
socksenright= socket (AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
printf ("socksenright: %5d\n", socksenright);
printf ("\n");
/* den Index des Interfaces ermitteln */
printf ("Interfacename left: %s\n", ifnameleft);
printf ("Interfacename right: %s\n", ifnameright);
printf ("\n");
memset (&ifindexleft, 0, sizeof (struct ifreq));
memset (&ifindexright, 0, sizeof (struct ifreq));
strncpy (ifindexleft.ifr_name, ifnameleft, IFNAMSIZ);
strncpy (ifindexright.ifr_name, ifnameright, IFNAMSIZ);
result= ioctl (socksenleft, SIOCGIFINDEX, &ifindexleft);
result= ioctl (socksenright, SIOCGIFINDEX, &ifindexright);
indexleft= ifindexleft.ifr_ifindex;
indexright= ifindexright.ifr_ifindex;
printf ("interfaceresult: %5ld indexleft %5d\n", result, indexleft);
printf ("interfaceresult: %5ld indexright %5d\n", result, indexright);
printf ("\n");
/* Socketadresse vorbereiten */
memset (&sockaddrleft, 0, sizeof(struct sockaddr_ll));
sockaddrleft.sll_family = PF_PACKET; /* RAW communication */
sockaddrleft.sll_hatype = ARPHRD_ETHER; /* Ethernet */
sockaddrleft.sll_pkttype = PACKET_OTHERHOST; /* Ziel ist ein anderer Rechner */
sockaddrleft.sll_ifindex = indexleft; /* Interface-Index */
sockaddrleft.sll_halen = ETH_ALEN; /* Address length*/
/* Socketadresse vorbereiten */
memset (&sockaddrright, 0, sizeof(struct sockaddr_ll));
sockaddrright.sll_family = PF_PACKET; /* RAW communication */
sockaddrright.sll_hatype = ARPHRD_ETHER; /* Ethernet */
sockaddrright.sll_pkttype = PACKET_OTHERHOST; /* Ziel ist ein anderer Rechner */
sockaddrright.sll_ifindex = indexright; /* Interface-Index */
sockaddrright.sll_halen = ETH_ALEN; /* Address length*/
/* Socketadresse vorbereiten */
memset (&sockaddrboth, 0, sizeof(struct sockaddr_ll));
sockaddrboth.sll_family = PF_PACKET; /* RAW communication */
sockaddrboth.sll_hatype = ARPHRD_ETHER; /* Ethernet */
sockaddrboth.sll_pkttype = PACKET_OTHERHOST; /* Ziel ist ein anderer Rechner */
sockaddrboth.sll_halen = ETH_ALEN; /* Address length*/
// Schnittstellen in den Promiskuitätsmodus schalten
struct ifreq ifr;
int raw_socket;
char deviceleft[100]= "enp3s0";
char deviceright[100]= "enp3s1";
memset (&ifr, 0, sizeof (struct ifreq));
// Open A Raw Socket Deviceleft
if ((raw_socket = socket (PF_PACKET, SOCK_RAW, htons (ETH_P_ALL))) < 1)
{
printf ("ERROR: Could not open socket, Got #?\n");
exit (1);
}
/* Set the device to use */
strcpy (ifr.ifr_name, deviceleft);
/* Get the current flags that the device might have */
if (ioctl (raw_socket, SIOCGIFFLAGS, &ifr) == -1)
{
perror ("Error: Could not retrive the flags from the device.\n");
exit (1);
}
/* Set the old flags plus the IFF_PROMISC flag */
ifr.ifr_flags |= IFF_PROMISC;
if (ioctl (raw_socket, SIOCSIFFLAGS, &ifr) == -1)
{
perror ("Error: Could not set flag IFF_PROMISC");
exit (1);
}
printf ("Entering promiscuous mode\n");
/* Configure the device */
if (ioctl (raw_socket, SIOCGIFINDEX, &ifr) < 0)
{
perror ("Error: Error getting the device index.\n");
exit (1);
}
// Open A Raw Socket Deviceright
if ((raw_socket = socket (PF_PACKET, SOCK_RAW, htons (ETH_P_ALL))) < 1)
{
printf ("ERROR: Could not open socket, Got #?\n");
exit (1);
}
/* Set the device to use */
strcpy (ifr.ifr_name, deviceright);
/* Get the current flags that the device might have */
if (ioctl (raw_socket, SIOCGIFFLAGS, &ifr) == -1)
{
perror ("Error: Could not retrive the flags from the device.\n");
exit (1);
}
/* Set the old flags plus the IFF_PROMISC flag */
ifr.ifr_flags |= IFF_PROMISC;
if (ioctl (raw_socket, SIOCSIFFLAGS, &ifr) == -1)
{
perror ("Error: Could not set flag IFF_PROMISC");
exit (1);
}
printf ("Entering promiscuous mode\n");
/* Configure the device */
if (ioctl (raw_socket, SIOCGIFINDEX, &ifr) < 0)
{
perror ("Error: Error getting the device index.\n");
exit (1);
}
// Pakete transferieren
getchar ();
signed long packetcounter= 0;
macliste emplisteleft;
macliste emplisteright;
while (1)
{
result= recvfrom (sockemp, transferpuffer, 10000, 0, (struct sockaddr*) (&sockaddrboth), (socklen_t*)&sockaddrsize);
if ((sockaddrboth.sll_ifindex != indexleft) && (sockaddrboth.sll_ifindex != indexright))
continue;
packetcounter++;
if ((sockaddrboth.sll_ifindex == indexleft) && (emplisteright.neu (transferpuffer + 6)))
{
emplisteleft.add (transferpuffer + 6); // Ethernet II Paket Quelladresse
result= sendto (socksenright, transferpuffer, result, 0, (struct sockaddr*) &sockaddrright, sizeof (struct sockaddr_ll));
printf ("sendresult: %5ld %2d\n", result, indexright);
}
if ((sockaddrboth.sll_ifindex == indexright) && (emplisteleft.neu (transferpuffer + 6)))
{
emplisteright.add (transferpuffer + 6); // Ethernet II Paket Quelladresse
result= sendto (socksenleft, transferpuffer, result, 0, (struct sockaddr*) &sockaddrleft, sizeof (struct sockaddr_ll));
printf ("sendresult: %5ld %2d\n", result, indexleft);
}
printf ("Packet %10ld empresult: %5ld %2d %20llx %10lld %10lld\n", packetcounter, result, sockaddrboth.sll_ifindex, llmac (transferpuffer + 6), emplisteleft.anz, emplisteright.anz);
}
return 0;
}
compile with gcc 9.3.0 under ubuntu 20.04 server:
g++ transferraw.cc -std=c++17 -Wall -Wextra -Wconversion -pedantic-errors -O2 -o transferraw

Linux Netlink Socket Communication Crashes VM

I have written a kernel module and userspace program such that the kernel module sends netlink multicast messages, and the userspace program reads these messages and prints them out. The kernel module and userspace program are available here (https://github.com/akshayknarayan/netlink-test) and replicated below. The code was adapted from this post: Multicast from kernel to user space via Netlink in C
If line 69 of the userspace program (the call to usleep) is commented out, then everything works; once the kernel module is loaded, it repeatedly multicasts messages and the userspace program prints them out.
However, if line 69 of the userspace program is uncommented, within a second of loading the kernel module, my VM hangs and becomes unresponsive.
Why is this the case? How can I prevent the kernel from hanging?
Linux ubuntu-xenial 4.4.0-75-generic #96-Ubuntu SMP Thu Apr 20 09:56:33 UTC 2017 x86_64 x86_64 x86_64 GNU/Linux
Userspace program:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <linux/netlink.h>
#include <unistd.h>
/* Multicast group, consistent in both kernel prog and user prog. */
#define MYMGRP 22
int nl_open(void) {
int sock;
struct sockaddr_nl addr;
int group = MYMGRP;
sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_USERSOCK);
if (sock < 0) {
printf("sock < 0.\n");
return sock;
}
memset((void *) &addr, 0, sizeof(addr));
addr.nl_family = AF_NETLINK;
addr.nl_pid = getpid();
if (bind(sock, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
printf("bind < 0.\n");
return -1;
}
if (setsockopt(sock, 270, NETLINK_ADD_MEMBERSHIP, &group, sizeof(group)) < 0) {
printf("setsockopt < 0\n");
return -1;
}
return sock;
}
void nl_recv(int sock) {
struct sockaddr_nl nladdr;
struct msghdr msg;
struct iovec iov;
char buffer[65536];
int ret;
iov.iov_base = (void *) buffer;
iov.iov_len = sizeof(buffer);
msg.msg_name = (void *) &(nladdr);
msg.msg_namelen = sizeof(nladdr);
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
ret = recvmsg(sock, &msg, 0);
if (ret < 0)
printf("ret < 0.\n");
else
printf("Received message payload: %s\n", (char*) NLMSG_DATA((struct nlmsghdr *) &buffer));
}
int main(int argc, char *argv[]) {
int nls;
nls = nl_open();
if (nls < 0)
return nls;
while (1) {
nl_recv(nls);
usleep(5000);
}
return 0;
}
Kernel module:
#include <linux/module.h>
#include <linux/netlink.h>
#include <linux/skbuff.h>
#include <linux/gfp.h>
#include <net/sock.h>
#define MYMGRP 22
struct sock *nl_sk = NULL;
static struct timer_list timer;
void nl_send_msg(unsigned long data) {
struct sk_buff *skb_out;
struct nlmsghdr *nlh;
int res;
char *msg = "hello from kernel!\n";
int msg_size = strlen(msg);
skb_out = nlmsg_new(
NLMSG_ALIGN(msg_size), // #payload: size of the message payload
GFP_KERNEL // #flags: the type of memory to allocate.
);
if (!skb_out) {
printk(KERN_ERR "Failed to allocate new skb\n");
return;
}
nlh = nlmsg_put(
skb_out, // #skb: socket buffer to store message in
0, // #portid: netlink PORTID of requesting application
0, // #seq: sequence number of message
NLMSG_DONE, // #type: message type
msg_size, // #payload: length of message payload
0 // #flags: message flags
);
memcpy(nlmsg_data(nlh), msg, msg_size+1);
res = nlmsg_multicast(
nl_sk, // #sk: netlink socket to spread messages to
skb_out, // #skb: netlink message as socket buffer
0, // #portid: own netlink portid to avoid sending to yourself
MYMGRP, // #group: multicast group id
GFP_KERNEL // #flags: allocation flags
);
if (res < 0) {
printk(KERN_INFO "Error while sending to user: %d\n", res);
} else {
mod_timer(&timer, jiffies + msecs_to_jiffies(1));
}
}
static int __init nl_init(void) {
struct netlink_kernel_cfg cfg = {};
printk(KERN_INFO "init NL\n");
nl_sk = netlink_kernel_create(&init_net, NETLINK_USERSOCK, &cfg);
if (!nl_sk) {
printk(KERN_ALERT "Error creating socket.\n");
return -10;
}
init_timer(&timer);
timer.function = nl_send_msg;
timer.expires = jiffies + 1000;
timer.data = 0;
add_timer(&timer);
nl_send_msg(0);
return 0;
}
static void __exit nl_exit(void) {
printk(KERN_INFO "exit NL\n");
del_timer_sync(&timer);
netlink_kernel_release(nl_sk);
}
module_init(nl_init);
module_exit(nl_exit);
MODULE_LICENSE("GPL");
For posterity: I believe the problem was the allocation in nlmsg_new, which should not occur inside an interrupt handler (the timer handler, nl_send_msg), as explained here.
Without the sleep, I believe nlmsg_new does not need to sleep when allocating, so the requirement that an interrupt handler not sleep is not violated. However, if the userspace process lags behind the kernel, it is possible for the kernel to sleep during the allocation, causing a hang.

xenomai xddp with std::thread

I need to perform RT to NRT communication on a Xenomai machine.
I'm actually able to compile and run the example proposed here
But if try to replace the pthreads with std::thread, as follows:
#include <sys/mman.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
#include <string.h>
#include <malloc.h>
#include <pthread.h>
#include <fcntl.h>
#include <errno.h>
#include <rtdk.h>
#include <rtdm/rtipc.h>
#include <iostream>
#include <thread>
#define XDDP_PORT_LABEL "xddp-demo"
static const char *msg[] = {
"Surfing With The Alien",
"Lords of Karma",
"Banana Mango",
"Psycho Monkey",
"Luminous Flesh Giants",
"Moroccan Sunset",
"Satch Boogie",
"Flying In A Blue Dream",
"Ride",
"Summer Song",
"Speed Of Light",
"Crystal Planet",
"Raspberry Jam Delta-V",
"Champagne?",
"Clouds Race Across The Sky",
"Engines Of Creation"
};
static void fail(const char *reason) {
perror(reason);
exit(EXIT_FAILURE);
}
void realtime_thread1() {
struct rtipc_port_label plabel;
struct sockaddr_ipc saddr;
char buf[128];
int ret, s;
/*
* Get a datagram socket to bind to the RT endpoint. Each
* endpoint is represented by a port number within the XDDP
* protocol namespace.
*/
s = socket(AF_RTIPC, SOCK_DGRAM, IPCPROTO_XDDP);
if (s < 0) {
perror("socket");
exit(EXIT_FAILURE);
}
/*
* Set a port label. This name will be registered when
* binding, in addition to the port number (if given).
*/
strcpy(plabel.label, XDDP_PORT_LABEL);
ret = setsockopt(s, SOL_XDDP, XDDP_LABEL,
&plabel, sizeof(plabel));
if (ret)
fail("setsockopt");
/*
* Bind the socket to the port, to setup a proxy to channel
* traffic to/from the Linux domain. Assign that port a label,
* so that peers may use a descriptive information to locate
* it. For instance, the pseudo-device matching our RT
* endpoint will appear as
* /proc/xenomai/registry/rtipc/xddp/<XDDP_PORT_LABEL> in the
* Linux domain, once the socket is bound.
*
* saddr.sipc_port specifies the port number to use. If -1 is
* passed, the XDDP driver will auto-select an idle port.
*/
memset(&saddr, 0, sizeof(saddr));
saddr.sipc_family = AF_RTIPC;
saddr.sipc_port = -1;
ret = bind(s, (struct sockaddr *)&saddr, sizeof(saddr));
if (ret)
fail("bind");
for (;;) {
/* Get packets relayed by the regular thread */
ret = recvfrom(s, buf, sizeof(buf), 0, NULL, 0);
if (ret <= 0)
fail("recvfrom");
rt_printf("%s: \"%.*s\" relayed by peer\n", __FUNCTION__, ret, buf);
}
}
void realtime_thread2() {
struct rtipc_port_label plabel;
struct sockaddr_ipc saddr;
int ret, s, n = 0, len;
struct timespec ts;
struct timeval tv;
socklen_t addrlen;
s = socket(AF_RTIPC, SOCK_DGRAM, IPCPROTO_XDDP);
if (s < 0) {
perror("socket");
exit(EXIT_FAILURE);
}
/*
* Set the socket timeout; it will apply when attempting to
* connect to a labeled port, and to recvfrom() calls. The
* following setup tells the XDDP driver to wait for at most
* one second until a socket is bound to a port using the same
* label, or return with a timeout error.
*/
tv.tv_sec = 1;
tv.tv_usec = 0;
ret = setsockopt(s, SOL_SOCKET, SO_RCVTIMEO,
&tv, sizeof(tv));
if (ret)
fail("setsockopt");
/*
* Set a port label. This name will be used to find the peer
* when connecting, instead of the port number.
*/
strcpy(plabel.label, XDDP_PORT_LABEL);
ret = setsockopt(s, SOL_XDDP, XDDP_LABEL,
&plabel, sizeof(plabel));
if (ret)
fail("setsockopt");
memset(&saddr, 0, sizeof(saddr));
saddr.sipc_family = AF_RTIPC;
saddr.sipc_port = -1; /* Tell XDDP to search by label. */
ret = connect(s, (struct sockaddr *)&saddr, sizeof(saddr));
if (ret)
fail("connect");
/*
* We succeeded in making the port our default destination
* address by using its label, but we don't know its actual
* port number yet. Use getpeername() to retrieve it.
*/
addrlen = sizeof(saddr);
ret = getpeername(s, (struct sockaddr *)&saddr, &addrlen);
if (ret || addrlen != sizeof(saddr))
fail("getpeername");
rt_printf("%s: NRT peer is reading from /dev/rtp%d\n",
__FUNCTION__, saddr.sipc_port);
for (;;) {
len = strlen(msg[n]);
/*
* Send a datagram to the NRT endpoint via the proxy.
* We may pass a NULL destination address, since the
* socket was successfully assigned the proper default
* address via connect(2).
*/
ret = sendto(s, msg[n], len, 0, NULL, 0);
if (ret != len)
fail("sendto");
rt_printf("%s: sent %d bytes, \"%.*s\"\n",
__FUNCTION__, ret, ret, msg[n]);
n = (n + 1) % (sizeof(msg) / sizeof(msg[0]));
/*
* We run in full real-time mode (i.e. primary mode),
* so we have to let the system breathe between two
* iterations.
*/
ts.tv_sec = 0;
ts.tv_nsec = 500000000; /* 500 ms */
clock_nanosleep(CLOCK_REALTIME, 0, &ts, NULL);
}
}
void regular_thread() {
char buf[128], *devname;
int fd, ret;
if (asprintf(&devname,
"/proc/xenomai/registry/rtipc/xddp/%s",
XDDP_PORT_LABEL) < 0)
fail("asprintf");
fd = open(devname, O_RDWR);
std::cout << "File descriptor regular thread: " << fd << std::endl;
free(devname);
if (fd < 0)
fail("open");
for (;;) {
/* Get the next message from realtime_thread2. */
ret = read(fd, buf, sizeof(buf));
if (ret <= 0)
fail("read");
/* Relay the message to realtime_thread1. */
ret = write(fd, buf, ret);
if (ret <= 0)
fail("write");
}
}
int main(int argc, char **argv) {
std::thread rt1(realtime_thread1);
std::thread rt2(realtime_thread2);
std::thread regth(regular_thread);
rt1.join();
rt2.join();
regth.join();
return 0;
}
I get a failure at the open function, which returns -1.
The reason why I want to use std::thread is that I need to implement the code in C++, and pthread does not understand member function. And I don't want to use global variables.
Also, I need to run the same code on a RT and NRT (without xenomai libraries) machine and I've already prepared the code for that (with conveniently placed #ifdef.
I've figured it out.
I need to wrap my binary with the posix skin of Xenomai.
In CMAKE:
set(xeno_cflags_params "--skin=posix" "--cflags")
execute_process(
COMMAND xeno-config ${xeno_cflags_params}
OUTPUT_VARIABLE xeno_cflags
OUTPUT_STRIP_TRAILING_WHITESPACE)
set(xeno_ldflags_params "--skin=posix" "--ldflags")
execute_process(
COMMAND xeno-config ${xeno_ldflags_params}
OUTPUT_VARIABLE xeno_ldflags
OUTPUT_STRIP_TRAILING_WHITESPACE)
# Compiler and linker options
set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} ${xeno_cflags}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${xeno_cflags}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${xeno_ldflags}")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${xeno_ldflags}")

Implementing DNAT in OUTPUT chain using netfilter framework

I am trying to implement DNAT in OUTPUT chain but the packets do not arrive to the target destination. What I am trying to do, is e.g. if the message is sent to 192.168.56.17, I change it to 192.168.56.1 and this ip is in my network, so it should be sent.
The code is:
#include <linux/module.h> //needed for every module
#include <linux/kernel.h>
#include <linux/types.h> //u_int && co
#include <linux/skbuff.h> //struct sk_buff
#include <linux/in.h> //basic internet shiat
#include <linux/ip.h> //protocol headers
#include <linux/tcp.h>
#include <linux/netfilter.h> //need this for register_
#include <linux/netfilter_ipv4.h> //..
#include <linux/netdevice.h> //struct net_device
#define NR_OF_VLANS 3
#define MAX_UNIQUE_ADDRS 16
#define IFACE0 "vboxnet0"
#define IFACE1 "eth0"
#define IFACE2 "eth0.1"
#define INIT_ADDR 0x0038A8C0
MODULE_AUTHOR("tomak");
MODULE_DESCRIPTION("dnat");
static struct nf_hook_ops NF_hook_out;
static int change_ip_out(struct iphdr* iph)
{
//examine last byte of dest ip addr
__be32 daddr = iph->daddr;
//Note: big endian => we need first byte of the structure
__be32 offset = daddr - INIT_ADDR;
//printk(KERN_INFO "be32 offset %pI4\n", &(offset));
__u32 uOffset = be32_to_cpu(offset);
//printk(KERN_INFO "offset: %d", uOffset);
__u32 uRemainder = uOffset % MAX_UNIQUE_ADDRS ;
__be32 remainder = cpu_to_be32(uRemainder);
//printk(KERN_INFO "remainder is: %pI4\n", &remainder);
int division = (int)(uOffset / MAX_UNIQUE_ADDRS);
//change ip and put on right iface
iph->daddr = INIT_ADDR + remainder;
printk(KERN_INFO "OUT changed daddr to %pI4\n", &(iph->daddr));
if (division == 0) {
return NF_ACCEPT;
}
return NF_REPEAT;
}
u_int hook_fcn_out( u_int hooknum, //the hook number (linux/netfilter_ipv4.h)
struct sk_buff **skpp, //pointer to a pointer with an sk_buff(mad ****) (linux/skbuff.h)
const struct net_device *in, //only valid for recieved
const struct net_device *out, //only valid for outgoing (linux/netdevice.h)
int (*okfn)(struct sk_buff *)) //called from net/core/netfilter.c ??
{
int result;
int i = 0;
int max_addrs = MAX_UNIQUE_ADDRS * NR_OF_VLANS;
__be32 test_addr[max_addrs]; //addresses to check
struct iphdr* iph = ip_hdr(skpp); //getting ip header
//addresses for detection
__be32 addr = INIT_ADDR;
for(i = 0; i < max_addrs; i++){
test_addr[i] = addr;
addr = addr + 0x01000000; //+1
}
//detecting ips
for(i = 0; i < max_addrs; i++){
if(memcmp(&(iph->daddr),&test_addr[i], sizeof(test_addr[0])) == 0) {
printk(KERN_INFO "OUT detected message to address %pI4\n", &(iph->daddr));
printk(KERN_INFO "OUT message detected to interface %s\n", out->name);
result = change_ip_out(iph);
return result;
}
}
/* printk(KERN_INFO "Detected output message to daddr: %pI4\n", &(iph->daddr));
printk(KERN_INFO "Message detected for interface: %s\n", out->name); */
return NF_ACCEPT;
}
int init_module(void)
{
printk(KERN_DEBUG "nat_up\n");
NF_hook_out.hook = hook_fcn_out;
NF_hook_out.hooknum = NF_INET_LOCAL_OUT;
NF_hook_out.pf = PF_INET;
NF_hook_out.priority = NF_IP_PRI_NAT_DST;
//register hook functions
nf_register_hook(&NF_hook_out);
return 0;
}
void cleanup_module(void)
{
printk(KERN_DEBUG "nat_down\n");
nf_unregister_hook(&NF_hook_out);
}
Does anyone has any idea where might be the problem. My guess was priority, but even NF_INET_PRI_FIRST did not work.
Thanks a lot for your comments and help.
Tomas

What's the max file mapping size in 64bits machine

I'm new to 64-bits architecture. Could you tell me what's MAX file size supported by file mapping in 64 bits linux machine. I want to open more than 20GB files by file mapping, is it available?
I write a sample code. But it causes Bus Error when I get the value of the pointer in GBSIZE offset:
unsigned char* pCur = pBegin + GBSIZE;
//pBegin is the pointer returned by mmap
printf("%c",*pCur);
BTW, printf("%c",*pBegin ); works fine. and my address sizes : 38 bits physical, 48 bits virtual
Here is the full code:
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
//#define FILEPATH "smallfile"
#define FILEPATH "bigfile"
#define GBSIZE (1024L*1024L*1024L)
#define TBSIZE (1024L*GBSIZE)
#define NUMSIZE (20L * GBSIZE)
//#define NUMSIZE (10)
#define FILESIZE (NUMINTS * sizeof(int))
int main(int argc, char *argv[])
{
int i;
int fd;
unsigned char *pBegin;
fd = open(FILEPATH, O_RDONLY);
if (fd == -1) {
perror("Error opening file for reading");
exit(EXIT_FAILURE);
}
pBegin = mmap(0, NUMSIZE, PROT_READ, MAP_SHARED, fd, 0);
if (pBegin == MAP_FAILED) {
close(fd);
perror("Error mmapping the file");
exit(EXIT_FAILURE);
}
/** ERROR happens here!!! **/
unsigned char* pCur = pBegin + GBSIZE;
printf("%c",*pCur);
if (munmap(pBegin, NUMSIZE) == -1) {
perror("Error un-mmapping the file");
}
close(fd);
return 0;
}
Although pointers are 64-bit wide, most processors do not actually support virtual addresses using the full 64 bits. To see what size virtual addresses your processor supports, look in /proc/cpuinfo (48 bits is typical).
grep "address sizes" /proc/cpuinfo
Additionally, half of the virtual address space is used by the kernel and not available to userspace - leaving 47 bits in the current Linux implementation.
However, even taking this into account, you will still have plenty of room for a 20GB file. 47 bits in theory means a virtual address space of 128TB.
From the mmap(2) man page:
void *mmap(void *addr, size_t length, int prot, int flags,
int fd, off_t offset);
length is a size_t, which on 64-bit machines is 64 bits in length. Therefore yes, you can theoretically map a 20GB file.
64-bit addresses allow for many orders of magnitude more than 20 GB.
(This answer was originally edited into the question by OP)
You have requested a 20GB map onto a file which was only 50MB in size.
As described by the mmap man page, mmap succeeds when you request the length too big, however it will give SIGBUS or SIGSEGV when you actually try to read beyond the end of the underlying file.
Agree with MarkR, you are dereference an invalid address.
// A bug in these lines.
unsigned char* pCur = pBegin + GBSIZE;
printf("%c",*pCur);
unsigned char* pEnd = pBegin + NUMSIZE;
unsigned char* pLast = pEnd - 1;
unsigned char* pCur = pLast;
I modified your code to use HUGE TLB flags as the following.
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#define MAP_HUGETLB 0x40000 /* create a huge page mapping */
#define MAP_HUGE_SHIFT 26
#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT)
#define KSIZE 1024L
#define MSIZE (1024L*1024L)
#define GSIZE (1024L*1024L*1024L)
#define TSIZE (1024L*GSIZE)
#define INIT_MEM 0
// Fail on my MacBook Pro (Retina, 13-inch, Early 2015)
// Darwin Kernel Version 16.5.0:x86_64
// #define NUMSIZE (16L * TSIZE)
// mmap ok; init: got killed; signal 9
// #define NUMSIZE (8L * TSIZE)
// Got killed signal 9
// #define NUMSIZE (1L * TSIZE)
// OK
// #define NUMSIZE (200L * GSIZE)
// OK
#define NUMSIZE (20L * GSIZE)
typedef unsigned long long ETYPE;
#define MEMSIZE (NUMSIZE*sizeof(ETYPE))
#define PGSIZE (16*KSIZE)
void init(ETYPE* ptr) {
*ptr = (ETYPE)ptr;
}
int verify(ETYPE* ptr) {
if (*ptr != (ETYPE)ptr) {
fprintf(stderr, "ERROR: 0x%016llx != %p.\n", *ptr, ptr);
return -1;
}
else {
fprintf(stdout, "OK: 0x%016llx = %p.\n", *ptr, ptr);
}
return 0;
}
int main(int argc, char *argv[])
{
int i;
int fd;
ETYPE *pBegin;
int flags = MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB | MAP_HUGE_1GB;
printf("mmap memory size:%lu GB\n", MEMSIZE/GSIZE);
pBegin = (ETYPE*) mmap(0, MEMSIZE, PROT_READ | PROT_WRITE, flags, -1, 0);
if (pBegin == MAP_FAILED) {
perror("Error mmapping the file");
exit(EXIT_FAILURE);
}
ETYPE* pEnd = pBegin + NUMSIZE;
ETYPE* pCur = pBegin;
#if INIT_MEM
while (pCur < pEnd) {
init(pCur);
// ++pCur; //slow if init all addresses.
pCur += (PGSIZE/sizeof(ETYPE));
}
#endif
init(&pBegin[0]);
init(&pBegin[NUMSIZE-1]);
verify(&pBegin[0]);
verify(&pBegin[NUMSIZE-1]);
if (munmap(pBegin, MEMSIZE) == -1) {
perror("Error un-mmapping the file");
}
return 0;
}

Resources