How to use Fast Open Mechanism with MultipathTCP? - linux

I would like to use Fast Open Mechanism with MultipathTCP.
Are there any options to do that?
I tried this:
int main(int argc, char *argv[])
{
...
struct msghdr msgh;
memset(&msgh, 0, sizeof(msgh));
struct cmsghdr *cmsg;
unsigned char buffer[1] = "X";
int size = 3;
struct sockaddr_in dst;
memset(&dst,0,sizeof(dst));
inet_pton(AF_INET, "127.0.0.1", &dst.sin_addr);
dst.sin_family = AF_INET;
dst.sin_port = htons(PORT);
/* Construct control information */
struct iovec msgiov = {};
struct unp_in_pktinfo {
struct in_addr ipi_addr; /* destination IPv4 address */
int ipi_ifindex; /* received interface index */
};
msgh.msg_name = &dst;
msgh.msg_namelen = sizeof(struct sockaddr_in);
msgiov.iov_base = buffer;
msgiov.iov_len = size;
msgh.msg_iov = &msgiov;
msgh.msg_iovlen = 1;
unsigned char control_buf[CMSG_LEN(sizeof(struct unp_in_pktinfo))] = {};
msgh.msg_control = &control_buf;
msgh.msg_controllen = CMSG_LEN(sizeof(struct unp_in_pktinfo));
cmsg = CMSG_FIRSTHDR(&msgh);
cmsg->cmsg_level = IPPROTO_IP;
cmsg->cmsg_type = IP_PKTINFO;
cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
memcpy((struct in_addr *)CMSG_DATA(cmsg), &(dst.sin_addr),
sizeof(dst.sin_addr));
cmsg = (struct cmsghdr *)((caddr_t) cmsg + CMSG_ALIGN(cmsg->cmsg_len));
sock_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP);
ret = sendmsg(sock_fd, &msgh, MSG_FASTOPEN);
close(sock_fd);
return EXIT_SUCCESS;
}
It seems to be correct, but It doesn't work.
Could you please help me to find a solution?
Thank you!

There is existing approach to do that:
You need to apply the patch or use the kernel functionality (if exists). I applied patch from mptcp fastopen in linux kernel.
Instead of using msghdr, there is another solution posted here: mptcp fastopen in linux kernel. It is about using "sendto" fucntion with MSG_FASTOPEN:
ret = sendto(sock_fd, sendline, strlen(sendline), MSG_FASTOPEN,(struct sockaddr *) &daddr, sizeof(daddr));
Also the is another option (alternative) described in mptcp fastopen in linux kernel:
setsockopt(sock_fd, SOL_TCP, TCP_FASTOPEN_CONNECT, &enable, sizeof(enable));

Related

Why is the network latency so high in Linux socket hardware time stamping?

I am trying to obtain the packet traveling latency in hardware timestamp between 2 computers connected by 1 Ethernet cable. However the result for obtained delay is 11.5 micro-seconds, which is much higher than my expectation like several nano-seconds.
Design
The way I tried to obtain the latency is demonstrated in the following graph as delay = ((t3 - t0) - (t2 - t1)) / 2
client server
| ping |
t0|-------------->|t1
| |
| pong |
t3|<--------------|t2
| |
v v
Here t0/t2 are Linux hardware sending time recorded by NIC, and t1/t3 are hardware receiving timestamp. The detailed information about linux socket hardware time can be found here: https://docs.kernel.org/networking/timestamping.html. As my understand, t0/t2 are time that NIC transmit the first byte to wire, t1/t3 are time that NIC receive the first byte from wire. Thus the delay that I calculated is purely the propagation delay which should be within several nanoseconds. In addition to prove my assumption, I tried to change the packet size in 256/512/1400 bytes, the results have no change and are all in 11.5 microseconds. Is my understand correct on those hardware timestamp and delay computation?
Implementation
The 2 computers I am using are Dell OptiPlex-7090 on Ubuntu 20.04, the NIC version is Ethernet Connection (14) I219-LM.
The completed code for testing can be found here: https://github.com/ChuanyuXue/Hardware-RTT. It records both hardware timestamp and software timestamp.
utils.c:
void die(char *s)
{
perror(s);
exit(1);
}
int setup_receiver(int fd, int port)
{
struct sockaddr_in addr;
memset(&addr, 0, sizeof(addr));
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = htonl(INADDR_ANY);
addr.sin_port = htons(port);
if (bind(fd, (struct sockaddr *)&addr, sizeof(addr)) == -1)
{
die("bind()");
}
int val = SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE;
if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val)) == -1)
{
die("setsockopt() HW receiving");
}
int enable = 1;
if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS, &enable, sizeof(enable)) < 0)
{
die("setsockopt() SW receiving");
}
return 0;
}
int setup_sender(int fd)
{
int timestamp_flags = SOF_TIMESTAMPING_TX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE;
if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &timestamp_flags, sizeof(timestamp_flags)) < 0)
{
die("setsockopt() HW sending");
}
int enable = 1;
if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS, &enable, sizeof(enable)))
{
die("setsockopt() SW receiving");
}
}
int setup_adapter(int fd, char *dev_name)
{
struct hwtstamp_config hwts_config;
struct ifreq ifr;
memset(&hwts_config, 0, sizeof(hwts_config));
hwts_config.tx_type = HWTSTAMP_TX_ON;
hwts_config.rx_filter = HWTSTAMP_FILTER_ALL;
memset(&ifr, 0, sizeof(ifr));
snprintf(ifr.ifr_name, IFNAMSIZ, "%s", dev_name);
ifr.ifr_data = (void *)&hwts_config;
if (ioctl(fd, SIOCSHWTSTAMP, &ifr) == -1)
{
die("ioctl()");
}
}
void send_single(int fd, char *address, int port)
{
/*
Send one message
*/
struct sockaddr_in si_server;
memset(&si_server, 0, sizeof(si_server));
si_server.sin_family = AF_INET;
si_server.sin_port = htons(port);
if (inet_aton(address, &si_server.sin_addr) == 0)
{
die("inet_aton()");
}
char buffer[BUFFER_LEN];
struct iovec iov = (struct iovec){.iov_base = buffer, .iov_len = BUFFER_LEN};
struct msghdr msg = (struct msghdr){.msg_name = &si_server,
.msg_namelen = sizeof si_server,
.msg_iov = &iov,
.msg_iovlen = 1};
ssize_t send_len = sendmsg(fd, &msg, 0);
if (send_len < 0)
{
printf("[!] Error sendmsg()");
}
// -------------- obtain the loopback packet
char data[BUFFER_LEN], control[BUFFER_LEN];
struct iovec entry;
struct sockaddr_in from_addr;
int res;
memset(&msg, 0, sizeof(msg));
msg.msg_iov = &entry;
msg.msg_iovlen = 1;
entry.iov_base = data;
entry.iov_len = sizeof(data);
msg.msg_name = (caddr_t)&from_addr;
msg.msg_namelen = sizeof(from_addr);
msg.msg_control = &control;
msg.msg_controllen = sizeof(control);
// wait until get the loopback
while (recvmsg(fd, &msg, MSG_ERRQUEUE) < 0)
{
}
// encode the returned packet
struct cmsghdr *cmsg;
for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
cmsg = CMSG_NXTHDR(&msg, cmsg))
{
if (cmsg->cmsg_level == SOL_SOCKET &&
cmsg->cmsg_type == SCM_TIMESTAMPING)
{
struct timespec *ts =
(struct timespec *)CMSG_DATA(cmsg);
printf("HD-SEND TIMESTAMP %ld.%09ld\n", ts[2].tv_sec, ts[2].tv_nsec);
}
if (cmsg->cmsg_level == SOL_SOCKET &&
cmsg->cmsg_type == SO_TIMESTAMPNS)
{
struct timespec *ts =
(struct timespec *)CMSG_DATA(cmsg);
printf("SW-SEND TIMESTAMP %ld.%09ld\n", ts->tv_sec, ts->tv_nsec);
}
}
}
void recv_single(int fd)
{
char data[BUFFER_LEN], ctrl[BUFFER_LEN];
struct msghdr msg;
struct iovec iov;
ssize_t len;
struct cmsghdr *cmsg;
memset(&msg, 0, sizeof(msg));
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = ctrl;
msg.msg_controllen = sizeof(ctrl);
iov.iov_base = data;
iov.iov_len = sizeof(data);
struct timespec start;
if (recvmsg(fd, &msg, 0) < 0)
{
printf("[!] Error recvmsg()");
}
for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
cmsg = CMSG_NXTHDR(&msg, cmsg))
{
if (cmsg->cmsg_level == SOL_SOCKET &&
cmsg->cmsg_type == SCM_TIMESTAMPING)
{
struct timespec *ts =
(struct timespec *)CMSG_DATA(cmsg);
printf("HW-RECV TIMESTAMP %ld.%09ld\n", ts[2].tv_sec, ts[2].tv_nsec);
}
if (cmsg->cmsg_level == SOL_SOCKET &&
cmsg->cmsg_type == SCM_TIMESTAMPNS)
{
struct timespec *ts =
(struct timespec *)CMSG_DATA(cmsg);
printf("SW-RECV TIMESTAMP %ld.%09ld\n", ts->tv_sec, ts->tv_nsec);
}
}
}
client.c
int main(int argc, char *argv[])
{
const char *address = "192.168.0.23";
const int port = 54321;
int fd_in = socket(AF_INET, SOCK_DGRAM, 0);
int fd_out = socket(AF_INET, SOCK_DGRAM, 0);
setup_adapter(fd_in, "eth0");
setup_adapter(fd_out, "eth0");
setup_sender(fd_out);
setup_receiver(fd_in, port);
int count = 0;
while (1)
{
printf("[ ---- Iter-%5d ----------------------------- ]\n", count++);
send_single(fd_out, (char *)address, port);
recv_single(fd_in);
usleep(10000);
}
}
server.c
int main(int argc, char *argv[])
{
const char *address = "192.168.0.22";
const int port = 54321;
int fd_in = socket(AF_INET, SOCK_DGRAM, 0);
int fd_out = socket(AF_INET, SOCK_DGRAM, 0);
setup_adapter(fd_in, "eth0");
setup_adapter(fd_out, "eth0");
setup_sender(fd_out);
setup_receiver(fd_in, port);
int count = 0;
while (1)
{
printf("[ ---- Iter-%5d ----------------------------- ]\n", count++);
recv_single(fd_in);
usleep(10000);
send_single(fd_out, (char *)address, port);
}
}
From the results, I can see the hardware timestamp is much more stable and has very low jitter compared with software timestamp(I am sorry my account is not allowed to embed picture into question):
https://github.com/ChuanyuXue/Hardware-RTT/blob/main/0301_exp_hwtime/01.png
https://github.com/ChuanyuXue/Hardware-RTT/blob/main/0301_exp_hwtime/02.png
https://github.com/ChuanyuXue/Hardware-RTT/blob/main/0301_exp_hwtime/03.png
https://github.com/ChuanyuXue/Hardware-RTT/blob/main/0301_exp_hwtime/04.png
https://github.com/ChuanyuXue/Hardware-RTT/blob/main/0301_exp_hwtime/06.png
Above results make me feel the timestamp I obtained is correct as the HW latency has smaller delay and very low jitter, but there are some issues I neglect when I compute delay = ((t3 - t0) - (t2 - t1)) / 2. Is there anyone who can let me know why or test my code on other environment?

Use Netfilter to write kernel module to modify source IP error, computer crash

I want to write a kernel module that uses Netfilter to modify the source ip to "100.100.100.100" whiche packet the Destination IP is "192.68.4.103" and the protocol is TCP.
I write a .c file but when i install this module ,it will cause the computer crash. How should I rewrite it?
The system is ubuntu16.04 . Here is the code I wrote:
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include<linux/inet.h>
#include<net/ip.h>
#include<net/tcp.h>
#include <linux/netdevice.h>
#include <linux/inet.h>
#include <linux/socket.h>
#include <linux/skbuff.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("LINHOS");
#define IF_NAME "eno1" //Network name which is got by command "ifconfig"
struct nf_hook_ops nfho;
static unsigned int checkIP(
unsigned int hooknum,
struct sk_buff *__skb,
const struct net_device *in,
const struct net_device *out,
int(*okfn)(struct sk_buff *)){
struct sk_buff *skb;
struct net_device *dev;
struct iphdr *iph;
struct tcphdr *tcph;
int tot_len;
unsigned int iph_len;
int tcph_len;
int ret;
skb = __skb;
if (skb == NULL)
return NF_ACCEPT;
iph = ip_hdr(skb);
if (iph == NULL)
return NF_ACCEPT;
tot_len = ntohs(iph->tot_len);
if (iph->daddr =="192.68.4.103") { //locla ip
iph_len = ip_hdrlen(skb);
skb_pull(skb, iph_len);
skb_reset_transport_header(skb);
if (iph->protocol == IPPROTO_TCP) {
tcph = tcp_hdr(skb);
tcph_len = tcp_hdrlen(skb);
iph->saddr = in_aton("100.100.100.100");
dev = dev_get_by_name(&init_net, IF_NAME);
tcph->check = 0;
skb->csum = csum_partial((unsigned char *) tcph,
tot_len - iph_len,
0);
tcph->check = csum_tcpudp_magic(iph->saddr,
iph->daddr,
ntohs(iph->tot_len) - iph_len,
iph->protocol,
skb->csum);
iph->check = 0;
iph->check = ip_fast_csum(iph, iph->ihl);
skb->ip_summed = CHECKSUM_NONE;
skb->pkt_type = PACKET_HOST;
skb->dev = dev;
skb_push(skb, iph_len);
skb_push(skb, ETH_ALEN);
ret = dev_queue_xmit(skb);
if (ret < 0) {
printk(KERN_ERR "dev_queue_xmit() error!\n");
return NF_DROP;
}
return NF_STOLEN;
}
skb_push(skb, iph_len);
skb_reset_transport_header(skb);
}
return NF_ACCEPT;
}
static int __initfilter_init(void){
printk("----------------checkIP is OK!--------------");
nfho.hook = checkIP;
nfho.pf = AF_INET;
nfho.hooknum = NF_INET_PRE_ROUTING;
nfho.priority = NF_IP_PRI_FIRST;
int ret = nf_register_hook(&nfho);
if (ret < 0) {
printk(KERN_ERR "can't modify skb hook!");
return ret;
}
return 0;
}
static void filter_exit(void) {
nf_unregister_hook(&nfho);
}
module_init(filter_init);
module_exit(filter_exit);
Could anyone explain to me what I am doing wrong?

skbuff packet sent with zero payload

Using Netfilter's hooks (NF_INET_PRE_ROUTING and NF_INET_POST_ROUTING) I implemented a kernel module that monitors all incoming and outgoing packets for a given host. By looking at the skuffs I can classify packets and identify those that I am interested in. Each time I detect such packets I want to generate my own packet and send it out in the network (note: I am not copying/cloning the original packet but instead create one from "scratch" and send it out using dev_queue_xmit. The only similarity with the original packet is that my packet goes towards the same destination, but has different port/protocol etc.).
The problem is that my generated packets are being sent out with empty payload. Packets successfully reach the destination, the size of the payload is correct, but its content is all set to zeros. Two observations: (1) The function that I am using to construct skbuffs and send out has been tested before and seemed to work. (2) The problem appear only when I classify outgoing packets and attempt to send my own along the way, seemingly the same code works fine when I classify packets using NF_INET_PRE_ROUTING.
Please see the code below:
static struct nf_hook_ops nfout;
static int __init init_main(void) {
nfout.hook = hook_func_out;
nfout.hooknum = NF_INET_POST_ROUTING;
nfout.pf = PF_INET;
nfout.priority = NF_IP_PRI_FIRST;
nf_register_hook(&nfout);
printk(KERN_INFO "Loading Postrouting hook\n");
return 0;
}
static unsigned int hook_func_out(const struct nf_hook_ops *ops,
struct sk_buff *skb, const struct net_device *in,
const struct net_device *out, int (*okfn)(struct sk_buff *)) {
if (skb is the packet that I am looking for, omitting details...) {
generate_send_packet(skb);
}
return NF_ACCEPT;
}
void generate_send_packet(struct sk_buff *target_skb) {
static struct tcphdr * tcph;
static struct iphdr * iph;
static struct ethhdr * ethh;
struct sk_buff * skb1;
iph = ip_hdr(target_skb);
tcph = tcp_hdr(target_skb);
ethh = eth_hdr(target_skb);
int payload = 123456789;
skb1 = construct_udp_skb(dev,
dev->dev_addr, mac,
iph->saddr, iph->daddr,
ntohs(tcph->source), dst_port,
(unsigned char *)&payload, sizeof(int)
);
if (dev_queue_xmit(skb1) != NET_XMIT_SUCCESS) {
printk(KERN_INFO "Sending failed\n");
}
}
struct sk_buff* construct_udp_skb(struct net_device *dev,
unsigned char * src_mac, unsigned char * dst_mac,
uint32_t src_ip, uint32_t dst_ip,
uint32_t src_port, uint32_t dst_port,
uint32_t ttl, uint32_t tcp_seq,
unsigned char * usr_data, uint16_t usr_data_len) {
static struct ethhdr *ethh;
static struct iphdr *iph;
struct udphdr * udph;
static struct sk_buff *skb;
static uint16_t header_len = 300;
unsigned char * p_usr_data;
int udplen;
skb = alloc_skb(1000, GFP_KERNEL);
skb_reserve(skb, header_len);
//-----------------------------------------------
udph = (struct udphdr*) skb_push(skb, sizeof(struct udphdr));
iph = (struct iphdr*) skb_push(skb, sizeof(struct iphdr));
ethh = (struct ethhdr*) skb_push(skb, sizeof(struct ethhdr));
memset(udph, 0 , sizeof(struct udphdr));
memset(iph, 0 , sizeof(struct iphdr));
skb_set_mac_header(skb, 0);
skb_set_network_header(skb, sizeof(struct ethhdr));
skb_set_transport_header(skb, sizeof(struct ethhdr) + sizeof(struct iphdr));
//ETH -------------------------------------------
memcpy(ethh->h_source, src_mac, 6);
memcpy(ethh->h_dest, dst_mac, 6);
ethh->h_proto = htons(ETH_P_IP);
//IP --------------------------------------------
iph->ihl = 5;
iph->version = 4;
iph->ttl = 128;
iph->tos = 0;
iph->protocol = IPPROTO_UDP;
iph->saddr = src_ip;
iph->daddr = dst_ip;
iph->check = ip_fast_csum((u8 *)iph, iph->ihl);
iph->id = htons(222);
iph->frag_off = 0;
iph->tot_len = htons(sizeof(struct iphdr) + sizeof(struct udphdr) + usr_data_len );
ip_send_check(iph);
//UDP--------------------------------------------
udph->source = htons(src_port);
udph->dest = htons(dst_port);
skb->dev = dev;
skb->protocol = IPPROTO_UDP;
skb->priority = 0;
skb->pkt_type = PACKET_OUTGOING;
p_usr_data = skb_put(skb, usr_data_len);
printk(KERN_INFO "Sending [%i]\n", *(int*)usr_data);
skb->csum = csum_and_copy_from_user(usr_data, p_usr_data, usr_data_len, 0, &err);
udplen = sizeof(struct udphdr) + usr_data_len;
udph->len = htons(udplen);
udph->check = udp_v4_check(udplen,
iph->saddr, iph->daddr,
0);
return skb;
}
What could be the possible reason why dev_queue_xmit would nullify the payload in the skbuff?
Thanks!
I assume that the result of debug print message for printk(KERN_INFO "Sending [%i]\n", (int)usr_data) is as expected. But what about the skb->data? Can you try to print it after csum_and_copy_from_user() to see whether it is empty or not? It seems very likely that you will see the zero payload at this point already.

How to add a new custom layer 4 protocol (a new Raw socket) in linux kernel?

i am trying adding my own customized layer 4 protocol in linux (ubuntu 14.04) - IPPROTO_MYPROTO as a loadable kernel module. I have done all necessary steps to register the protocol. Here i am sharing my code.
When i am trying to send a mesage from user space program using sendmsg(), i expect the corresponding fn myproto_sendmsg() registered via struct proto structure should be called in kernel space. But what i am observing is that though the myproto_sendmsg() in kernel space is not being called, yet destination machine is receiving the correct data. surprise ! surprise !. Is the default udp sendmsg() fn kicking in here which is like uninvited guest doing his work.
Here, sendmsg() call in user space returns as many bytes as send. Hence, fn returns success.
User space program :
void forwardUDP( int destination_node ,char sendString[] )
{
struct msghdr msg;
destination_node = destination_node % N; //destination node to which data is to be forwaded
int sock, rc;
struct sockaddr_in server_addr;
struct iovec iov;
struct hostent *host; //hostent predefined structure use to store info about host
host = (struct hostent *) gethostbyname(node[destination_node].ip_address);//gethostbyname returns a pointer to hostent
if ((sock = socket(AF_INET, SOCK_RAW, 5)) == -1)
{
perror("socket");
exit(1);
}
//destination address structure
server_addr.sin_family = AF_INET;
server_addr.sin_port = htons(node[destination_node].udpportno);
server_addr.sin_addr = *((struct in_addr *)host->h_addr); //host->h_addr gives address of host
bzero(&(server_addr.sin_zero),8);
/* fill the messsage structure*/
memset(&msg, 0 , sizeof(struct msghdr));
memset(&iov, 0, sizeof(struct iovec));
msg.msg_name = (void *)&server_addr;
msg.msg_namelen = sizeof(struct sockaddr_in);
printf("sendString = %s\n", sendString);
iov.iov_base = (void *)sendString;
iov.iov_len = strlen(sendString);
msg.msg_iov = &iov;
printf("len = %d\n", strlen(sendString));
#if 1
msg.msg_iovlen = 1;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
#endif
//sendto(sock, sendString, strlen(sendString), 0,(struct sockaddr *)&server_addr, sizeof(struct sockaddr));
**rc = sendmsg(sock, &msg, 0);**
printf("rc = %d\n", rc);
//sendto() function shall send a message through a connectionless-mode socket.
printf("\nFORWARD REQUEST : '%s' has been forwarded to node ---->%d\n",sendString,destination_node);
//close(sock);
}
Kernel Module
/* Define the handler which will recieve all ingress packets for protocol = IPPROTO_MYPROTO
defined in net/protocol.h
*/
/* Resgiter the call backs for pkt reception */
static const struct net_protocol myproto_protocol = {
.handler = myproto_rcv,
.err_handler = myproto_err,
.no_policy = 1,
.netns_ok = 1,
};
static struct inet_protosw myproto_protosw;
int
myproto_rcv(struct sk_buff *skb){
printk(KERN_INFO "myproto_rcv is called\n");
return 0;
}
int
myproto_sendmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t len){
printk(KERN_INFO "myproto_sendmsg() is called\n");
return 0;
}
void myproto_lib_close(struct sock *sk, long timeout){
printk(KERN_INFO "close is called\n");
return;
}
int
myproto_recvmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg,
size_t len, int noblock, int flags,
int *addr_len){
printk(KERN_INFO "myproto_recvmsg() is called.\n");
printk(KERN_INFO "iocb = 0x%x,\nsk = 0x%x,\nmsg = 0x%x,\nlen = %d,\nnoblock = %d,\nflags = %d,\naddr_len = 0x%x", iocb, sk, msg, len, noblock, flags, addr_len);
return 0;
}
/* Socket structure for Custom protocol, see struct udp_sock for example*/
struct myproto_sock{
struct inet_sock inet; // should be first member
__u16 len;
};
void
myproto_lib_hash(struct sock *sk){
printk(KERN_INFO "myproto_lib_hash() is called");
}
/* Define the **struct proto** structure for the Custom protocol defined in
net/sock.h */
struct proto myproto_prot = {
.name = "MYPROTO",
.owner = THIS_MODULE,
.close = myproto_lib_close,
.sendmsg = myproto_sendmsg,
.hash = myproto_lib_hash,
.recvmsg = myproto_recvmsg,
.obj_size = sizeof(struct myproto_sock),
.slab_flags = SLAB_DESTROY_BY_RCU,
};
int init_module(void);
void cleanup_module(void);
int
init_module(void)
{
int rc = 0;
rc = proto_register(&myproto_prot, 1);
if(rc == 0){
printk(KERN_INFO "Protocol registration is successful\n");
}
else{
printk(KERN_INFO "Protocol registration is failed\n");
cleanup_module();
return rc;
}
rc = inet_add_protocol(&myproto_protocol, IPPROTO_MYPROTO);
if(rc == 0){
printk(KERN_INFO "Protocol insertion in inet_protos[] is successful\n");
}
else{
printk(KERN_INFO "Protocol insertion in inet_protos[] is failed\n");
cleanup_module();
return rc;
}
memset(&myproto_protosw, 0 ,sizeof(myproto_protosw));
myproto_protosw.type = SOCK_RAW;
myproto_protosw.protocol = IPPROTO_MYPROTO;
myproto_protosw.prot = &myproto_prot;
extern const struct proto_ops inet_dgram_ops; // defined in ipv4/af_inet.c
myproto_protosw.ops = &inet_dgram_ops;
myproto_protosw.flags = INET_PROTOSW_REUSE;
inet_register_protosw(&myproto_protosw);
return 0;
}
void cleanup_module(void)
{
int rc = 0;
rc = inet_del_protocol(&myproto_protocol, IPPROTO_MYPROTO);
if(rc == 0)
printk(KERN_INFO "Protocol removed successful\n");
else
printk(KERN_INFO "Protocol removal failed\n");
proto_unregister(&myproto_prot);
printk(KERN_INFO "Module cleaned up\n");
return;
}

create SOCK_RAW socket just for sending data without any recvform()

If I create a socket whose type is SOCK_RAW only to send some data without receiving any data, is there any problem when kernel continue to receive network packets and copy its datagram to somebuffer (of application?). In other words, after the somebuffer is filled what will happened? error or ignore?
I don't know how to prevent kernel from delivering the copy of datagram to my application.
Reference http://sock-raw.org/papers/sock_raw 0x4 raw_input
After the IP layer processes
a new incoming IP datagram, it calls ip_local_deliver_finish() kernel function
which is responsibe for calling a registered transport protocol handler by
inspecting the protocol field of the IP header (remember from above). However
before it delivers the datagram to the handler, it checks every time if an
application has created a raw socket with the same protocol number. If there
is one or more such applications, it makes a copy of the datagram and delivers
it to them as well.
You can use shutdown(2) in order to shutdown reception part of the socket.
See shutdown man page
EDIT : I found that shutdown only works on connected (ie TCP) sockets.
With Raw socket, there are 2 possibilities :
Receive data into a temporary buffer (with recv) and discard them (perhaps in an other thread)
If I remember well, when the socket buffer is full, incoming data are automatically discarded (and data in the buffer aren't modified), so you can set the socket reception buffer size to 0 (and increase it later if needed).
Here's how to set reception buffer size to 0 :
int opt = 0;
setsockopt(sock_fd, SOL_SOCKET, SO_RCVBUF, &opt, sizeof(opt));
TEST
/**
* #file raw_print_pkt.c
* #brief
* #author Airead Fan <fgh1987168#gmail.com>
* #date 2012/08/22 12:35:22
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <netinet/in.h>
#include <sys/ioctl.h>
#include <netinet/ip.h>
#include <netinet/tcp.h>
int main(int argc, char *argv[])
{
int s;
ssize_t rn; /* receive number */
struct sockaddr_in saddr;
char packet[4096];
int count;
if ((s = socket(AF_INET, SOCK_RAW, IPPROTO_TCP)) < 0) {
perror("error:");
exit(EXIT_FAILURE);
}
memset(packet, 0, sizeof(packet));
socklen_t *len = (socklen_t *)sizeof(saddr);
int fromlen = sizeof(saddr);
int opt = 0;
count = 0;
while(1) {
if ((rn = recvfrom(s, (char *)&packet, sizeof(packet), 0,
(struct sockaddr *)&saddr, &fromlen)) < 0)
perror("packet receive error:");
if (rn == 0) {
printf("the peer has performed an orderly shutdown\n");
break;
}
printf("[%d] rn = %lu \n", count++, rn);
if (count == 16) {
if (setsockopt(s, SOL_SOCKET, SO_RCVBUF, &opt, sizeof(opt)) < 0) {
perror("setsocketopt failed");
} else {
fprintf(stdout, "setsocketopt successful\n");
}
// int shutdown(int sockfd, int how);
/* if (shutdown(s, SHUT_RD) < 0) {
* perror("shutdown failed");
* } */
}
}
return 0;
}
TEST 2 (same includes):
int main(int argc, char *argv[])
{
int s;
ssize_t rn; /* receive number */
char packet[4096];
int count;
if ((s = socket(AF_INET, SOCK_RAW, IPPROTO_TCP)) < 0) {
perror("error:");
exit(EXIT_FAILURE);
}
memset(packet, 0, sizeof(packet));
int opt = 0;
count = 0;
//Set recv buffer size
if (setsockopt(s, SOL_SOCKET, SO_RCVBUF, &opt, sizeof(opt)) < 0) {
perror("setsocketopt failed");
} else {
fprintf(stdout, "setsocketopt successful\n");
}
//10 seconds countdown
int i = 10;
while(i > 0)
{
printf("\r%d ", i);
fflush(stdout);
i--;
sleep(1);
}
printf("\n");
while(1) {
if ((rn = recv(s, (char *)&packet, sizeof(packet), 0)) <= 0)
perror("packet receive error:");
printf("[%d] rn = %lu \n", count++, rn);
}
return 0;
}
Here's how to proceed with test 2 :
First of all, set the buffer size to 4096 (or bigger if you have a lot of traffic on your network). Compile and launch. During the 10 seconds before starting receiving data, send a lot of data to the socket. After the 10 seconds, the program will receive everything you sent during the countdown.
After that, set the buffer size to 0. Proceed as previously. After the 10 seconds, the program won't receive the data you sent during the countdown. But if you send data while it's in recvfrom, it will read them normally.
I don't really understand what you want! if you want just to inject some packets, it's simple:
#include<netinet/tcp.h> /* TCP header */
#include<netinet/ip.h> /* IP header */
/* Checksum compute function */
/* source : http://www.winpcap.org/pipermail/winpcap-users/2007-July/001984.html */
unsigned short checksum(unsigned short *buffer, int size)
{
unsigned long cksum=0;
while(size >1)
{
cksum+=*buffer++;
size -=sizeof(unsigned short);
}
if(size)
cksum += *(UCHAR*)buffer;
cksum = (cksum >> 16) + (cksum & 0xffff);
cksum += (cksum >>16);
return (unsigned short)(~cksum);
}
int main (int argc, char **argv)
{
char packet_buffer[BUFFER_SIZE];
struct sockaddr_in sin;
struct iphdr *ip_header; /* IP header */
struct tcphdr *tcp_header; /* TCP header */
int flag = 1;
/* Creating RAW socket */
int raw_socket = socket (PF_INET, SOCK_RAW, IPPROTO_TCP);
ip_header = (struct iphdr *) packet_buffer;
tcp_header = (struct tcphdr *) (packet_buffer + sizeof (struct ip));
sin.sin_family = AF_INET;
sin.sin_port = htons(PORT_NUMBER);
sin.sin_addr.s_addr = inet_addr (IP_ADDRESS);
/* Zeroing the bbuffer */
memset (packet_buffer, 0, BUFFER_SIZE);
/* Construct your IP Header */
ip_header->ihl = 5;
ip_header->version = 4;
ip_header->tos = 0;
ip_header->tot_len = sizeof (struct ip) + sizeof (struct tcphdr);
ip_header->id = htonl(CHOOSE_PACKET_ID);
ip_header->frag_off = 0;
ip_header->ttl = 255;
ip_header->protocol = 6; /* TCP. Change to 17 if you want UDP */
ip_header->check = 0;
ip_header->saddr = inet_addr (SOURCE_IP_ADDRESS_TO_SPOOF);
ip_header->daddr = sin.sin_addr.s_addr;
/* Construct your TCP Header */
tcp_header->source = htons (SOURCE);
tcp_header->dest = htons(DEST);
tcp_header->seq = random();
tcp_header->ack_seq = 0;
tcp_header->doff = 0;
tcp_header->syn = 1;
tcp_header->window = htonl(65535);
tcp_header->check = 0;
tcp_header->urg_ptr = 0;
/* IP Checksum */
ip_header->check = checksum((unsigned short *) packet_buffer, ip_header->tot_len >> 1);
if (setsockopt(raw_socket, IPPROTO_IP, IP_HDRINCL, &flag, sizeof(flag)) < 0)
{
/* ERROR handling */
}
while (1)
{
/* Send the packet */
if (sendto(raw_socket, packet_buffer, ip_header->tot_len, 0, (struct sockaddr *) &sin, sizeof (sin)) < 0)
{
/* ERROR handling */
}
/* The rest of your need */
}
return 0;
}

Resources