PCI driver to fetch MAC address - linux

I was trying to write a pci driver which can display the MAC address of my Ethernet card.
Running a Ubuntu on VM and my Ethernet card is Intel one as follows
00:08.0 Ethernet controller: Intel Corporation 82540EM Gigabit Ethernet Controller (rev 02)
I was able to get the data sheet of the same from Intel website and as per data sheet it says IO address are mapped to Bar 2 (Refer to pg 87) and MAC can be read using RAL/RAH register which are at offset RAL (05400h + 8*n; R/W) and RAH (05404h + 8n; R/W)
2 18h IO Register Base Address (bits 31:2) 0b mem
Based on this information, i wrote a small PCI driver but i always get the MAC as fff and when i debugged further, i see io_base address is always zero.
Below is the code
1 /*
2 Program to find a device on the PCI sub-system
3 */
4 #define VENDOR_ID 0x8086
5 #define DEVICE_ID 0x100e
6
7 #include <linux/kernel.h>
8 #include <linux/module.h>
9 #include <linux/stddef.h>
10 #include <linux/pci.h>
11 #include <linux/init.h>
12 #include <linux/cdev.h>
13 #include <linux/device.h>
14 #include <asm/io.h>
15
16 #define LOG(string...) printk(KERN_INFO string)
17
18 #define CDEV_MAJOR 227
19 #define CDEV_MINOR 0
20
21
22 MODULE_LICENSE("GPL");
23
24 struct pci_dev *pci_dev;
25 unsigned long mmio_addr;
26 unsigned long reg_len;
27 unsigned long *base_addr;
28
29 int device_probe(struct pci_dev *dev, const struct pci_device_id *id);
30 void device_remove(struct pci_dev *dev);
31
32 struct pci_device_id pci_device_id_DevicePCI[] =
33 {
34 {VENDOR_ID, DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
35 };
36
37 struct pci_driver pci_driver_DevicePCI =
38 {
39 name: "MyPCIDevice",
40 id_table: pci_device_id_DevicePCI,
41 probe: device_probe,
42 remove: device_remove
43 };
44
45
46 int init_module(void)
47 {
48 //struct pci_dev *pdev = NULL;
49 int ret = 0;
50
51 pci_register_driver(&pci_driver_DevicePCI);
52
53 return ret;
54 }
55
56 void cleanup_module(void)
57 {
58 pci_unregister_driver(&pci_driver_DevicePCI);
59
60 }
61
62 #define REGISTER_OFFSET 0x05400
64 int device_probe(struct pci_dev *dev, const struct pci_device_id *id)
65 {
66 int ret;
67 int bar = 2; // Bar to be reserved
68 unsigned long io_base = 0;
69 unsigned long mem_len = 0;
70 unsigned int register_data = 0;
71
72 LOG("Device probed");
73
74 /* Reserve the access to PCI device */
75 ret = pci_request_region(dev, bar, "my_pci");
76 if (ret) {
77 printk(KERN_ERR "request region failed :%d\n", ret);
78 return ret;
79 }
80
81 ret = pci_enable_device(dev);
82 if (ret < 0 ) LOG("Failed while enabling ... ");
83
84 io_base = pci_resource_start(dev, bar);
85 mem_len = pci_resource_len(dev, bar);
86
87 request_region(io_base, mem_len, "my_pci");
88 register_data = inw(io_base + REGISTER_OFFSET);
89 printk(KERN_INFO "IO base = %lx", io_base);
90 printk(KERN_INFO "MAC = %x", register_data);
91
92 return ret;
93 }
94
95 void device_remove(struct pci_dev *dev)
96 {
97 pci_release_regions(dev);
98 pci_disable_device(dev);
99 }
100
lspci -x output of my card
00:08.0 Ethernet controller: Intel Corporation 82540EM Gigabit Ethernet Controller (rev 02)
00: 86 80 0e 10 07 00 30 02 02 00 00 02 00 40 00 00
10: 00 00 82 f0 00 00 00 00 41 d2 00 00 00 00 00 00
20: 00 00 00 00 00 00 00 00 00 00 00 00 86 80 1e 00
30: 00 00 00 00 dc 00 00 00 00 00 00 00 09 01 ff 00
Can any one let me know what am i doing wrong?

I've modified your code and commented on changes. I have removed all of your existing comments to avoid confusion, and have only modified your probe function.
/* We need a place to store a logical address for unmapping later */
static void* logical_address;
int device_probe(struct pci_dev *dev, const struct pci_device_id *id)
{
int ret;
int bar_mask; /* BAR mask (this variable) and the integer BAR */
int requested_bar = 2; /* (this variable) are not the same thing, so give them */
/* separate variables */
resource_size_t io_base = 0; /* use kernel macros instead of built-in datatypes */
resource_size_t mem_len = 0;
unsigned int register_data = 0;
LOG("Device probed");
/* add this call to get the correct BAR mask */
bar_mask = pci_select_bars(dev, 0);
/* switched order - enable device before requesting memory */
ret = pci_enable_device(dev);
if (ret < 0 ) LOG("Failed while enabling ... ");
/* for this call, we want to pass the BAR mask, NOT the integer bar we want */
ret = pci_request_region(dev, bar_mask, "my_pci");
if (ret) {
printk(KERN_ERR "request region failed :%d\n", ret);
return ret;
}
/* it is in THESE calls that we request a specific BAR */
io_base = pci_resource_start(dev, requested_bar);
mem_len = pci_resource_len(dev, requested_bar);
/* you don't need to request anything again, so get rid of this line: */
/* request_region(io_base, mem_len, "my_pci"); */
/* you're missing an important step: we need to translate the IO address
* to a kernel logical address that we can actually use. Add a call to
* ioremap()
*/
logical_address = ioremap(io_base, mem_len);
/* we need to use the logical address returned by ioremap(), not the physical
* address returned by resource_start
*/
register_data = inw(logical_address + REGISTER_OFFSET);
printk(KERN_INFO "IO base = %lx", io_base);
printk(KERN_INFO "MAC = %x", register_data);
return ret;
}
You will need to add a corresponding call to iounmap() in your device_remove() routine. Take a look at the Intel E100E driver source code for some good examples.

Related

How to convert hex_dump of packets, which were captured in kernel module, to pcap file?

I am writing a kernel module on Linux (Xubuntu x64). The version of the kernel is 5.4.0-52-generic. My kernel module is capturing traffic from an interface and printing it in hex:
Nov 10 14:04:34 ubuntu kernel: [404009.566887] Packet hex dump:
Nov 10 14:04:34 ubuntu kernel: [404009.566889] 000000 00 00 00 00 00 00 00 00 00 00 00 00 08 00 45 00
Nov 10 14:04:34 ubuntu kernel: [404009.566899] 000010 00 54 49 4C 40 00 40 01 A7 EF C0 A8 64 0E C0 A8
Nov 10 14:04:34 ubuntu kernel: [404009.566907] 000020 64 0E 08 00 9E FE 00 03 00 08 72 0E AB 5F 00 00
Nov 10 14:04:34 ubuntu kernel: [404009.566914] 000030 00 00 7B B5 01 00 00 00 00 00 10 11 12 13 14 15
Nov 10 14:04:34 ubuntu kernel: [404009.566922] 000040 16 17 18 19 1A 1B 1C 1D 1E 1F 20 21 22 23 24 25
Nov 10 14:04:34 ubuntu kernel: [404009.566929] 000050 26 27 28 29
This output I've got using this command under root: tail -f /var/log/kern.log
The whole problem is that I need to save this output as pcap-file. I know that there is text2pcap but its library (libpcap) is user-mode only so I can't use it in kernel module (or maybe not? Correct me if I'm wrong).
Is it possible to use text2pcap in kernel module? Otherwise, How can I save an output as pcap file while being in kernel module?
Source code:
#include <linux/module.h> // included for all kernel modules
#include <linux/kernel.h> // included for KERN_INFO
#include <linux/init.h> // included for __init and __exit macros
#include <linux/skbuff.h> // included for struct sk_buff
#include <linux/if_packet.h> // include for packet info
#include <linux/ip.h> // include for ip_hdr
#include <linux/netdevice.h> // include for dev_add/remove_pack
#include <linux/if_ether.h> // include for ETH_P_ALL
#include <linux/unistd.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Tester");
MODULE_DESCRIPTION("Sample linux kernel module program to capture all network packets");
struct packet_type ji_proto;
void pkt_hex_dump(struct sk_buff *skb)
{
size_t len;
int rowsize = 16;
int i, l, linelen, remaining;
int li = 0;
uint8_t *data, ch;
printk("Packet hex dump:\n");
data = (uint8_t *) skb_mac_header(skb);
if (skb_is_nonlinear(skb)) {
len = skb->data_len;
} else {
len = skb->len;
}
remaining = len;
for (i = 0; i < len; i += rowsize) {
printk("%06d\t", li);
linelen = min(remaining, rowsize);
remaining -= rowsize;
for (l = 0; l < linelen; l++) {
ch = data[l];
printk(KERN_CONT "%02X ", (uint32_t) ch);
}
data += linelen;
li += 10;
printk(KERN_CONT "\n");
}
}
int ji_packet_rcv (struct sk_buff *skb, struct net_device *dev,struct packet_type *pt, struct net_device *orig_dev)
{
printk(KERN_INFO "New packet captured.\n");
/* linux/if_packet.h : Packet types */
// #define PACKET_HOST 0 /* To us */
// #define PACKET_BROADCAST 1 /* To all */
// #define PACKET_MULTICAST 2 /* To group */
// #define PACKET_OTHERHOST 3 /* To someone else */
// #define PACKET_OUTGOING 4 /* Outgoing of any type */
// #define PACKET_LOOPBACK 5 /* MC/BRD frame looped back */
// #define PACKET_USER 6 /* To user space */
// #define PACKET_KERNEL 7 /* To kernel space */
/* Unused, PACKET_FASTROUTE and PACKET_LOOPBACK are invisible to user space */
// #define PACKET_FASTROUTE 6 /* Fastrouted frame */
switch (skb->pkt_type)
{
case PACKET_HOST:
printk(KERN_INFO "PACKET to us − ");
break;
case PACKET_BROADCAST:
printk(KERN_INFO "PACKET to all − ");
break;
case PACKET_MULTICAST:
printk(KERN_INFO "PACKET to group − ");
break;
case PACKET_OTHERHOST:
printk(KERN_INFO "PACKET to someone else − ");
break;
case PACKET_OUTGOING:
printk(KERN_INFO "PACKET outgoing − ");
break;
case PACKET_LOOPBACK:
printk(KERN_INFO "PACKET LOOPBACK − ");
break;
case PACKET_FASTROUTE:
printk(KERN_INFO "PACKET FASTROUTE − ");
break;
}
//printk(KERN_CONT "Dev: %s ; 0x%.4X ; 0x%.4X \n", skb->dev->name, ntohs(skb->protocol), ip_hdr(skb)->protocol);
struct ethhdr *ether = eth_hdr(skb);
//printk("Source: %x:%x:%x:%x:%x:%x\n", ether->h_source[0], ether->h_source[1], ether->h_source[2], ether->h_source[3], ether->h_source[4], ether->h_source[5]);
//printk("Destination: %x:%x:%x:%x:%x:%x\n", ether->h_dest[0], ether->h_dest[1], ether->h_dest[2], ether->h_dest[3], ether->h_dest[4], ether->h_dest[5]);
//printk("Protocol: %d\n", ether->h_proto);
pkt_hex_dump(skb);
kfree_skb (skb);
return 0;
}
static int __init ji_init(void)
{
/* See the <linux/if_ether.h>
When protocol is set to htons(ETH_P_ALL), then all protocols are received.
All incoming packets of that protocol type will be passed to the packet
socket before they are passed to the protocols implemented in the kernel. */
/* Few examples */
//ETH_P_LOOP 0x0060 /* Ethernet Loopback packet */
//ETH_P_IP 0x0800 /* Internet Protocol packet */
//ETH_P_ARP 0x0806 /* Address Resolution packet */
//ETH_P_LOOPBACK 0x9000 /* Ethernet loopback packet, per IEEE 802.3 */
//ETH_P_ALL 0x0003 /* Every packet (be careful!!!) */
//ETH_P_802_2 0x0004 /* 802.2 frames */
//ETH_P_SNAP 0x0005 /* Internal only */
ji_proto.type = htons(ETH_P_IP);
/* NULL is a wildcard */
//ji_proto.dev = NULL;
ji_proto.dev = dev_get_by_name (&init_net, "enp0s3");
ji_proto.func = ji_packet_rcv;
/* Packet sockets are used to receive or send raw packets at the device
driver (OSI Layer 2) level. They allow the user to implement
protocol modules in user space on top of the physical layer. */
/* Add a protocol handler to the networking stack.
The passed packet_type is linked into kernel lists and may not be freed until
it has been removed from the kernel lists. */
dev_add_pack (&ji_proto);
printk(KERN_INFO "Module insertion completed successfully!\n");
return 0; // Non-zero return means that the module couldn't be loaded.
}
static void __exit ji_cleanup(void)
{
dev_remove_pack(&ji_proto);
printk(KERN_INFO "Cleaning up module....\n");
}
module_init(ji_init);
module_exit(ji_cleanup);
The problem was solved using call_usermodehelper() to call user-mode text2pcap with arguments as if text2pcap was called using terminal.
Is it possible to use text2pcap in kernel module?
Not without putting it and the code it uses to write a pcap file (which isn't from libpcap, it's from a small library that's part of Wireshark, also used by dumpcap to write pcap and pcapng files) into the kernel.
How can I save an output as pcap file while being in kernel module?
You could write your own code to open a file and write to it in the kernel module; "Writing to a file from the Kernel" talks about that.
It also says
A "preferred" technique would be to pass the parameters in via IOCTLs and implement a read() function in your module. Then reading the dump from the module and writing into the file from userspace.
so you might want to consider that; the userspace code could just use libpcap to write the file.

BPF verifier rejects code: "invalid bpf_context access"

I'm trying to write a simple socket filter eBPF program that can access the socket buffer data.
#include <linux/bpf.h>
#include <linux/if_ether.h>
#define SEC(NAME) __attribute__((section(NAME), used))
SEC("socket_filter")
int myprog(struct __sk_buff *skb) {
void *data = (void *)(long)skb->data;
void *data_end = (void *)(long)skb->data_end;
struct ethhdr *eth = data;
if ((void*)eth + sizeof(*eth) > data_end)
return 0;
return 1;
}
And I'm compiling using clang:
clang -I./ -I/usr/include/x86_64-linux-gnu/asm \
-I/usr/include/x86_64-linux-gnu/ -O2 -target bpf -c test.c -o test.elf
However when I try to load the program I get the following verifier error:
invalid bpf_context access off=80 size=4
My understanding of this error is that it should be thrown when you try to access context data that hasn't been checked to be within data_end, however my code does do that:
Here is the instructions for my program
0000000000000000 packet_counter:
0: 61 12 50 00 00 00 00 00 r2 = *(u32 *)(r1 + 80)
1: 61 11 4c 00 00 00 00 00 r1 = *(u32 *)(r1 + 76)
2: 07 01 00 00 0e 00 00 00 r1 += 14
3: b7 00 00 00 01 00 00 00 r0 = 1
4: 3d 12 01 00 00 00 00 00 if r2 >= r1 goto +1 <LBB0_2>
5: b7 00 00 00 00 00 00 00 r0 = 0
which would imply that the error is being caused by reading the pointer to data_end? However it only happens if I don't try to check the bounds later.
This is because your BPF program is a “socket filter”, and that such programs are not allowed to do direct packet access (see sk_filter_is_valid_access(), where we return false on trying to read skb->data or skb->data_end for example). I do not know the specific reason why it is not available, although I suspect this would be a security precaution as socket filter programs may be available to unprivileged users.
Your program loads just fine as a TC classifier, for example (bpftool prog load foo.o /sys/fs/bpf/foo type classifier -- By the way thanks for the standalone working reproducer, much appreciated!).
If you want to access data for a socket filter, you can still use the bpf_skb_load_bytes() (or bpf_skb_store_bytes()) helper, which automatically does the check on length. Something like this:
#include <linux/bpf.h>
#define SEC(NAME) __attribute__((section(NAME), used))
static void *(*bpf_skb_load_bytes)(const struct __sk_buff *, __u32,
void *, __u32) =
(void *) BPF_FUNC_skb_load_bytes;
SEC("socket_filter")
int myprog(struct __sk_buff *skb)
{
__u32 foo;
if (bpf_skb_load_bytes(skb, 0, &foo, sizeof(foo)))
return 0;
if (foo == 3)
return 0;
return 1;
}
Regarding your last comment:
However it only happens if I don't try to check the bounds later.
I suspect clang compiles out the assignments for data and data_end if you do not use them in your code, so they are no longer present and no longer a problem for the verifier.

recvfrom(2) receives UDP broadcast twice, but tcpdump(8) receives it only once

Summary: I want to receive packets from a single interface, but setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, iface, 1 + strlen(iface)) doesn't play well with recvfrom and shows all packets on all interfaces. tcpdump works well, however.
I have a strong feeling that there's something wrong with the receiver program, but I haven't been able to figure it out.
I'm working with a Netronome Agilio CX SmartNIC. The two ports on the NIC are connected together with one cable, and the port on the motherboard are connected to the wall (so I can SSH into it). The board-loaded NIC is eth0 in the OS, while the SmartNIC presents two interfaces as enp1s0np0 and enp1s0np1.
Because the two interfaces on the SmartNIC has no associated IP addresses, I have to send broadcast to one port so it arrives at the other port. For now, I send to enp1s0np0 and expect it from enp1s0np1.
I have also deployed a XDP offload program that modifies part of the packet so I can know whether the packet arrives at enp1s0np1. The program changes the string at position 28~35 to another string (of the form !......!).
The problem I am having is, I wrote a receiver program myself, and it receives two packets for every packet I send - the first is the original, while the second is the XDP-modified packet. However, tcpdump only receives the modified packet (expected behavior).
I am unsure why my program is getting it twice - I don't think it should be able to see the unmodified packet.
This is the packet sender program. It reads 32 double-precision floating point numbers and packs them into a 256-byte block, and prepends the block with 16 bytes of "magic numbers".
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include "config.h"
#include "util.h"
typedef unsigned char byte;
void sanity_check(void);
int main(int argc, char** argv) {
sanity_check();
int sock = socket(AF_INET, SOCK_DGRAM, 0);
if (sock < 0)
errorexit("socket");
char iface[16] = "enp1s0np0";
if (setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, iface, 1 + strlen(iface)))
errorexit("setsockopt");
int optval = -1;
if (setsockopt(sock, SOL_SOCKET, SO_BROADCAST | SO_REUSEADDR, &optval, sizeof(int)))
errorexit("setsockopt");
struct sockaddr_in target_addr = {
.sin_family = AF_INET,
.sin_addr.s_addr = 0xFFFFFFFF,
.sin_port = htons(6666)
};
byte buf[272];
// Prepare data
{
unsigned long magic = MAGIC;
memcpy(buf + 0, &magic, sizeof magic);
unsigned long zero = 0UL;
memcpy(buf + 8, &zero, sizeof zero);
double data;
for (int i = 0; i < 32; i++) {
scanf(" %lf", &data);
memcpy(buf + 16 + 8 * i, &data, sizeof data);
}
}
int sent = sendto(sock, buf, sizeof(buf), 0, (struct sockaddr*)&target_addr, sizeof(struct sockaddr));
if (sent != 0)
errorexit("send");
printf("%d bytes sent.\n", sent);
// if (shutdown(sock, SHUT_RDWR))
if (close(sock))
errorexit("close");
return 0;
}
void sanity_check(void) {
if (getuid() || geteuid()) {
fprintf(stderr, "Need root to proceed\n");
exit(1);
}
}
This is the receiver program. In fact, it's receiving every single packet that comes into the machine, with most of them being SSH data. I had to add checks for the magic number or it just spams the terminal. I guess it just failed to listen to the specific interface. (Check is if (buf[28] != '!' || buf[35] != '!') continue;)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include "config.h"
#include "util.h"
typedef unsigned char byte;
void sanity_check(void);
int main(int argc, char** argv) {
sanity_check();
int sock = socket(AF_PACKET, SOCK_DGRAM, htons(3));
if (sock < 0)
errorexit("socket");
char iface[16] = "enp1s0np1";
if (setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, iface, 1 + strlen(iface)))
errorexit("setsockopt");
struct sockaddr_in target_addr = {
.sin_family = AF_INET,
.sin_addr.s_addr = htonl(INADDR_ANY),
.sin_port = htons(UDP_PORT)
};
size_t bufsize = 8192;
byte *buf = malloc(bufsize);
unsigned long magic = MAGIC;
int saddr_size = sizeof(struct sockaddr);
printf("Preparing to receive... ");
fflush(stdout);
while (1) {
int received = recvfrom(sock, buf, bufsize, 0,(struct sockaddr *)&target_addr , (socklen_t*)&saddr_size);
if (received < 0)
errorexit("receive");
if (received == 0)
break;
else if (buf[28] != '!' || buf[35] != '!') // Magic check
continue;
printf("%d bytes received.\n", received);
hexdump(buf, received);
}
if (close(sock))
errorexit("close");
free(buf);
return 0;
}
void sanity_check(void) {
if (getuid() || geteuid()) {
fprintf(stderr, "Need root to proceed\n");
exit(1);
}
}
The file util.c (link to Gist) contains two utility functions (errorexit which is just a wrapper of perror and exit, and a badly hand-crafted hexdump function for displaying) and is irrelevant here.
The constant MAGIC is defined as
#define MAGIC 0x216C7174786A7A21UL // string "!zjxtql!"
Here's the console output of my program (recv.c compiled into recv) and the tcpdump command, with irrelevant data truncated. Before both programs are killed, only one packet is sent from the sender program. The special thing to note is the data at position 28 (was originally !zjxtql!, should be modified to !wjfskb! by the XDP offload program).
$ sudo ./recv
Preparing to receive...
300 bytes received.
00000000 45 00 01 2C 4C 1A 40 00 40 11 B4 7F 72 D6 C6 51 |E..,L.#.#...r..Q|
00000010 FF FF FF FF B0 9F 1A 0A 01 18 3A 51 21 7A 6A 78 |..........:Q!zjx|
00000020 74 71 6C 21 00 00 00 00 00 00 00 00 29 5C 8F C2 |tql!........)\..|
00000120 14 AE F7 3F AE 47 E1 7A 14 AE F7 3F |...?.G.z...?|
0000012C
300 bytes received.
00000000 45 00 01 2C 4C 1A 40 00 40 11 B4 7F 72 D6 C6 51 |E..,L.#.#...r..Q|
00000010 FF FF FF FF B0 9F 1A 0A 01 18 C0 9B 21 77 6A 66 |............!wjf|
00000020 73 6B 62 21 00 00 00 00 00 00 00 00 29 5C 8F C2 |skb!........)\..|
00000120 14 AE F7 3F AE 47 E1 7A 14 AE F7 3F |...?.G.z...?|
0000012C
^C
$ sudo tcpdump -vv -X -i enp1s0np1 port 6666
tcpdump: listening on enp1s0np1, link-type EN10MB (Ethernet), capture size 262144 bytes
04:53:52.819657 IP (tos 0x0, ttl 64, id 8595, offset 0, flags [DF], proto UDP (17), length 300)
agilio1415.47585 > 255.255.255.255.ircu-2: [bad udp cksum 0xb759 -> 0xc274!] UDP, length 272
0x0000: 4500 012c 2193 4000 4011 df06 72d6 c651 E..,!.#.#...r..Q
0x0010: ffff ffff b9e1 1a0a 0118 b759 2177 6a66 ...........Y!wjf
0x0020: 736b 6221 0000 0000 0000 0000 295c 8fc2 skb!........)\..
0x0120: 14ae f73f ae47 e17a 14ae f73f ...?.G.z...?
^C
I have tried straceing tcpdump and trying its job with setsockopt:
sudo strace -e setsockopt tcpdump -vv -X -i enp1s0np1 port 6666
which gives
setsockopt(3, SOL_PACKET, PACKET_ADD_MEMBERSHIP, {mr_ifindex=if_nametoindex("enp1s0np1"), mr_type=PACKET_MR_PROMISC, mr_alen=0, mr_address=}, 16) = 0
setsockopt(3, SOL_PACKET, PACKET_AUXDATA, [1], 4) = 0
setsockopt(3, SOL_PACKET, PACKET_VERSION, [1], 4) = 0
setsockopt(3, SOL_PACKET, PACKET_RESERVE, [4], 4) = 0
setsockopt(3, SOL_PACKET, PACKET_RX_RING, 0x7ffe9d8d1510, 28) = 0
setsockopt(7, SOL_SOCKET, SO_RCVBUF, [8388608], 4) = 0
setsockopt(7, SOL_SOCKET, SO_SNDBUF, [8388608], 4) = 0
setsockopt(3, SOL_SOCKET, SO_ATTACH_FILTER, {len=1, filter=0x7fa5289de000}, 16) = 0
setsockopt(3, SOL_SOCKET, SO_ATTACH_FILTER, {len=24, filter=0x56025631f280}, 16) = 0
tcpdump: listening on enp1s0np1, link-type EN10MB (Ethernet), capture size 262144 bytes
because I don't understand the others, I mimicked only the first call to setsockopt of tcpdump:
struct packet_mreq mreq = {
.mr_ifindex = if_nametoindex(iface),
.mr_type = PACKET_MR_PROMISC,
.mr_alen = 0
};
if (setsockopt(sock, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq, sizeof(mreq)))
errorexit("setsockopt");
The above code is a replacement for the setsockopt(SO_BINDTODEVICE) call in the receiver program, but I haven't observed any difference (still all packets from all interfaces are caught).
It looks like all that I'm missing is a bind(2). Unfortunately, SO_BINDTOINTERFACE doesn't work with AF_PACKET, so bind(2) is the only solution.
The code isn't any complex:
struct sockaddr_ll sll = {
.sll_family = AF_PACKET,
.sll_ifindex = if_nametoindex(iface),
.sll_protocol = htons(3) // 3 = ETH_P_ALL
};
if (bind(sock, (struct sockaddr*)&sll, sizeof sll))
errorexit("sock");
From the same socket.7 page:
SO_BINDTOSOCKET
... Note that this works only for some socket types, particularly AF_INET sockets. It is not supported for packet sockets (use normal bind(2) there).
Hmmm, guess I should've read the manual more thoroughly.

Why my spi test C code get this result?

At the bottom is the spi test code (spitest.c) I used, and when running it on my linux kit, I got this result:
root#abcd-kit:/system # ./spitest
open device: /dev/spidev0.0
set spi mode: 0
set bits per word: 8
set max speed: 2000000 Hz (2 MHz)
the received data is below:
00 00 00 00 30 30
30 0A 00 00 00 00
00 00 00 00 2F 73
the received data is below:
00 00 00 00 30 30
30 0A 00 00 00 00
00 00 00 00 2F 73
...
dmesg output:
<7>[ 1254.714088] usif-spi e1100000.usif1: Pushing msg a8085ed0
<6>[ 1254.714367] SPI XFER :ae81c700 , Length : 18
<6>[ 1254.714404] TX Buf :a6207000 , TX DMA : (null)
<6>[ 1254.714425] RX Buf :92bf5000 , RX DMA : (null)
<6>[ 1254.714445] CS change:0, bits/w :8, delay : 0 us, speed : 2000000 Hz
<7>[ 1254.714471] TX--->:31 a5 bb 00 00 bb fc 76 80 84 1e 00 5c 29 7d 77
<7>[ 1254.714491] TX--->:44 b9
<7>[ 1254.714511] RX--->:00 00 00 00 30 30 30 0a 00 00 00 00 00 00 00 00
<7>[ 1254.714534] RX--->:2f 73
<7>[ 1254.714558] usif-spi e1100000.usif1: Msg a8085ed0 completed with status 0
<7>[ 1255.725936] usif-spi e1100000.usif1: Pushing msg a8085ed0
<6>[ 1255.726472] SPI XFER :ae81cc40 , Length : 18
<6>[ 1255.726604] TX Buf :a6207000 , TX DMA : (null)
<6>[ 1255.726656] RX Buf :92bf5000 , RX DMA : (null)
<6>[ 1255.726706] CS change:0, bits/w :8, delay : 0 us, speed : 2000000 Hz
<7>[ 1255.726773] TX--->:31 a5 bb 00 00 bb fc 76 94 29 7d 77 5c 29 7d 77
<7>[ 1255.726829] TX--->:44 b9
<7>[ 1255.726875] RX--->:00 00 00 00 30 30 30 0a 00 00 00 00 00 00 00 00
<7>[ 1255.726925] RX--->:2f 73
And the biggest problem is that I cannot get correct result from miso pin (read is wrong, can do write correctly). Whatever I do, e.g. connect miso to ground or 1.8V, it always give this kind of result. The fisrt 5 data are always zero (I think it is because tx buffer has size of 5 and it is half duplex), and then followed random data, even that I used memset() to set rx buffer data to be zero before each spi transfer. And if I stop the program and run it again, the data changed but they are still random.
How could I read correct data from miso pin?
Thanks!
spitest.c
/*
* SPI testing utility (using spidev driver)
*
* Copyright (c) 2007 MontaVista Software, Inc.
* Copyright (c) 2007 Anton Vorontsov <avorontsov#ru.mvista.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License.
*
* Cross-compile with cross-gcc -I/path/to/cross-kernel/include
*/
#include <stdint.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <getopt.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <linux/types.h>
#include "spidev.h"
#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
static void pabort(const char *s)
{
perror(s);
abort();
}
static const char *device = "/dev/spidev0.0";
static uint8_t mode;
static uint8_t bits = 8;
static uint32_t speed = 2000000;
static uint16_t delay;
#define LENGTH 18
static void transfer(int fd)
{
int ret, i;
uint8_t tx[5] = {0x31, 0xa5, 0xbb};
uint8_t rx[LENGTH] = {0, };
struct spi_ioc_transfer tr = {
.tx_buf = (unsigned long)tx,
.rx_buf = (unsigned long)rx,
.len = LENGTH,
.delay_usecs = delay,
.speed_hz = speed,
.bits_per_word = bits, //important, bits = 8 means byte transfer is possible
};
memset(rx, 0, LENGTH);
ret = ioctl(fd, SPI_IOC_MESSAGE(1), &tr);
if (ret < 1)
pabort("can't send spi message\n");
printf("the received data is below:\n");
for (ret = 0; ret < LENGTH; ret++) { //print the received data, by Tom Xue
if (!(ret % 6))
puts("");
printf("%.2X ", rx[ret]);
}
puts("");
}
int main(int argc, char *argv[])
{
int ret = 0;
int fd;
unsigned char rd_buf[32];
fd = open(device, O_RDWR);
if (fd < 0)
pabort("can't open device\n");
/*
* * spi mode
* */
ret = ioctl(fd, SPI_IOC_WR_MODE, &mode);
if (ret == -1)
pabort("can't set spi mode\n");
ret = ioctl(fd, SPI_IOC_RD_MODE, &mode);
if (ret == -1)
pabort("can't get spi mode\n");
/*
* * bits per word
* */
ret = ioctl(fd, SPI_IOC_WR_BITS_PER_WORD, &bits);
if (ret == -1)
pabort("can't set bits per word\n");
ret = ioctl(fd, SPI_IOC_RD_BITS_PER_WORD, &bits);
if (ret == -1)
pabort("can't get bits per word\n");
/*
* * max speed hz
* */
ret = ioctl(fd, SPI_IOC_WR_MAX_SPEED_HZ, &speed);
if (ret == -1)
pabort("can't set max speed hz\n");
ret = ioctl(fd, SPI_IOC_RD_MAX_SPEED_HZ, &speed);
if (ret == -1)
pabort("can't get max speed hz\n");
printf("open device: %s\n", device);
printf("set spi mode: %d\n", mode);
printf("set bits per word: %d\n", bits);
printf("set max speed: %d Hz (%d MHz)\n", speed, speed/1000000);
while(1){
transfer(fd);
//read(fd, rd_buf, 4);
//printf("rd_buf = %s, %d, %d, %d, %d\n", rd_buf, rd_buf[0], rd_buf[1], rd_buf[2], rd_buf[3]);
//memset(rd_buf, 0, 10);
sleep(1);
}
close(fd);
return ret;
}
More:
My CPU is Intel Sofia-3gr, I guess its spec is not publicly released. I see the Tx data from my oscilloscope, and confirmed that TX is right.
I can also printk the pinmux/pinctrl setting (use ioremap and ioread32), it is also right. I say it right also because that I can see how to set it as SPI from those dts reference files, I just follow them.
Key:
I just find that the SPI TX interrupt is pending each time a spi transfer starts, but no SPI RX interrupt pending. Hence the spi driver code will not read the RX data at all. As the reason, I don't know.

Using gdb to analyze kmemleak scan result

I have read this link Analyzing kmemleak result, but it seems not give the final answer.
After scanning the memory leak by kmemleak. I can derive the result of unreferenced object's address, say 0xffff880060bb3060 [in the update].
and I use the following gdb command to see the location of this address.
Note I found the .text_address in /sys/module/hello/session/.text, and the value is 0xffffffffa038c000
sudo insmod hello.ko
gdb hello.ko /proc/kcore
add-symbol-file hello.ko 0xffffffffa038c000
list *0xffff880060bb3060
However the list command show nothing.
I have used these steps to debug of kernel oops message, so I'm sure the command I type is correct.
So how can I use gdb to analyze the result given by kmemleak?
Update
I think I should also provide my sample code hello.c:
#include <linux/module.h>
#include <linux/list.h>
#include <linux/slab.h>
struct num {
struct list_head node;
int number;
};
static int __init hello_init(void)
{
LIST_HEAD(my_list);
struct num *tmp;
struct list_head *iterator;
u8 i;
for(i=0; i<5; i++) {
tmp = kmalloc(sizeof(struct num), GFP_KERNEL); //I expect this line will be output as memory leak
tmp->number = i;
list_add(&tmp->node, &my_list);
}
list_for_each(iterator, &my_list) {
printk("%d\n", list_entry(iterator, struct num, node)->number);
}
return 0;
}
static void hello_exit(void)
{
printk(KERN_ALERT "Good, haha\n");
}
module_init(hello_init);
module_exit(hello_exit);
And the following is the scan result:
unreferenced object 0xffff880060bb3060 (size 32):
comm "swapper/0", pid 1, jiffies 4294895149 (age 802.568s)
hex dump (first 32 bytes):
00 01 10 00 00 00 ad de 00 02 20 00 00 00 ad de .......... .....
00 28 b1 57 00 88 ff ff 01 4f 0a 56 00 00 00 00 .(.W.....O.V....
backtrace:
[<ffffffff8170ea7e>] kmemleak_alloc+0x4e/0xc0
[<ffffffff811a5de8>] kmem_cache_alloc_trace+0xb8/0x160
[<ffffffff810b0ed6>] pm_vt_switch_required+0x76/0xb0
[<ffffffff813c1e61>] register_framebuffer+0x1d1/0x370
[<ffffffff813db3fc>] vesafb_probe+0x5bc/0xa70
[<ffffffff814870cc>] platform_drv_probe+0x3c/0x70
[<ffffffff81484d74>] driver_probe_device+0x94/0x3d0
[<ffffffff8148517b>] __driver_attach+0x8b/0x90
[<ffffffff81482913>] bus_for_each_dev+0x73/0xb0
[<ffffffff814847ae>] driver_attach+0x1e/0x20
[<ffffffff814842f8>] bus_add_driver+0x208/0x2e0
[<ffffffff81485844>] driver_register+0x64/0xf0
[<ffffffff81486eca>] __platform_driver_register+0x4a/0x50
[<ffffffff81d923f1>] vesafb_driver_init+0x12/0x14
[<ffffffff810020f2>] do_one_initcall+0xf2/0x1b0
[<ffffffff81d50f24>] kernel_init_freeable+0x144/0x1cc

Resources