linux kernel ip_options_build() function - linux

Below is the ip_options_build() in linux kernel 3.4, line 51 and 52:
51 if (opt->srr)
52 memcpy(iph+opt->srr+iph[opt->srr+1]-4, &daddr, 4);
I understand that the two lines say, if source routing option is present, copy the destination address to the end of the option, that suggests that iph[opt->srr+1] is the length of the source routing option, but I don't get it why?
31/*
32 * Write options to IP header, record destination address to
33 * source route option, address of outgoing interface
34 * (we should already know it, so that this function is allowed be
35 * called only after routing decision) and timestamp,
36 * if we originate this datagram.
37 *
38 * daddr is real destination address, next hop is recorded in IP header.
39 * saddr is address of outgoing interface.
40 */
41
42void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
43 __be32 daddr, struct rtable *rt, int is_frag)
44{
45 unsigned char *iph = skb_network_header(skb);
46
47 memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options));
48 memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen);
49 opt = &(IPCB(skb)->opt);
50
51 if (opt->srr)
52 memcpy(iph+opt->srr+iph[opt->srr+1]-4, &daddr, 4);
53
54 if (!is_frag) {
55 if (opt->rr_needaddr)
56 ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, skb, rt);
57 if (opt->ts_needaddr)
58 ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, skb, rt);
59 if (opt->ts_needtime) {
60 struct timespec tv;
61 __be32 midtime;
62 getnstimeofday(&tv);
63 midtime = htonl((tv.tv_sec % 86400) * MSEC_PER_SEC
+ tv.tv_nsec / NSEC_PER_MSEC);
64 memcpy(iph+opt->ts+iph[opt->ts+2]-5, &midtime, 4);
65 }
66 return;
67 }
68 if (opt->rr) {
69 memset(iph+opt->rr, IPOPT_NOP, iph[opt->rr+1]);
70 opt->rr = 0;
71 opt->rr_needaddr = 0;
72 }
73 if (opt->ts) {
74 memset(iph+opt->ts, IPOPT_NOP, iph[opt->ts+1]);
75 opt->ts = 0;
76 opt->ts_needaddr = opt->ts_needtime = 0;
77 }
78}

If I remember correctly, iph + opt->srr is basically the address of the first byte of the srr option. The format of the option itself is as follows:
TYPE (1 byte) | LENGTH (1 byte) | OFFSET (1 byte) | ... and then some addresses 4 bytes each
The LENGTH "field" specifies the length in bytes of the entire option, so that's why iph[opt->srr+1] is the length of the option.

Related

Where and When Linux Kernel Setup GDT?

I have some doubt regarding GDT in linux. I try to get GDT info in kernel space (Ring0), and my test code called in system call context. In the test code, I try to print ss register (Segment Selector), and get ss segment descriptor by GDTR and ss-segment-selector.
77 void printGDTInfo(void) {
78 struct desc_ptr pgdt, *pss_desc;
79 unsigned long ssr;
80 struct desc_struct *ss_desc;
81
82 // Get GDTR
83 native_store_gdt(&pgdt);
84 unsigned long gdt_addr = pgdt.address;
85 unsigned long gdt_size = pgdt.size;
86 printk("[GDT] Addr:%lu |Size:%lu\n", gdt_addr, gdt_size);
87
88 // Get SS Register
89 asm("mov %%ss, %%eax"
90 :"=a"(ssr));
91 printk("SSR In Kernel:%lu\n", ssr);
92 unsigned long desc_index = ssr >> 3; // SHIFT for Descriptor Index
93 printk("SSR Shift:%lu\n", desc_index);
94 ss_desc = (struct desc_struct*)(gdt_addr + desc_index * sizeof(struct desc_struct));
95 printk("SSR:Base0:%lu, Base1:%lu,Base2:%lu\n", ss_desc->base0, ss_desc->base1, ss_desc->base2);
96 }
What confused me most is the "base" fields in ss-descriptor are all zero (line95 print). I try to print __USER_DS segment descriptor, the "base" fields are also zero.
Is that true? All the segment in Linux use same Base Address(zero)?
I want to check the GDT initialization in Linux Source Code but I am not sure where and when Linux setup GDT?
I find codes in "arch/x86/kernel/cpu/common.c"like this, the second parameter(zero) of GDT_ENTRY_INIT is zero, which means the base0/base1/base2 fields in segment descriptor are all zero.
125 [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
126 [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
127 [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff),
If that is true, all the segment has same base address(zero). As a result, same virtual address in Ring0 and Ring1 will mapping to the same linear address?
I am appreciate for your help :)

Porting package to NodeJS and having trouble with Buffer.alloc()

I'm porting a game server to NodeJS. The problem is that I'm not familiar at all with packets and building them out. After reading through the NodeJS docs, I think that I've structured my response the way that the client expects, but I'm the client doesn't seem to respond well to what my server is sending.
Hoping someone can confirm that it's something in my server response that doesn't match the documentation requirements..
The client expects the following response from the TCP socket:
Packet Build
BYTE[1] cmd (0xA8)
BYTE[2] total length of this packet
BYTE[1] System Info Flag (0x5D)
BYTE[2] # of servers
(Repeat as needed for each server)
BYTE[2] server index (0-based)
BYTE[32] server name
BYTE percent full
BYTE timezone
BYTE[4] server IP to ping
Here is my NodeJS interpretation of the docs.
/** Build response header */
const length = 45
serverResponse = Buffer.alloc(length)
serverResponse.fill(0xA8, 0)
serverResponse.fill(Buffer.alloc(2, length), 1)
// Last fill had a buffer size of 2, so our next offset considers that
serverResponse.fill(0x5d,3)
serverResponse.fill(Buffer.alloc(2, 1),4)
/** Build response server list */
serverResponse.fill(Buffer.alloc(2, 0),5) /* 2 Bytes (server index, 0-based) */
// Last fill had a buffer size of 2, so our next offset considers that
serverResponse.fill(
Buffer.alloc(32, Buffer.from('Heres your server')),
7) /* 32 bytes (Server name) */
serverResponse.fill(Buffer.alloc(1, 9), 39) /* 1 Bytes (% Full) */
/**
* Trying -12 - 12 range divided by (60 * 60)). #see
* https://github.com/Sphereserver/Source/blob/0be2bc1d2e16659239460495b9819eb8dcfd39ed/src/graysvr/CServRef.cpp#L42
*/
serverResponse.fill(Buffer.alloc(1, -5 / (60 *60)),40) /* 1 Bytes (Timezone */
serverResponse.fill(Buffer.from([0,0,0,0]), 41) /** IP Address */
This outputs:
<Buffer a8 2d 2d 5d 01 00 00 48 65 72 65 73 20 79 6f 75 72 20 73 65 72 76 65 72 48 65 72 65 73 20 79 6f 75 72 20 73 65 72 76 09 00 00 00 00 00>
Edit: I've discovered the following that may help me along.
NodeJS equivalents
/**
* BYTE 8-bit unsigned buf.writeUInt8()
* SBYTE 8-bit signed buf.writeInt8()
* BOOL 8-bit boolean (0x00=False, 0xFF=True) buf.fill(0x00) || buf.fill(0xFF)
* CHAR 8-bit single ASCII character buf.from('Text', 'ascii') - Make 8-bit?
* UNI 16-bit single unicode character buf.from('A', 'utf16le') - Correct?
* SHORT 16-bit signed buf.writeInt16BE() - #see https://www.reddit.com/r/node/comments/9hob2u/buffer_endianness_little_endian_or_big_endian_how/
* USHORT 16-bit unsigned buf.writeUInt16BE() - #see https://www.reddit.com/r/node/comments/9hob2u/buffer_endianness_little_endian_or_big_endian_how/
* INT 32-bit signed buf.writeInt32BE - #see https://www.reddit.com/r/node/comments/9hob2u/buffer_endianness_little_endian_or_big_endian_how/
* UINT 32-bit unsigned buf.writeUInt32BE - #see https://www.reddit.com/r/node/comments/9hob2u/buffer_endianness_little_endian_or_big_endian_how/
*/

Why my spi test C code get this result?

At the bottom is the spi test code (spitest.c) I used, and when running it on my linux kit, I got this result:
root#abcd-kit:/system # ./spitest
open device: /dev/spidev0.0
set spi mode: 0
set bits per word: 8
set max speed: 2000000 Hz (2 MHz)
the received data is below:
00 00 00 00 30 30
30 0A 00 00 00 00
00 00 00 00 2F 73
the received data is below:
00 00 00 00 30 30
30 0A 00 00 00 00
00 00 00 00 2F 73
...
dmesg output:
<7>[ 1254.714088] usif-spi e1100000.usif1: Pushing msg a8085ed0
<6>[ 1254.714367] SPI XFER :ae81c700 , Length : 18
<6>[ 1254.714404] TX Buf :a6207000 , TX DMA : (null)
<6>[ 1254.714425] RX Buf :92bf5000 , RX DMA : (null)
<6>[ 1254.714445] CS change:0, bits/w :8, delay : 0 us, speed : 2000000 Hz
<7>[ 1254.714471] TX--->:31 a5 bb 00 00 bb fc 76 80 84 1e 00 5c 29 7d 77
<7>[ 1254.714491] TX--->:44 b9
<7>[ 1254.714511] RX--->:00 00 00 00 30 30 30 0a 00 00 00 00 00 00 00 00
<7>[ 1254.714534] RX--->:2f 73
<7>[ 1254.714558] usif-spi e1100000.usif1: Msg a8085ed0 completed with status 0
<7>[ 1255.725936] usif-spi e1100000.usif1: Pushing msg a8085ed0
<6>[ 1255.726472] SPI XFER :ae81cc40 , Length : 18
<6>[ 1255.726604] TX Buf :a6207000 , TX DMA : (null)
<6>[ 1255.726656] RX Buf :92bf5000 , RX DMA : (null)
<6>[ 1255.726706] CS change:0, bits/w :8, delay : 0 us, speed : 2000000 Hz
<7>[ 1255.726773] TX--->:31 a5 bb 00 00 bb fc 76 94 29 7d 77 5c 29 7d 77
<7>[ 1255.726829] TX--->:44 b9
<7>[ 1255.726875] RX--->:00 00 00 00 30 30 30 0a 00 00 00 00 00 00 00 00
<7>[ 1255.726925] RX--->:2f 73
And the biggest problem is that I cannot get correct result from miso pin (read is wrong, can do write correctly). Whatever I do, e.g. connect miso to ground or 1.8V, it always give this kind of result. The fisrt 5 data are always zero (I think it is because tx buffer has size of 5 and it is half duplex), and then followed random data, even that I used memset() to set rx buffer data to be zero before each spi transfer. And if I stop the program and run it again, the data changed but they are still random.
How could I read correct data from miso pin?
Thanks!
spitest.c
/*
* SPI testing utility (using spidev driver)
*
* Copyright (c) 2007 MontaVista Software, Inc.
* Copyright (c) 2007 Anton Vorontsov <avorontsov#ru.mvista.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License.
*
* Cross-compile with cross-gcc -I/path/to/cross-kernel/include
*/
#include <stdint.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <getopt.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <linux/types.h>
#include "spidev.h"
#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
static void pabort(const char *s)
{
perror(s);
abort();
}
static const char *device = "/dev/spidev0.0";
static uint8_t mode;
static uint8_t bits = 8;
static uint32_t speed = 2000000;
static uint16_t delay;
#define LENGTH 18
static void transfer(int fd)
{
int ret, i;
uint8_t tx[5] = {0x31, 0xa5, 0xbb};
uint8_t rx[LENGTH] = {0, };
struct spi_ioc_transfer tr = {
.tx_buf = (unsigned long)tx,
.rx_buf = (unsigned long)rx,
.len = LENGTH,
.delay_usecs = delay,
.speed_hz = speed,
.bits_per_word = bits, //important, bits = 8 means byte transfer is possible
};
memset(rx, 0, LENGTH);
ret = ioctl(fd, SPI_IOC_MESSAGE(1), &tr);
if (ret < 1)
pabort("can't send spi message\n");
printf("the received data is below:\n");
for (ret = 0; ret < LENGTH; ret++) { //print the received data, by Tom Xue
if (!(ret % 6))
puts("");
printf("%.2X ", rx[ret]);
}
puts("");
}
int main(int argc, char *argv[])
{
int ret = 0;
int fd;
unsigned char rd_buf[32];
fd = open(device, O_RDWR);
if (fd < 0)
pabort("can't open device\n");
/*
* * spi mode
* */
ret = ioctl(fd, SPI_IOC_WR_MODE, &mode);
if (ret == -1)
pabort("can't set spi mode\n");
ret = ioctl(fd, SPI_IOC_RD_MODE, &mode);
if (ret == -1)
pabort("can't get spi mode\n");
/*
* * bits per word
* */
ret = ioctl(fd, SPI_IOC_WR_BITS_PER_WORD, &bits);
if (ret == -1)
pabort("can't set bits per word\n");
ret = ioctl(fd, SPI_IOC_RD_BITS_PER_WORD, &bits);
if (ret == -1)
pabort("can't get bits per word\n");
/*
* * max speed hz
* */
ret = ioctl(fd, SPI_IOC_WR_MAX_SPEED_HZ, &speed);
if (ret == -1)
pabort("can't set max speed hz\n");
ret = ioctl(fd, SPI_IOC_RD_MAX_SPEED_HZ, &speed);
if (ret == -1)
pabort("can't get max speed hz\n");
printf("open device: %s\n", device);
printf("set spi mode: %d\n", mode);
printf("set bits per word: %d\n", bits);
printf("set max speed: %d Hz (%d MHz)\n", speed, speed/1000000);
while(1){
transfer(fd);
//read(fd, rd_buf, 4);
//printf("rd_buf = %s, %d, %d, %d, %d\n", rd_buf, rd_buf[0], rd_buf[1], rd_buf[2], rd_buf[3]);
//memset(rd_buf, 0, 10);
sleep(1);
}
close(fd);
return ret;
}
More:
My CPU is Intel Sofia-3gr, I guess its spec is not publicly released. I see the Tx data from my oscilloscope, and confirmed that TX is right.
I can also printk the pinmux/pinctrl setting (use ioremap and ioread32), it is also right. I say it right also because that I can see how to set it as SPI from those dts reference files, I just follow them.
Key:
I just find that the SPI TX interrupt is pending each time a spi transfer starts, but no SPI RX interrupt pending. Hence the spi driver code will not read the RX data at all. As the reason, I don't know.

Deadlock in MCS lock implementation

Hardware:
Darwin Kernel Version 13.2.0: Thu Apr 17 23:03:13 PDT 2014; root:xnu-2422.100.13~1/RELEASE_X86_64 x86_64
atomics.hpp
1 #ifndef ATOMIC_UTILS_H
2 #define ATOMIC_UTILS_H
3
4 #include
5
6 #define BARRIER() __asm__ volatile ( "": : :"memory" )
7
8 #define CPU_RELAX() __asm__ volatile( "pause\n\t": : :"memory" )
9
10 #define STORE_FENCE() __asm__ volatile("mfence" ::: "memory");
11
12 class AtomicUtils
13 {
14 public:
15
16 /**
17 * check if the value at addr is equal to oldval, if so replace it with newva l
18 * and return the oldval
19 */
20 inline static size_t compareAndExchange( volatile size_t* addr, size_t oldval , size_t newval )
21 {
22 size_t ret;
23 __asm__ volatile( "lock cmpxchgq %2, %1\n\t"
24 :"=a"(ret), "+m"(*addr)
25 : "r"(newval), "0"(oldval)
26 : "memory" );
27 return ret;
28 }
29
30 /**
31 * Atomically stores x into addr and returns the previous
32 * stored in addr
33 */
34 inline static size_t loadAndStore( size_t x, volatile size_t* addr )
36 {
37 size_t ret;
38 __asm__ volatile( "lock xchgq %1, %0\n\t"
39 : "+m"(*addr), "=r"(ret)
40 : "1"(x) );
41 return ret;
42 }
43
44 };
45
46 #endif
mcs.hpp
1 #ifndef MCS_LOCK_H
2 #define MCS_LOCK_H
3
4 #include "atomics.hpp"
5 #include
6
7 class MCSLock
8 {
9 struct mcs_lock_t
10 {
11 mcs_lock_t():next(0), locked(false){}
12 struct mcs_lock_t* next;
13 bool locked;
14 };
15
16 public:
17 typedef struct mcs_lock_t mcs_lock;
18
19 private:
20 mcs_lock** tail;
21 static boost::thread_specific_ptr tls_node;
22
23 public:
24 MCSLock( mcs_lock** lock_tail ):tail( lock_tail )
25 {
26 if( tls_node.get() == 0 )
27 tls_node.reset( new mcs_lock() );
28 }
29
30 void lock()
31 {
32 mcs_lock* thread_node = tls_node.get();
33 thread_node->next = 0;
34 thread_node->locked = true;
35
36 volatile mcs_lock* pred = reinterpret_cast(
37 AtomicUtils::loadAndStore(
38 reinterpret_cast( thread_node ),
39 reinterpret_cast( tail )
40 )
41 );
42 if( pred != 0 )
43 {
44 pred->next = *tail;
45
46 STORE_FENCE();
47 //BARRIER(); // Required to prevent re ordering between prev->next = tail and thread_node->locked. ( WR harzard )
48
49 // Spin on a local variable. Someone unlock me plz !!
50 while( thread_node->locked )
51 CPU_RELAX();
52
53 }
54 }
55
56 void unlock()
57 {
58 mcs_lock* thread_node = tls_node.get();
59 if( thread_node->next == 0 )
60 {
61 // If false, then we a new thread has request for lock. Now release t he lock for the new thread
62 if(
63 AtomicUtils::compareAndExchange(
64 reinterpret_cast( tail ),
65 reinterpret_cast( thread_node ),
66 0
67 ) == reinterpret_cast( thread_node ) 68 )
69 {
70 return;
71 }
72
73 while( thread_node->next == 0 )
74 CPU_RELAX();
75 }
76
77 thread_node->next->locked = false;
78 }
79 };
80
81 boost::thread_specific_ptr MCSLock::tls_node;
82 #endif
mcs_test.cpp
1 #include "mcs.hpp"
2 #include <iostream>
3 #include <pthread.h>
4 #include <vector>
5 #define NUM_THREADS 16
6 #define NUM_ITERATIONS 100
7
8 std::vector<int> elements;
9 MCSLock::mcs_lock *tail = 0;
10
11 void* thread_run( void* data )
12 {
13 MCSLock lock( &tail );
14 for( int i = 0; i < NUM_ITERATIONS; ++i )
15 {
16 lock.lock();
17 elements.push_back( i );
18 lock.unlock();
19 }
20
21 return 0;
22 }
23
24 int main()
25 {
26 pthread_t threads[ NUM_THREADS ];
27 elements.reserve( NUM_THREADS * NUM_ITERATIONS );
28
29 {
30 for( int i = 0; i < NUM_THREADS; ++i )
31 pthread_create( &threads[i], NULL, thread_run, NULL );
32
33 for( int i = 0; i < NUM_THREADS; ++i )
34 pthread_join( threads[i], NULL );
35
36 std::cout <<"\nExiting main thread: " << std::endl;
37 }
38 }
The above code is compiled using clang
Problem:
I see that 1 or 2 threads are stuck in lock() in line 50. Except the main threads, the threads which are stuck in lock() there are no other threads alive. This means that when the other threads invoke unlock() they somehow don't set the locked = false for other variables and exit.
Any pointers on debugging this please ?
Stuck on this for many hours and no clues.
Doesn't clang have builtins for these inline-asm blocks (like gcc's __sync_val_compare_and_swap)? Why re-invent the wheel?
Second, I'd really think about adding the memory clobber to loadAndStore. You need to make sure that any writes the compiler is holding in registers gets flushed to memory before doing the xchgq. Similarly it will prevent gcc from optimizing memory reads to before the xchgq. Either would be bad.
Third, I'd examine the asm output for your while loops (thread_node->locked and thread_node->next). Since these variables are not volatile, gcc may optimize this to only perform the read once.
These may not solve your problem, but that's where I'd start.

PCI driver to fetch MAC address

I was trying to write a pci driver which can display the MAC address of my Ethernet card.
Running a Ubuntu on VM and my Ethernet card is Intel one as follows
00:08.0 Ethernet controller: Intel Corporation 82540EM Gigabit Ethernet Controller (rev 02)
I was able to get the data sheet of the same from Intel website and as per data sheet it says IO address are mapped to Bar 2 (Refer to pg 87) and MAC can be read using RAL/RAH register which are at offset RAL (05400h + 8*n; R/W) and RAH (05404h + 8n; R/W)
2 18h IO Register Base Address (bits 31:2) 0b mem
Based on this information, i wrote a small PCI driver but i always get the MAC as fff and when i debugged further, i see io_base address is always zero.
Below is the code
1 /*
2 Program to find a device on the PCI sub-system
3 */
4 #define VENDOR_ID 0x8086
5 #define DEVICE_ID 0x100e
6
7 #include <linux/kernel.h>
8 #include <linux/module.h>
9 #include <linux/stddef.h>
10 #include <linux/pci.h>
11 #include <linux/init.h>
12 #include <linux/cdev.h>
13 #include <linux/device.h>
14 #include <asm/io.h>
15
16 #define LOG(string...) printk(KERN_INFO string)
17
18 #define CDEV_MAJOR 227
19 #define CDEV_MINOR 0
20
21
22 MODULE_LICENSE("GPL");
23
24 struct pci_dev *pci_dev;
25 unsigned long mmio_addr;
26 unsigned long reg_len;
27 unsigned long *base_addr;
28
29 int device_probe(struct pci_dev *dev, const struct pci_device_id *id);
30 void device_remove(struct pci_dev *dev);
31
32 struct pci_device_id pci_device_id_DevicePCI[] =
33 {
34 {VENDOR_ID, DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
35 };
36
37 struct pci_driver pci_driver_DevicePCI =
38 {
39 name: "MyPCIDevice",
40 id_table: pci_device_id_DevicePCI,
41 probe: device_probe,
42 remove: device_remove
43 };
44
45
46 int init_module(void)
47 {
48 //struct pci_dev *pdev = NULL;
49 int ret = 0;
50
51 pci_register_driver(&pci_driver_DevicePCI);
52
53 return ret;
54 }
55
56 void cleanup_module(void)
57 {
58 pci_unregister_driver(&pci_driver_DevicePCI);
59
60 }
61
62 #define REGISTER_OFFSET 0x05400
64 int device_probe(struct pci_dev *dev, const struct pci_device_id *id)
65 {
66 int ret;
67 int bar = 2; // Bar to be reserved
68 unsigned long io_base = 0;
69 unsigned long mem_len = 0;
70 unsigned int register_data = 0;
71
72 LOG("Device probed");
73
74 /* Reserve the access to PCI device */
75 ret = pci_request_region(dev, bar, "my_pci");
76 if (ret) {
77 printk(KERN_ERR "request region failed :%d\n", ret);
78 return ret;
79 }
80
81 ret = pci_enable_device(dev);
82 if (ret < 0 ) LOG("Failed while enabling ... ");
83
84 io_base = pci_resource_start(dev, bar);
85 mem_len = pci_resource_len(dev, bar);
86
87 request_region(io_base, mem_len, "my_pci");
88 register_data = inw(io_base + REGISTER_OFFSET);
89 printk(KERN_INFO "IO base = %lx", io_base);
90 printk(KERN_INFO "MAC = %x", register_data);
91
92 return ret;
93 }
94
95 void device_remove(struct pci_dev *dev)
96 {
97 pci_release_regions(dev);
98 pci_disable_device(dev);
99 }
100
lspci -x output of my card
00:08.0 Ethernet controller: Intel Corporation 82540EM Gigabit Ethernet Controller (rev 02)
00: 86 80 0e 10 07 00 30 02 02 00 00 02 00 40 00 00
10: 00 00 82 f0 00 00 00 00 41 d2 00 00 00 00 00 00
20: 00 00 00 00 00 00 00 00 00 00 00 00 86 80 1e 00
30: 00 00 00 00 dc 00 00 00 00 00 00 00 09 01 ff 00
Can any one let me know what am i doing wrong?
I've modified your code and commented on changes. I have removed all of your existing comments to avoid confusion, and have only modified your probe function.
/* We need a place to store a logical address for unmapping later */
static void* logical_address;
int device_probe(struct pci_dev *dev, const struct pci_device_id *id)
{
int ret;
int bar_mask; /* BAR mask (this variable) and the integer BAR */
int requested_bar = 2; /* (this variable) are not the same thing, so give them */
/* separate variables */
resource_size_t io_base = 0; /* use kernel macros instead of built-in datatypes */
resource_size_t mem_len = 0;
unsigned int register_data = 0;
LOG("Device probed");
/* add this call to get the correct BAR mask */
bar_mask = pci_select_bars(dev, 0);
/* switched order - enable device before requesting memory */
ret = pci_enable_device(dev);
if (ret < 0 ) LOG("Failed while enabling ... ");
/* for this call, we want to pass the BAR mask, NOT the integer bar we want */
ret = pci_request_region(dev, bar_mask, "my_pci");
if (ret) {
printk(KERN_ERR "request region failed :%d\n", ret);
return ret;
}
/* it is in THESE calls that we request a specific BAR */
io_base = pci_resource_start(dev, requested_bar);
mem_len = pci_resource_len(dev, requested_bar);
/* you don't need to request anything again, so get rid of this line: */
/* request_region(io_base, mem_len, "my_pci"); */
/* you're missing an important step: we need to translate the IO address
* to a kernel logical address that we can actually use. Add a call to
* ioremap()
*/
logical_address = ioremap(io_base, mem_len);
/* we need to use the logical address returned by ioremap(), not the physical
* address returned by resource_start
*/
register_data = inw(logical_address + REGISTER_OFFSET);
printk(KERN_INFO "IO base = %lx", io_base);
printk(KERN_INFO "MAC = %x", register_data);
return ret;
}
You will need to add a corresponding call to iounmap() in your device_remove() routine. Take a look at the Intel E100E driver source code for some good examples.

Resources