Linux kernel socket programming:sendmsg function msg address can not access - linux

I want to send a message with socket->ops->sendmsg() in kernel module. One of the arguments of func sendmsg struct msghdr has a pointer msg_iov to the send buffer.
But except NULL , whatever buffer address I assign to the msg_iov the sendmsg() will return a EFAULT error to me. That means the address I assigned to the pointer can not be accessed.
So please help me and thank you very much.
P.S: here is part of my code. I omitted the irrelevant codes.
struct iovec vec;
char *buff = (char *)kmalloc(7, GFP_KERNEL);
unsigned long user_addr=0;
size_t count = 16;
buff[0] = 'H';
buff[1] = 'e';
buff[2] = 'l';
buff[3] = 'l';
buff[4] = 'o';
buff[5] = '\n';
buff[6] = '\0';
down_write(&current->mm->mmap_sem);
user_addr = do_mmap_pgoff(NULL, 0, count, PROT_READ|PROT_WRITE,\
MAP_PRIVATE|MAP_ANONYMOUS, 0);
up_write(&current->mm->mmap_sem);
__copy_to_user((void*)user_addr, (void*)buff, 7);
vec.iov_base = (void*)user_addr;
vec.iov_len = strlen( (char*)user_addr );
msg.msg_iov = &vec;
msg.msg_iovlen = 1;
msg.msg_flags = 0;
msg.msg_name = NULL;
msg.msg_namelen = 0;
msg.msg_control = NULL;
msg.msg_controllen = 0;
error = NewSock->ops->sendmsg(&kiocb,NewSock, &msg, 7);
do_munmap( &current->mm, user_addr, strlen( (char*) user_addr));

You can't call this function with data that lives in the kernel's address space:
int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t size)
/* ... */
while (--iovlen >= 0) {
size_t seglen = iov->iov_len;
unsigned char __user *from = iov->iov_base;
/* ... */
if ((err = skb_add_data(skb, from, copy)) != 0)
goto do_fault;
static inline int skb_add_data(struct sk_buff *skb,
char __user *from, int copy)
/* ... */
__wsum csum = csum_and_copy_from_user(from, skb_put(skb, copy),
copy, 0, &err);
#define csum_and_copy_from_user csum_partial_copy_from_user
__wsum
csum_partial_copy_from_user(const void __user *src, void *dst,
int len, __wsum isum, int *errp)
/* ... */
if (!likely(access_ok(VERIFY_READ, src, len)))
goto out_err;
/* ... */
isum = csum_partial_copy_generic((__force const void *)src,
dst, len, isum, errp, NULL);
access_ok on x86 checks for userspace pointers:
/**
* access_ok: - Checks if a user space pointer is valid
* #type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that
* %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe
* to write to a block, it is always safe to read from it.
* #addr: User space pointer to start of block to check
* #size: Size of block to check
*
* Context: User context only. This function may sleep.
*
* Checks if a pointer to a block of memory in user space is valid.
*
* Returns true (nonzero) if the memory block may be valid, false (zero)
* if it is definitely invalid.
*
* Note that, depending on architecture, this function probably just
* checks that the pointer is in the user space range - after calling
* this function, memory access functions may still return -EFAULT.
*/
#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0))
The comment on __range_not_ok() looks similar:
/*
* Test whether a block of memory is a valid user space address.
* Returns 0 if the range is valid, nonzero otherwise.
*
* This is equivalent to the following test:
* (u33)addr + (u33)size >= (u33)current->addr_limit.seg (u65 for x86_64)
*
* This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry...
*/
While I followed the x86-specific code paths whenever architecture-specific code was involved, I expect other architectures to enforce this behavior to the best of their respective abilities.
It looks like you cannot call sendmsg() on in-kernel struct iovec memory.

Related

GDB: Displaying incorrect values in struct

I'm trying to implement the malloc function and it looks like that gdb is giving me some weird values from this struct:
struct MemoryBlock {
struct MemoryBlock * next;
size_t size;
signed char is_free;
} startBlock;
And that's the function where I'm debugging it with gdb:
struct MemoryBlock * create_new_block(size_t size)
{
struct MemoryBlock * ret_block;
// add some space for the struct block
size += sizeof(struct MemoryBlock);
ret_block = (void *) sbrk(size);
// test first, if we can allocate that much of ram
if (ret_block == (void *) -1)
return NULL;
ret_block->size = size - sizeof(struct MemoryBlock);
ret_block->is_free = 0;
ret_block->next = NULL;
return ret_block; // HERE'S the breakpoint
}
So here's the issue (I'm at the breakpoint return ret_block):
If I want to see what kind of values are inside of the ret_block pointer, than I'm getting this:
(gdb) p (struct MemoryBlock) ret_block
$26 = {next = 0x555555559000, size = 140737488347680, is_free = -53 '\313'}
size is fine, because if I convert it into the decimal system than I'm getting 3 as expected. (the argument size from the function is currently 3)
But I'm surprised that next and is_free aren't 0 since the last three lines should set both to 0.
So I looked up what is in the memory:
As you can see each value is correctly stored in my heap. But why am I getting these values if I do p (struct MemoryBlock) ret_block?
If you need the whole code
#include <unistd.h>
#include <stdio.h>
/* ============
* Structs
* ============ */
struct MemoryBlock {
struct MemoryBlock * next;
size_t size;
signed char is_free;
} startBlock;
/* ==============
* Functions
* ============== */
struct MemoryBlock * create_new_block(size_t size);
void * malloc(size_t size);
/* ==================
* Main Programm
* ================== */
int main()
{
char * buffer;
char * b2;
unsigned short index;
// The start of my heap :D
startBlock.is_free = 0;
startBlock.size = 0;
buffer = malloc(3);
b2 = malloc(3);
// ----- ERROR -----
if (buffer == NULL || b2 == NULL)
return 1;
// ----- ERROR -----
// fill the buffers with random stuff
for (index=0; index<2; index++) {
buffer[index] = 'a';
b2[index] = 'b';
}
buffer[index] = '\0';
b2[index] = '\0';
puts(buffer);
puts(b2);
return 0;
}
struct MemoryBlock * create_new_block(size_t size)
{
struct MemoryBlock * ret_block;
// add some space for the struct block
size += sizeof(struct MemoryBlock);
ret_block = (void *) sbrk(size);
// test first, if we can allocate that much of ram
if (ret_block == (void *) -1)
return NULL;
ret_block->size = size - sizeof(struct MemoryBlock);
ret_block->is_free = 0;
ret_block->next = NULL;
return ret_block;
}
void * malloc (size_t size)
{
struct MemoryBlock * ret_block;
struct MemoryBlock * prev_block;
prev_block = &startBlock;
ret_block = startBlock.next;
// go through the linked lists and look if you can find a suitable block
while (ret_block != NULL && (ret_block->size < size || !ret_block->is_free))
{
prev_block = ret_block;
ret_block = ret_block->next;
}
// couldn't find a suitable block => create a new one
if (ret_block == NULL) {
ret_block = create_new_block(size);
if (ret_block == NULL)
return NULL;
}
prev_block->next = ret_block;
ret_block->is_free = 0;
return ret_block;
}
Ok, one of my friends told me my issue... The casting was wrong! Here's the solution:
(gdb) p * ret_block
$57 = {next = 0x0, size = 3, is_free = 0 '\000'}
A star was enough to get the desired output...

Question in Book Linux device driver 3rd about interrupt handler for circular buffer

I am reading LLDR3, having a question on P.271 in section "Implementing a Handler"
Bottom are codes I am having questions:
I see writer ( ISR ) and reader ( which is waken-up by ISR ) they are touching on same buffer ( short_queue ), since they are touching on the shared resource, doesn't it worry about the case where "short_i_read" got interrupted by the writer ISR while it is working on buffer?
I can understand ISR writer won't get interrupted since it is ISR and normally IRQ will be disabled until completion. But for buffer read "short_i_read" , I don't see any place to guarantee atomic operation.
The one thing I notice is :
buffer writer(ISR) only increment on short_head
buffer reader only increment on short_tail
Does that mean this code here let writer and reader only touch different variable to have it achieve kind of lock-free circular buffer?
irqreturn_t short_interrupt(int irq, void *dev_id, struct pt_regs *regs) {
struct timeval tv;
int written;
do_gettimeofday(&tv);
/* Write a 16 byte record. Assume PAGE_SIZE is a multiple of 16 */
written = sprintf((char *)short_head,"%08u.%06u\n", (int)(tv.tv_sec % 100000000), (int)(tv.tv_usec));
BUG_ON(written != 16);
short_incr_bp(&short_head, written);
wake_up_interruptible(&short_queue);
/* awake any reading process */
return IRQ_HANDLED;
}
static inline void short_incr_bp(volatile unsigned long *index, int delta) {
unsigned long new = *index + delta;
barrier(); /* Don't optimize these two together */
*index = (new >= (short_buffer + PAGE_SIZE)) ? short_buffer : new;
}
ssize_t short_i_read (struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
{
int count0;
DEFINE_WAIT(wait);
while (short_head == short_tail) {
prepare_to_wait(&short_queue, &wait, TASK_INTERRUPTIBLE);
if (short_head == short_tail)
schedule();
finish_wait(&short_queue, &wait);
if (signal_pending (current)) /* a signal arrived */
return -ERESTARTSYS; /* tell the fs layer to handle it */
}
/* count0 is the number of readable data bytes */
count0 = short_head - short_tail;
if (count0 < 0) /* wrapped */
count0 = short_buffer + PAGE_SIZE - short_tail;
if (count0 < count) count = count0;
if (copy_to_user(buf, (char *)short_tail, count))
return -EFAULT;
short_incr_bp (&short_tail, count);
return count;
}

how to implement splice_read for a character device file with uncached DMA buffer

I have a character device driver. It includes a 4MB coherent DMA buffer. The buffer is implemented as a ring buffer. I also implemente the splice_read call for the driver to improve the performance. But this implementation does not work well. Below is the using example:
(1)splice the 16 pages of device buffer data to a pipefd[1]. (the DMA buffer is managed as in page unit).
(2)splice the pipefd[0] to the socket.
(3)the receiving side (tcp client) receives the data, and then check the correctness.
I found that the tcp client got errors. The splice_read implementation is show below (I steal it from the vmsplice implementation):
/* splice related functions */
static void rdma_ring_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
put_page(buf->page);
buf->flags &= ~PIPE_BUF_FLAG_LRU;
}
void rdma_ring_spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
{
put_page(spd->pages[i]);
}
static const struct pipe_buf_operations rdma_ring_page_pipe_buf_ops = {
.can_merge = 0,
.map = generic_pipe_buf_map,
.unmap = generic_pipe_buf_unmap,
.confirm = generic_pipe_buf_confirm,
.release = rdma_ring_pipe_buf_release,
.steal = generic_pipe_buf_steal,
.get = generic_pipe_buf_get,
};
/* in order to simplify the caller work, the parameter meanings of ppos, len
* has been changed to adapt the internal ring buffer of the driver. The ppos
* indicate wich page is refferred(shoud start from 1, as the csr page are
* not allowed to do the splice), The len indicate how many pages are needed.
* Also, we constrain that maximum page number for each splice shoud not
* exceed 16 pages, if else, a EINVAL will return. If a high speed device
* need a more big page number, it can rework this routing. The off is also
* used to return the total bytes shoud be transferred, use can compare it
* with the return value to determint whether all bytes has been transfered.
*/
static ssize_t do_rdma_ring_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
struct rdma_ring *priv = to_rdma_ring(in->private_data);
struct rdma_ring_buf *data_buf;
struct rdma_ring_dstatus *dsta_buf;
struct page *pages[PIPE_DEF_BUFFERS];
struct partial_page partial[PIPE_DEF_BUFFERS];
ssize_t total_sz = 0, error;
int i;
unsigned offset;
struct splice_pipe_desc spd = {
.pages = pages,
.partial = partial,
.nr_pages_max = PIPE_DEF_BUFFERS,
.flags = flags,
.ops = &rdma_ring_page_pipe_buf_ops,
.spd_release = rdma_ring_spd_release_page,
};
/* init the spd, currently we omit the packet header, if a control
* is needed, it may be implemented by define a control variable in
* the device struct */
spd.nr_pages = len;
for (i = 0; i < len; i++) {
offset = (unsigned)(*ppos) + i;
data_buf = get_buf(priv, offset);
dsta_buf = get_dsta_buf(priv, offset);
pages[i] = virt_to_page(data_buf);
get_page(pages[i]);
partial[i].offset = 0;
partial[i].len = dsta_buf->bytes_xferred;
total_sz += partial[i].len;
}
error = _splice_to_pipe(pipe, &spd);
/* use the ppos to return the theory total bytes shoud transfer */
*ppos = total_sz;
return error;
}
/* splice read */
static ssize_t rdma_ring_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len, unsigned int flags)
{
ssize_t ret;
MY_PRINT("%s: *ppos = %lld, len = %ld\n", __func__, *ppos, (long)len);
if (unlikely(len > PIPE_DEF_BUFFERS))
return -EINVAL;
ret = do_rdma_ring_splice_read(in, ppos, pipe, len, flags);
return ret;
}
The _splice_to_pipe is just the same one as the splice_to_pipe in kernel. As this function is not an exported symbol, so I re-implemented it.
I think the main cause is that the some kind of lock of pages are omitted, but
I don't know where and how.
My kernel version is 3.10.

accessing i2c platform device from userspace program

I'm trying to access an 24c256 eeprom content from user space in a am335x_starter_kit.
I dont have to add eeprom driver into kernel and make modifications in board.c file because board already uses eeprom to access some board configuration and Mac address information.
I just want to access eeprom content from user space.
I used read and write functions for character devices before but i2c platform devices doesnt have these functions.
struct i2c_driver {
unsigned int class;
int (* attach_adapter) (struct i2c_adapter *);
int (* probe) (struct i2c_client *, const struct i2c_device_id *);
int (* remove) (struct i2c_client *);
void (* shutdown) (struct i2c_client *);
void (* alert) (struct i2c_client *, unsigned int data);
int (* command) (struct i2c_client *client, unsigned int cmd, void *arg);
struct device_driver driver;
const struct i2c_device_id * id_table;
int (* detect) (struct i2c_client *, struct i2c_board_info *);
const unsigned short * address_list;
struct list_head clients;
};
This is the eeprom driver. Board file uses it from kernel to get mac address and board configuration data.
/*
* at24.c - handle most I2C EEPROMs
*
* Copyright (C) 2005-2007 David Brownell
* Copyright (C) 2008 Wolfram Sang, Pengutronix
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/mutex.h>
#include <linux/sysfs.h>
#include <linux/mod_devicetable.h>
#include <linux/log2.h>
#include <linux/bitops.h>
#include <linux/jiffies.h>
#include <linux/of.h>
#include <linux/i2c.h>
#include <linux/i2c/at24.h>
/*
* I2C EEPROMs from most vendors are inexpensive and mostly interchangeable.
* Differences between different vendor product lines (like Atmel AT24C or
* MicroChip 24LC, etc) won't much matter for typical read/write access.
* There are also I2C RAM chips, likewise interchangeable. One example
* would be the PCF8570, which acts like a 24c02 EEPROM (256 bytes).
*
* However, misconfiguration can lose data. "Set 16-bit memory address"
* to a part with 8-bit addressing will overwrite data. Writing with too
* big a page size also loses data. And it's not safe to assume that the
* conventional addresses 0x50..0x57 only hold eeproms; a PCF8563 RTC
* uses 0x51, for just one example.
*
* Accordingly, explicit board-specific configuration data should be used
* in almost all cases. (One partial exception is an SMBus used to access
* "SPD" data for DRAM sticks. Those only use 24c02 EEPROMs.)
*
* So this driver uses "new style" I2C driver binding, expecting to be
* told what devices exist. That may be in arch/X/mach-Y/board-Z.c or
* similar kernel-resident tables; or, configuration data coming from
* a bootloader.
*
* Other than binding model, current differences from "eeprom" driver are
* that this one handles write access and isn't restricted to 24c02 devices.
* It also handles larger devices (32 kbit and up) with two-byte addresses,
* which won't work on pure SMBus systems.
*/
struct at24_data {
struct at24_platform_data chip;
struct memory_accessor macc;
int use_smbus;
/*
* Lock protects against activities from other Linux tasks,
* but not from changes by other I2C masters.
*/
struct mutex lock;
struct bin_attribute bin;
u8 *writebuf;
unsigned write_max;
unsigned num_addresses;
/*
* Some chips tie up multiple I2C addresses; dummy devices reserve
* them for us, and we'll use them with SMBus calls.
*/
struct i2c_client *client[];
};
/*
* This parameter is to help this driver avoid blocking other drivers out
* of I2C for potentially troublesome amounts of time. With a 100 kHz I2C
* clock, one 256 byte read takes about 1/43 second which is excessive;
* but the 1/170 second it takes at 400 kHz may be quite reasonable; and
* at 1 MHz (Fm+) a 1/430 second delay could easily be invisible.
*
* This value is forced to be a power of two so that writes align on pages.
*/
static unsigned io_limit = 128;
module_param(io_limit, uint, 0);
MODULE_PARM_DESC(io_limit, "Maximum bytes per I/O (default 128)");
/*
* Specs often allow 5 msec for a page write, sometimes 20 msec;
* it's important to recover from write timeouts.
*/
static unsigned write_timeout = 25;
module_param(write_timeout, uint, 0);
MODULE_PARM_DESC(write_timeout, "Time (in ms) to try writes (default 25)");
#define AT24_SIZE_BYTELEN 5
#define AT24_SIZE_FLAGS 8
#define AT24_BITMASK(x) (BIT(x) - 1)
/* create non-zero magic value for given eeprom parameters */
#define AT24_DEVICE_MAGIC(_len, _flags) \
((1 << AT24_SIZE_FLAGS | (_flags)) \
<< AT24_SIZE_BYTELEN | ilog2(_len))
static const struct i2c_device_id at24_ids[] = {
/* needs 8 addresses as A0-A2 are ignored */
{ "24c00", AT24_DEVICE_MAGIC(128 / 8, AT24_FLAG_TAKE8ADDR) },
/* old variants can't be handled with this generic entry! */
{ "24c01", AT24_DEVICE_MAGIC(1024 / 8, 0) },
{ "24c02", AT24_DEVICE_MAGIC(2048 / 8, 0) },
/* spd is a 24c02 in memory DIMMs */
{ "spd", AT24_DEVICE_MAGIC(2048 / 8,
AT24_FLAG_READONLY | AT24_FLAG_IRUGO) },
{ "24c04", AT24_DEVICE_MAGIC(4096 / 8, 0) },
/* 24rf08 quirk is handled at i2c-core */
{ "24c08", AT24_DEVICE_MAGIC(8192 / 8, 0) },
{ "24c16", AT24_DEVICE_MAGIC(16384 / 8, 0) },
{ "24c32", AT24_DEVICE_MAGIC(32768 / 8, AT24_FLAG_ADDR16) },
{ "24c64", AT24_DEVICE_MAGIC(65536 / 8, AT24_FLAG_ADDR16) },
{ "24c128", AT24_DEVICE_MAGIC(131072 / 8, AT24_FLAG_ADDR16) },
{ "24c256", AT24_DEVICE_MAGIC(262144 / 8, AT24_FLAG_ADDR16) },
{ "24c512", AT24_DEVICE_MAGIC(524288 / 8, AT24_FLAG_ADDR16) },
{ "24c1024", AT24_DEVICE_MAGIC(1048576 / 8, AT24_FLAG_ADDR16) },
{ "at24", 0 },
{ /* END OF LIST */ }
};
MODULE_DEVICE_TABLE(i2c, at24_ids);
/*-------------------------------------------------------------------------*/
/*
* This routine supports chips which consume multiple I2C addresses. It
* computes the addressing information to be used for a given r/w request.
* Assumes that sanity checks for offset happened at sysfs-layer.
*/
static struct i2c_client *at24_translate_offset(struct at24_data *at24,
unsigned *offset)
{
unsigned i;
if (at24->chip.flags & AT24_FLAG_ADDR16) {
i = *offset >> 16;
*offset &= 0xffff;
} else {
i = *offset >> 8;
*offset &= 0xff;
}
return at24->client[i];
}
static ssize_t at24_eeprom_read(struct at24_data *at24, char *buf,
unsigned offset, size_t count)
{
struct i2c_msg msg[2];
u8 msgbuf[2];
struct i2c_client *client;
unsigned long timeout, read_time;
int status, i;
memset(msg, 0, sizeof(msg));
/*
* REVISIT some multi-address chips don't rollover page reads to
* the next slave address, so we may need to truncate the count.
* Those chips might need another quirk flag.
*
* If the real hardware used four adjacent 24c02 chips and that
* were misconfigured as one 24c08, that would be a similar effect:
* one "eeprom" file not four, but larger reads would fail when
* they crossed certain pages.
*/
/*
* Slave address and byte offset derive from the offset. Always
* set the byte address; on a multi-master board, another master
* may have changed the chip's "current" address pointer.
*/
client = at24_translate_offset(at24, &offset);
if (count > io_limit)
count = io_limit;
switch (at24->use_smbus) {
case I2C_SMBUS_I2C_BLOCK_DATA:
/* Smaller eeproms can work given some SMBus extension calls */
if (count > I2C_SMBUS_BLOCK_MAX)
count = I2C_SMBUS_BLOCK_MAX;
break;
case I2C_SMBUS_WORD_DATA:
count = 2;
break;
case I2C_SMBUS_BYTE_DATA:
count = 1;
break;
default:
/*
* When we have a better choice than SMBus calls, use a
* combined I2C message. Write address; then read up to
* io_limit data bytes. Note that read page rollover helps us
* here (unlike writes). msgbuf is u8 and will cast to our
* needs.
*/
i = 0;
if (at24->chip.flags & AT24_FLAG_ADDR16)
msgbuf[i++] = offset >> 8;
msgbuf[i++] = offset;
msg[0].addr = client->addr;
msg[0].buf = msgbuf;
msg[0].len = i;
msg[1].addr = client->addr;
msg[1].flags = I2C_M_RD;
msg[1].buf = buf;
msg[1].len = count;
}
/*
* Reads fail if the previous write didn't complete yet. We may
* loop a few times until this one succeeds, waiting at least
* long enough for one entire page write to work.
*/
timeout = jiffies + msecs_to_jiffies(write_timeout);
do {
read_time = jiffies;
switch (at24->use_smbus) {
case I2C_SMBUS_I2C_BLOCK_DATA:
status = i2c_smbus_read_i2c_block_data(client, offset,
count, buf);
break;
case I2C_SMBUS_WORD_DATA:
status = i2c_smbus_read_word_data(client, offset);
if (status >= 0) {
buf[0] = status & 0xff;
buf[1] = status >> 8;
status = count;
}
break;
case I2C_SMBUS_BYTE_DATA:
status = i2c_smbus_read_byte_data(client, offset);
if (status >= 0) {
buf[0] = status;
status = count;
}
break;
default:
status = i2c_transfer(client->adapter, msg, 2);
if (status == 2)
status = count;
}
dev_dbg(&client->dev, "read %zu#%d --> %d (%ld)\n",
count, offset, status, jiffies);
if (status == count)
return count;
/* REVISIT: at HZ=100, this is sloooow */
msleep(1);
} while (time_before(read_time, timeout));
return -ETIMEDOUT;
}
static ssize_t at24_read(struct at24_data *at24,
char *buf, loff_t off, size_t count)
{
ssize_t retval = 0;
if (unlikely(!count))
return count;
/*
* Read data from chip, protecting against concurrent updates
* from this host, but not from other I2C masters.
*/
mutex_lock(&at24->lock);
while (count) {
ssize_t status;
status = at24_eeprom_read(at24, buf, off, count);
if (status <= 0) {
if (retval == 0)
retval = status;
break;
}
buf += status;
off += status;
count -= status;
retval += status;
}
mutex_unlock(&at24->lock);
return retval;
}
static ssize_t at24_bin_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *attr,
char *buf, loff_t off, size_t count)
{
struct at24_data *at24;
at24 = dev_get_drvdata(container_of(kobj, struct device, kobj));
return at24_read(at24, buf, off, count);
}
/*
* Note that if the hardware write-protect pin is pulled high, the whole
* chip is normally write protected. But there are plenty of product
* variants here, including OTP fuses and partial chip protect.
*
* We only use page mode writes; the alternative is sloooow. This routine
* writes at most one page.
*/
static ssize_t at24_eeprom_write(struct at24_data *at24, const char *buf,
unsigned offset, size_t count)
{
struct i2c_client *client;
struct i2c_msg msg;
ssize_t status;
unsigned long timeout, write_time;
unsigned next_page;
/* Get corresponding I2C address and adjust offset */
client = at24_translate_offset(at24, &offset);
/* write_max is at most a page */
if (count > at24->write_max)
count = at24->write_max;
/* Never roll over backwards, to the start of this page */
next_page = roundup(offset + 1, at24->chip.page_size);
if (offset + count > next_page)
count = next_page - offset;
/* If we'll use I2C calls for I/O, set up the message */
if (!at24->use_smbus) {
int i = 0;
msg.addr = client->addr;
msg.flags = 0;
/* msg.buf is u8 and casts will mask the values */
msg.buf = at24->writebuf;
if (at24->chip.flags & AT24_FLAG_ADDR16)
msg.buf[i++] = offset >> 8;
msg.buf[i++] = offset;
memcpy(&msg.buf[i], buf, count);
msg.len = i + count;
}
/*
* Writes fail if the previous one didn't complete yet. We may
* loop a few times until this one succeeds, waiting at least
* long enough for one entire page write to work.
*/
timeout = jiffies + msecs_to_jiffies(write_timeout);
do {
write_time = jiffies;
if (at24->use_smbus) {
status = i2c_smbus_write_i2c_block_data(client,
offset, count, buf);
if (status == 0)
status = count;
} else {
status = i2c_transfer(client->adapter, &msg, 1);
if (status == 1)
status = count;
}
dev_dbg(&client->dev, "write %zu#%d --> %zd (%ld)\n",
count, offset, status, jiffies);
if (status == count)
return count;
/* REVISIT: at HZ=100, this is sloooow */
msleep(1);
} while (time_before(write_time, timeout));
return -ETIMEDOUT;
}
static ssize_t at24_write(struct at24_data *at24, const char *buf, loff_t off,
size_t count)
{
ssize_t retval = 0;
if (unlikely(!count))
return count;
/*
* Write data to chip, protecting against concurrent updates
* from this host, but not from other I2C masters.
*/
mutex_lock(&at24->lock);
while (count) {
ssize_t status;
status = at24_eeprom_write(at24, buf, off, count);
if (status <= 0) {
if (retval == 0)
retval = status;
break;
}
buf += status;
off += status;
count -= status;
retval += status;
}
mutex_unlock(&at24->lock);
return retval;
}
static ssize_t at24_bin_write(struct file *filp, struct kobject *kobj,
struct bin_attribute *attr,
char *buf, loff_t off, size_t count)
{
struct at24_data *at24;
at24 = dev_get_drvdata(container_of(kobj, struct device, kobj));
return at24_write(at24, buf, off, count);
}
/*-------------------------------------------------------------------------*/
/*
* This lets other kernel code access the eeprom data. For example, it
* might hold a board's Ethernet address, or board-specific calibration
* data generated on the manufacturing floor.
*/
static ssize_t at24_macc_read(struct memory_accessor *macc, char *buf,
off_t offset, size_t count)
{
struct at24_data *at24 = container_of(macc, struct at24_data, macc);
return at24_read(at24, buf, offset, count);
}
static ssize_t at24_macc_write(struct memory_accessor *macc, const char *buf,
off_t offset, size_t count)
{
struct at24_data *at24 = container_of(macc, struct at24_data, macc);
return at24_write(at24, buf, offset, count);
}
/*-------------------------------------------------------------------------*/
#ifdef CONFIG_OF
static void at24_get_ofdata(struct i2c_client *client,
struct at24_platform_data *chip)
{
const __be32 *val;
struct device_node *node = client->dev.of_node;
if (node) {
if (of_get_property(node, "read-only", NULL))
chip->flags |= AT24_FLAG_READONLY;
val = of_get_property(node, "pagesize", NULL);
if (val)
chip->page_size = be32_to_cpup(val);
}
}
#else
static void at24_get_ofdata(struct i2c_client *client,
struct at24_platform_data *chip)
{ }
#endif /* CONFIG_OF */
static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
{
struct at24_platform_data chip;
bool writable;
int use_smbus = 0;
struct at24_data *at24;
int err;
unsigned i, num_addresses;
kernel_ulong_t magic;
if (client->dev.platform_data) {
chip = *(struct at24_platform_data *)client->dev.platform_data;
} else {
if (!id->driver_data) {
err = -ENODEV;
goto err_out;
}
magic = id->driver_data;
chip.byte_len = BIT(magic & AT24_BITMASK(AT24_SIZE_BYTELEN));
magic >>= AT24_SIZE_BYTELEN;
chip.flags = magic & AT24_BITMASK(AT24_SIZE_FLAGS);
/*
* This is slow, but we can't know all eeproms, so we better
* play safe. Specifying custom eeprom-types via platform_data
* is recommended anyhow.
*/
chip.page_size = 1;
/* update chipdata if OF is present */
at24_get_ofdata(client, &chip);
chip.setup = NULL;
chip.context = NULL;
}
if (!is_power_of_2(chip.byte_len))
dev_warn(&client->dev,
"byte_len looks suspicious (no power of 2)!\n");
if (!chip.page_size) {
dev_err(&client->dev, "page_size must not be 0!\n");
err = -EINVAL;
goto err_out;
}
if (!is_power_of_2(chip.page_size))
dev_warn(&client->dev,
"page_size looks suspicious (no power of 2)!\n");
/* Use I2C operations unless we're stuck with SMBus extensions. */
if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
if (chip.flags & AT24_FLAG_ADDR16) {
err = -EPFNOSUPPORT;
goto err_out;
}
if (i2c_check_functionality(client->adapter,
I2C_FUNC_SMBUS_READ_I2C_BLOCK)) {
use_smbus = I2C_SMBUS_I2C_BLOCK_DATA;
} else if (i2c_check_functionality(client->adapter,
I2C_FUNC_SMBUS_READ_WORD_DATA)) {
use_smbus = I2C_SMBUS_WORD_DATA;
} else if (i2c_check_functionality(client->adapter,
I2C_FUNC_SMBUS_READ_BYTE_DATA)) {
use_smbus = I2C_SMBUS_BYTE_DATA;
} else {
err = -EPFNOSUPPORT;
goto err_out;
}
}
//???????????????
if (chip.flags & AT24_FLAG_TAKE8ADDR)
num_addresses = 8;
else
num_addresses = DIV_ROUND_UP(chip.byte_len, (chip.flags & AT24_FLAG_ADDR16) ? 65536 : 256);
at24 = kzalloc(sizeof(struct at24_data) + num_addresses * sizeof(struct i2c_client *), GFP_KERNEL);
if (!at24) {
err = -ENOMEM;
goto err_out;
}
mutex_init(&at24->lock);
at24->use_smbus = use_smbus;
at24->chip = chip;
at24->num_addresses = num_addresses;
/*
* Export the EEPROM bytes through sysfs, since that's convenient.
* By default, only root should see the data (maybe passwords etc)
*/
sysfs_bin_attr_init(&at24->bin);
at24->bin.attr.name = "eeprom";
at24->bin.attr.mode = chip.flags & AT24_FLAG_IRUGO ? S_IRUGO : S_IRUSR;
at24->bin.read = at24_bin_read;
at24->bin.size = chip.byte_len;
at24->macc.read = at24_macc_read;
writable = !(chip.flags & AT24_FLAG_READONLY);
if (writable) {
if (!use_smbus || i2c_check_functionality(client->adapter,
I2C_FUNC_SMBUS_WRITE_I2C_BLOCK)) {
unsigned write_max = chip.page_size;
at24->macc.write = at24_macc_write;
at24->bin.write = at24_bin_write;
at24->bin.attr.mode |= S_IWUSR;
if (write_max > io_limit)
write_max = io_limit;
if (use_smbus && write_max > I2C_SMBUS_BLOCK_MAX)
write_max = I2C_SMBUS_BLOCK_MAX;
at24->write_max = write_max;
/* buffer (data + address at the beginning) */
at24->writebuf = kmalloc(write_max + 2, GFP_KERNEL);
if (!at24->writebuf) {
err = -ENOMEM;
goto err_struct;
}
} else {
dev_warn(&client->dev,
"cannot write due to controller restrictions.");
}
}
at24->client[0] = client;
/* use dummy devices for multiple-address chips */
for (i = 1; i < num_addresses; i++) {
at24->client[i] = i2c_new_dummy(client->adapter,
client->addr + i);
if (!at24->client[i]) {
dev_err(&client->dev, "address 0x%02x unavailable\n",
client->addr + i);
err = -EADDRINUSE;
goto err_clients;
}
}
err = sysfs_create_bin_file(&client->dev.kobj, &at24->bin);
if (err)
goto err_clients;
i2c_set_clientdata(client, at24);
dev_info(&client->dev, "%zu byte %s EEPROM, %s, %u bytes/write\n", at24->bin.size, client->name,
writable ? "writable" : "read-only", at24->write_max);
if (use_smbus == I2C_SMBUS_WORD_DATA ||
use_smbus == I2C_SMBUS_BYTE_DATA) {
dev_notice(&client->dev, "Falling back to %s reads, "
"performance will suffer\n", use_smbus ==
I2C_SMBUS_WORD_DATA ? "word" : "byte");
}
/* export data to kernel code */
if (chip.setup)
chip.setup(&at24->macc, chip.context);
return 0;
err_clients:
for (i = 1; i < num_addresses; i++)
if (at24->client[i])
i2c_unregister_device(at24->client[i]);
kfree(at24->writebuf);
err_struct:
kfree(at24);
err_out:
dev_dbg(&client->dev, "probe error %d\n", err);
return err;
}
/*-------------------------------------------------------------------------*/
static int __devexit at24_remove(struct i2c_client *client)
{
struct at24_data *at24;
int i;
at24 = i2c_get_clientdata(client);
sysfs_remove_bin_file(&client->dev.kobj, &at24->bin);
for (i = 1; i < at24->num_addresses; i++)
i2c_unregister_device(at24->client[i]);
kfree(at24->writebuf);
kfree(at24);
return 0;
}
/*-------------------------------------------------------------------------*/
static struct i2c_driver at24_driver = {
.driver = {
.name = "at24",
.owner = THIS_MODULE,
},
.probe = at24_probe,
.remove = __devexit_p(at24_remove),
.id_table = at24_ids,
};
static int __init at24_init(void)
{
if (!io_limit) {
pr_err("at24: io_limit must not be 0!\n");
return -EINVAL;
}
io_limit = rounddown_pow_of_two(io_limit);
return i2c_add_driver(&at24_driver);
}
module_init(at24_init);
static void __exit at24_exit(void)
{
i2c_del_driver(&at24_driver);
}
module_exit(at24_exit);
MODULE_DESCRIPTION("Driver for most I2C EEPROMs");
MODULE_AUTHOR("David Brownell and Wolfram Sang");
MODULE_LICENSE("GPL");
These are snippets from board file:
static struct i2c_board_info __initdata am335x_i2c0_boardinfo[] = {
{
/* Baseboard board EEPROM */
I2C_BOARD_INFO("24c256", BASEBOARD_I2C_ADDR),
.platform_data = &am335x_baseboard_eeprom_info,
},
.
.
static struct at24_platform_data am335x_baseboard_eeprom_info = {
.byte_len = (256*1024) / 8,
.page_size = 64,
.flags = AT24_FLAG_ADDR16,
.setup = am335x_evm_setup,
.context = (void *)NULL,
};
static void am335x_evm_setup(struct memory_accessor *mem_acc, void *context)
{
int ret;
char tmp[10];
struct device *mpu_dev;
/* 1st get the MAC address from EEPROM */
ret = mem_acc->read(mem_acc, (char *)&am335x_mac_addr,
EEPROM_MAC_ADDRESS_OFFSET, sizeof(am335x_mac_addr));
.
.
.
How can i read from/write into eeprom content from user space.
Should i use sysfs? What should i do?
EEPROM:
It's part of setting the MAC and serial number, but the only way to know if the EEPROM is working is to read its content.
$ cat /sys/bus/i2c/devices/2-0057/eeprom | hexdump -C

How to make the copy of the packet?

I want to make a copy of the packet (and send it to queue that is made by me) at the Net Filter hook.
Will skb_copy work for me? i also have to add the seq no before the packet,skb_reserve will do that?
I have written the following code to capture packet
unsigned int hook_func(unsigned int hooknum,
struct sk_buff **skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
if (strcmp(in->name, drop_if) == 0) {
printk("Dropped packet on %s...\n", drop_if);
return NF_DROP;
} else {
return NF_ACCEPT;
}
}
/* Initialisation routine */
int init_module()
{
/* Fill in our hook structure */
nfho.hook = hook_func; /* Handler function */
nfho.hooknum = NF_IP_PRE_ROUTING; /* First hook for IPv4 */
nfho.pf = PF_INET;
nfho.priority = NF_IP_PRI_FIRST; /* Make our function first */
nf_register_hook(&nfho);
return 0;
}
I do agree with Rachit Jain, unless you have a valid reason to do this in Kernel space, I do suggest you use libpcap to do it in user-space.
Anyhow, if you just wanna copy the packet and then amend some data, I suggest you allocate a new skb with enough space to copy the data you already have in the skb you received + enough space to add a header.
Here's a code that I once used, it doesn't do any copying from an already existing skb but it can be useful to you. I am crafting a special kind of ICMP echo message here
int sendICMPEcho(unsigned char *msg, unsigned int length,
__be32 source, __be32 dest)
{
struct ethhdr *eth;
struct iphdr *iph;
struct icmphdr *icmph;
struct sk_buff *newPacket;
unsigned char *data;
unsigned int skbSize = length + sizeof(struct icmphdr)
+ sizeof(struct iphdr)
+ sizeof(struct ethhdr);
/* Allocate the skb */
newPacket = alloc_skb(skbSize, GFP_ATOMIC);
if(newPacket == NULL)
return SEND_FAIL_MEMORY;
/* Reserve the headers area */
skb_reserve(newPacket, sizeof(struct icmphdr)
+ sizeof(struct iphdr)
+ sizeof(struct ethhdr));
/* Extend the data area from 0 to the message length */
data = skb_put(newPacket, length);
/* Copy the data from the message buffer to the newPacket */
memcpy(data, msg, length);
/************** ICMP HEADER***************/
/* skb_push - pushing the icmp header in the packet data */
icmph = (struct icmphdr *) skb_push(newPacket,
sizeof(struct icmphdr));
/*set ICMP header here */
icmph->type = ICMP_ECHO;
icmph->code = 100; /* Our magic number */
icmph->un.echo.id = 0;
icmph->un.echo.sequence = htons(sendCounter);
icmph->checksum= 0;
icmph->checksum = in_cksum((unsigned short *)icmph,
sizeof(struct icmphdr) + length);
/************** END ICMP HEADER**************/
/************** IP HEADER ***************/
iph = (struct iphdr *) skb_push(newPacket,
sizeof(struct iphdr));
/* set IP header here */
iph->ihl = 5;/* 5 * 32(bits) */
iph->version = 4;
iph->tos = 255; /* Just a magic number - remove it */
iph->tot_len = htons( sizeof(struct iphdr)
+ sizeof(struct icmphdr)
+ length);
iph->id = 0;
iph->frag_off = 0; /* No fragementation */
iph->ttl = 65;
iph->protocol = IPPROTO_ICMP;
iph->saddr = source;
iph->daddr = dest;
iph->check = 0;
iph->check = in_cksum((unsigned short *)iph, sizeof(struct iphdr));
/************** END IP HEADER ***************/
/*WARNING: THE CODE BELOW SHOULD BE REPLACED BY SOMETHING MORE ROBUST
THAT USES THE KERNEL ROUTING!
AND USES IP_LOCAL_OUT INSTEAD OF WHAT WE ARE DOING */
/* Set up the net-device for the new packet */
/* In my code, there was a function findDeviceByIp that does the routing and return which net_device to use for transmission*/
newPacket->dev = findDeviceByIP(source);
if(newPacket->dev == NULL)
{
kfree_skb(newPacket);
return SEND_DEV_FAIL;
}
/************** ETH HEADER ***************/
eth = (struct ethhdr *) skb_push(newPacket, sizeof(struct ethhdr));
if(strcmp(newPacket->dev->name, "wlan0") == 0)
memcpy(eth->h_dest, wifiMAC, 6);
else if(strcmp(newPacket->dev->name, "eth0") == 0)
memcpy(eth->h_dest, etherMAC, 6);
else
{
kfree_skb(newPacket);
return SEND_FAIL_SEND;
}
memcpy(eth->h_source, newPacket->dev->dev_addr, 6);
eth->h_proto = htons(ETH_P_IP);
/************** END ETH HEADER ***************/
dev_queue_xmit(newPacket);/* Transmite the packet */
/* END OF THE WARNING AREA */
++sendCounter;
return SEND_SUCCESS;
}
There are many helper functions provided by linux kernel to work on skb's. it depends on the usecase which one you want to use.
skb_clone ==>copies the skbuff header and increments the reference counter for data buffer. If you are only interested in modifying the skbuff header then you can use skb_clone
skb_copy ==> copies skbuff header, data buffer as well as fragments. Use when you are interested in modifying the data in main buffer as well as in fragments buffer
pskb_copy ==> copies skbuff header + only the data buffer but not the fragments, thus if you want to modify skb except fragment buffer then you can use this one. which takes headroom also as arguement.
Better read the helper function provided by linux kernel(net/core/skbuff.c) to do the skb operations efficiently and to avoid any pitfalls.

Resources