Creating dynamically sized MPI file views - io

I would like to write out a binary file using collective MPI I/O. My plan is to create an MPI derived type analogous to
struct soln_dynamic_t
{
int int_data[2];
double *u; /* Length constant for all instances of this struct */
};
Each processor then creates a view based on the derived type, and writes into the view.
I have this all working for the case in which *u is replaced with u[10] (see complete code below), but ultimately, I'd like to have a dynamic length array for u. (In case it matters, the length will be fixed for all instances of soln_dynamic_t for any run, but not known at compile time.)
What is the best way to handle this?
I have read several posts on why I can't use soln_dynamic_t
directly as an MPI structure. The problem is that processors are not guaranteed to have the same offset between u[0] and int_data[0]. (Is that right?)
On the other hand, the structure
struct soln_static_t
{
int int_data[2];
double u[10]; /* fixed at compile time */
};
works because the offsets are guaranteed to be the same across processors.
I've considered several approaches :
Create the view based on manually defined offsets, etc, rather than using a derived type.
Base the MPI structure on another MPI type, i.e. an contiguous type for ``*u` (is that allowed?)
I am guessing there must be a standard way to do this. Any suggestions would be very helpful.
Several other posts on this issue have been helpful, although they mostly deal with communication and not file I/O.
Here is the complete code::
#include <mpi.h>
typedef struct
{
int int_data[2];
double u[10]; /* Make this a dynamic length (but fixed) */
} soln_static_t;
void build_soln_type(int n, int* int_data, double *u, MPI_Datatype *soln_t)
{
int block_lengths[2] = {2,n};
MPI_Datatype typelist[2] = {MPI_INT, MPI_DOUBLE};
MPI_Aint disp[2], start_address, address;
MPI_Address(int_data,&start_address);
MPI_Address(u,&address);
disp[0] = 0;
disp[1] = address-start_address;
MPI_Datatype tmp_type;
MPI_Type_create_struct(2,block_lengths,disp,typelist,&tmp_type);
MPI_Aint extent;
extent = block_lengths[0]*sizeof(int) + block_lengths[1]*sizeof(double);
MPI_Type_create_resized(tmp_type, 0, extent, soln_t);
MPI_Type_commit(soln_t);
}
void main(int argc, char** argv)
{
MPI_File file;
int globalsize, localsize, starts, order;
MPI_Datatype localarray, soln_t;
int rank, nprocs, nsize = 10; /* must match size in struct above */
/* --- Initialize MPI */
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
/* --- Set up data to write out */
soln_static_t data;
data.int_data[0] = nsize;
data.int_data[1] = rank;
data.u[0] = 3.14159; /* To check that data is written as expected */
build_soln_type(nsize, data.int_data, data.u, &soln_t);
MPI_File_open(MPI_COMM_WORLD, "bin.out",
MPI_MODE_CREATE|MPI_MODE_WRONLY,
MPI_INFO_NULL, &file);
/* --- Create file view for this processor */
globalsize = nprocs;
localsize = 1;
starts = rank;
order = MPI_ORDER_C;
MPI_Type_create_subarray(1, &globalsize, &localsize, &starts, order,
soln_t, &localarray);
MPI_Type_commit(&localarray);
MPI_File_set_view(file, 0, soln_t, localarray,
"native", MPI_INFO_NULL);
/* --- Write data into view */
MPI_File_write_all(file, data.int_data, 1, soln_t, MPI_STATUS_IGNORE);
/* --- Clean up */
MPI_File_close(&file);
MPI_Type_free(&localarray);
MPI_Type_free(&soln_t);
MPI_Finalize();
}

Since the size of the u array of the soln_dynamic_t type is known at runtime and will not change after that, I'd rather suggest an other approach.
Basically, you store all the data contiguous in memory :
typedef struct
{
int int_data[2];
double u[]; /* Make this a dynamic length (but fixed) */
} soln_dynamic_t;
Then you have to manually allocate this struct
soln_dynamic_t * alloc_soln(int nsize, int count) {
return (soln_dynamic_t *)calloc(offsetof(soln_dynamic_t, u)+nsize*sizeof(double), count);
}
Note you cannot directly access an array of soln_dynamic_t because the size is unknown at compile time. Instead, you have to manually calculate the pointers.
soln_dynamic_t *p = alloc_soln(10, 2);
p[0].int_data[0] = 1; // OK
p[0].u[0] = 2; // OK
p[1].int_data[0] = 3; // KO ! since sizeof(soln_dynamic_t) is unknown at compile time.
Here is the full rewritten version of your program
#include <mpi.h>
#include <malloc.h>
typedef struct
{
int int_data[2];
double u[]; /* Make this a dynamic length (but fixed) */
} soln_dynamic_t;
void build_soln_type(int n, MPI_Datatype *soln_t)
{
int block_lengths[2] = {2,n};
MPI_Datatype typelist[2] = {MPI_INT, MPI_DOUBLE};
MPI_Aint disp[2];
disp[0] = offsetof(soln_dynamic_t, int_data);
disp[1] = offsetof(soln_dynamic_t, u);
MPI_Datatype tmp_type;
MPI_Type_create_struct(2,block_lengths,disp,typelist,&tmp_type);
MPI_Aint extent;
extent = offsetof(soln_dynamic_t, u) + block_lengths[1]*sizeof(double);
MPI_Type_create_resized(tmp_type, 0, extent, soln_t);
MPI_Type_free(&tmp_type);
MPI_Type_commit(soln_t);
}
soln_dynamic_t * alloc_soln(int nsize, int count) {
return (soln_dynamic_t *)calloc(offsetof(soln_dynamic_t, u) + nsize*sizeof(double), count);
}
int main(int argc, char** argv)
{
MPI_File file;
int globalsize, localsize, starts, order;
MPI_Datatype localarray, soln_t;
int rank, nprocs, nsize = 10; /* must match size in struct above */
/* --- Initialize MPI */
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
/* --- Set up data to write out */
soln_dynamic_t *data = alloc_soln(nsize,1);
data->int_data[0] = nsize;
data->int_data[1] = rank;
data->u[0] = 3.14159; /* To check that data is written as expected */
build_soln_type(nsize, &soln_t);
MPI_File_open(MPI_COMM_WORLD, "bin2.out",
MPI_MODE_CREATE|MPI_MODE_WRONLY,
MPI_INFO_NULL, &file);
/* --- Create file view for this processor */
globalsize = nprocs;
localsize = 1;
starts = rank;
order = MPI_ORDER_C;
MPI_Type_create_subarray(1, &globalsize, &localsize, &starts, order,
soln_t, &localarray);
MPI_Type_commit(&localarray);
MPI_File_set_view(file, 0, soln_t, localarray,
"native", MPI_INFO_NULL);
/* --- Write data into view */
MPI_File_write_all(file, data, 1, soln_t, MPI_STATUS_IGNORE);
/* --- Clean up */
MPI_File_close(&file);
MPI_Type_free(&localarray);
MPI_Type_free(&soln_t);
MPI_Finalize();
return 0;
}

Related

Question in Book Linux device driver 3rd about interrupt handler for circular buffer

I am reading LLDR3, having a question on P.271 in section "Implementing a Handler"
Bottom are codes I am having questions:
I see writer ( ISR ) and reader ( which is waken-up by ISR ) they are touching on same buffer ( short_queue ), since they are touching on the shared resource, doesn't it worry about the case where "short_i_read" got interrupted by the writer ISR while it is working on buffer?
I can understand ISR writer won't get interrupted since it is ISR and normally IRQ will be disabled until completion. But for buffer read "short_i_read" , I don't see any place to guarantee atomic operation.
The one thing I notice is :
buffer writer(ISR) only increment on short_head
buffer reader only increment on short_tail
Does that mean this code here let writer and reader only touch different variable to have it achieve kind of lock-free circular buffer?
irqreturn_t short_interrupt(int irq, void *dev_id, struct pt_regs *regs) {
struct timeval tv;
int written;
do_gettimeofday(&tv);
/* Write a 16 byte record. Assume PAGE_SIZE is a multiple of 16 */
written = sprintf((char *)short_head,"%08u.%06u\n", (int)(tv.tv_sec % 100000000), (int)(tv.tv_usec));
BUG_ON(written != 16);
short_incr_bp(&short_head, written);
wake_up_interruptible(&short_queue);
/* awake any reading process */
return IRQ_HANDLED;
}
static inline void short_incr_bp(volatile unsigned long *index, int delta) {
unsigned long new = *index + delta;
barrier(); /* Don't optimize these two together */
*index = (new >= (short_buffer + PAGE_SIZE)) ? short_buffer : new;
}
ssize_t short_i_read (struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
{
int count0;
DEFINE_WAIT(wait);
while (short_head == short_tail) {
prepare_to_wait(&short_queue, &wait, TASK_INTERRUPTIBLE);
if (short_head == short_tail)
schedule();
finish_wait(&short_queue, &wait);
if (signal_pending (current)) /* a signal arrived */
return -ERESTARTSYS; /* tell the fs layer to handle it */
}
/* count0 is the number of readable data bytes */
count0 = short_head - short_tail;
if (count0 < 0) /* wrapped */
count0 = short_buffer + PAGE_SIZE - short_tail;
if (count0 < count) count = count0;
if (copy_to_user(buf, (char *)short_tail, count))
return -EFAULT;
short_incr_bp (&short_tail, count);
return count;
}

how to implement splice_read for a character device file with uncached DMA buffer

I have a character device driver. It includes a 4MB coherent DMA buffer. The buffer is implemented as a ring buffer. I also implemente the splice_read call for the driver to improve the performance. But this implementation does not work well. Below is the using example:
(1)splice the 16 pages of device buffer data to a pipefd[1]. (the DMA buffer is managed as in page unit).
(2)splice the pipefd[0] to the socket.
(3)the receiving side (tcp client) receives the data, and then check the correctness.
I found that the tcp client got errors. The splice_read implementation is show below (I steal it from the vmsplice implementation):
/* splice related functions */
static void rdma_ring_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
put_page(buf->page);
buf->flags &= ~PIPE_BUF_FLAG_LRU;
}
void rdma_ring_spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
{
put_page(spd->pages[i]);
}
static const struct pipe_buf_operations rdma_ring_page_pipe_buf_ops = {
.can_merge = 0,
.map = generic_pipe_buf_map,
.unmap = generic_pipe_buf_unmap,
.confirm = generic_pipe_buf_confirm,
.release = rdma_ring_pipe_buf_release,
.steal = generic_pipe_buf_steal,
.get = generic_pipe_buf_get,
};
/* in order to simplify the caller work, the parameter meanings of ppos, len
* has been changed to adapt the internal ring buffer of the driver. The ppos
* indicate wich page is refferred(shoud start from 1, as the csr page are
* not allowed to do the splice), The len indicate how many pages are needed.
* Also, we constrain that maximum page number for each splice shoud not
* exceed 16 pages, if else, a EINVAL will return. If a high speed device
* need a more big page number, it can rework this routing. The off is also
* used to return the total bytes shoud be transferred, use can compare it
* with the return value to determint whether all bytes has been transfered.
*/
static ssize_t do_rdma_ring_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
struct rdma_ring *priv = to_rdma_ring(in->private_data);
struct rdma_ring_buf *data_buf;
struct rdma_ring_dstatus *dsta_buf;
struct page *pages[PIPE_DEF_BUFFERS];
struct partial_page partial[PIPE_DEF_BUFFERS];
ssize_t total_sz = 0, error;
int i;
unsigned offset;
struct splice_pipe_desc spd = {
.pages = pages,
.partial = partial,
.nr_pages_max = PIPE_DEF_BUFFERS,
.flags = flags,
.ops = &rdma_ring_page_pipe_buf_ops,
.spd_release = rdma_ring_spd_release_page,
};
/* init the spd, currently we omit the packet header, if a control
* is needed, it may be implemented by define a control variable in
* the device struct */
spd.nr_pages = len;
for (i = 0; i < len; i++) {
offset = (unsigned)(*ppos) + i;
data_buf = get_buf(priv, offset);
dsta_buf = get_dsta_buf(priv, offset);
pages[i] = virt_to_page(data_buf);
get_page(pages[i]);
partial[i].offset = 0;
partial[i].len = dsta_buf->bytes_xferred;
total_sz += partial[i].len;
}
error = _splice_to_pipe(pipe, &spd);
/* use the ppos to return the theory total bytes shoud transfer */
*ppos = total_sz;
return error;
}
/* splice read */
static ssize_t rdma_ring_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len, unsigned int flags)
{
ssize_t ret;
MY_PRINT("%s: *ppos = %lld, len = %ld\n", __func__, *ppos, (long)len);
if (unlikely(len > PIPE_DEF_BUFFERS))
return -EINVAL;
ret = do_rdma_ring_splice_read(in, ppos, pipe, len, flags);
return ret;
}
The _splice_to_pipe is just the same one as the splice_to_pipe in kernel. As this function is not an exported symbol, so I re-implemented it.
I think the main cause is that the some kind of lock of pages are omitted, but
I don't know where and how.
My kernel version is 3.10.

accessing i2c platform device from userspace program

I'm trying to access an 24c256 eeprom content from user space in a am335x_starter_kit.
I dont have to add eeprom driver into kernel and make modifications in board.c file because board already uses eeprom to access some board configuration and Mac address information.
I just want to access eeprom content from user space.
I used read and write functions for character devices before but i2c platform devices doesnt have these functions.
struct i2c_driver {
unsigned int class;
int (* attach_adapter) (struct i2c_adapter *);
int (* probe) (struct i2c_client *, const struct i2c_device_id *);
int (* remove) (struct i2c_client *);
void (* shutdown) (struct i2c_client *);
void (* alert) (struct i2c_client *, unsigned int data);
int (* command) (struct i2c_client *client, unsigned int cmd, void *arg);
struct device_driver driver;
const struct i2c_device_id * id_table;
int (* detect) (struct i2c_client *, struct i2c_board_info *);
const unsigned short * address_list;
struct list_head clients;
};
This is the eeprom driver. Board file uses it from kernel to get mac address and board configuration data.
/*
* at24.c - handle most I2C EEPROMs
*
* Copyright (C) 2005-2007 David Brownell
* Copyright (C) 2008 Wolfram Sang, Pengutronix
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/mutex.h>
#include <linux/sysfs.h>
#include <linux/mod_devicetable.h>
#include <linux/log2.h>
#include <linux/bitops.h>
#include <linux/jiffies.h>
#include <linux/of.h>
#include <linux/i2c.h>
#include <linux/i2c/at24.h>
/*
* I2C EEPROMs from most vendors are inexpensive and mostly interchangeable.
* Differences between different vendor product lines (like Atmel AT24C or
* MicroChip 24LC, etc) won't much matter for typical read/write access.
* There are also I2C RAM chips, likewise interchangeable. One example
* would be the PCF8570, which acts like a 24c02 EEPROM (256 bytes).
*
* However, misconfiguration can lose data. "Set 16-bit memory address"
* to a part with 8-bit addressing will overwrite data. Writing with too
* big a page size also loses data. And it's not safe to assume that the
* conventional addresses 0x50..0x57 only hold eeproms; a PCF8563 RTC
* uses 0x51, for just one example.
*
* Accordingly, explicit board-specific configuration data should be used
* in almost all cases. (One partial exception is an SMBus used to access
* "SPD" data for DRAM sticks. Those only use 24c02 EEPROMs.)
*
* So this driver uses "new style" I2C driver binding, expecting to be
* told what devices exist. That may be in arch/X/mach-Y/board-Z.c or
* similar kernel-resident tables; or, configuration data coming from
* a bootloader.
*
* Other than binding model, current differences from "eeprom" driver are
* that this one handles write access and isn't restricted to 24c02 devices.
* It also handles larger devices (32 kbit and up) with two-byte addresses,
* which won't work on pure SMBus systems.
*/
struct at24_data {
struct at24_platform_data chip;
struct memory_accessor macc;
int use_smbus;
/*
* Lock protects against activities from other Linux tasks,
* but not from changes by other I2C masters.
*/
struct mutex lock;
struct bin_attribute bin;
u8 *writebuf;
unsigned write_max;
unsigned num_addresses;
/*
* Some chips tie up multiple I2C addresses; dummy devices reserve
* them for us, and we'll use them with SMBus calls.
*/
struct i2c_client *client[];
};
/*
* This parameter is to help this driver avoid blocking other drivers out
* of I2C for potentially troublesome amounts of time. With a 100 kHz I2C
* clock, one 256 byte read takes about 1/43 second which is excessive;
* but the 1/170 second it takes at 400 kHz may be quite reasonable; and
* at 1 MHz (Fm+) a 1/430 second delay could easily be invisible.
*
* This value is forced to be a power of two so that writes align on pages.
*/
static unsigned io_limit = 128;
module_param(io_limit, uint, 0);
MODULE_PARM_DESC(io_limit, "Maximum bytes per I/O (default 128)");
/*
* Specs often allow 5 msec for a page write, sometimes 20 msec;
* it's important to recover from write timeouts.
*/
static unsigned write_timeout = 25;
module_param(write_timeout, uint, 0);
MODULE_PARM_DESC(write_timeout, "Time (in ms) to try writes (default 25)");
#define AT24_SIZE_BYTELEN 5
#define AT24_SIZE_FLAGS 8
#define AT24_BITMASK(x) (BIT(x) - 1)
/* create non-zero magic value for given eeprom parameters */
#define AT24_DEVICE_MAGIC(_len, _flags) \
((1 << AT24_SIZE_FLAGS | (_flags)) \
<< AT24_SIZE_BYTELEN | ilog2(_len))
static const struct i2c_device_id at24_ids[] = {
/* needs 8 addresses as A0-A2 are ignored */
{ "24c00", AT24_DEVICE_MAGIC(128 / 8, AT24_FLAG_TAKE8ADDR) },
/* old variants can't be handled with this generic entry! */
{ "24c01", AT24_DEVICE_MAGIC(1024 / 8, 0) },
{ "24c02", AT24_DEVICE_MAGIC(2048 / 8, 0) },
/* spd is a 24c02 in memory DIMMs */
{ "spd", AT24_DEVICE_MAGIC(2048 / 8,
AT24_FLAG_READONLY | AT24_FLAG_IRUGO) },
{ "24c04", AT24_DEVICE_MAGIC(4096 / 8, 0) },
/* 24rf08 quirk is handled at i2c-core */
{ "24c08", AT24_DEVICE_MAGIC(8192 / 8, 0) },
{ "24c16", AT24_DEVICE_MAGIC(16384 / 8, 0) },
{ "24c32", AT24_DEVICE_MAGIC(32768 / 8, AT24_FLAG_ADDR16) },
{ "24c64", AT24_DEVICE_MAGIC(65536 / 8, AT24_FLAG_ADDR16) },
{ "24c128", AT24_DEVICE_MAGIC(131072 / 8, AT24_FLAG_ADDR16) },
{ "24c256", AT24_DEVICE_MAGIC(262144 / 8, AT24_FLAG_ADDR16) },
{ "24c512", AT24_DEVICE_MAGIC(524288 / 8, AT24_FLAG_ADDR16) },
{ "24c1024", AT24_DEVICE_MAGIC(1048576 / 8, AT24_FLAG_ADDR16) },
{ "at24", 0 },
{ /* END OF LIST */ }
};
MODULE_DEVICE_TABLE(i2c, at24_ids);
/*-------------------------------------------------------------------------*/
/*
* This routine supports chips which consume multiple I2C addresses. It
* computes the addressing information to be used for a given r/w request.
* Assumes that sanity checks for offset happened at sysfs-layer.
*/
static struct i2c_client *at24_translate_offset(struct at24_data *at24,
unsigned *offset)
{
unsigned i;
if (at24->chip.flags & AT24_FLAG_ADDR16) {
i = *offset >> 16;
*offset &= 0xffff;
} else {
i = *offset >> 8;
*offset &= 0xff;
}
return at24->client[i];
}
static ssize_t at24_eeprom_read(struct at24_data *at24, char *buf,
unsigned offset, size_t count)
{
struct i2c_msg msg[2];
u8 msgbuf[2];
struct i2c_client *client;
unsigned long timeout, read_time;
int status, i;
memset(msg, 0, sizeof(msg));
/*
* REVISIT some multi-address chips don't rollover page reads to
* the next slave address, so we may need to truncate the count.
* Those chips might need another quirk flag.
*
* If the real hardware used four adjacent 24c02 chips and that
* were misconfigured as one 24c08, that would be a similar effect:
* one "eeprom" file not four, but larger reads would fail when
* they crossed certain pages.
*/
/*
* Slave address and byte offset derive from the offset. Always
* set the byte address; on a multi-master board, another master
* may have changed the chip's "current" address pointer.
*/
client = at24_translate_offset(at24, &offset);
if (count > io_limit)
count = io_limit;
switch (at24->use_smbus) {
case I2C_SMBUS_I2C_BLOCK_DATA:
/* Smaller eeproms can work given some SMBus extension calls */
if (count > I2C_SMBUS_BLOCK_MAX)
count = I2C_SMBUS_BLOCK_MAX;
break;
case I2C_SMBUS_WORD_DATA:
count = 2;
break;
case I2C_SMBUS_BYTE_DATA:
count = 1;
break;
default:
/*
* When we have a better choice than SMBus calls, use a
* combined I2C message. Write address; then read up to
* io_limit data bytes. Note that read page rollover helps us
* here (unlike writes). msgbuf is u8 and will cast to our
* needs.
*/
i = 0;
if (at24->chip.flags & AT24_FLAG_ADDR16)
msgbuf[i++] = offset >> 8;
msgbuf[i++] = offset;
msg[0].addr = client->addr;
msg[0].buf = msgbuf;
msg[0].len = i;
msg[1].addr = client->addr;
msg[1].flags = I2C_M_RD;
msg[1].buf = buf;
msg[1].len = count;
}
/*
* Reads fail if the previous write didn't complete yet. We may
* loop a few times until this one succeeds, waiting at least
* long enough for one entire page write to work.
*/
timeout = jiffies + msecs_to_jiffies(write_timeout);
do {
read_time = jiffies;
switch (at24->use_smbus) {
case I2C_SMBUS_I2C_BLOCK_DATA:
status = i2c_smbus_read_i2c_block_data(client, offset,
count, buf);
break;
case I2C_SMBUS_WORD_DATA:
status = i2c_smbus_read_word_data(client, offset);
if (status >= 0) {
buf[0] = status & 0xff;
buf[1] = status >> 8;
status = count;
}
break;
case I2C_SMBUS_BYTE_DATA:
status = i2c_smbus_read_byte_data(client, offset);
if (status >= 0) {
buf[0] = status;
status = count;
}
break;
default:
status = i2c_transfer(client->adapter, msg, 2);
if (status == 2)
status = count;
}
dev_dbg(&client->dev, "read %zu#%d --> %d (%ld)\n",
count, offset, status, jiffies);
if (status == count)
return count;
/* REVISIT: at HZ=100, this is sloooow */
msleep(1);
} while (time_before(read_time, timeout));
return -ETIMEDOUT;
}
static ssize_t at24_read(struct at24_data *at24,
char *buf, loff_t off, size_t count)
{
ssize_t retval = 0;
if (unlikely(!count))
return count;
/*
* Read data from chip, protecting against concurrent updates
* from this host, but not from other I2C masters.
*/
mutex_lock(&at24->lock);
while (count) {
ssize_t status;
status = at24_eeprom_read(at24, buf, off, count);
if (status <= 0) {
if (retval == 0)
retval = status;
break;
}
buf += status;
off += status;
count -= status;
retval += status;
}
mutex_unlock(&at24->lock);
return retval;
}
static ssize_t at24_bin_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *attr,
char *buf, loff_t off, size_t count)
{
struct at24_data *at24;
at24 = dev_get_drvdata(container_of(kobj, struct device, kobj));
return at24_read(at24, buf, off, count);
}
/*
* Note that if the hardware write-protect pin is pulled high, the whole
* chip is normally write protected. But there are plenty of product
* variants here, including OTP fuses and partial chip protect.
*
* We only use page mode writes; the alternative is sloooow. This routine
* writes at most one page.
*/
static ssize_t at24_eeprom_write(struct at24_data *at24, const char *buf,
unsigned offset, size_t count)
{
struct i2c_client *client;
struct i2c_msg msg;
ssize_t status;
unsigned long timeout, write_time;
unsigned next_page;
/* Get corresponding I2C address and adjust offset */
client = at24_translate_offset(at24, &offset);
/* write_max is at most a page */
if (count > at24->write_max)
count = at24->write_max;
/* Never roll over backwards, to the start of this page */
next_page = roundup(offset + 1, at24->chip.page_size);
if (offset + count > next_page)
count = next_page - offset;
/* If we'll use I2C calls for I/O, set up the message */
if (!at24->use_smbus) {
int i = 0;
msg.addr = client->addr;
msg.flags = 0;
/* msg.buf is u8 and casts will mask the values */
msg.buf = at24->writebuf;
if (at24->chip.flags & AT24_FLAG_ADDR16)
msg.buf[i++] = offset >> 8;
msg.buf[i++] = offset;
memcpy(&msg.buf[i], buf, count);
msg.len = i + count;
}
/*
* Writes fail if the previous one didn't complete yet. We may
* loop a few times until this one succeeds, waiting at least
* long enough for one entire page write to work.
*/
timeout = jiffies + msecs_to_jiffies(write_timeout);
do {
write_time = jiffies;
if (at24->use_smbus) {
status = i2c_smbus_write_i2c_block_data(client,
offset, count, buf);
if (status == 0)
status = count;
} else {
status = i2c_transfer(client->adapter, &msg, 1);
if (status == 1)
status = count;
}
dev_dbg(&client->dev, "write %zu#%d --> %zd (%ld)\n",
count, offset, status, jiffies);
if (status == count)
return count;
/* REVISIT: at HZ=100, this is sloooow */
msleep(1);
} while (time_before(write_time, timeout));
return -ETIMEDOUT;
}
static ssize_t at24_write(struct at24_data *at24, const char *buf, loff_t off,
size_t count)
{
ssize_t retval = 0;
if (unlikely(!count))
return count;
/*
* Write data to chip, protecting against concurrent updates
* from this host, but not from other I2C masters.
*/
mutex_lock(&at24->lock);
while (count) {
ssize_t status;
status = at24_eeprom_write(at24, buf, off, count);
if (status <= 0) {
if (retval == 0)
retval = status;
break;
}
buf += status;
off += status;
count -= status;
retval += status;
}
mutex_unlock(&at24->lock);
return retval;
}
static ssize_t at24_bin_write(struct file *filp, struct kobject *kobj,
struct bin_attribute *attr,
char *buf, loff_t off, size_t count)
{
struct at24_data *at24;
at24 = dev_get_drvdata(container_of(kobj, struct device, kobj));
return at24_write(at24, buf, off, count);
}
/*-------------------------------------------------------------------------*/
/*
* This lets other kernel code access the eeprom data. For example, it
* might hold a board's Ethernet address, or board-specific calibration
* data generated on the manufacturing floor.
*/
static ssize_t at24_macc_read(struct memory_accessor *macc, char *buf,
off_t offset, size_t count)
{
struct at24_data *at24 = container_of(macc, struct at24_data, macc);
return at24_read(at24, buf, offset, count);
}
static ssize_t at24_macc_write(struct memory_accessor *macc, const char *buf,
off_t offset, size_t count)
{
struct at24_data *at24 = container_of(macc, struct at24_data, macc);
return at24_write(at24, buf, offset, count);
}
/*-------------------------------------------------------------------------*/
#ifdef CONFIG_OF
static void at24_get_ofdata(struct i2c_client *client,
struct at24_platform_data *chip)
{
const __be32 *val;
struct device_node *node = client->dev.of_node;
if (node) {
if (of_get_property(node, "read-only", NULL))
chip->flags |= AT24_FLAG_READONLY;
val = of_get_property(node, "pagesize", NULL);
if (val)
chip->page_size = be32_to_cpup(val);
}
}
#else
static void at24_get_ofdata(struct i2c_client *client,
struct at24_platform_data *chip)
{ }
#endif /* CONFIG_OF */
static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
{
struct at24_platform_data chip;
bool writable;
int use_smbus = 0;
struct at24_data *at24;
int err;
unsigned i, num_addresses;
kernel_ulong_t magic;
if (client->dev.platform_data) {
chip = *(struct at24_platform_data *)client->dev.platform_data;
} else {
if (!id->driver_data) {
err = -ENODEV;
goto err_out;
}
magic = id->driver_data;
chip.byte_len = BIT(magic & AT24_BITMASK(AT24_SIZE_BYTELEN));
magic >>= AT24_SIZE_BYTELEN;
chip.flags = magic & AT24_BITMASK(AT24_SIZE_FLAGS);
/*
* This is slow, but we can't know all eeproms, so we better
* play safe. Specifying custom eeprom-types via platform_data
* is recommended anyhow.
*/
chip.page_size = 1;
/* update chipdata if OF is present */
at24_get_ofdata(client, &chip);
chip.setup = NULL;
chip.context = NULL;
}
if (!is_power_of_2(chip.byte_len))
dev_warn(&client->dev,
"byte_len looks suspicious (no power of 2)!\n");
if (!chip.page_size) {
dev_err(&client->dev, "page_size must not be 0!\n");
err = -EINVAL;
goto err_out;
}
if (!is_power_of_2(chip.page_size))
dev_warn(&client->dev,
"page_size looks suspicious (no power of 2)!\n");
/* Use I2C operations unless we're stuck with SMBus extensions. */
if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
if (chip.flags & AT24_FLAG_ADDR16) {
err = -EPFNOSUPPORT;
goto err_out;
}
if (i2c_check_functionality(client->adapter,
I2C_FUNC_SMBUS_READ_I2C_BLOCK)) {
use_smbus = I2C_SMBUS_I2C_BLOCK_DATA;
} else if (i2c_check_functionality(client->adapter,
I2C_FUNC_SMBUS_READ_WORD_DATA)) {
use_smbus = I2C_SMBUS_WORD_DATA;
} else if (i2c_check_functionality(client->adapter,
I2C_FUNC_SMBUS_READ_BYTE_DATA)) {
use_smbus = I2C_SMBUS_BYTE_DATA;
} else {
err = -EPFNOSUPPORT;
goto err_out;
}
}
//???????????????
if (chip.flags & AT24_FLAG_TAKE8ADDR)
num_addresses = 8;
else
num_addresses = DIV_ROUND_UP(chip.byte_len, (chip.flags & AT24_FLAG_ADDR16) ? 65536 : 256);
at24 = kzalloc(sizeof(struct at24_data) + num_addresses * sizeof(struct i2c_client *), GFP_KERNEL);
if (!at24) {
err = -ENOMEM;
goto err_out;
}
mutex_init(&at24->lock);
at24->use_smbus = use_smbus;
at24->chip = chip;
at24->num_addresses = num_addresses;
/*
* Export the EEPROM bytes through sysfs, since that's convenient.
* By default, only root should see the data (maybe passwords etc)
*/
sysfs_bin_attr_init(&at24->bin);
at24->bin.attr.name = "eeprom";
at24->bin.attr.mode = chip.flags & AT24_FLAG_IRUGO ? S_IRUGO : S_IRUSR;
at24->bin.read = at24_bin_read;
at24->bin.size = chip.byte_len;
at24->macc.read = at24_macc_read;
writable = !(chip.flags & AT24_FLAG_READONLY);
if (writable) {
if (!use_smbus || i2c_check_functionality(client->adapter,
I2C_FUNC_SMBUS_WRITE_I2C_BLOCK)) {
unsigned write_max = chip.page_size;
at24->macc.write = at24_macc_write;
at24->bin.write = at24_bin_write;
at24->bin.attr.mode |= S_IWUSR;
if (write_max > io_limit)
write_max = io_limit;
if (use_smbus && write_max > I2C_SMBUS_BLOCK_MAX)
write_max = I2C_SMBUS_BLOCK_MAX;
at24->write_max = write_max;
/* buffer (data + address at the beginning) */
at24->writebuf = kmalloc(write_max + 2, GFP_KERNEL);
if (!at24->writebuf) {
err = -ENOMEM;
goto err_struct;
}
} else {
dev_warn(&client->dev,
"cannot write due to controller restrictions.");
}
}
at24->client[0] = client;
/* use dummy devices for multiple-address chips */
for (i = 1; i < num_addresses; i++) {
at24->client[i] = i2c_new_dummy(client->adapter,
client->addr + i);
if (!at24->client[i]) {
dev_err(&client->dev, "address 0x%02x unavailable\n",
client->addr + i);
err = -EADDRINUSE;
goto err_clients;
}
}
err = sysfs_create_bin_file(&client->dev.kobj, &at24->bin);
if (err)
goto err_clients;
i2c_set_clientdata(client, at24);
dev_info(&client->dev, "%zu byte %s EEPROM, %s, %u bytes/write\n", at24->bin.size, client->name,
writable ? "writable" : "read-only", at24->write_max);
if (use_smbus == I2C_SMBUS_WORD_DATA ||
use_smbus == I2C_SMBUS_BYTE_DATA) {
dev_notice(&client->dev, "Falling back to %s reads, "
"performance will suffer\n", use_smbus ==
I2C_SMBUS_WORD_DATA ? "word" : "byte");
}
/* export data to kernel code */
if (chip.setup)
chip.setup(&at24->macc, chip.context);
return 0;
err_clients:
for (i = 1; i < num_addresses; i++)
if (at24->client[i])
i2c_unregister_device(at24->client[i]);
kfree(at24->writebuf);
err_struct:
kfree(at24);
err_out:
dev_dbg(&client->dev, "probe error %d\n", err);
return err;
}
/*-------------------------------------------------------------------------*/
static int __devexit at24_remove(struct i2c_client *client)
{
struct at24_data *at24;
int i;
at24 = i2c_get_clientdata(client);
sysfs_remove_bin_file(&client->dev.kobj, &at24->bin);
for (i = 1; i < at24->num_addresses; i++)
i2c_unregister_device(at24->client[i]);
kfree(at24->writebuf);
kfree(at24);
return 0;
}
/*-------------------------------------------------------------------------*/
static struct i2c_driver at24_driver = {
.driver = {
.name = "at24",
.owner = THIS_MODULE,
},
.probe = at24_probe,
.remove = __devexit_p(at24_remove),
.id_table = at24_ids,
};
static int __init at24_init(void)
{
if (!io_limit) {
pr_err("at24: io_limit must not be 0!\n");
return -EINVAL;
}
io_limit = rounddown_pow_of_two(io_limit);
return i2c_add_driver(&at24_driver);
}
module_init(at24_init);
static void __exit at24_exit(void)
{
i2c_del_driver(&at24_driver);
}
module_exit(at24_exit);
MODULE_DESCRIPTION("Driver for most I2C EEPROMs");
MODULE_AUTHOR("David Brownell and Wolfram Sang");
MODULE_LICENSE("GPL");
These are snippets from board file:
static struct i2c_board_info __initdata am335x_i2c0_boardinfo[] = {
{
/* Baseboard board EEPROM */
I2C_BOARD_INFO("24c256", BASEBOARD_I2C_ADDR),
.platform_data = &am335x_baseboard_eeprom_info,
},
.
.
static struct at24_platform_data am335x_baseboard_eeprom_info = {
.byte_len = (256*1024) / 8,
.page_size = 64,
.flags = AT24_FLAG_ADDR16,
.setup = am335x_evm_setup,
.context = (void *)NULL,
};
static void am335x_evm_setup(struct memory_accessor *mem_acc, void *context)
{
int ret;
char tmp[10];
struct device *mpu_dev;
/* 1st get the MAC address from EEPROM */
ret = mem_acc->read(mem_acc, (char *)&am335x_mac_addr,
EEPROM_MAC_ADDRESS_OFFSET, sizeof(am335x_mac_addr));
.
.
.
How can i read from/write into eeprom content from user space.
Should i use sysfs? What should i do?
EEPROM:
It's part of setting the MAC and serial number, but the only way to know if the EEPROM is working is to read its content.
$ cat /sys/bus/i2c/devices/2-0057/eeprom | hexdump -C

Memory allocation of struct in C++

My struct is as follows:
typedef struct KeypointSt {
float row, col;
float scale, ori;
unsigned char *descrip; /* Vector of descriptor values */
struct KeypointSt *next;
} *Keypoint;
The following is a part of a code in C. How can I translate it to C++, considering allocation and de-allocation of heap.
Keypoint k, keys = NULL;
for (i = 0; i < num; i++) {
/* Allocate memory for the keypoint. */
k = (Keypoint) malloc(sizeof(struct KeypointSt));
k->next = keys;
keys = k;
k->descrip = malloc(len);
for (j = 0; j < len; j++) {
k->descrip[j] = (unsigned char) val;
}
}
One possible way of converting to C++ is:
#include <cstring> // memset()
typedef struct KeypointSt
{
float row, col;
float scale, ori;
size_t len;
unsigned char *descrip; /* Vector of descriptor values */
KeypointSt *next;
KeypointSt(int p_len, int p_val) : row(0.0), col(0.0), scale(0.0),
ori(0.0), len(p_len),
descrip(new unsigned char[len]), next(0)
{ memset(descrip, len, p_val); }
~KeypointSt() { delete descrip; }
} *Keypoint;
extern KeypointSt *init_keypoints(size_t num, size_t len, unsigned char val);
extern void free_keypoints(KeypointSt *list);
KeypointSt *init_keypoints(size_t num, size_t len, unsigned char val)
{
KeypointSt *keys = NULL;
for (size_t i = 0; i < num; i++)
{
/* Allocate memory for the keypoint. */
KeypointSt *k = new KeypointSt(len, val);
k->next = keys;
keys = k;
}
return keys;
}
void free_keypoints(KeypointSt *list)
{
while (list != 0)
{
KeypointSt *next = list->next;
delete list;
list = next;
}
}
int main(void)
{
KeypointSt *keys = init_keypoints(4, 5, 6);
free_keypoints(keys);
return 0;
}
The only reason I've kept the typedef in place is because you have existing code; the C++ code would be better using KeypointSt * everywhere — or renaming the structure tag to Keypoint and using Keypoint * in place of your original Keypoint. I don't like non-opaque types where the typedef conceals a pointer. If I see a declaration XYZ xyz;, and it is a structure or class type, I expect to use xyz.pqr and not xyz->pqr.
We can debate code layout of the constructor code, the absence of a default constructor (no arrays), and the absence of a copy constructor and an assignment operator (both needed because of the allocation for descrip). The code of init_keypoints() is not exception safe; a memory allocation failure will leak memory. Fixing that is left as an exercise (it isn't very hard, I think, but I don't claim exception-handling expertise). I've not attempted to consider any extra requirements imposed by C++11. Simply translating from C to C++ is 'easy' until you look at the extra demands that C++ makes — demands that make your life easier in the long run, but at a short-term cost in pain.

Linux kernel socket programming:sendmsg function msg address can not access

I want to send a message with socket->ops->sendmsg() in kernel module. One of the arguments of func sendmsg struct msghdr has a pointer msg_iov to the send buffer.
But except NULL , whatever buffer address I assign to the msg_iov the sendmsg() will return a EFAULT error to me. That means the address I assigned to the pointer can not be accessed.
So please help me and thank you very much.
P.S: here is part of my code. I omitted the irrelevant codes.
struct iovec vec;
char *buff = (char *)kmalloc(7, GFP_KERNEL);
unsigned long user_addr=0;
size_t count = 16;
buff[0] = 'H';
buff[1] = 'e';
buff[2] = 'l';
buff[3] = 'l';
buff[4] = 'o';
buff[5] = '\n';
buff[6] = '\0';
down_write(&current->mm->mmap_sem);
user_addr = do_mmap_pgoff(NULL, 0, count, PROT_READ|PROT_WRITE,\
MAP_PRIVATE|MAP_ANONYMOUS, 0);
up_write(&current->mm->mmap_sem);
__copy_to_user((void*)user_addr, (void*)buff, 7);
vec.iov_base = (void*)user_addr;
vec.iov_len = strlen( (char*)user_addr );
msg.msg_iov = &vec;
msg.msg_iovlen = 1;
msg.msg_flags = 0;
msg.msg_name = NULL;
msg.msg_namelen = 0;
msg.msg_control = NULL;
msg.msg_controllen = 0;
error = NewSock->ops->sendmsg(&kiocb,NewSock, &msg, 7);
do_munmap( &current->mm, user_addr, strlen( (char*) user_addr));
You can't call this function with data that lives in the kernel's address space:
int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t size)
/* ... */
while (--iovlen >= 0) {
size_t seglen = iov->iov_len;
unsigned char __user *from = iov->iov_base;
/* ... */
if ((err = skb_add_data(skb, from, copy)) != 0)
goto do_fault;
static inline int skb_add_data(struct sk_buff *skb,
char __user *from, int copy)
/* ... */
__wsum csum = csum_and_copy_from_user(from, skb_put(skb, copy),
copy, 0, &err);
#define csum_and_copy_from_user csum_partial_copy_from_user
__wsum
csum_partial_copy_from_user(const void __user *src, void *dst,
int len, __wsum isum, int *errp)
/* ... */
if (!likely(access_ok(VERIFY_READ, src, len)))
goto out_err;
/* ... */
isum = csum_partial_copy_generic((__force const void *)src,
dst, len, isum, errp, NULL);
access_ok on x86 checks for userspace pointers:
/**
* access_ok: - Checks if a user space pointer is valid
* #type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that
* %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe
* to write to a block, it is always safe to read from it.
* #addr: User space pointer to start of block to check
* #size: Size of block to check
*
* Context: User context only. This function may sleep.
*
* Checks if a pointer to a block of memory in user space is valid.
*
* Returns true (nonzero) if the memory block may be valid, false (zero)
* if it is definitely invalid.
*
* Note that, depending on architecture, this function probably just
* checks that the pointer is in the user space range - after calling
* this function, memory access functions may still return -EFAULT.
*/
#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0))
The comment on __range_not_ok() looks similar:
/*
* Test whether a block of memory is a valid user space address.
* Returns 0 if the range is valid, nonzero otherwise.
*
* This is equivalent to the following test:
* (u33)addr + (u33)size >= (u33)current->addr_limit.seg (u65 for x86_64)
*
* This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry...
*/
While I followed the x86-specific code paths whenever architecture-specific code was involved, I expect other architectures to enforce this behavior to the best of their respective abilities.
It looks like you cannot call sendmsg() on in-kernel struct iovec memory.

Resources