How to splice onto socketfd? - linux

The manual mentioned splice() can transfer data between two arbitrary filedescriptors, also onto a socketfd. This works if the file is send at once. Therefore the filesize has to be lower than PIPE_BUF_SZ (=65536).
But, how to handle bigger files? I want to understand the difference to sendfile() syscall. How would you rewrite the sendfile() syscall?
The second splice returns with Invalid argument. I guess it is because the socketfd is not seekable.
size_t len = 800000; //e.g.
static int do_copy(int in_fd, int out_fd)
{
loff_t in_off = 0, out_off = 0;
static int buf_size = 65536;
off_t len;
int filedes[2];
int err = -1;
if(pipe(filedes) < 0) {
perror("pipe:");
goto out;
}
while(len > 0) {
if(buf_size > len) buf_size = len;
/* move to pipe buffer. */
err = splice(in_fd, &in_off, filedes[1], NULL, buf_size, SPLICE_F_MOVE | SPLICE_F_MORE);
if(err < 0) {
perror("splice:");
goto out_close;
}
/* move from pipe buffer to out_fd */
err = splice(filedes[0], NULL, out_fd, &out_off, buf_size, SPLICE_F_MOVE | SPLICE_F_MORE);
if(err < 0) {
perror("splice2:");
goto out_close;
}
len -= buf_size;
}
err = 0;
out_close:
close(filedes[0]);
close(filedes[1]);
out:
return err;
}

sendfile() systemcall does not check if the filedescriptor is seekable. The only check onto that fd is, if you can read (FMODE_READ) onto the fd.
splice() does some more checks. Among others, if the fd is seekable (FMODE_PREAD) / (FMODE_PWRITE).
That's why sendfile works, but splice won't.

Related

Extracting filedescriptors from fd_set

I have to implement a conversion tool between two different protocols.
It should be a relatively simply loop, triggered by filedescriptor events.
The protocol APIs however:
st API: Is meant for use with select(), it expose a GetFD() method that will throw it fd's into the given fd_set using FD_SET
nd API: Only works with poll() (it is ZMQ)
Is there any way to extract the fds from a fd_set, without calling select()? (Preferably portable)
I have tried looking into the source of the fd_set structure, but it is not really readable and probably a hint that you should not touch it through anything than the 4 macro, FD_CLR/SET/ISSET/ZERO.
//Pseudo-code of what i hope to achieve
// Get fd's from API1
fd_set readfds;
FD_ZERO( & readfds );
int max = api1.GetFd(readfds);
struct zmq_pollitem_t poll_items[MAX_COUNT];
int fd_count = 0;
convert_fdset_to_pollitem( readfds , poll_items , fd_count ); //what i need
poll_items[fd_count].fd = api2.GetFD();
poll_items[fd_count].socket = api2.GetSocket();
fd_count++;
const uint32_t timeout_msec = 500;
int ret = zmq_poll( poll_items , fd_count , timeout_msec);
An example of a select() function. You can compare the return then with each of your different protocol sockets - e.g. if (select_socket(s1,s2) == protocol_1_sd). I am using that function, to handle http and https sockets.
// selecting corresponding socket from FD_SET
int select_socket(int sock_1, int sock_2) {
fd_set read;
struct timeval timeout;
int ret = -1;
FD_ZERO(&read);
FD_SET(sock_1, &read);
FD_SET(sock_2, &read);
timeout.tv_sec= 10;
timeout.tv_usec = 0;
if (select(FD_SETSIZE, &read, NULL, NULL, &timeout ) < 0)
perror("select");
for (int i = 0; i < FD_SETSIZE; i++) {
if (FD_ISSET(i, &read)) {
if (i == sock_1)
ret = sock_1;
else if (i == sock_2)
ret = sock_2;
else {
FD_CLR(i, &read);
ret = -1;
}
}
}
return ret;
}

How to use the dmafd from iotcl VIDIOC_EXPBUF with sendfile?

The ioctl VIDIOC_EXPBUF exports the dma memory as a filedescriptor. I want so transfer these memory over the network using sendfile (because it is really fast compared to send() or write()).
But sendfile() does not work on these filedescriptor. sendfile() returns with Error -1 and Errno is: Illegal seek.
How to get this working?
int buffer_export(int *vfd, int index, int *dmafd)
{
struct v4l2_exportbuffer expbuf;
memset(&expbuf, 0, sizeof(expbuf));
expbuf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
expbuf.index = index;
if (ioctl(*vfd, VIDIOC_EXPBUF, &expbuf) == -1) {
perror("VIDIOC_EXPBUF");
return -1;
}
*dmafd = expbuf.fd;
return 0;
}
Here the transfer code.
int *vfd;
//Capture Image
int dmafd, err, bytes_send;
int frame_size = 721920;
off_t offset = 0;
err = buffer_export( vfd, 0, &dmafd);
// returns 0
err = sendfile(socket_fd, dmafd ,&offset, frame_size);
//returns with -1
Update:
Browsing the sendfile source I came across the error occurs while checking the input fd.
retval = -ESPIPE; //=-29
if( !(in.file->f_mode & FMODE_PREAD) )
goto in_fd; //error
f_mode=5; which is
/* file is open for reading */
#define FMODE_READ ((__force fmode_t)0x1)
/* file is open for writing */
#define FMODE_WRITE ((__force fmode_t)0x2)
/* file is seekable */
#define FMODE_LSEEK ((__force fmode_t)0x4)
/* file can be accessed using pread */
#define FMODE_PREAD ((__force fmode_t)0x8)
/* file can be accessed using pwrite */
fails

Share socket between unrelated processes like systemd

There are multiple questions and answers how to do it, but both processes must cooperate
Can I open a socket and pass it to another process in Linux
Share socket (listen) between unrelated processes
Portable way to pass file descriptor between different processes
etc.
In systemd, there is feature socket activation, you just have opened and prepared file descriptotr in your process without any cooperation. You can just use file descriptor 3 (SD_LISTEN_FDS_START) and it is already activated socket by systemd.
How does systemd do this? I can't find any relevant source code.
Edit:
I know, how to write systemd socket activated service, but I'm interested in the process of passing file descriptor to my service form the systemd point of view.
E.g. if I would like to write my own socket activator, that behaves exactly as systemd.
systemd is not unrelated to the processes who share the sockets. systemd starts up and supervises the entire system, so it can pass the socket file descriptors during exec() easily. systemd listens on behalf of the services and whenever a connection would come in, an instance of the respective service would be spawned. Here is the implementation:
int main(int argc, char **argv, char **envp) {
int r, n;
int epoll_fd = -1;
log_parse_environment();
log_open();
r = parse_argv(argc, argv);
if (r <= 0)
return r == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
r = install_chld_handler();
if (r < 0)
return EXIT_FAILURE;
n = open_sockets(&epoll_fd, arg_accept);
if (n < 0)
return EXIT_FAILURE;
if (n == 0) {
log_error("No sockets to listen on specified or passed in.");
return EXIT_FAILURE;
}
for (;;) {
struct epoll_event event;
r = epoll_wait(epoll_fd, &event, 1, -1);
if (r < 0) {
if (errno == EINTR)
continue;
log_error_errno(errno, "epoll_wait() failed: %m");
return EXIT_FAILURE;
}
log_info("Communication attempt on fd %i.", event.data.fd);
if (arg_accept) {
r = do_accept(argv[optind], argv + optind, envp, event.data.fd);
if (r < 0)
return EXIT_FAILURE;
} else
break;
}
...
}
Once a connection comes in, it will call do_accept():
static int do_accept(const char* name, char **argv, char **envp, int fd) {
_cleanup_free_ char *local = NULL, *peer = NULL;
_cleanup_close_ int fd_accepted = -1;
fd_accepted = accept4(fd, NULL, NULL, 0);
if (fd_accepted < 0)
return log_error_errno(errno, "Failed to accept connection on fd:%d: %m", fd);
getsockname_pretty(fd_accepted, &local);
getpeername_pretty(fd_accepted, true, &peer);
log_info("Connection from %s to %s", strna(peer), strna(local));
return fork_and_exec_process(name, argv, envp, fd_accepted);
}
finally, it calls execvpe(name, argv, envp); and wrap the fd up in envp. There is a trick in it, if fd_accepted is not equal to SD_LISTEN_FDS_START, it call dup2() to makes SD_LISTEN_FDS_START be the copy of fd_accepted:
if (start_fd != SD_LISTEN_FDS_START) {
assert(n_fds == 1);
r = dup2(start_fd, SD_LISTEN_FDS_START);
if (r < 0)
return log_error_errno(errno, "Failed to dup connection: %m");
safe_close(start_fd);
start_fd = SD_LISTEN_FDS_START;
}
So you can just use file descriptor 3 like this in your application, sd_listen_fds will parse the environment variable LISTEN_FDS passed from envp:
int listen_sock;
int fd_count = sd_listen_fds(0);
if (fd_count == 1) { // assume one socket only
listen_sock = SD_LISTEN_FDS_START; // SD_LISTEN_FDS_START is a macro defined to 3
} else {
// error
}
struct sockaddr addr;
socklen_t addrlen;
while (int client_sock = accept(listen_sock, &addr, &addrlen)) {
// do something
}

Error when reading from Linux FIFO

In the embedded application I'm working on we have a serial port abstraction, and I'm currently working on a simulated variant of said abstraction to use when you are not running on the 'real' hardware. I'm using FIFO files for this, as you can then plug in whathever software you want to communicate with the actual application but I'm having trouble with the "read" function, which flags that you gave it an invalid fd. Though I have used debugging tools to verify that the fd passed to it is the same as has been opened earlier so it should be valid. I cannot find any cause for this problem.
FIFO files are opened through this function:
int createOpenFifo(const std::string& path, int flags)
{
int fd = open(path.c_str(), flags);
if (fd < 0)
{
mkfifo(path.c_str(), 0777);
fd = open(path.c_str(), flags);
if (fd < 0)
{
return -1;
}
}
return fd;
}
And the FIFOs are then written to using the following function:
int write_serial(handle_t handle, size_t length, const uint8_t* pData)
{
SerialFifoData* data = static_cast<SerialFifoData*>(handle);
size_t written = 0;
while (written < length)
{
int result = write(data->writeFd, pData + written, length - written);
if (result < 0)
{
return -1;
}
written += result;
}
return 0;
}
And finally read from using this function:
int read_serial(handle_t handle, size_t buffer_size, uint8_t* pBuffer, size_t* bytes_read)
{
SerialFifoData* data = static_cast<SerialFifoData*>(handle);
int return_val = read(data->readFd, pBuffer, buffer_size);
if (return_val < 0)
{
if (errno == EAGAIN || errno == EWOULDBLOCK) // Non-blocking, no data
// which flag is raised
// varies between POSIX
// implementations
{
*bytes_read = 0;
return -2;
}
return -1;
}
*bytes_read = return_val;
return 0;
}
I have verified that each function recieves correct input, and the read and write calls are nearly identical to those used for the actual serial port code (the only difference is how the FD is extracted from the handle) where they work just fine.

Why msync() doesn't change the st_mtime of file

I have a question when I use msync. Thank you very much for your help!
In brief, I mmap file A, and modify it, and the msync, but the st_mtime doesn't change. even munmap file A and exit the process, the st_mtime is also unchanged.
The following is the code.
int main() {
const char *file_name = "txt";
int ret = -1;
int fd = open(file_name, O_RDWR, 0666);
if (fd < 0) {
printf("FATAL, Fail to open file[%s]\n", file_name);
return -1;
}
struct stat st;
fstat(fd, &st);
void * buffer = mmap(NULL, st.st_size,
PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0);
if (MAP_FAILED == buffer) {
printf("FATAL, Fail to mmap, file[%s], size[%d]\n",
file_name, st.st_size);
return -1;
}
printf("m_time[%d]\n", st.st_mtime);
for (int i=0; i<10;i++) {
int *ptr = (int *)buffer;
printf("%d\n", *ptr);
*ptr += 1;
sleep(1);
ret = msync(buffer, st.st_size, MS_ASYNC);
if (0 != ret) {
printf("FATAL, Fail to msync, file[%s], size[%d]\n",
file_name, st.st_size);
return -1;
}
fstat(fd, &st);
printf("m_time[%d]\n", st.st_mtime);
}
ret = munmap(buffer, st.st_size);
if (0 != ret) {
printf("FATAL, Fail to munmap, file[%s], size[%d]\n",
file_name, st.st_size);
return -1;
}
fstat(fd, &st);
printf("m_time[%d]\n", st.st_mtime);
fsync(fd);
fstat(fd, &st);
printf("m_time[%d]\n", st.st_mtime);
return 0;
}
The relevant excerpt of the mmap manpage is:
The st_ctime and st_mtime field for a file mapped with PROT_WRITE and MAP_SHARED will be updated after a write to the mapped region, and before a subsequent msync(2) with the MS_SYNC or MS_ASYNC flag, if one occurs.
That means that, in your program, st_mtime might be updated anytime between the line which reads
*ptr += 1`
and and the line which reads
ret = msync(buffer, st.st_size, MS_ASYNC);
Your sleep(1) is in between those lines, which means that by the time the sleep occurs, the st_mtime might have already been modified. So when you fstat the file a second time, you might be getting the same value as when you statted it the first time, just because not enough time has elapsed.
Try putting your sleep(1) before the *ptr += 1. This should guarantee that at least one second elapses between the original fstat and the update to the st_mtime.
Looks like a fix is on the way.
http://thread.gmane.org/gmane.linux.kernel/1549524/focus=55700

Resources