Opening sockets to the Xserver directly - linux

I'm looking to understand how Linux Desktop Environments work with Xserver. I was reading that most window managers don't open sockets directly, instead they use either Xlib bindings for which ever language the WM is being written or you can use higher level bindings XCB; but i would like to know What are the advantages to opening a socket directly to the Xserver?

I'm not going to tell you the advantages, but I'm going to tell you how to do it.
Some time ago I found an exampe of someone this, and I can't find it anymore, but here's more or less his code. It's quite interesting to know what's going on under the hood of client libraries like XCB.
In order to send a request to the X server, you need:
a socket connection to the X server
the opcode of the request
data associated to the request
The API to open a socket, create a graphics context, create a window, and map a window can look something like:
x11_connection_t connection = {0};
x11_init_connection(&connection);
x11_state_t state;
state.socket_fd = connection.socket_fd;
state.id_base = connection.setup->id_base;
state.root_window = connection.root[0].window_id;
state.root_visual = connection.root[0].visual_id;
state.root_depth = connection.root[0].depth;
state.gcontext = x11_init_gc(&state, X11_GC_GRAPHICS_EXPOSURES, (u32[]){X11_EXPOSURES_NOT_ALLOWED});
state.window = x11_init_window(&state, 0,0, WIDTH,HEIGHT, state.root_window, state.root_visual,
X11_CW_BACK_PIXEL | X11_CW_EVENT_MASK,
(u32[]){XCB_RGB_BLUE, X11_EVENT_MASK_KEY_PRESS | X11_EVENT_MASK_POINTER_MOTION}); // X11_EVENT_MASK_KEY_PRESS | X11_EVENT_MASK_POINTER_MOTION | X11_EVENT_MASK_STRUCTURE_NOTIFY | X11_EVENT_MASK_EXPOSURE
x11_map_window(&state, state.window);
But first we need to implement it.
For illustration, here are the first thirteen X protocol opcodes:
#define X11_OPCODE_CREATE_WINDOW 1
#define X11_OPCODE_CHANGE_WINDOW_ATTRIBUTES 2
#define X11_OPCODE_GET_WINDOW_ATTRIBUTES 3
#define X11_OPCODE_DESTROY_WINDOW 4
#define X11_OPCODE_DESTROY_SUBWINDOWS 5
#define X11_OPCODE_CHANGE_SAVE_SET 6
#define X11_OPCODE_REPARENT_WINDOW 7
#define X11_OPCODE_MAP_WINDOW 8
#define X11_OPCODE_MAP_SUBWINDOWS 9
#define X11_OPCODE_UNMAP_WINDOW 10
#define X11_OPCODE_UNMAP_SUBWINDOWS 11
#define X11_OPCODE_CONFIGURE_WINDOW 12
#define X11_OPCODE_CIRCULATE_WINDOW 13
You can open the socket with a function like the following,
int x11_init_socket(){
int socket_fd = socket(AF_UNIX, SOCK_STREAM, 0); // Create the socket!
struct sockaddr_un serv_addr = {0};
serv_addr.sun_family = AF_UNIX;
strcpy(serv_addr.sun_path, "/tmp/.X11-unix/X0");
int srv_len = sizeof(struct sockaddr_un);
connect(socket_fd, (struct sockaddr*)&serv_addr, sizeof(serv_addr));
return socket_fd;
}
Then you need the handshake,
int x11_init_connection(x11_connection_t* connection){
connection->socket_fd = x11_init_socket();
x11_connection_request_t request = {0};
request.order = 'l'; // Little endian!
request.major = 11;
request.minor = 0; // Version 11.0
write(connection->socket_fd, &request, sizeof(x11_connection_request_t)); // Send request
read(connection->socket_fd, &connection->header, sizeof(x11_connection_reply_t)); // Read reply header
if(connection->header.success == 0) return connection->header.success; // Error handling!
connection->setup = sbrk(connection->header.length * 4); // Allocate memory for remainder of data
read(connection->socket_fd, connection->setup, connection->header.length * 4); // Read remainder of data
void* p = ((void*)connection->setup);
p += sizeof(x11_connection_setup_t) + connection->setup->vendor_length; // Ignore the vendor
connection->format = p; p += sizeof(x11_pixmap_format_t) * connection->setup->formats; // Align struct with format sections. Move pointer to end of section
connection->root = p; p += sizeof(x11_root_t) * connection->setup->roots; // Align struct with root section(s). Move pointer to end of section
connection->depth = p; p += sizeof(x11_depth_t); // Align depth struct with first depth section. Move pointer to end of section
connection->visual = p; // Align visual with first visual for first depth
return connection->header.success;
}
and a bunch of data structures that the X server likes:
typedef struct{
u8 order;
u8 pad1;
u16 major, minor;
u16 auth_proto, auth_data;
u16 pad2;
}x11_connection_request_t;
typedef struct{
u8 success;
u8 pad1;
u16 major, minor;
u16 length;
}x11_connection_reply_t;
typedef struct{
u32 release;
u32 id_base, id_mask;
u32 motion_buffer_size;
u16 vendor_length;
u16 request_max;
u8 roots;
u8 formats;
u8 image_order;
u8 bitmap_order;
u8 scanline_unit, scanline_pad;
u8 keycode_min, keycode_max;
u32 pad;
}x11_connection_setup_t;
typedef struct{
u8 depth;
u8 bpp;
u8 scanline_pad;
u8 pad1;
u32 pad2;
}x11_pixmap_format_t;
typedef struct{
u32 window_id;
u32 colormap;
u32 white, black;
u32 input_mask;
u16 width, height;
u16 width_mm, height_mm;
u16 maps_min, maps_max;
u32 visual_id;
u8 backing_store;
u8 save_unders;
u8 depth;
u8 depths;
}x11_root_t;
typedef struct{
u8 depth;
u8 pad1;
u16 visuals;
u32 pad2;
}x11_depth_t;
typedef struct{
u8 group;
u8 bits;
u16 colormap_entries;
u32 mask_red, mask_green, mask_blue;
u32 pad;
}x11_visual_t;
typedef struct{
u8 response_type; /**< Type of the response */
u8 pad0; /**< Padding */
u16 sequence; /**< Sequence number */
u32 pad[7]; /**< Padding */
u32 full_sequence; /**< full sequence */
}x11_generic_event_t;
typedef struct{
u8 success;
u8 code;
u16 seq;
u32 id;
u16 op_major;
u8 op_minor;
u8 pad[21];
}x11_error_t;
typedef struct{
int socket_fd;
x11_connection_reply_t header;
x11_connection_setup_t* setup;
x11_pixmap_format_t* format;
x11_root_t* root;
x11_depth_t* depth;
x11_visual_t* visual;
}x11_connection_t;
typedef struct{
int socket_fd;
u32 id_base; // We'll use this to generate 32-bit identifiers!
u32 root_window;
u32 root_visual;
u8 root_depth;
u32 window;
u32 gcontext;
}x11_state_t;
You also need a function a function to generate 32-bit identifiers:
u32 x11_generate_id(x11_connection_t* connection){
static u32 id = 0;
return (id++ | connection->setup->id_base);
}
Finally, you need the actual functions that assemble the packets with the requests. The following are helper functions to send X11_OPCODE_CREATE_GC, X11_OPCODE_CREATE_WINDOW, X11_OPCODE_MAP_WINDOW, and X11_OPCODE_PUT_IMAGE.
#define X11_NONE 0x00000000L // X11_NONE is the universal null resource or null atom parameter value for many core X requests
#define X11_COPY_FROM_PARENT 0x00000000L // X11_COPY_FROM_PARENT can be used for many xcb_create_window parameters
#define X11_CURRENT_TIME 0x00000000L // X11_CURRENT_TIME can be used in most requests that take an xcb_timestamp_t
#define X11_NO_SYMBOL 0x00000000L // X11_NO_SYMBOL fills in unused entries in xcb_keysym_t tables
enum x11_exposures_t{
X11_EXPOSURES_NOT_ALLOWED = 0,
X11_EXPOSURES_ALLOWED = 1,
X11_EXPOSURES_DEFAULT = 2
};
enum x11_gc_t{
X11_GC_FUNCTION = 1<<0,
X11_GC_PLANE_MASK = 1<<1,
X11_GC_FOREGROUND = 1<<2,
X11_GC_BACKGROUND = 1<<3,
X11_GC_LINE_WIDTH = 1<<4,
X11_GC_LINE_STYLE = 1<<5,
X11_GC_CAP_STYLE = 1<<6,
X11_GC_JOIN_STYLE = 1<<7,
X11_GC_FILL_STYLE = 1<<8,
X11_GC_FILL_RULE = 1<<9,
X11_GC_TILE = 1<<10,
X11_GC_STIPPLE = 1<<11,
X11_GC_TILE_STIPPLE_ORIGIN_X = 1<<12,
X11_GC_TILE_STIPPLE_ORIGIN_Y = 1<<13,
X11_GC_FONT = 1<<14,
X11_GC_SUBWINDOW_MODE = 1<<15,
X11_GC_GRAPHICS_EXPOSURES = 1<<16,
X11_GC_CLIP_ORIGIN_X = 1<<17,
X11_GC_CLIP_ORIGIN_Y = 1<<18,
X11_GC_CLIP_MASK = 1<<19,
X11_GC_DASH_OFFSET = 1<<20,
X11_GC_DASH_LIST = 1<<21,
X11_GC_ARC_MODE = 1<<22,
};
u32 x11_init_gc(x11_state_t* state, u32 value_mask, u32* value_list){
u32 gcontext_id = x11_generate_id(state);
u16 flag_count = popcnt(value_mask);
u16 length = 4 + flag_count;
u32* packet = sbrk(length * 4);
packet[0] = X11_OPCODE_CREATE_GC | (length<<16); // The first `u32` in the packet is always the opcode and the length of the packet!
packet[1] = gcontext_id;
packet[2] = state->root_window;
packet[3] = value_mask;
for(int i=0; i < flag_count; ++i)
packet[4 + i] = value_list[i];
write(state->socket_fd, packet, length * 4);
sbrk(-(length * 4));
return gcontext_id;
}
enum x11_cw_t{
X11_CW_BACK_PIXMAP = 1<<0,
X11_CW_BACK_PIXEL = 1<<1,
X11_CW_BORDER_PIXMAP = 1<<2,
X11_CW_BORDER_PIXEL = 1<<3,
X11_CW_BIT_GRAVITY = 1<<4,
X11_CW_WIN_GRAVITY = 1<<5,
X11_CW_BACKING_STORE = 1<<6,
X11_CW_BACKING_PLANES = 1<<7,
X11_CW_BACKING_PIXEL = 1<<8,
X11_CW_OVERRIDE_REDIRECT = 1<<9,
X11_CW_SAVE_UNDER = 1<<10,
X11_CW_EVENT_MASK = 1<<11,
X11_CW_DONT_PROPAGATE = 1<<12,
X11_CW_COLORMAP = 1<<13,
X11_CW_CURSOR = 1<<14
};
enum x11_event_mask_t{
X11_EVENT_MASK_NO_EVENT = 0,
X11_EVENT_MASK_KEY_PRESS = 1<<0, // 0x00000001
X11_EVENT_MASK_KEY_RELEASE = 1<<1, // 0x00000002
X11_EVENT_MASK_BUTTON_PRESS = 1<<2, // 0x00000004
X11_EVENT_MASK_BUTTON_RELEASE = 1<<3, // 0x00000008
X11_EVENT_MASK_ENTER_WINDOW = 1<<4, // 0x00000010
X11_EVENT_MASK_LEAVE_WINDOW = 1<<5, // 0x00000020
X11_EVENT_MASK_POINTER_MOTION = 1<<6, // 0x00000040
X11_EVENT_MASK_POINTER_MOTION_HINT = 1<<7, // 0x00000080
X11_EVENT_MASK_BUTTON_1_MOTION = 1<<8, // 0x00000100
X11_EVENT_MASK_BUTTON_2_MOTION = 1<<9, // 0x00000200
X11_EVENT_MASK_BUTTON_3_MOTION = 1<<10, // 0x00000400
X11_EVENT_MASK_BUTTON_4_MOTION = 1<<11, // 0x00000800
X11_EVENT_MASK_BUTTON_5_MOTION = 1<<12, // 0x00001000
X11_EVENT_MASK_BUTTON_MOTION = 1<<13, // 0x00002000
X11_EVENT_MASK_KEYMAP_STATE = 1<<14, // 0x00004000
X11_EVENT_MASK_EXPOSURE = 1<<15, // 0x00008000
X11_EVENT_MASK_VISIBILITY_CHANGE = 1<<16, // 0x00010000
X11_EVENT_MASK_STRUCTURE_NOTIFY = 1<<17, // 0x00020000
X11_EVENT_MASK_RESIZE_REDIRECT = 1<<18, // 0x00040000
X11_EVENT_MASK_SUBSTRUCTURE_NOTIFY = 1<<19, // 0x00080000
X11_EVENT_MASK_SUBSTRUCTURE_REDIRECT = 1<<20, // 0x00100000
X11_EVENT_MASK_FOCUS_CHANGE = 1<<21, // 0x00200000
X11_EVENT_MASK_PROPERTY_CHANGE = 1<<22, // 0x00400000
X11_EVENT_MASK_COLOR_MAP_CHANGE = 1<<23, // 0x00800000
X11_EVENT_MASK_OWNER_GRAB_BUTTON = 1<<24 // 0x01000000
};
#define X11_DEFAULT_BORDER 0
#define X11_DEFAULT_GROUP 0
u32 x11_init_window(x11_state_t* state, u16 x, u16 y, u16 w, u16 h, u32 window_parent, u32 visual, u32 value_mask, u32* value_list){
u32 window_id = x11_generate_id(state);
u16 flag_count = popcnt(value_mask);
u16 length = 8 + flag_count;
u32* packet = sbrk(length * 4);
packet[0] = X11_OPCODE_CREATE_WINDOW | length<<16; // The first `u32` in the packet is always the opcode and the length of the packet!
packet[1] = window_id;
packet[2] = window_parent;
packet[3] = x | (y<<16);
packet[4] = w | (h<<16);
packet[5] = (X11_DEFAULT_BORDER<<16) | X11_DEFAULT_GROUP;
packet[6] = visual;
packet[7] = value_mask;
for(int i=0;i < flag_count; ++i)
packet[8 + i] = value_list[i];
write(state->socket_fd, packet, length * 4);
sbrk(-(length * 4));
return window_id;
}
void x11_map_window(x11_state_t* state, u32 window_id){
int const length = 2;
u32 packet[length];
packet[0] = X11_OPCODE_MAP_WINDOW | (length<<16); // The first `u32` in the packet is always the opcode and the length of the packet!
packet[1] = window_id;
write(state->socket_fd, packet, 8);
}
enum x11_image_format_t{
X11_IMAGE_FORMAT_XY_BITMAP = 0,
X11_IMAGE_FORMAT_XY_PIXMAP = 1,
X11_IMAGE_FORMAT_Z_PIXMAP = 2
};
void x11_put_img(x11_state_t* state, u16 x, u16 y, u32* data){
u32 packet[6];
u16 const length = ((state->tile_width * state->tile_height)) + 6;
packet[0] = X11_OPCODE_PUT_IMAGE | (X11_IMAGE_FORMAT_Z_PIXMAP<<8) | (length<<16); // The first `u32` in the packet is always the opcode and the length of the packet!
packet[1] = state->window;
packet[2] = state->gcontext;
packet[3] = state->tile_width | (state->tile_height<<16);
packet[4] = x | (y<<16);
packet[5] = state->root_depth<<8;
write(state->socket_fd, packet, 24);
write(state->socket_fd, data, state->tile_width * state->tile_height * 4);
}

I've done it with node/javascript ( https://github.com/sidorares/node-x11/ )
It's not as scary as it seems, and for some particular tasks might work better than xlib ( non-trivial parallelism or resource dependencies or indirect glx ), but probably most value in this is educational. If you just want some work done there is probably solution available and you don't need to reinvent the wheel
Starting point would be x11 protocol documentation: https://www.x.org/releases/X11R7.7/doc/xproto/x11protocol.html

The question should rather be "What are the advantages of using Xlib instead of a graphical toolkit like gtk"?
Even to master Xlib, you have to spent months or years! It's so intricate that almost every app that has graphical content to display uses a toolkit instead.
Using a plain socket means speaking the X11 protocol directly, which means you would end up recreating Xlib (no, you wouldn't finish!)
As an analogy, plain socket ~ machine code, xlib ~ assembly code, toolkit ~ higher level language. (notwithstanding that xlib does a little maintenance for you, but I guess it's rather neglectable)

Related

Reading and writing a std::vector<int> to / from registry with WriteProfileBinary

I am getting myself into a pickle here!
I have a simple <std::vector<int> container and what I want to do is read / write it to a binary registry key. I started with:
std::vector<int> vSortedColumnIndexInfo = dlgColumns.SortedEditorColumnIndexInfo();
theApp.WriteProfileBinary(_T("Options"), _T("EditorSortedColumnIndexInfo"),
vSortedColumnIndexInfo.data,
vSortedColumnIndexInfo.size);
But this will not compile:
error C3867:
std::vector<int,std::allocator<int>>::data: non-standard syntax; use
& to create a pointer to member
error C3867:
std::vector<int,std::allocator<int>>::size: non-standard syntax; use
& to create a pointer to member
Why is it saying this? What is the correct way to read / write a std::vector<int> to / from a binary registry key? It can be changed from an int if required.
Update 1
Based on the comments I now have:
std::vector<int> vSortedColumnIndexInfo = dlgColumns.SortedEditorColumnIndexInfo();
theApp.WriteProfileBinary(_T("Options"), _T("EditorSortedColumnIndexInfo"),
reinterpret_cast<BYTE*>(vSortedColumnIndexInfo.data()),
gsl::narrow<UINT>(vSortedColumnIndexInfo.size() * sizeof(int)));
Since I know my values will be under 256 I have decided to stick with BYTE for the container.
Writing to Registry
std::vector<BYTE> vExcludedColumns;
// POpulate vector
UINT uSize = gsl::narrow<UINT>(sizeof(BYTE) * vExcludedColumns.size());
theApp.WriteProfileBinary(
strSection,
_T("AssignStatesEx"),
vExcludedColumns.data(),
uSize
);
theApp.WriteProfileInt(strSection, _T("AssignStatesExSize"), uSize);
Reading from Registry
std::vector<BYTE> vExcludedColumns;
UINT uSize = theApp.GetProfileInt(strSection, _T("AssignStatesExSize"), 0);
UINT uSizeRead = 0;
BYTE* temp = nullptr;
theApp.GetProfileBinary(strSection, _T("AssignStatesEx"), &temp, &uSizeRead);
if (uSizeRead == uSize)
{
vExcludedColumns.resize(uSizeRead, 0);
memcpy(vExcludedColumns.data(), temp, uSizeRead);
}
delete[] temp;
temp = nullptr;
I believe that this will still work for both 32 bit and 64 bit.
I am open to comments if this code can be improved or simplified.
Updated
Here is the same code, but added into public app methods:
std::vector<BYTE> CMeetingScheduleAssistantApp::GetProfileVector(CString strSection, CString strKey)
{
std::vector<BYTE> vData;
UINT uSize = theApp.GetProfileInt(strSection, strKey + _T("Size"), 0);
UINT uSizeRead = 0;
BYTE* temp = nullptr;
theApp.GetProfileBinary(strSection, strKey, &temp, &uSizeRead);
if (uSizeRead == uSize)
{
vData.resize(uSizeRead, 0);
memcpy(vData.data(), temp, uSizeRead);
}
delete[] temp;
temp = nullptr;
return vData;
}
void CMeetingScheduleAssistantApp::WriteProfileVector(CString strSection, CString strKey, std::vector<BYTE> vData)
{
UINT uSize = gsl::narrow<UINT>(sizeof(BYTE) * vData.size());
theApp.WriteProfileBinary(
strSection,
strKey,
vData.data(),
uSize
);
theApp.WriteProfileInt(strSection, strKey + _T("Size"), uSize);
}

GDB: Displaying incorrect values in struct

I'm trying to implement the malloc function and it looks like that gdb is giving me some weird values from this struct:
struct MemoryBlock {
struct MemoryBlock * next;
size_t size;
signed char is_free;
} startBlock;
And that's the function where I'm debugging it with gdb:
struct MemoryBlock * create_new_block(size_t size)
{
struct MemoryBlock * ret_block;
// add some space for the struct block
size += sizeof(struct MemoryBlock);
ret_block = (void *) sbrk(size);
// test first, if we can allocate that much of ram
if (ret_block == (void *) -1)
return NULL;
ret_block->size = size - sizeof(struct MemoryBlock);
ret_block->is_free = 0;
ret_block->next = NULL;
return ret_block; // HERE'S the breakpoint
}
So here's the issue (I'm at the breakpoint return ret_block):
If I want to see what kind of values are inside of the ret_block pointer, than I'm getting this:
(gdb) p (struct MemoryBlock) ret_block
$26 = {next = 0x555555559000, size = 140737488347680, is_free = -53 '\313'}
size is fine, because if I convert it into the decimal system than I'm getting 3 as expected. (the argument size from the function is currently 3)
But I'm surprised that next and is_free aren't 0 since the last three lines should set both to 0.
So I looked up what is in the memory:
As you can see each value is correctly stored in my heap. But why am I getting these values if I do p (struct MemoryBlock) ret_block?
If you need the whole code
#include <unistd.h>
#include <stdio.h>
/* ============
* Structs
* ============ */
struct MemoryBlock {
struct MemoryBlock * next;
size_t size;
signed char is_free;
} startBlock;
/* ==============
* Functions
* ============== */
struct MemoryBlock * create_new_block(size_t size);
void * malloc(size_t size);
/* ==================
* Main Programm
* ================== */
int main()
{
char * buffer;
char * b2;
unsigned short index;
// The start of my heap :D
startBlock.is_free = 0;
startBlock.size = 0;
buffer = malloc(3);
b2 = malloc(3);
// ----- ERROR -----
if (buffer == NULL || b2 == NULL)
return 1;
// ----- ERROR -----
// fill the buffers with random stuff
for (index=0; index<2; index++) {
buffer[index] = 'a';
b2[index] = 'b';
}
buffer[index] = '\0';
b2[index] = '\0';
puts(buffer);
puts(b2);
return 0;
}
struct MemoryBlock * create_new_block(size_t size)
{
struct MemoryBlock * ret_block;
// add some space for the struct block
size += sizeof(struct MemoryBlock);
ret_block = (void *) sbrk(size);
// test first, if we can allocate that much of ram
if (ret_block == (void *) -1)
return NULL;
ret_block->size = size - sizeof(struct MemoryBlock);
ret_block->is_free = 0;
ret_block->next = NULL;
return ret_block;
}
void * malloc (size_t size)
{
struct MemoryBlock * ret_block;
struct MemoryBlock * prev_block;
prev_block = &startBlock;
ret_block = startBlock.next;
// go through the linked lists and look if you can find a suitable block
while (ret_block != NULL && (ret_block->size < size || !ret_block->is_free))
{
prev_block = ret_block;
ret_block = ret_block->next;
}
// couldn't find a suitable block => create a new one
if (ret_block == NULL) {
ret_block = create_new_block(size);
if (ret_block == NULL)
return NULL;
}
prev_block->next = ret_block;
ret_block->is_free = 0;
return ret_block;
}
Ok, one of my friends told me my issue... The casting was wrong! Here's the solution:
(gdb) p * ret_block
$57 = {next = 0x0, size = 3, is_free = 0 '\000'}
A star was enough to get the desired output...

skbuff packet sent with zero payload

Using Netfilter's hooks (NF_INET_PRE_ROUTING and NF_INET_POST_ROUTING) I implemented a kernel module that monitors all incoming and outgoing packets for a given host. By looking at the skuffs I can classify packets and identify those that I am interested in. Each time I detect such packets I want to generate my own packet and send it out in the network (note: I am not copying/cloning the original packet but instead create one from "scratch" and send it out using dev_queue_xmit. The only similarity with the original packet is that my packet goes towards the same destination, but has different port/protocol etc.).
The problem is that my generated packets are being sent out with empty payload. Packets successfully reach the destination, the size of the payload is correct, but its content is all set to zeros. Two observations: (1) The function that I am using to construct skbuffs and send out has been tested before and seemed to work. (2) The problem appear only when I classify outgoing packets and attempt to send my own along the way, seemingly the same code works fine when I classify packets using NF_INET_PRE_ROUTING.
Please see the code below:
static struct nf_hook_ops nfout;
static int __init init_main(void) {
nfout.hook = hook_func_out;
nfout.hooknum = NF_INET_POST_ROUTING;
nfout.pf = PF_INET;
nfout.priority = NF_IP_PRI_FIRST;
nf_register_hook(&nfout);
printk(KERN_INFO "Loading Postrouting hook\n");
return 0;
}
static unsigned int hook_func_out(const struct nf_hook_ops *ops,
struct sk_buff *skb, const struct net_device *in,
const struct net_device *out, int (*okfn)(struct sk_buff *)) {
if (skb is the packet that I am looking for, omitting details...) {
generate_send_packet(skb);
}
return NF_ACCEPT;
}
void generate_send_packet(struct sk_buff *target_skb) {
static struct tcphdr * tcph;
static struct iphdr * iph;
static struct ethhdr * ethh;
struct sk_buff * skb1;
iph = ip_hdr(target_skb);
tcph = tcp_hdr(target_skb);
ethh = eth_hdr(target_skb);
int payload = 123456789;
skb1 = construct_udp_skb(dev,
dev->dev_addr, mac,
iph->saddr, iph->daddr,
ntohs(tcph->source), dst_port,
(unsigned char *)&payload, sizeof(int)
);
if (dev_queue_xmit(skb1) != NET_XMIT_SUCCESS) {
printk(KERN_INFO "Sending failed\n");
}
}
struct sk_buff* construct_udp_skb(struct net_device *dev,
unsigned char * src_mac, unsigned char * dst_mac,
uint32_t src_ip, uint32_t dst_ip,
uint32_t src_port, uint32_t dst_port,
uint32_t ttl, uint32_t tcp_seq,
unsigned char * usr_data, uint16_t usr_data_len) {
static struct ethhdr *ethh;
static struct iphdr *iph;
struct udphdr * udph;
static struct sk_buff *skb;
static uint16_t header_len = 300;
unsigned char * p_usr_data;
int udplen;
skb = alloc_skb(1000, GFP_KERNEL);
skb_reserve(skb, header_len);
//-----------------------------------------------
udph = (struct udphdr*) skb_push(skb, sizeof(struct udphdr));
iph = (struct iphdr*) skb_push(skb, sizeof(struct iphdr));
ethh = (struct ethhdr*) skb_push(skb, sizeof(struct ethhdr));
memset(udph, 0 , sizeof(struct udphdr));
memset(iph, 0 , sizeof(struct iphdr));
skb_set_mac_header(skb, 0);
skb_set_network_header(skb, sizeof(struct ethhdr));
skb_set_transport_header(skb, sizeof(struct ethhdr) + sizeof(struct iphdr));
//ETH -------------------------------------------
memcpy(ethh->h_source, src_mac, 6);
memcpy(ethh->h_dest, dst_mac, 6);
ethh->h_proto = htons(ETH_P_IP);
//IP --------------------------------------------
iph->ihl = 5;
iph->version = 4;
iph->ttl = 128;
iph->tos = 0;
iph->protocol = IPPROTO_UDP;
iph->saddr = src_ip;
iph->daddr = dst_ip;
iph->check = ip_fast_csum((u8 *)iph, iph->ihl);
iph->id = htons(222);
iph->frag_off = 0;
iph->tot_len = htons(sizeof(struct iphdr) + sizeof(struct udphdr) + usr_data_len );
ip_send_check(iph);
//UDP--------------------------------------------
udph->source = htons(src_port);
udph->dest = htons(dst_port);
skb->dev = dev;
skb->protocol = IPPROTO_UDP;
skb->priority = 0;
skb->pkt_type = PACKET_OUTGOING;
p_usr_data = skb_put(skb, usr_data_len);
printk(KERN_INFO "Sending [%i]\n", *(int*)usr_data);
skb->csum = csum_and_copy_from_user(usr_data, p_usr_data, usr_data_len, 0, &err);
udplen = sizeof(struct udphdr) + usr_data_len;
udph->len = htons(udplen);
udph->check = udp_v4_check(udplen,
iph->saddr, iph->daddr,
0);
return skb;
}
What could be the possible reason why dev_queue_xmit would nullify the payload in the skbuff?
Thanks!
I assume that the result of debug print message for printk(KERN_INFO "Sending [%i]\n", (int)usr_data) is as expected. But what about the skb->data? Can you try to print it after csum_and_copy_from_user() to see whether it is empty or not? It seems very likely that you will see the zero payload at this point already.

how to implement splice_read for a character device file with uncached DMA buffer

I have a character device driver. It includes a 4MB coherent DMA buffer. The buffer is implemented as a ring buffer. I also implemente the splice_read call for the driver to improve the performance. But this implementation does not work well. Below is the using example:
(1)splice the 16 pages of device buffer data to a pipefd[1]. (the DMA buffer is managed as in page unit).
(2)splice the pipefd[0] to the socket.
(3)the receiving side (tcp client) receives the data, and then check the correctness.
I found that the tcp client got errors. The splice_read implementation is show below (I steal it from the vmsplice implementation):
/* splice related functions */
static void rdma_ring_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
put_page(buf->page);
buf->flags &= ~PIPE_BUF_FLAG_LRU;
}
void rdma_ring_spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
{
put_page(spd->pages[i]);
}
static const struct pipe_buf_operations rdma_ring_page_pipe_buf_ops = {
.can_merge = 0,
.map = generic_pipe_buf_map,
.unmap = generic_pipe_buf_unmap,
.confirm = generic_pipe_buf_confirm,
.release = rdma_ring_pipe_buf_release,
.steal = generic_pipe_buf_steal,
.get = generic_pipe_buf_get,
};
/* in order to simplify the caller work, the parameter meanings of ppos, len
* has been changed to adapt the internal ring buffer of the driver. The ppos
* indicate wich page is refferred(shoud start from 1, as the csr page are
* not allowed to do the splice), The len indicate how many pages are needed.
* Also, we constrain that maximum page number for each splice shoud not
* exceed 16 pages, if else, a EINVAL will return. If a high speed device
* need a more big page number, it can rework this routing. The off is also
* used to return the total bytes shoud be transferred, use can compare it
* with the return value to determint whether all bytes has been transfered.
*/
static ssize_t do_rdma_ring_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
struct rdma_ring *priv = to_rdma_ring(in->private_data);
struct rdma_ring_buf *data_buf;
struct rdma_ring_dstatus *dsta_buf;
struct page *pages[PIPE_DEF_BUFFERS];
struct partial_page partial[PIPE_DEF_BUFFERS];
ssize_t total_sz = 0, error;
int i;
unsigned offset;
struct splice_pipe_desc spd = {
.pages = pages,
.partial = partial,
.nr_pages_max = PIPE_DEF_BUFFERS,
.flags = flags,
.ops = &rdma_ring_page_pipe_buf_ops,
.spd_release = rdma_ring_spd_release_page,
};
/* init the spd, currently we omit the packet header, if a control
* is needed, it may be implemented by define a control variable in
* the device struct */
spd.nr_pages = len;
for (i = 0; i < len; i++) {
offset = (unsigned)(*ppos) + i;
data_buf = get_buf(priv, offset);
dsta_buf = get_dsta_buf(priv, offset);
pages[i] = virt_to_page(data_buf);
get_page(pages[i]);
partial[i].offset = 0;
partial[i].len = dsta_buf->bytes_xferred;
total_sz += partial[i].len;
}
error = _splice_to_pipe(pipe, &spd);
/* use the ppos to return the theory total bytes shoud transfer */
*ppos = total_sz;
return error;
}
/* splice read */
static ssize_t rdma_ring_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len, unsigned int flags)
{
ssize_t ret;
MY_PRINT("%s: *ppos = %lld, len = %ld\n", __func__, *ppos, (long)len);
if (unlikely(len > PIPE_DEF_BUFFERS))
return -EINVAL;
ret = do_rdma_ring_splice_read(in, ppos, pipe, len, flags);
return ret;
}
The _splice_to_pipe is just the same one as the splice_to_pipe in kernel. As this function is not an exported symbol, so I re-implemented it.
I think the main cause is that the some kind of lock of pages are omitted, but
I don't know where and how.
My kernel version is 3.10.

Linux kernel socket programming:sendmsg function msg address can not access

I want to send a message with socket->ops->sendmsg() in kernel module. One of the arguments of func sendmsg struct msghdr has a pointer msg_iov to the send buffer.
But except NULL , whatever buffer address I assign to the msg_iov the sendmsg() will return a EFAULT error to me. That means the address I assigned to the pointer can not be accessed.
So please help me and thank you very much.
P.S: here is part of my code. I omitted the irrelevant codes.
struct iovec vec;
char *buff = (char *)kmalloc(7, GFP_KERNEL);
unsigned long user_addr=0;
size_t count = 16;
buff[0] = 'H';
buff[1] = 'e';
buff[2] = 'l';
buff[3] = 'l';
buff[4] = 'o';
buff[5] = '\n';
buff[6] = '\0';
down_write(&current->mm->mmap_sem);
user_addr = do_mmap_pgoff(NULL, 0, count, PROT_READ|PROT_WRITE,\
MAP_PRIVATE|MAP_ANONYMOUS, 0);
up_write(&current->mm->mmap_sem);
__copy_to_user((void*)user_addr, (void*)buff, 7);
vec.iov_base = (void*)user_addr;
vec.iov_len = strlen( (char*)user_addr );
msg.msg_iov = &vec;
msg.msg_iovlen = 1;
msg.msg_flags = 0;
msg.msg_name = NULL;
msg.msg_namelen = 0;
msg.msg_control = NULL;
msg.msg_controllen = 0;
error = NewSock->ops->sendmsg(&kiocb,NewSock, &msg, 7);
do_munmap( &current->mm, user_addr, strlen( (char*) user_addr));
You can't call this function with data that lives in the kernel's address space:
int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t size)
/* ... */
while (--iovlen >= 0) {
size_t seglen = iov->iov_len;
unsigned char __user *from = iov->iov_base;
/* ... */
if ((err = skb_add_data(skb, from, copy)) != 0)
goto do_fault;
static inline int skb_add_data(struct sk_buff *skb,
char __user *from, int copy)
/* ... */
__wsum csum = csum_and_copy_from_user(from, skb_put(skb, copy),
copy, 0, &err);
#define csum_and_copy_from_user csum_partial_copy_from_user
__wsum
csum_partial_copy_from_user(const void __user *src, void *dst,
int len, __wsum isum, int *errp)
/* ... */
if (!likely(access_ok(VERIFY_READ, src, len)))
goto out_err;
/* ... */
isum = csum_partial_copy_generic((__force const void *)src,
dst, len, isum, errp, NULL);
access_ok on x86 checks for userspace pointers:
/**
* access_ok: - Checks if a user space pointer is valid
* #type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that
* %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe
* to write to a block, it is always safe to read from it.
* #addr: User space pointer to start of block to check
* #size: Size of block to check
*
* Context: User context only. This function may sleep.
*
* Checks if a pointer to a block of memory in user space is valid.
*
* Returns true (nonzero) if the memory block may be valid, false (zero)
* if it is definitely invalid.
*
* Note that, depending on architecture, this function probably just
* checks that the pointer is in the user space range - after calling
* this function, memory access functions may still return -EFAULT.
*/
#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0))
The comment on __range_not_ok() looks similar:
/*
* Test whether a block of memory is a valid user space address.
* Returns 0 if the range is valid, nonzero otherwise.
*
* This is equivalent to the following test:
* (u33)addr + (u33)size >= (u33)current->addr_limit.seg (u65 for x86_64)
*
* This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry...
*/
While I followed the x86-specific code paths whenever architecture-specific code was involved, I expect other architectures to enforce this behavior to the best of their respective abilities.
It looks like you cannot call sendmsg() on in-kernel struct iovec memory.

Resources