Problem with hotplugging device driver module - linux

I'm writing a module that when you plug in a usb mouse prints "hello world". The problem comes when I plug in the mouse, dmesg prints six times the below message:
[ 7367.238560] helwor_mod: disagrees about version of symbol
module_layout
This is my code
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/usb.h>
#include <linux/usb/input.h>
#include <linux/hid.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Isaac Lleida <isakyllr#opmbx.org>");
MODULE_VERSION("0.1");
static struct usb_device_id usb_mouse_id_table [] = {
{ USB_INTERFACE_INFO(USB_INTERFACE_CLASS_HID, USB_INTERFACE_SUBCLASS_BOOT,
USB_INTERFACE_PROTOCOL_MOUSE) },
{ }
};
MODULE_DEVICE_TABLE(usb, usb_mouse_id_table);
static int mouse_probe(struct usb_interface *iface,
const struct usb_device_id *id)
{
pr_info("Hello World!");
return 0;
}
static void mouse_disconnect(struct usb_interface *iface)
{
pr_info("Bye World!");
}
static struct usb_driver mouse_driver = {
.name = "usbmouse",
.probe = mouse_probe,
.disconnect = mouse_disconnect,
.id_table = usb_mouse_id_table,
};
module_usb_driver(mouse_driver);
static int __init hello_init(void)
{
int res = 0;
res = usb_register(&mouse_driver);
if(res)
pr_err("usb_register failed with error %d", res);
return res;
}
static void __exit hello_exit(void)
{
pr_debug("USB Mouse Removed...");
usb_deregister(&mouse_driver);
}
I have been googling all the afternoon and still don't know how to solve it.
I hope someone can help me, thanks.

This is related:
module_layout version incompatibility
I'd bet that the kernel sources (headers) you're using to compile your module are from a different kernel version than then one you're running.

Problem solved. This issue is produced because the device is being controled by another driver and it's loaded in the system. Just unload it and load your driver module.
And sorry Gilles for the offtopic.

Related

nvcc under linux complains: Contains a vector, which is not supported in device code

I have the following code
#include <cuda.h>
#include <cuda_runtime.h>
#ifdef _MSC_VER
#include <intrin.h>
#else
#include <x86intrin.h>
#endif
//A bitset for the variable assignments
//The state for non existing variable 0 is stored as well, just to avoid +1/-1 adjustments
struct Atom_t {
enum where { device, host};
enum BoolOp {opXor, opOr, opAnd };
public: //TODO make private later
int VarCount;
bool isValid;
union {
uint32_t raw[1]; //don't worry about alignment, the compiler will not use aligned read/writes anyway.}
uint64_t raw64[1];
__m256i avxraw[1];
};
public:
__host__ __device__ friend bool operator==(const Atom_t& a, const Atom_t& b);
};
__host__ __device__ bool operator==(const Atom_t& a, const Atom_t& b) {
const auto IntCount = a.IntCount();
if (IntCount != b.IntCount()) { return false; }
#ifdef __CUDA_ARCH__
__shared__ bool isDifferent;
isDifferent = false;
for (auto i = ThreadId(); i < IntCount; i += BlockDim()) {
if (a.raw[i] != b.raw[i] || isDifferent) {
isDifferent = true;
break;
}
}
syncthreads();
return !isDifferent;
#else
auto result = true;
#ifdef _DEBUG
for (auto i = 0; i < IntCount; i++) {
if (a.raw[i] != b.raw[i]) { result = false; }
}
#endif
auto AvxCount = a.Avx2Count();
if (AvxCount != b.Avx2Count()) { if (result) { print("Atom_t == is incorrect"); } assert1(!result); return false; }
for (auto i = 0; i < AvxCount; i++) {
const auto packedCompare = _mm256_cmpeq_epi8(a.avxraw[i], b.avxraw[i]);
const auto bitmask = _mm256_movemask_epi8(packedCompare);
if (bitmask != -1) { if (result) { print("Atom_t == is incorrect"); } assert1(!result); return false; }
}
#endif
#ifndef __CUDA_ARCH__
assert(result);
#endif
return true;
}
The compiler complains
Description Resource Path Location Type
"__nv_bool (const Atom_t &, const Atom_t &)" contains a vector, which is not supported in device code
However, the vector is not in device code, only in the host code. How do I make this error go away in NSight Eclipse Edition 9.1 running CUDA 11.
I tried:
#ifdef __CUDA_ARCH__
# define DEAL_II_COMPILER_VECTORIZATION_LEVEL 0
#endif
But that does not work.
However, the vector is not in device code, only in the host code.
The error is coming about due to this line:
__m256i avxraw[1];
which is visible in both the host code and device code compilation trajectory.
According to my testing this may be a possible workaround:
$ cat t32.cpp
#ifdef _MSC_VER
#include <intrin.h>
#else
#include <x86intrin.h>
#endif
#include <iostream>
typedef char dummy[sizeof(__m256i)];
struct Atom_t {
enum where { device, host};
enum BoolOp {opXor, opOr, opAnd };
public: //TODO make private later
int VarCount;
bool isValid;
union {
uint32_t raw[1];
uint64_t raw64[1];
#ifndef FOO //hide the vectorized datastruct from cuda's view
__m256i avxraw[1];
#else
alignas(32) dummy foo[1];
#endif
};
};
int main(){
std::cout << sizeof(__m256i) << std::endl;
std::cout << sizeof(Atom_t) << std::endl;
}
$ g++ t32.cpp -o t32
$ ./t32
32
64
$ g++ t32.cpp -o t32 -DFOO
$ ./t32
32
64
(Fedora Core 29)
The alignas(32) directive is still probably somewhat fragile if the definition of __m256i changes dramatically. And, clearly, the above is not CUDA code in the exact frame that was presented. It would need to be adapted (e.g. replace #ifndef FOO with #ifndef __CUDA_ARCH__)
I'm not suggesting that this code is correct, defect-free, or suitable for any particular purpose; it is mostly code provided by OP. My objective here is to identify issues that I see and are asked about in the question, and suggest possible ways to address those issues. Use this at your own risk.
Found it!
The problem is not the code in the method, the problem is the presence of the _m256i within view of cuda.
The following patch fixes the issue:
struct Atom_t {
enum where { device, host};
enum BoolOp {opXor, opOr, opAnd };
public: //TODO make private later
int VarCount;
bool isValid;
union {
uint32_t raw[1]; //don't worry about alignment, the compiler will not use aligned read/writes anyway.}
uint64_t raw64[1];
#ifndef __CUDA_ARCH__ //hide the vectorized datastruct from cuda's view
__m256i avxraw[1];
#endif
};
Now that nvcc does not see the vectorized datatype it will stop worrying.

Attaching device with device driver

I am trying to learn about Linux platform drivers. I have taken a driver from the following tutorial:
http://linuxseekernel.blogspot.com/2014/05/platform-device-driver-practical.html
It is a basic platform driver. I have compiled it and loaded the module. It loads fine, however, its probe function is never executed. There is a lot of documentation that has said as long as the devices id and drivers id match, then the probe function is called. Well I have the following driver:
#include <linux/module.h>
#include <linux/kernel.h>
//for platform drivers....
#include <linux/platform_device.h>
#define DRIVER_NAME "twl12xx"
MODULE_LICENSE("GPL");
/**************/
static int sample_drv_probe(struct platform_device *pdev){
printk(KERN_ALERT "twl12xx: Probed\n");
return 0;
}
static int sample_drv_remove(struct platform_device *pdev){
printk(KERN_ALERT "twl12xx: Removing twl12xx\n");
return 0;
}
static const struct platform_device_id twl12xx_id_table[] = {
{ "twl12xx", 0},
{}
};
MODULE_DEVICE_TABLE(platform, twl12xx_id_table);
static struct platform_driver sample_pldriver = {
.probe = sample_drv_probe,
.remove = sample_drv_remove,
.driver = {
.name = DRIVER_NAME,
},
};
/**************/
int ourinitmodule(void)
{
printk(KERN_ALERT "\n Welcome to twl12xx driver.... \n");
/* Registering with Kernel */
platform_driver_register(&sample_pldriver);
return 0;
}
void ourcleanupmodule(void)
{
printk(KERN_ALERT "\n Thanks....Exiting twl12xx driver... \n");
/* Unregistering from Kernel */
platform_driver_unregister(&sample_pldriver);
return;
}
module_init(ourinitmodule);
module_exit(ourcleanupmodule);
I also have the following entry in my device tree:
twl12xx: twl12xx#2 {
compatible = "twl12xx";
};
I feel like I am must be missing something or incorrectly defining my device tree.
Whatever you have read is correct; driver and device ID both should match.
You have just create the skeleton of driver and you are using the device tree. So it looks fine. But you are missing the of_match_table entry in your code; which is very important for device ID match (same string as you have pass from the device tree) and driver probe calling.
So add the below changes your code:
#ifdef CONFIG_OF
static const struct of_device_id twl12xx_dt_ids[] = {
.compatible = "ti,twl12xx",
{}
};
MODULE_DEVICE_TABLE(of, twl12xx_dt_ids);
#endif
static struct platform_driver sample_pldriver = {
.probe = sample_drv_probe,
.remove = sample_drv_remove,
.driver = {
.name = DRIVER_NAME,
.of_match_table = of_match_ptr(twl12xx_dt_ids),
},
.id_table = twl12xx_id_table,
};
I had similar problem. Probe function also wasn't able to print anything. The reason in my case was: In my linux I had ready driver that was binded to the device . When I unbinded this driver, Probe function worked successfully.
(Note, to unbind:
cd /sys/bus/platform/drivers/<driver_name>
echo "device_name" > unbind
)

"Device has no release() function" - what does this mean?

I am trying to write a simple linux kernel driver to turn a GPIO pin on while the module is loaded. Module loading works, but when I call rmmod to remove it I get this error:
sudo rmmod psctl
[13051.599199] ------------[ cut here ]------------
[13051.608758] WARNING: at drivers/base/core.c:196 device_release+0x78/0x84()
[13051.620581] Device 'psctl.0' does not have a release() function, it is broken and must be fixed.
[13051.637898] Modules linked in: psctl(O-) tmp102 hwmon nfsd rtc_ds1307 i2c_dev snd_bcm2835 snd_pcm snd_page_alloc snd_seq snd_seq_device snd_timer snd spidev leds_gpio led_class spi_bcm2708 i2c_bcm2708 [last unloaded: psctl]
[13051.670268] [<c0013f64>] (unwind_backtrace+0x0/0xf0) from [<c001e80c>] (warn_slowpath_common+0x4c/0x64)
[13051.688743] [<c001e80c>] (warn_slowpath_common+0x4c/0x64) from [<c001e8b8>] (warn_slowpath_fmt+0x30/0x40)
[13051.707217] [<c001e8b8>] (warn_slowpath_fmt+0x30/0x40) from [<c0239240>] (device_release+0x78/0x84)
[13051.725132] [<c0239240>] (device_release+0x78/0x84) from [<c01f4ad8>] (kobject_release+0x50/0x84)
[13051.742880] [<c01f4ad8>] (kobject_release+0x50/0x84) from [<bf0e6064>] (gpiotest_exit+0x10/0x2c [psctl])
[13051.761326] [<bf0e6064>] (gpiotest_exit+0x10/0x2c [psctl]) from [<c005e140>] (sys_delete_module+0x1b4/0x240)
[13051.780050] [<c005e140>] (sys_delete_module+0x1b4/0x240) from [<c000dae0>] (ret_fast_syscall+0x0/0x30)
[13051.798205] ---[ end trace 2eaa6df2dbf1f2e2 ]---
[13051.810083] psctl: Unloaded module
What does this mean? I have had a look at drivers/base/core.c and it seems that neither the kobject or the kobj_type have a release method associated with them (or there is no kobj_type set on the kobj). Am I supposed to set the release method myself, or is there something else that I haven't done that would set this for me?
The code for the module is below:
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/device.h>
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/platform_device.h>
#define AUTHOR "Xian Stannard <xian#xianic.net>"
#define DESCRIPTION "Simple GPIO driver as proof of concept"
#define VERSION "0.1"
#define DRIVER_NAME "psctl"
#define OUT_GPIO 18
#define MPRINT(level, fmt, args...) printk(level DRIVER_NAME ": " fmt "\n", ## args)
struct ps_data_struct {
int out_gpio;
};
static int ps_probe(struct platform_device *pdev) {
struct ps_data_struct *data = pdev->dev.platform_data;
if(data == NULL)
MPRINT(KERN_DEBUG, "No platform data");
else {
MPRINT(KERN_DEBUG, "Has platform data: gpio %d", data->out_gpio);
}
return -ENODEV;
}
static int __exit ps_remove(struct platform_device *dev) {
MPRINT(KERN_DEBUG, "Remove");
return 0;
}
static void ps_release(struct device* dev) {
MPRINT(KERN_DEBUG, "Release");
}
static struct ps_data_struct ps_data = {
.out_gpio = OUT_GPIO
};
static struct platform_driver ps_driver = {
.driver = {
.name = DRIVER_NAME,
.owner = THIS_MODULE
},
.probe = ps_probe,
.remove = __exit_p(ps_remove)
};
static struct platform_device ps_device = {
.name = DRIVER_NAME,
.id = 0,
.dev = {
.platform_data = &ps_data
}
};
static int __init gpiotest_init(void) {
int retval = 0;
retval = platform_driver_register(&ps_driver);
if(retval != 0) {
MPRINT(KERN_ERR, "Could not register driver");
goto drvreg;
}
retval = platform_device_register(&ps_device);
if(retval != 0) {
MPRINT(KERN_ERR, "Could not create device");
goto devreg;
}
MPRINT(KERN_INFO, "Loaded module");
return 0;
devreg:
platform_driver_unregister(&ps_driver);
drvreg:
MPRINT(KERN_ERR, "Module load failed with error code %d", retval);
return retval;
}
static void __exit gpiotest_exit(void) {
platform_device_unregister(&ps_device);
platform_driver_unregister(&ps_driver);
MPRINT(KERN_INFO, "Unloaded module");
}
module_init(gpiotest_init);
module_exit(gpiotest_exit);
MODULE_AUTHOR(AUTHOR);
MODULE_DESCRIPTION(DESCRIPTION);
MODULE_VERSION(VERSION);
MODULE_LICENSE("GPL");
I think using platform_device_register() to register a device, the pdev->dev->release is not initialized, so when you try to remove this device, the OOPS message print out.
There are two solutions to solve this problem:
Using platform_device_alloc() and platform_device_add() to register your device;
You init the ps_device.dev.release yourself, if you insist on using platform_device_register().
When you call platform_device_alloc() to get an instance of device, the release method initialization is done to the default function, as can be see here.
struct platform_device *platform_device_alloc(const char *name, int id)
{
...
...
pa->pdev.dev.release = platform_device_release;
...
...
}
This is the default implementation of platform_device_release .
However, when you avoid platform_device_alloc, you obviously miss this initialization and hence need to provide your own implementation of release method for device.

Kernel Module to Ethernet Packet Echo

I develop a linux kernel module to re-transmit some ethernet packet (do the echo). THe packet arrives, i check the ethernet destination address, and if it is for me, i re-transmit. If not i do nothing.
I used dev_pack_eth to define my protocol handler to recieve all ethernet packet (EHT_P_ALL) and dev_queue_xmit to transmit the skb buff received.
It works, the echo is functional but...
Sometimes, very often. the kernel crashs and i don't know why.
When i re-transmit the packet, i return NET_RX_Sucess.
When i don't re-transmit i use kfree_skb to free the skb buff received and return NET_RX_DROP.
I think the problem my be on this issues. Can you help me?
If needed i could post the kernel module code.
Best Regards!
------------Edit:Code added--------
#include <linux/module.h> /* Needed by all modules */
#include <linux/kernel.h> /* Needed for KERN_INFO */
#include <linux/init.h> /* Needed for the macros */
#include <linux/skbuff.h>
#include <linux/if_ether.h>
#include <linux/netdevice.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_bridge.h>
#include <asm-generic/types.h>
/*Buscar as interfaces de rede*/
struct net_device *dev_eth0;
struct net_device *dev_eth1;
int contador;
static struct packet_type hook; /* Initialisation routine */
void handler_add_config (void);
void handler_remove(void);
void print_mac_hdr(struct ethhdr *eth);
static int hook_func( struct sk_buff *skb)
{
struct ethhdr *eth;
struct ethhdr aux;
eth= eth_hdr(skb)
print_mac_hdr(eth);
/*If destination isn't the same that dev_addr, the packet is not for me: do nothing*/
if(memcmp(eth->h_dest,skb->dev->dev_addr,ETH_ALEN)!=0)
{
printk("Não são iguais!!!\n");
}
else
{
/*Swap addr*/
memcpy(&(aux.h_dest),eth->h_dest,ETH_ALEN);
memcpy(eth->h_dest,eth->h_source,ETH_ALEN);
memcpy(eth->h_source,&(aux.h_dest),ETH_ALEN);
/*Re build ther hearders*/
skb->data = (unsigned char *)skb->mac_header;
skb->len += ETH_HLEN;
skb->pkt_type = PACKET_OUTGOING;
/*Send*/
if(dev_queue_xmit(skb)!= NET_XMIT_SUCCESS)
{
printk("Erro na transmissão\n");
}
else
{
printk("Trama retransmitida com sucesso\n");
return NET_RX_SUCCESS;
}
}
kfree_skb(skb);
return NET_RX_DROP;
}
/*Print eth headers*/
void print_mac_hdr(struct ethhdr *eth)
{
printk("Destino: %02x:%02x:%02x:%02x:%02x:%02x \n",eth->h_dest[0],eth->h_dest[1],eth->h_dest[2],eth->h_dest[3],eth->h_dest[4],eth->h_dest[5]);
printk("Origem: %02x:%02x:%02x:%02x:%02x:%02x\n",eth->h_source[0],eth->h_source[1],eth->h_source[2],eth->h_source[3],eth->h_source[4],eth->h_source[5]);
printk("Proto: 0x%04x\n",ntohs(eth->h_proto));
}
/*Configure Protocol Handler*/
void handler_add_config (void)
{
hook.type = htons(ETH_P_ALL);
hook.func = (void *)hook_func;
hook.dev = NULL;
dev_add_pack(&hook);
printk("Handler Protocol adicionado!!!!\n");
}
/*Unregist protocol handler*/
void handler_remove(void)
{
dev_remove_pack(&hook);
printk("Handler Protocol removido!!!!\n");
synchronize_net();/*Sincronizar a rede!*/
}
/*Init module and protocol handler*/
static int __init hook_init(void)
{
printk("Hello:I'm the hook module!!!!\n");
contador =0;
dev_eth0=dev_get_by_name(&init_net,"eth0");
dev_eth1=dev_get_by_name(&init_net,"eth1");
handler_add_config();
return 0;
}
/*Remove module and protocol handler*/
static void __exit hook_exit(void)
{
printk("Hook module says Goodbye!!!!!\n");
handler_remove();
}
module_init(hook_init);
module_exit(hook_exit);
MODULE_LICENSE("GPL");
i think when u are returning return NET_RX_DROP; it causes problem because there are basically return types in hooks are ...
Return Code Meaning
NF_DROP Discard the packet.
NF_ACCEPT Keep the packet.
NF_STOLEN Forget about the packet.
NF_QUEUE Queue packet for userspace.
NF_REPEAT Call this hook function again.
& u are returning NET_RX_DROP so try to use NF_DROP.
Look at af_x25.c in net/x25 for a sample implementation of the same where they return 0 even on a drop. BTW didn't understand why are you incrementing skb->len when all you are doing is swapping the mac addresses ? I.e why the need to rebuild hdrs in that sense? Am i missing something here?
You are probably crashing because you are freeing the slngle copy of sk_buff by calling kfree_skb(skb);

Linux USB device driver not getting probed

I'm working on a device driver for Linux. It's a USB pen tablet. The problem is that the driver's probe callback never gets called. dmesg just shows:
generic-usb: probe of 0003:099A:2620.000F failed with error -22
and i never get to connect to the device. It seems like the systems drivers are overriding my driver in some way?
My code is registering & unregistering correctly using insmod / rmmod:
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/usb.h>
#include <linux/slab.h>
MODULE_DEVICE_TABLE (usb, id_table);
struct usb_device_id id_table[] =
{
{USB_DEVICE(0x099a, 0x2620)}, //Zippy Technology Corp. Digi Tablet
{0}
};
void dt_disconnect(struct usb_interface *interface)
{
printk("dt_disconnect called\n");
}
int dt_probe(struct usb_interface *interface, const struct usb_device_id *id)
{
printk("dt_probe called\n");
return 0;
}
static struct usb_driver dt_driver =
{
.name = "Zippy Technology Corp. Digi Tablet",
.probe = dt_probe,
.disconnect = dt_disconnect,
.id_table = id_table
};
static int __init dt_init(void)
{
//0 means success
int error = usb_register(&dt_driver);
if(error)
printk("dt_init failed\n");
return 0;
}
static void __exit dt_exit(void)
{
//void
usb_deregister(&dt_driver);
}
module_init(dt_init);
module_exit(dt_exit);
MODULE_LICENSE("GPL");
dt_probe is never called. I'm using Linux 2.6.40 (Fedora 15's version of 3.0) and most documentation about this stuff is very old so I thought I'd ask here. Any thoughts?
Yes, usbhid driver overrides your driver. You need to remove the usbhid driver from the running kernel. First deattach your device from the system and use "modprobe -r usbhid" to remove the usbhid module. Now insert your module and attach the device, then your driver will be taken.

Resources