Write a Simple Linux Kernel Module

Note that all code showed in this article is licensed under GPL-2.0 and MPL-2.0.

Recently, I am interested in Linux kernel space and want to develop some simple kernel module. Although Rust for Linux is merging into the main line of Linux git repository, and will become available at Linux 6.1 release. I still want to let it be in C, and it’s more native and doesn’t need to pay attention to type conversion.

What we will do is create a custom char device and this device will send all written data to a remote cloud server in UDP protocol.

What the module do

First, I assume you are running an Ubuntu or Debian distribution, other Linux flavors are also supported. But you should ensure Linux kernel header package is available. In my test, Debian in Windows Subsystem for Linux and Debian for Raspberry Pi is incapable. You may install the packages need by the following command:

1
# apt install kmod build-essential linux-headers-`uname -r`

And then let’s create the following file:

Makefile

1
2
3
4
5
6
7
8
9
obj-m += repipe.o

PWD := $(CURDIR)

all:
	make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules

clean:
	make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean

chardev.h

1
2
3
4
5
6
7
8
9
#ifndef CHARDEV_H
#define CHARDEV_H

#include <linux/ioctl.h>

#define DEVICE_FILE_NAME "repipe"
#define DEVICE_PATH "/dev/repipe"

#endif

repipe.c

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h> 
#include <linux/sched.h>
#include <linux/tty.h>
#include <linux/cdev.h>
#include <linux/socket.h>
#include <linux/net.h>
#include <linux/ip.h>
#include <linux/in.h>
#include <linux/uio.h>
#include <linux/inet.h>

MODULE_LICENSE("Dual MPL/GPL");
MODULE_AUTHOR("Rui Li <me@lirui.org>");
MODULE_DESCRIPTION("Create a char device which sends data to remote server");

static char *remote_host = "localhost";
static int remote_port = 1040;

module_param(remote_host, charp, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
MODULE_PARM_DESC(remote_host, "Remote host to connect to");
module_param(remote_port, int, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
MODULE_PARM_DESC(remote_port, "Remote port to connect to");

#include "chardev.h"

#define DEVICE_NAME "repipe"
#define BUF_LEN 1024

static int major;
static atomic_t pipe_inuse = ATOMIC_INIT(0);
static char msg[BUF_LEN + 1];
static struct class *cls;
static struct sockaddr_in remote_addr;
static struct socket *remote_sock;

enum {
    REPIPE_FREE = 0,
    REPIPE_INUSE = 1,
};

static int device_open(struct inode *inode, struct file *file);
static int device_release(struct inode *inode, struct file *file);
static ssize_t device_read(struct file *filp, char __user *buff, size_t len, loff_t *off);
static ssize_t device_write(struct file *filp, const char __user *buff, size_t len, loff_t *off);

static struct file_operations fops = {
    .read = device_read,
    .write = device_write,
    .open = device_open,
    .release = device_release,
};

static int __init repipe_init(void) {
    int ret_val;

    major = register_chrdev(0, DEVICE_NAME, &fops);

    if (major < 0) {
        pr_alert("Registering repipe device failed with %d", major);
        return major;
    }

    cls = class_create(THIS_MODULE, DEVICE_FILE_NAME);
    device_create(cls, NULL, MKDEV(major, 0), NULL, DEVICE_FILE_NAME);

    pr_info("Repipe device registered with major number %d on %s", major, DEVICE_PATH);

    memset(&remote_addr, 0, sizeof(remote_addr));
    remote_addr.sin_family = AF_INET;
    remote_addr.sin_port = htons(remote_port);
    remote_addr.sin_addr.s_addr = in_aton(remote_host);

    remote_sock = sock_alloc();

    ret_val = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, IPPROTO_UDP, &remote_sock);
    if (ret_val < 0) {
        pr_alert("Repipe failed to create socket: %d", ret_val);
        return ret_val;
    }

    ret_val = remote_sock->ops->connect(remote_sock, (struct sockaddr *)&remote_addr, sizeof(remote_addr), 0);
    if (ret_val < 0) {
        pr_alert("Repipe failed to connect to remote host: %d", ret_val);
        return ret_val;
    }

    return 0;
}

static void __exit repipe_exit(void) {
    device_destroy(cls, MKDEV(major, 0));
    class_destroy(cls);
    sock_release(remote_sock);
    unregister_chrdev(major, DEVICE_NAME);
    pr_info("Repipe device unregistered");
}

static int device_open(struct inode *inode, struct file *file) {
    if (atomic_cmpxchg(&pipe_inuse, REPIPE_FREE, REPIPE_INUSE) == REPIPE_INUSE) {
        return -EBUSY;
    }

    try_module_get(THIS_MODULE);

    return 0;
}

static int device_release(struct inode *inode, struct file *file) {
    atomic_set(&pipe_inuse, REPIPE_FREE);

    module_put(THIS_MODULE);

    return 0;
}

static ssize_t device_read(struct file *filp, char __user *buff, size_t len, loff_t *off) {
    return -EINVAL;
}

static ssize_t device_write(struct file *filp, const char __user *buff, size_t len, loff_t *off) {
    static struct msghdr msg_hdr;
    static struct kvec vec;
    int i;
    int ret_val;

    for (i = 0; i < len && i < BUF_LEN; i++) {
        get_user(msg[i], buff + i);
    }

    msg_hdr.msg_name = &remote_addr;
    msg_hdr.msg_namelen = sizeof(remote_addr);
    vec.iov_base = msg;
    vec.iov_len = i;

    ret_val = kernel_sendmsg(remote_sock, &msg_hdr, &vec, 1, i);

    if (ret_val < 0) {
        pr_alert("Repipe failed to send message: %d", ret_val);
        return ret_val;
    }

    return i;
}

module_init(repipe_init);
module_exit(repipe_exit);

There are about 160 lines of code, and the main logic is concentrated on repipe.c. The first lines we defined the license, author name and description of this module using macro. And then there are two parameters it takes: the remote host and port. That’s fairly straight and simple, those macros are defined at include/linux/module.h. The third parameter of module_param function is the permission of accordant file located at sysfs. In this example, you can find the parameter also available at /sys/module/repipe/parameters/remote_host.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
MODULE_LICENSE("Dual MPL/GPL");
MODULE_AUTHOR("Rui Li <me@lirui.org>");
MODULE_DESCRIPTION("Create a char device which sends data to remote server");

static char *remote_host = "localhost";
static int remote_port = 1040;

module_param(remote_host, charp, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
MODULE_PARM_DESC(remote_host, "Remote host to connect to");
module_param(remote_port, int, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
MODULE_PARM_DESC(remote_port, "Remote port to connect to");

The repipe_init function is called when loading the module, it will be responsible for creating the char device and initialize the socket. On Linux, devices have a major number and a minor number, here is a list for it. We use 0 as our major number, so Linux would assign one for us. That’s could avoid conflict.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
static int __init repipe_init(void) {
    int ret_val;

    major = register_chrdev(0, DEVICE_NAME, &fops);

    if (major < 0) {
        pr_alert("Registering repipe device failed with %d", major);
        return major;
    }

    cls = class_create(THIS_MODULE, DEVICE_FILE_NAME);
    device_create(cls, NULL, MKDEV(major, 0), NULL, DEVICE_FILE_NAME);

    pr_info("Repipe device registered with major number %d on %s", major, DEVICE_PATH);

    memset(&remote_addr, 0, sizeof(remote_addr));
    remote_addr.sin_family = AF_INET;
    remote_addr.sin_port = htons(remote_port);
    remote_addr.sin_addr.s_addr = in_aton(remote_host);

    remote_sock = sock_alloc();

    ret_val = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, IPPROTO_UDP, &remote_sock);
    if (ret_val < 0) {
        pr_alert("Repipe failed to create socket: %d", ret_val);
        return ret_val;
    }

    ret_val = remote_sock->ops->connect(remote_sock, (struct sockaddr *)&remote_addr, sizeof(remote_addr), 0);
    if (ret_val < 0) {
        pr_alert("Repipe failed to connect to remote host: %d", ret_val);
        return ret_val;
    }

    return 0;
}

Another important function is device_write. When we write data into /dev/repipe, device_open is called first, and then is device_write. device_open ensure there is only one process is writing data. device_write will copy data from user space to kernel space and send data. The struct msghdr is used to describe the message to send, the most essential fields are msg_name (remote address) and its length. And the kvec struct is quite like iovec but it’s for kernel space. Finally, we can send our data:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
static ssize_t device_write(struct file *filp, const char __user *buff, size_t len, loff_t *off) {
    static struct msghdr msg_hdr;
    static struct kvec vec;
    int i;
    int ret_val;

    for (i = 0; i < len && i < BUF_LEN; i++) {
        get_user(msg[i], buff + i);
    }

    msg_hdr.msg_name = &remote_addr;
    msg_hdr.msg_namelen = sizeof(remote_addr);
    vec.iov_base = msg;
    vec.iov_len = i;

    ret_val = kernel_sendmsg(remote_sock, &msg_hdr, &vec, 1, i);

    if (ret_val < 0) {
        pr_alert("Repipe failed to send message: %d", ret_val);
        return ret_val;
    }

    return i;
}

Here is a screenshot from a real machine.

Module usage

Data received

By the way, seems that VM in Hyper-V has a very high UDP loss rate. Or it could not send UDP data gram at all. If you want to send data to other machines, don’t use Hyper-V VM. I tested on my VPS, it works fine.

That’s all for today. Thanks for reading. Hopefully, I will share more knowledge about Linux.