用户空间与内核空间交互
了解Linux的朋友都清楚,Linux有两种运行空间,即内核空间与用户空间,内核态与用户态程序所在的物理内存是分隔开来的。在进行用户态程序与内核态程序通讯时,需要将用户态程序内存中的数据拷贝(传递)到内核态内存中使用,反之亦然。内核提供了统一的接口进行拷贝,如copy_[from|to]_user接口,通过寄存器传递参数或传递参数指针。内核自身也对接口进行了额外封装。
- procfs文件系统
- sysfs文件系统
- debugfs文件系统
- netlink套接字
- ioctl系统调用
这里给出NetLink与ioctl交互方式的内核态代码与用户态代码。
netlink
套接字
套接字相较于其他进程间通讯(如管道、消息队列、共享内存与信号量等),具有如下优势:
- 支持全双工数据传输
- 通过如传输层协议
TCP
,网络传输损失低 - 数据传输速率高,尤其在本机内传输
- 流控
- 异步通讯,消息可以排队,因此发送方不需要等待接收方
- 在其他与内核通信方式中诸如
procfs
、sysfs
、debugfs
以及ioctl
等,用户空间总是作为传输的发起者,与之相反,netlink
实现下内核空间可作为发起者 - 其他机制会造成文件系统命名污染,使用
netlink
套接字以及ioctl
则不会有这样的困扰
netlink
套接字是什么
netlink
套接字是2.2内核之后引入的一类特殊套接字,只在Linux操作系统中存在,使用它可以完成用户空间的进程间通讯,也可以用它来与内核进行通讯。与之类似的是UDS: UNIX domain datagram socket
,旨在进行本机内部的通讯。UDS使用路径名作为命名空间(一类特殊的套接字文件),netlink
套接字则使用PID: port ID
端口号。
编写用户空间套接字应用
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <linux/netlink.h>
#define NETLINK_MY_UNIT_PROTO 31 /* 由内核注册 */
#define NLSPACE 1024
static const char *thedata = "sample user data to send to kernel via netlink";
int main(int argc, char **argv)
{
int sd;
struct sockaddr_nl src_nl, dest_nl;
struct nlmsghdr *nlhdr;
struct iovec iov;
struct msghdr msg;
ssize_t nsent, nrecv;
/* 1. 通讯终端,Netlink套接字 */
sd = socket(PF_NETLINK, SOCK_RAW, NETLINK_MY_UNIT_PROTO);
if (sd < 0) {
perror("netlink_u: netlink socket creation failed");
exit(EXIT_FAILURE);
}
printf("%s:PID %d: netlink socket created\n", argv[0], getpid());
/* 设置Netlink源地址结构,并绑定 */
memset(&src_nl, 0, sizeof(src_nl));
src_nl.nl_family = AF_NETLINK;
/* 这里所说的PID,是端口号与进程ID无关 */
src_nl.nl_pid = getpid();
src_nl.nl_groups = 0x0; // no multicast
if (bind(sd, (struct sockaddr *)&src_nl, sizeof(src_nl)) < 0) {
perror("netlink_u: bind failed");
exit(EXIT_FAILURE);
}
printf("%s: bind done\n", argv[0]);
/* 设置Netlink目的地址结构 */
memset(&dest_nl, 0, sizeof(dest_nl));
dest_nl.nl_family = AF_NETLINK;
dest_nl.nl_groups = 0x0; /* 非多播 */
dest_nl.nl_pid = 0; /* 发向内核 */
/* 准备Netlink header */
nlhdr = (struct nlmsghdr *)malloc(NLMSG_SPACE(NLSPACE));
if (!nlhdr) {
fprintf(stderr, "netlink_u: malloc nlhdr failed");
exit(EXIT_FAILURE);
}
memset(nlhdr, 0, NLMSG_SPACE(NLSPACE));
nlhdr->nlmsg_len = NLMSG_SPACE(NLSPACE);
nlhdr->nlmsg_pid = getpid();
/* 设置传输的负载数据 */
strncpy(NLMSG_DATA(nlhdr), thedata, strnlen(thedata, NLSPACE)+1);
printf("%s: destination struct, netlink hdr, payload setup\n", argv[0]);
/* 设置iovec等 */
memset(&iov, 0, sizeof(struct iovec));
iov.iov_base = (void *)nlhdr;
iov.iov_len = nlhdr->nlmsg_len;
printf("%s: initialized iov structure (nl header folded in)\n", argv[0]);
/* 设置消息头结构 */
memset(&msg, 0, sizeof(struct msghdr));
msg.msg_name = (void *)&dest_nl; /* 目的地址 */
msg.msg_namelen = sizeof(dest_nl);
msg.msg_iov = &iov;
msg.msg_iovlen = 1; /* msg iov中的元素数 */
printf("%s: initialized msghdr structure (iov folded in)\n", argv[0]);
/* 发送!!! */
nsent = sendmsg(sd, &msg, 0);
if (nsent < 0) {
perror("netlink_u: sendmsg(2) failed");
free(nlhdr);
exit(EXIT_FAILURE);
} else if (nsent == 0) {
printf(" 0 bytes sent\n");
free(nlhdr);
exit(EXIT_FAILURE);
}
printf("%s:sendmsg(): *** success, sent %ld bytes all-inclusive\n"
" (see kernel log for dtl)\n", argv[0], nsent);
fflush(stdout);
/* 阻塞等待内核消息 */
printf("%s: now blocking on kernel netlink msg via recvmsg() ...\n", argv[0]);
nrecv = recvmsg(sd, &msg, 0);
if (nrecv < 0) {
perror("netlink_u: recvmsg(2) failed");
free(nlhdr);
exit(EXIT_FAILURE);
}
printf("%s:recvmsg(): *** success, received %ld bytes:"
"\nmsg from kernel netlink: \"%s\"\n",
argv[0], nrecv, (char *)NLMSG_DATA(nlhdr));
free(nlhdr);
close(sd);
exit(EXIT_SUCCESS);
}
编写内核空间netlink套接字代码模块
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <net/sock.h>
#include <linux/netlink.h>
#include <linux/skbuff.h>
#include "convenient.h"
#define pr_fmt(fmt) "%s:%s(): " fmt, KBUILD_MODNAME, __func__
MODULE_AUTHOR("xd");
MODULE_DESCRIPTION("netlink module");
MODULE_LICENSE("Dual MIT/GPL");
MODULE_VERSION("1.0");
#define OURMODNAME "netlink_simple_intf"
#define NETLINK_MY_UNIT_PROTO 31 /* 注册协议 */
#define NLSPACE 1024
static struct sock *nlsock;
/*********************************************************
* netlink_recv_and_reply
* 用户空间进程/线程发送信息,将会调用这一回调函数;
* 需要注意的是,这一程序运行在进程上下文而非中断上下文;
* 在这一程序中,只是简单接收消息之后将负载信息回送回用户空间;
*********************************************************/
static void netlink_recv_and_reply(struct sk_buff *skb)
{
struct nlmsghdr *nlh;
struct sk_buff *skb_tx;
char *reply = "Reply from kernel netlink";
int pid, msgsz, stat;
/* 用来确定当前程序运行的上下文,进程上下文/中断上下文 */
PRINT_CTX();
nlh = (struct nlmsghdr *)skb->data;
pid = nlh->nlmsg_pid; /* 发送进程的端口号 */
pr_info("received from PID %d:\n"
"\"%s\"\n", pid, (char *)NLMSG_DATA(nlh));
/* 应答 */
msgsz = strnlen(reply, NLSPACE);
skb_tx = nlmsg_new(msgsz, 0);
if (!skb_tx) {
pr_warn("skb alloc failed!\n");
return;
}
/* 配置负载信息 */
nlh = nlmsg_put(skb_tx, 0, 0, NLMSG_DONE, msgsz, 0);
NETLINK_CB(skb_tx).dst_group = 0; /* 单播 */
strncpy(nlmsg_data(nlh), reply, msgsz);
/* 发送,自动回收内存 */
stat = nlmsg_unicast(nlsock, skb_tx, pid);
if (stat < 0)
pr_warn("nlmsg_unicast() failed (err=%d)\n", stat);
pr_info("reply sent\n");
}
static struct netlink_kernel_cfg nl_kernel_cfg = {
.input = netlink_recv_and_reply,
};
static int __init netlink_simple_intf_init(void)
{
pr_info("creating kernel netlink socket\n");
/* 创建套接字服务协议 */
nlsock = netlink_kernel_create(&init_net, NETLINK_MY_UNIT_PROTO,
&nl_kernel_cfg);
if (!nlsock) {
pr_warn("netlink_kernel_create failed\n");
return PTR_ERR(nlsock);
}
pr_info("inserted\n");
return 0; /* 创建成功 */
}
static void __exit netlink_simple_intf_exit(void)
{
netlink_kernel_release(nlsock);
pr_info("removed\n");
}
module_init(netlink_simple_intf_init);
module_exit(netlink_simple_intf_exit);
ioctl
内核态代码编写
#define pr_fmt(fmt) "%s:%s(): " fmt, KBUILD_MODNAME, __func__
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/ioctl.h>
/* copy_[to|from]_user */
#include <linux/version.h>
#if LINUX_VERSION_CODE > KERNEL_VERSION(4, 11, 0)
#include <linux/uaccess.h>
#else
#include <asm/uaccess.h>
#endif
#include "ioctl_llkd.h"
#include "convenient.h"
#define OURMODNAME "ioctl_llkd_kdrv"
MODULE_AUTHOR("xd");
MODULE_DESCRIPTION("ioctl test");
MODULE_LICENSE("Dual MIT/GPL");
MODULE_VERSION("0.1");
static int ioctl_intf_major,
power = 1; /* 'powered on' by default */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36)
static long ioctl_intf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
#else
static int ioctl_intf_ioctl(struct inode *ino, struct file *filp, unsigned int cmd,
unsigned long arg)
#endif
{
int retval = 0;
pr_debug("In ioctl method, cmd=%d\n", _IOC_NR(cmd));
/* Verify stuff: is the ioctl's for us? etc.. */
if (_IOC_TYPE(cmd) != IOCTL_LLKD_MAGIC) {
pr_info("ioctl fail; magic # mismatch\n");
return -ENOTTY;
}
if (_IOC_NR(cmd) > IOCTL_LLKD_MAXIOCTL) {
pr_info("ioctl fail; invalid cmd?\n");
return -ENOTTY;
}
switch (cmd) {
case IOCTL_LLKD_IOCRESET:
pr_debug("In ioctl cmd option: IOCTL_LLKD_IOCRESET\n");
break;
case IOCTL_LLKD_IOCQPOWER: /* Get: arg is pointer to result */
pr_debug("In ioctl cmd option: IOCTL_LLKD_IOCQPOWER\n"
"arg=0x%x (drv) power=%d\n", (unsigned int)arg, power);
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
retval = __put_user(power, (int __user *)arg);
break;
case IOCTL_LLKD_IOCSPOWER: /* Set: arg is the value to set */
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
power = arg;
pr_debug("In ioctl cmd option: IOCTL_LLKD_IOCSPOWER\n"
"power=%d now.\n", power);
break;
default:
return -ENOTTY;
}
return retval;
}
static const struct file_operations ioctl_intf_fops = {
.llseek = no_llseek,
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36)
.unlocked_ioctl = ioctl_intf_ioctl, // use the 'unlocked' version
#else
.ioctl = ioctl_intf_ioctl, // 'old' way
#endif
};
static int ioctl_intf_open(struct inode *inode, struct file *filp)
{
pr_debug("Device node with minor # %d being used\n", iminor(inode));
switch (iminor(inode)) {
case 0:
filp->f_op = &ioctl_intf_fops;
break;
default:
return -ENXIO;
}
if (filp->f_op && filp->f_op->open)
return filp->f_op->open(inode, filp); /* Minor-specific open */
return 0;
}
static struct file_operations ioctl_intf_open_fops = {
.open = ioctl_intf_open, /* just a means to get at the real open */
};
static int __init ioctl_llkd_kdrv_init(void)
{
int result;
pr_debug("ioctl_intf_major=%d\n", ioctl_intf_major);
result = register_chrdev(ioctl_intf_major, OURMODNAME, &ioctl_intf_open_fops);
if (result < 0) {
pr_info("register_chrdev() failed trying to get ioctl_intf_major=%d\n", ioctl_intf_major);
return result;
}
if (ioctl_intf_major == 0)
ioctl_intf_major = result; /* 动态 */
pr_debug("registered:: ioctl_intf_major=%d\n", ioctl_intf_major);
pr_info("initialized\n");
return 0;
}
static void ioctl_llkd_kdrv_cleanup(void)
{
unregister_chrdev(ioctl_intf_major, OURMODNAME);
pr_info("removed\n");
}
module_init(ioctl_llkd_kdrv_init);
module_exit(ioctl_llkd_kdrv_cleanup);
用户态代码编写
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <signal.h>
#include <stdlib.h>
#include <sys/ioctl.h>
#include "ioctl_llkd.h"
int main(int argc, char **argv)
{
int fd, power;
if (argc < 2) {
fprintf(stderr, "Usage: %s device_file\n\
If device_file does not exist, create it using mknod(1) (as root)\n", argv[0]);
exit(EXIT_FAILURE);
}
if ((fd = open(argv[1], O_RDWR, 0)) == -1) {
perror("open");
exit(EXIT_FAILURE);
}
printf("device opened: fd=%d\n", fd);
printf
("(FYI, IOCTL_LLKD_IOCRESET = 0x%x IOCTL_LLKD_IOCQPOWER= 0x%x "
"IOCTL_LLKD_IOCSPOWER=0x%x)\n",
IOCTL_LLKD_IOCRESET, (unsigned int)IOCTL_LLKD_IOCQPOWER,
(unsigned int)IOCTL_LLKD_IOCSPOWER);
// Test our various ioctl's ...
// 1. Reset the device
if (ioctl(fd, IOCTL_LLKD_IOCRESET, 0) == -1) {
perror("ioctl IOCTL_LLKD_IOCRESET failed");
close(fd);
exit(EXIT_FAILURE);
}
printf("%s: device reset.\n", argv[0]);
// 2. Query it's power status
if (ioctl(fd, IOCTL_LLKD_IOCQPOWER, &power) == -1) {
perror("ioctl IOCTL_LLKD_IOCQPOWER failed");
close(fd);
exit(EXIT_FAILURE);
}
printf("%s: power=%d\n", argv[0], power);
// 3. Toggle it's power status
if (0 == power) {
printf("%s: Device OFF, powering it On now ...\n", argv[0]);
if (ioctl(fd, IOCTL_LLKD_IOCSPOWER, 1) == -1) {
perror("ioctl IOCTL_LLKD_IOCSPOWER failed");
close(fd);
exit(EXIT_FAILURE);
}
printf("%s: power is ON now.\n", argv[0]);
} else if (1 == power) {
printf("%s: Device ON, powering it OFF in 3s ...\n", argv[0]);
sleep(3); /* yes, careful here of sleep & signals! */
if (ioctl(fd, IOCTL_LLKD_IOCSPOWER, 0) == -1) {
perror("ioctl IOCTL_LLKD_IOCSPOWER failed");
close(fd);
exit(EXIT_FAILURE);
}
printf("%s: power OFF ok, exiting..\n", argv[0]);
}
close(fd);
exit(EXIT_SUCCESS);
}