【转载】Linux系统调用SYSCALL_DEFINE详解

系统调用在内核中的入口都是sys_xxx,但其实Linux的系统调用都改为SYSCALL_DEFINE定义的。本文以socket系统调用为例来详解。

1 首先看一下SYSCALL_DEFINE的定义,如下:

复制代码
1 #define SYSCALL_DEFINE0(name)       asmlinkage long sys_##name(void)
2 #define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__)
3 #define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__)
4 #define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
5 #define SYSCALL_DEFINE4(name, ...) SYSCALL_DEFINEx(4, _##name, __VA_ARGS__)
6 #define SYSCALL_DEFINE5(name, ...) SYSCALL_DEFINEx(5, _##name, __VA_ARGS__)
7 #define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)
复制代码

2 宏SYSCALL_DEFINEx的定义:

复制代码
 1 #define SYSCALL_DEFINEx(x, name, ...)                    \
 2     asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__));        \
 3     static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__));    \
 4     asmlinkage long SyS##name(__SC_LONG##x(__VA_ARGS__))        \
 5     {                                \
 6         __SC_TEST##x(__VA_ARGS__);                \
 7         return (long) SYSC##name(__SC_CAST##x(__VA_ARGS__));    \
 8     }                                \
 9     SYSCALL_ALIAS(sys##name, SyS##name);                \
10     static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__))
复制代码

3 下面以socket系统调用为实例来分析,其定义:

复制代码
 1 SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
 2 {
 3     int retval;
 4     struct socket *sock;
 5     int flags;
 6 
 7     /* Check the SOCK_* constants for consistency.  */
 8     BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
 9     BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
10     BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
11     BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
12 
13     flags = type & ~SOCK_TYPE_MASK;
14     if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
15         return -EINVAL;
16     type &= SOCK_TYPE_MASK;
17 
18     if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
19         flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
20 
21     retval = sock_create(family, type, protocol, &sock);
22     if (retval < 0)
23         goto out;
24 
25     retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
26     if (retval < 0)
27         goto out_release;
28 
29 out:
30     /* It may be already another descriptor 8) Not kernel problem. */
31     return retval;
32 
33 out_release:
34     sock_release(sock);
35     return retval;
36 }
复制代码

3.1 ##和__VA_ARGS__

其中##是连接符,__VA_ARGS__代表前面...里面的可变参数。

3.2 socket系统调用对应的就是SYSCALL_DEFINE3

socket -> SYSCALL_DEFINE3 -> SYSCALL_DEFINEx 展开就是:

SYSCALL_DEFINEx(3, _socket,  int, family, int, type, int, protocol)

再继续展开如下:

复制代码
1 asmlinkage long sys_socket(__SC_DECL3(int, family, int, type, int, protocol));        \  ---- 详解1
2     static inline long SYSC_socket(__SC_DECL3(int, family, int, type, int, protocol));    \  --- 详解2 
3     asmlinkage long SyS_socket(__SC_LONG3(int, family, int, type, int, protocol))        \   --- 详解3
4     {                                \
5         __SC_TEST3(int, family, int, type, int, protocol);                \
6         return (long) SYSC_socket(__SC_CAST3(int, family, int, type, int, protocol));    \
7     }                                \
8     SYSCALL_ALIAS(sys_socket, SyS_socket);                \  ------ 详解4
9     static inline long SYSC_sockt(__SC_DECL3(int, family, int, type, int, protocol))  详解5
复制代码

详解1: 函数sys_socket的声明

详解2 :函数SYSC_socket声明

详解3:函数SYSC_socket定义

详解4:SYSCALL_ALIAS,根据名字就可以知道,这个宏定义的意思其实就是将SyS_socket的别名设为sys_socket,也就是说调用sys_socket其实就是在调用SyS_sockt。

1 #define SYSCALL_ALIAS(alias, name)                    \
2     asm ("\t.globl " #alias "\n\t.set " #alias ", " #name "\n"    \
3          "\t.globl ." #alias "\n\t.set ." #alias ", ." #name)

3.3 宏__SC_DECL3,__SC_LONG3,__SC_CAST3

复制代码
 1 /*宏__SC_DECLx*/
 2 #define __SC_DECL1(t1, a1)    t1 a1
 3 #define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__)
 4 #define __SC_DECL3(t3, a3, ...) t3 a3, __SC_DECL2(__VA_ARGS__)
 5 #define __SC_DECL4(t4, a4, ...) t4 a4, __SC_DECL3(__VA_ARGS__)
 6 #define __SC_DECL5(t5, a5, ...) t5 a5, __SC_DECL4(__VA_ARGS__)
 7 #define __SC_DECL6(t6, a6, ...) t6 a6, __SC_DECL5(__VA_ARGS__)
 8 /*宏__SC_LONGx*/
 9 #define __SC_LONG1(t1, a1)     long a1
10 #define __SC_LONG2(t2, a2, ...) long a2, __SC_LONG1(__VA_ARGS__)
11 #define __SC_LONG3(t3, a3, ...) long a3, __SC_LONG2(__VA_ARGS__)
12 #define __SC_LONG4(t4, a4, ...) long a4, __SC_LONG3(__VA_ARGS__)
13 #define __SC_LONG5(t5, a5, ...) long a5, __SC_LONG4(__VA_ARGS__)
14 #define __SC_LONG6(t6, a6, ...) long a6, __SC_LONG5(__VA_ARGS__)
15 /*宏__SC_CASTx*/
16 #define __SC_CAST1(t1, a1)    (t1) a1
17 #define __SC_CAST2(t2, a2, ...) (t2) a2, __SC_CAST1(__VA_ARGS__)
18 #define __SC_CAST3(t3, a3, ...) (t3) a3, __SC_CAST2(__VA_ARGS__)
19 #define __SC_CAST4(t4, a4, ...) (t4) a4, __SC_CAST3(__VA_ARGS__)
20 #define __SC_CAST5(t5, a5, ...) (t5) a5, __SC_CAST4(__VA_ARGS__)
21 #define __SC_CAST6(t6, a6, ...) (t6) a6, __SC_CAST5(__VA_ARGS__)
22 /*宏__SC_TESTx*/
23 #define __SC_TEST(type)        BUILD_BUG_ON(sizeof(type) > sizeof(long))
24 #define __SC_TEST1(t1, a1)    __SC_TEST(t1)
25 #define __SC_TEST2(t2, a2, ...)    __SC_TEST(t2); __SC_TEST1(__VA_ARGS__)
26 #define __SC_TEST3(t3, a3, ...)    __SC_TEST(t3); __SC_TEST2(__VA_ARGS__)
27 #define __SC_TEST4(t4, a4, ...)    __SC_TEST(t4); __SC_TEST3(__VA_ARGS__)
28 #define __SC_TEST5(t5, a5, ...)    __SC_TEST(t5); __SC_TEST4(__VA_ARGS__)
29 #define __SC_TEST6(t6, a6, ...)    __SC_TEST(t6); __SC_TEST5(__VA_ARGS__)
30 /*宏BUILD_BUG_ON*/
31 #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
复制代码

上面__SC_DECL3

定义展开如下:

__SC_DECL3(int, family, int, type, int, protocol) -> 
int family, __SC_DECL2(int, type, int, protocol) -> 
int family, int type, __SC_DECL1(int, protocol) -> 
int family, int type, int protocol 

上面__SC_LONG3展开如下:

__SC_LONG3(int, family, int, type, int, protocol) -> 
long family, __SC_LONG2(int, type, int, protocol) ->
long family, long type , __SC_LONG1(int, protocol) -> 
long family, long type, long protocol

上面__SC_CAST3展开如下:

__SC_CAST3(int, family, int, type, int, protocol) -> 
(int) family, __SC_CAST2(int, type, int, protocol) -> 
(int) family, (int) type, __SC_CAST1(int, protocol) -> 
(int) family, (int) type, (int) protocol 

上面__SC_TEST3展开如下:

复制代码
__SC_TEST3(int, family, int, type, int, protocol) -> 
__SC_TEST(int); __SC_TEST2(int, type, int, protocol) ->
__SC_TEST(int); __SC_TEST(int); __SC_TEST1(int, protocol) ->
__SC_TEST(int); __SC_TEST(int); __SC_TEST(int); ->
BUILD_BUG_ON(sizeof(int) > sizeof(long));
BUILD_BUG_ON(sizeof(int) > sizeof(long));
BUILD_BUG_ON(sizeof(int) > sizeof(long));
复制代码

将这些东西代入前面展开的那一堆代码中,就能得到如下的比较清晰明了的代码了:

asmlinkage long sys_socket(int family, int type, int protocol); \
static inline long SYSC_socket(int family, int type, int protocol); \
asmlinkage long SyS_socket(long family, long type, long protocol) \
{ \
BUILD_BUG_ON(sizeof(int) > sizeof(long)); BUILD_BUG_ON(sizeof(int) > sizeof(long)); BUILD_BUG_ON(sizeof(int) > sizeof(long)); \
return (long) SYSC_socket((int) family, (int) type, (int) protocol); \
} \
SYSCALL_ALIAS(sys_socket, SyS_socket); \
static inline long SYSC_socket(int family, int type, int protocol)
{
code...
}

 

就是3.3节的宏是将系统调用的参数统一变为了使用long型来接收,再强转转为int,也就是系统调用本来传下来的参数类型。那么强转一下为什么呢?原因就是64位的Linux有一个名为CVE-2009-2009的漏洞,这个漏洞的具体内容参看原文。

原文链接:https://www.cnblogs.com/xinghuo123/p/13796932.html

posted @ 2022-01-12 16:44  huansky  阅读(1863)  评论(0编辑  收藏  举报