【转载】Linux系统调用SYSCALL_DEFINE详解
系统调用在内核中的入口都是sys_xxx,但其实Linux的系统调用都改为SYSCALL_DEFINE定义的。本文以socket系统调用为例来详解。
1 首先看一下SYSCALL_DEFINE的定义,如下:
1 #define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void) 2 #define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__) 3 #define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__) 4 #define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__) 5 #define SYSCALL_DEFINE4(name, ...) SYSCALL_DEFINEx(4, _##name, __VA_ARGS__) 6 #define SYSCALL_DEFINE5(name, ...) SYSCALL_DEFINEx(5, _##name, __VA_ARGS__) 7 #define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)
2 宏SYSCALL_DEFINEx的定义:
1 #define SYSCALL_DEFINEx(x, name, ...) \ 2 asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__)); \ 3 static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__)); \ 4 asmlinkage long SyS##name(__SC_LONG##x(__VA_ARGS__)) \ 5 { \ 6 __SC_TEST##x(__VA_ARGS__); \ 7 return (long) SYSC##name(__SC_CAST##x(__VA_ARGS__)); \ 8 } \ 9 SYSCALL_ALIAS(sys##name, SyS##name); \ 10 static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__))
3 下面以socket系统调用为实例来分析,其定义:
1 SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) 2 { 3 int retval; 4 struct socket *sock; 5 int flags; 6 7 /* Check the SOCK_* constants for consistency. */ 8 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); 9 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK); 10 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); 11 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); 12 13 flags = type & ~SOCK_TYPE_MASK; 14 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 15 return -EINVAL; 16 type &= SOCK_TYPE_MASK; 17 18 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 19 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 20 21 retval = sock_create(family, type, protocol, &sock); 22 if (retval < 0) 23 goto out; 24 25 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); 26 if (retval < 0) 27 goto out_release; 28 29 out: 30 /* It may be already another descriptor 8) Not kernel problem. */ 31 return retval; 32 33 out_release: 34 sock_release(sock); 35 return retval; 36 }
3.1 ##和__VA_ARGS__
其中##是连接符,__VA_ARGS__代表前面...里面的可变参数。
3.2 socket系统调用对应的就是SYSCALL_DEFINE3
socket -> SYSCALL_DEFINE3 -> SYSCALL_DEFINEx 展开就是:
SYSCALL_DEFINEx(3, _socket, int, family, int, type, int, protocol)
再继续展开如下:
1 asmlinkage long sys_socket(__SC_DECL3(int, family, int, type, int, protocol)); \ ---- 详解1 2 static inline long SYSC_socket(__SC_DECL3(int, family, int, type, int, protocol)); \ --- 详解2 3 asmlinkage long SyS_socket(__SC_LONG3(int, family, int, type, int, protocol)) \ --- 详解3 4 { \ 5 __SC_TEST3(int, family, int, type, int, protocol); \ 6 return (long) SYSC_socket(__SC_CAST3(int, family, int, type, int, protocol)); \ 7 } \ 8 SYSCALL_ALIAS(sys_socket, SyS_socket); \ ------ 详解4 9 static inline long SYSC_sockt(__SC_DECL3(int, family, int, type, int, protocol)) 详解5
详解1: 函数sys_socket的声明
详解2 :函数SYSC_socket声明
详解3:函数SYSC_socket定义
详解4:SYSCALL_ALIAS,根据名字就可以知道,这个宏定义的意思其实就是将SyS_socket的别名设为sys_socket,也就是说调用sys_socket其实就是在调用SyS_sockt。
1 #define SYSCALL_ALIAS(alias, name) \ 2 asm ("\t.globl " #alias "\n\t.set " #alias ", " #name "\n" \ 3 "\t.globl ." #alias "\n\t.set ." #alias ", ." #name)
3.3 宏__SC_DECL3,__SC_LONG3,__SC_CAST3
1 /*宏__SC_DECLx*/ 2 #define __SC_DECL1(t1, a1) t1 a1 3 #define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__) 4 #define __SC_DECL3(t3, a3, ...) t3 a3, __SC_DECL2(__VA_ARGS__) 5 #define __SC_DECL4(t4, a4, ...) t4 a4, __SC_DECL3(__VA_ARGS__) 6 #define __SC_DECL5(t5, a5, ...) t5 a5, __SC_DECL4(__VA_ARGS__) 7 #define __SC_DECL6(t6, a6, ...) t6 a6, __SC_DECL5(__VA_ARGS__) 8 /*宏__SC_LONGx*/ 9 #define __SC_LONG1(t1, a1) long a1 10 #define __SC_LONG2(t2, a2, ...) long a2, __SC_LONG1(__VA_ARGS__) 11 #define __SC_LONG3(t3, a3, ...) long a3, __SC_LONG2(__VA_ARGS__) 12 #define __SC_LONG4(t4, a4, ...) long a4, __SC_LONG3(__VA_ARGS__) 13 #define __SC_LONG5(t5, a5, ...) long a5, __SC_LONG4(__VA_ARGS__) 14 #define __SC_LONG6(t6, a6, ...) long a6, __SC_LONG5(__VA_ARGS__) 15 /*宏__SC_CASTx*/ 16 #define __SC_CAST1(t1, a1) (t1) a1 17 #define __SC_CAST2(t2, a2, ...) (t2) a2, __SC_CAST1(__VA_ARGS__) 18 #define __SC_CAST3(t3, a3, ...) (t3) a3, __SC_CAST2(__VA_ARGS__) 19 #define __SC_CAST4(t4, a4, ...) (t4) a4, __SC_CAST3(__VA_ARGS__) 20 #define __SC_CAST5(t5, a5, ...) (t5) a5, __SC_CAST4(__VA_ARGS__) 21 #define __SC_CAST6(t6, a6, ...) (t6) a6, __SC_CAST5(__VA_ARGS__) 22 /*宏__SC_TESTx*/ 23 #define __SC_TEST(type) BUILD_BUG_ON(sizeof(type) > sizeof(long)) 24 #define __SC_TEST1(t1, a1) __SC_TEST(t1) 25 #define __SC_TEST2(t2, a2, ...) __SC_TEST(t2); __SC_TEST1(__VA_ARGS__) 26 #define __SC_TEST3(t3, a3, ...) __SC_TEST(t3); __SC_TEST2(__VA_ARGS__) 27 #define __SC_TEST4(t4, a4, ...) __SC_TEST(t4); __SC_TEST3(__VA_ARGS__) 28 #define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__) 29 #define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) 30 /*宏BUILD_BUG_ON*/ 31 #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
上面__SC_DECL3
定义展开如下:
__SC_DECL3(int, family, int, type, int, protocol) -> int family, __SC_DECL2(int, type, int, protocol) -> int family, int type, __SC_DECL1(int, protocol) -> int family, int type, int protocol
上面__SC_LONG3展开如下:
__SC_LONG3(int, family, int, type, int, protocol) -> long family, __SC_LONG2(int, type, int, protocol) -> long family, long type , __SC_LONG1(int, protocol) -> long family, long type, long protocol
上面__SC_CAST3展开如下:
__SC_CAST3(int, family, int, type, int, protocol) -> (int) family, __SC_CAST2(int, type, int, protocol) -> (int) family, (int) type, __SC_CAST1(int, protocol) -> (int) family, (int) type, (int) protocol
上面__SC_TEST3展开如下:
__SC_TEST3(int, family, int, type, int, protocol) -> __SC_TEST(int); __SC_TEST2(int, type, int, protocol) -> __SC_TEST(int); __SC_TEST(int); __SC_TEST1(int, protocol) -> __SC_TEST(int); __SC_TEST(int); __SC_TEST(int); -> BUILD_BUG_ON(sizeof(int) > sizeof(long)); BUILD_BUG_ON(sizeof(int) > sizeof(long)); BUILD_BUG_ON(sizeof(int) > sizeof(long));
将这些东西代入前面展开的那一堆代码中,就能得到如下的比较清晰明了的代码了:
asmlinkage long sys_socket(int family, int type, int protocol); \ static inline long SYSC_socket(int family, int type, int protocol); \ asmlinkage long SyS_socket(long family, long type, long protocol) \ { \ BUILD_BUG_ON(sizeof(int) > sizeof(long)); BUILD_BUG_ON(sizeof(int) > sizeof(long)); BUILD_BUG_ON(sizeof(int) > sizeof(long)); \ return (long) SYSC_socket((int) family, (int) type, (int) protocol); \ } \ SYSCALL_ALIAS(sys_socket, SyS_socket); \ static inline long SYSC_socket(int family, int type, int protocol) { code... }
就是3.3节的宏是将系统调用的参数统一变为了使用long型来接收,再强转转为int,也就是系统调用本来传下来的参数类型。那么强转一下为什么呢?原因就是64位的Linux有一个名为CVE-2009-2009的漏洞,这个漏洞的具体内容参看原文。
原文链接:https://www.cnblogs.com/xinghuo123/p/13796932.html