ADD系统调用
周末闲散,敲着键盘遍历文件夹,竟发现了一年前的笔记,那年青涩的我啊,一切都是懵懵懂懂,时至今日,虽然看得都是相同的代码,一模一样的函数,就为啥感觉不一样呢。“一行有每一行的道”,IT行业的你们、她们、我们,每隔一年的变化就是那么的实实在在de。
贴上note,纪念逝去那一年的青春。
# 如何修改系统函数的功能,甚至是自己加个系统调用。
errno全局变量,存储当前出错编号,后通过perror库函数把该变量翻译成用户可以理解的错误字符串。
<include/linux/syscalls.h>
#define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void)
系统调用号,一旦分配,就不能再变更!
<arch/cris/arch-v10/kernel/entry.S>//与体系结构有关,一般也放在entry.S中
sys_call_table:
.long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */
.long sys_exit
.long sys_fork
.long sys_read
.long sys_write
.long sys_open /* 5 */
.long sys_close
.long sys_waitpid
.long sys_creat
.long sys_link
.long sys_unlink /* 10 */
。。。
。。。
系统调用处理程序,通过软中断,比如X86。
软中断: int $0x80指令产生,
产生异常,
切换到内核态,
执行128号异常处理程序,即:系统调用处理程序。
The system_call() function checks the validity of the given system call number by comparing it to NR_syscalls. If it is larger than or equal to NR_syscalls, the function returns -ENOSYS. Otherwise, the specified system call is invoked:
call *sys_call_table(,%rax,8)
<kernel/trace/trace_syscalls.c>
int reg_event_syscall_enter(void*ptr)
{
int ret =0;
int num;
char*name;
name = (char*)ptr;
num = syscall_name_to_nr(name); //名字传化为系统调用号==>A
if (num <0|| num >= NR_syscalls) //系统调用号不能为负数,且要小于NR_syscalls。
return-ENOSYS;
mutex_lock(&syscall_trace_lock);
if (!sys_refcount_enter)
ret = register_trace_sys_enter(ftrace_syscall_enter);
if (ret) {
pr_info("event trace: Could not activate"
"syscall entry trace point");
} else {
set_bit(num, enabled_enter_syscalls);
sys_refcount_enter++;
}
mutex_unlock(&syscall_trace_lock);
return ret;
}
A:
<arch/x86/kernel/ftrace.c>
int syscall_name_to_nr(char*name)
{
int i;
if (!syscalls_metadata)
return-1;
for (i =0; i < NR_syscalls; i++) {
if (syscalls_metadata[i]) { //数组的结构体格式是 ==>B
if (!strcmp(syscalls_metadata[i]->name, name))
return i;
}
}
return-1;
}
B:
<kernel/trace/syscall.h>
struct syscall_metadata {
constchar*name; //系统调用名字
int nb_args;
constchar**types;
constchar**args;
int enter_id;
int exit_id;
struct ftrace_event_call *enter_event;
struct ftrace_event_call *exit_event;
};
关于这个结构体,举X86的例子来跟踪下:
内核加载之处,做的最最基本的工作之一:
将syscalls_metadata[ ]安置好。
<arch/x86/kernel/ftrace.c>
staticint __init arch_init_ftrace_syscalls(void) //与体系结构相关的初始化,系统要首先构造这个系统调用的表
{
int i;
struct syscall_metadata *meta;
unsigned long**psys_syscall_table =&sys_call_table; //已指向了“名字对应系统调用号”表
//这里也说明了个小技巧:涉及到汇编,往往二级指针很有效
/*
-- arch/x86/kernel/syscall_table_32.S --
ENTRY(sys_call_table)
.long sys_restart_syscall
.long sys_exit
.long ptregs_fork
.long sys_read
.long sys_write
.long sys_open
.long sys_close
.long sys_waitpid
.long sys_creat
.long sys_link
.long sys_unlink
.long ptregs_execve
.long sys_chdir
.long sys_time
.long sys_mknod
...
...
*/
syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * NR_syscalls, GFP_KERNEL);
if (!syscalls_metadata) {
WARN_ON(1);
return-ENOMEM;
}
for (i =0; i < NR_syscalls; i++) {
meta = find_syscall_meta(psys_syscall_table[i]); //-->C
syscalls_metadata[i] = meta; //遍历一遍,创建了系统调用号的这个结构体数组
}
return0;
}
C:
<arch/x86/kernel/ftrace.c>
#ifdef CONFIG_FTRACE_SYSCALLS
extern unsigned long __start_syscalls_metadata[]; //这些变量是从其他地方导过来
extern unsigned long __stop_syscalls_metadata[];
extern unsigned long*sys_call_table;
staticstruct syscall_metadata **syscalls_metadata;
staticstruct syscall_metadata *find_syscall_meta(unsigned long*syscall)
{
struct syscall_metadata *start;
struct syscall_metadata *stop;
char str[KSYM_SYMBOL_LEN];
start = (struct syscall_metadata *)__start_syscalls_metadata;
stop = (struct syscall_metadata *)__stop_syscalls_metadata;
kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str);//系统调用号的名字
for ( ; start < stop; start++) {
if (start->name &&!strcmp(start->name, str)) //find struct...
return start;
}
return NULL;
}
====系统调用的实现====
==>参数验证(老三样儿)
1。The pointer points to a region of memory in user-space. Processes must not be able to trick the kernel into reading data in kernel-space on their behalf.
2。The pointer points to a region of memory in the process’s address space.The process must not be able to trick the kernel into reading someone else’s data.
3。If reading, the memory is marked readable. If writing, the memory is marked writable. If executing, the memory is marked executable.The process must not be able to bypass memory access restrictions.
来几个BT的宏瞧瞧:
<include/linux/syscalls.h>
#define __SYSCALL_DEFINEx(x, name, ...) \
asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__)); \
static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__)); \
asmlinkage long SyS##name(__SC_LONG##x(__VA_ARGS__)) \
{ \
__SC_TEST##x(__VA_ARGS__); \
return (long) SYSC##name(__SC_CAST##x(__VA_ARGS__)); \
} \
SYSCALL_ALIAS(sys##name, SyS##name); \
static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__))
是不是很晕,内核里这样的宏比比皆是,写个main.c加入这个宏,gcc -E一下,便明了。
比如试下:
__SYSCALL_DEFINEx(3, jesse)
{
。。。
}
宏展开:
asmlinkage long sysjesse(__SC_DECL3());
static inline long SYSCjesse(__SC_DECL3());
asmlinkage long SySjesse(__SC_LONG3())
{
__SC_TEST3();
return (long) SYSCjesse(__SC_CAST3());
}
SYSCALL_ALIAS(sysjesse, SySjesse);
static inline long SYSCjesse(__SC_DECL3()) //其实最终定义的是这个函数
{
。。。
}
瞧,这就晓得了不。再试一个:
__SYSCALL_DEFINEx(3, jesse, hao);
宏展开:
asmlinkage long sysjesse (__SC_DECL3(hao)); //==>aa
static inline long SYSCjesse (__SC_DECL3(hao));
asmlinkage long SySjesse (__SC_LONG3(hao))
{ __SC_TEST3(hao);
return (long) SYSCjesse (__SC_CAST3(hao));
}
SYSCALL_ALIAS(sysjesse, SySjesse);
static inline long SYSCjesse (__SC_DECL3(hao)) //最终定义的是这个函数
{
。。。
}
aa :
#define __SC_DECL1(t1, a1) t1 a1
#define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__)
#define __SC_DECL3(t3, a3, ...) t3 a3, __SC_DECL2(__VA_ARGS__)
#define __SC_DECL4(t4, a4, ...) t4 a4, __SC_DECL3(__VA_ARGS__)
#define __SC_DECL5(t5, a5, ...) t5 a5, __SC_DECL4(__VA_ARGS__)
#define __SC_DECL6(t6, a6, ...) t6 a6, __SC_DECL5(__VA_ARGS__)
====
总结: __SYSCALL_DEFINEx宏就可以看出意义了,不仅声明了函数,而且还定义了函数。
====
#一个实例:
<kernel/sys.c>//系统调用函数的集中营!
SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
void __user *, arg)
{
char buffer[256];
int ret =0;
/* We only trust the superuser with rebooting the system. */
if (!capable(CAP_SYS_BOOT))
return-EPERM;
/* For safety, we require "magic" arguments. */
if (magic1 != LINUX_REBOOT_MAGIC1 ||
(magic2 != LINUX_REBOOT_MAGIC2 &&
magic2 != LINUX_REBOOT_MAGIC2A &&
magic2 != LINUX_REBOOT_MAGIC2B &&
magic2 != LINUX_REBOOT_MAGIC2C))
return-EINVAL;
/* Instead of trying to make the power_off code look like
* halt when pm_power_off is not set do it the easy way.
*/
if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) &&!pm_power_off)
cmd = LINUX_REBOOT_CMD_HALT;
lock_kernel();
switch (cmd) {
case LINUX_REBOOT_CMD_RESTART:
kernel_restart(NULL);
break;
case LINUX_REBOOT_CMD_CAD_ON:
C_A_D =1;
break;
case LINUX_REBOOT_CMD_CAD_OFF:
C_A_D =0;
break;
case LINUX_REBOOT_CMD_HALT:
kernel_halt();
unlock_kernel();
do_exit(0);
panic("cannot halt");
case LINUX_REBOOT_CMD_POWER_OFF:
kernel_power_off();
unlock_kernel();
do_exit(0);
break;
case LINUX_REBOOT_CMD_RESTART2:
if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) -1) <0) {
unlock_kernel();
return-EFAULT;
}
buffer[sizeof(buffer) -1] ='\0';
kernel_restart(buffer);
break;
#ifdef CONFIG_KEXEC
case LINUX_REBOOT_CMD_KEXEC:
ret = kernel_kexec();
break;
#endif
#ifdef CONFIG_HIBERNATION
case LINUX_REBOOT_CMD_SW_SUSPEND:
ret = hibernate();
break;
#endif
default:
ret =-EINVAL;
break;
}
unlock_kernel();
return ret;
}
最后就是一个小小的总结:
要自定义一个系统调用,首先要在硬件上确实有个这个东西,也就是entry.S这个汇编文件上,要增加它的名字,也就是说确实有这么个调用。
然后,给他分配个调用号,也就是在相应体系结构下的文件<asm/unistd.h>增加个编号。
最后就是定义她的实体,一般在kernel/sys.c下就蛮简单,但也可以放在其他相关性比较大的文件里。
编译内核,开始使用:)
note - 2010.10.27