守护进程:看门狗watchdog的添加

本文为项目开发总结的原创文档。

 

本项目,添加一个watchdog守护进程,用来监控环境的三大进程mozart、bitbox、mplayer,任何一个进程出现故障,整个环境进行重启。

 

首先有通过版级驱动/arch/mips/xburst/soc-x1000/common# vim reset.c 
找到与看门狗有关的code[同事发现,牛!];因此主要是将核心代码从内核空间搬移到用户空间,及如何监控应用层的进程。
 
整体实现思路:
1.创建一个进程作为守护进程:watchdog
  
进程的添加:
在configs下添加watchdog.mak;
在src下添加watchdog包,用于加入watchdog相关的code;Makefile;
 
进程的启动:在app.c中的startall中调用mozart_system("watchdog -b");
 
2.调整进程优先级:
1)如何查看进程的优先级
2)如何修改进程的优先级;
 
3.watchdog守护进程如何监控mozart和bitbox和mplayer
我们知道内核会通过/proc虚拟文件系统导出系统中正在运行的进程信息,每个进程都有一个/proc/<pid>目录。因此我们可以将检测进程是否存在转换为检测/proc/<pid>目录是否存在,这样就简单多了。

如下文详细代码中的processExists;

 

实现难点突破:

用户空间和内核空间操作的都是虚拟地址。
1)如果是拿到的是物理地址,用户空间可以通过mmap的方式将物理地址转成虚拟地址(每一次的地址值都不一样),可以直接对这个虚拟地址赋值。
如下:
static int dev_fd;
    dev_fd = open("/dev/mem", O_RDWR | O_NDELAY);
 
    if (dev_fd < 0) {
        printf("open(/dev/mem) failed.");
        return 0;
    }
 
    unsigned char *map_base=(unsigned char * )mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, dev_fd, WDT_IOBASE (这个地址是物理地址!));
 
*( unsigned long*)(map_base + TCU_TSCR) = (1 << 16);
 
close(dev_fd);
 
2)而内核空间,从物理地址转成虚拟地址,一般是固定的。0x1000200---0xb000200;
 
3)同一个物理地址转成虚拟地址,用户空间和内核空间是不相同的。
关于用户空间和内核空间:
物理地址在内核空间和用户空间映射地址不一样~~~

 

看门狗实际上就是一个定时器,其硬件内部维护了一个计数的寄存器。每当时钟信号到来时,计数寄存器减掉1,。如果减到0,则重启系统。

如果减到0之前,系统又设置计数寄存器到一个较大的值,则系统永远不会重启。 

 

watchdog的基本实现原理是:

用户空间程序打开 /dev/mem设备(俗称“开门放狗”),

就会导致在内核中启动一个定时器(本项目mdt_start_count的入参是20000ms即20s),此后,用户空间程序需要保证在20分钟之内向这个设备写入数据(俗称“定期喂狗”),每次写操作会导致重新设定定时器(本项目是每sleep 10s重新去设定)。如果用户空间程序在20分钟之内没有写操作,定时器到期会导致一次系统Reboot操作(“狗咬人了”)。 

 

watchdog.c 内容如下:

#include <string.h>
#include <stdlib.h>
#include <signal.h>
#include <stdbool.h>
#include <unistd.h>
#include <pthread.h>
#include <errno.h>
#include <time.h>
#include <sys/types.h>
#include <pwd.h>
#include <sys/stat.h>
#include <linux/input.h>
#include <fcntl.h>
#include <execinfo.h>
#include <sys/mman.h>

#define WDT_IOBASE (0x10002000)
#define MAP_SIZE        0xFF



#define JZ_EXTAL_RTC      32768     /* RTC extal freq: 32.768 KHz */
#define TCU_IOBASE      0x10002000
#define TCU_TSCR   (0x3C)   /* Timer Stop Clear Register */

#define WDT_TCSR                (0x0c)  /* rw, 32, 0x???????? */
#define WDT_TCER                (0x04)  /* rw, 32, 0x???????? */
#define WDT_TDR                 (0x00)  /* rw, 32, 0x???????? */
#define WDT_TCNT                (0x08)  /* rw, 32, 0x???????? */
#define TCU_TSSR   (0x2C)   /* Timer Stop Set Register */

static void wdt_start_count(int msecs)
{
    static int dev_fd;
    dev_fd = open("/dev/mem", O_RDWR | O_NDELAY);

    if (dev_fd < 0) {
        printf("open(/dev/mem) failed.");
        return 0;
    }

    unsigned char *map_base=(unsigned char * )mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, dev_fd, WDT_IOBASE );

        int time = JZ_EXTAL_RTC / 64 * msecs / 1000;
        if(time > 65535)
                time = 65535;

#if 0
        outl(1 << 16,TCU_IOBASE + TCU_TSCR);

        outl(0,WDT_IOBASE + WDT_TCNT);          //counter
        outl(time,WDT_IOBASE + WDT_TDR);        //data
        outl((3<<3 | 1<<1),WDT_IOBASE + WDT_TCSR);
        outl(0,WDT_IOBASE + WDT_TCER);
        outl(1,WDT_IOBASE + WDT_TCER);
#endif

/*上文屏蔽部分是内核空间对寄存器的操作,修改成用户空间对寄存器的操作,关键是物理地址在用户空间需要通过mmap进行转换*/

// printf("wdt_start_count begin~~~. map_base = %p,time=%d\n",map_base,time); *( unsigned long*)(map_base + TCU_TSCR) = (1 << 16); *( unsigned long*)(map_base + WDT_TCNT) = 0;//counter *( unsigned long*)(map_base + WDT_TDR) = time;//data *( unsigned long*)(map_base + WDT_TCSR) = (3<<3 | 1<<1); *( unsigned long*)(map_base + WDT_TCER) = 0; *( unsigned long*)(map_base + WDT_TCER) = 1; close(dev_fd); // printf("wdt_start_count end.\n"); } static void wdt_stop_count(void) { static int dev_fd; dev_fd = open("/dev/mem", O_RDWR | O_NDELAY); if (dev_fd < 0) { printf("open(/dev/mem) failed."); return 0; } unsigned char *map_base=(unsigned char * )mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, dev_fd, WDT_IOBASE ); printf("\033[1;33mwdt_stop_count begin~~~. map_base = %p\n\033[m",map_base); #if 0 outl(1 << 16,TCU_IOBASE + TCU_TSCR); outl(0,WDT_IOBASE + WDT_TCNT); //counter outl(65535,WDT_IOBASE + WDT_TDR); //data outl(1 << 16,TCU_IOBASE + TCU_TSSR); #endif *( unsigned long*)(map_base + TCU_TSCR) = (1 << 16); *( unsigned long*)(map_base + WDT_TCNT) = 0;//counter *( unsigned long*)(map_base + WDT_TDR) = 65535; *( unsigned long*)(map_base + TCU_TSSR) = (1 << 16); close(dev_fd); printf("wdt_stop_count end.\n"); } void jz_wdt_restart() { printf("Restarting after 4 ms\n"); while(1) { wdt_start_count(20000); sleep(10); } while(1) printf("check wdt.\n"); } /*判 断 进 程 是 否 存 在*/ bool processExists(char * process_name) { FILE *ptr; int RE_BUF_SIZE = 32; char rebuff[RE_BUF_SIZE]; char ps[128]; snprintf(ps, sizeof(ps), "ps | grep %s |grep -v grep| wc -l", process_name); if((ptr = popen(ps, "r")) != NULL) { int count = 0; fgets(rebuff, RE_BUF_SIZE, ptr); if(rebuff != NULL) { count = atoi(rebuff); } pclose(ptr); return count >= 1; } printf("Current process %s is not Exist!!!!\n",process_name); return false; } static char *signal_str[] = { [1] = "SIGHUP", [2] = "SIGINT", [3] = "SIGQUIT", [4] = "SIGILL", [5] = "SIGTRAP", [6] = "SIGABRT", [7] = "SIGBUS", [8] = "SIGFPE", [9] = "SIGKILL", [10] = "SIGUSR1", [11] = "SIGSEGV", [12] = "SIGUSR2", [13] = "SIGPIPE", [14] = "SIGALRM", [15] = "SIGTERM", [16] = "SIGSTKFLT", [17] = "SIGCHLD", [18] = "SIGCONT", [19] = "SIGSTOP", [20] = "SIGTSTP", [21] = "SIGTTIN", [22] = "SIGTTOU", [23] = "SIGURG", [24] = "SIGXCPU", [25] = "SIGXFSZ", [26] = "SIGVTALRM", [27] = "SIGPROF", [28] = "SIGWINCH", [29] = "SIGIO", [30] = "SIGPWR", [31] = "SIGSYS", [34] = "SIGRTMIN", [35] = "SIGRTMIN+1", [36] = "SIGRTMIN+2", [37] = "SIGRTMIN+3", [38] = "SIGRTMIN+4", [39] = "SIGRTMIN+5", [40] = "SIGRTMIN+6", [41] = "SIGRTMIN+7", [42] = "SIGRTMIN+8", [43] = "SIGRTMIN+9", [44] = "SIGRTMIN+10", [45] = "SIGRTMIN+11", [46] = "SIGRTMIN+12", [47] = "SIGRTMIN+13", [48] = "SIGRTMIN+14", [49] = "SIGRTMIN+15", [50] = "SIGRTMAX-14", [51] = "SIGRTMAX-13", [52] = "SIGRTMAX-12", [53] = "SIGRTMAX-11", [54] = "SIGRTMAX-10", [55] = "SIGRTMAX-9", [56] = "SIGRTMAX-8", [57] = "SIGRTMAX-7", [58] = "SIGRTMAX-6", [59] = "SIGRTMAX-5", [60] = "SIGRTMAX-4", [61] = "SIGRTMAX-3", [62] = "SIGRTMAX-2", [63] = "SIGRTMAX-1", [64] = "SIGRTMAX", }; static void usage(const char *app_name) { printf("%s [-f file] -h\n" " -h help (show this usage text)\n" " -f file\n", app_name); return; } void sig_handler(int signo) { char cmd[64] = {}; void *array[10]; int size = 0; char **strings = NULL; int i = 0; #if 0 printf("\n\n[%s: %d] bitbox crashed by signal %s.\n", __func__, __LINE__, signal_str[signo]); printf("Call Trace:\n"); size = backtrace(array, 10); strings = backtrace_symbols(array, size); if (strings) { for (i = 0; i < size; i++) printf (" %s\n", strings[i]); free (strings); } else { printf("Not Found\n\n"); } if (signo == SIGSEGV || signo == SIGBUS || signo == SIGTRAP || signo == SIGABRT) { sprintf(cmd, "cat /proc/%d/maps", getpid()); printf("Process maps:\n"); system(cmd); } #else wdt_stop_count(); #endif exit(-1); } int main(int argc, char **argv) { int c = -1; int daemonize = 0; printf("watchdog V1.7 start!!!!@_@\n"); signal(SIGPIPE, SIG_IGN); signal(SIGINT, sig_handler); signal(SIGTERM, sig_handler); signal(SIGBUS, sig_handler); signal(SIGSEGV, sig_handler); signal(SIGABRT, sig_handler); while (1) { c = getopt(argc, argv, "bBf:h"); if (c < 0) break; switch (c) { case 'b': case 'B': daemonize = 1; break; case 'f': break; case 'h': return 0; default: return -1; } } /* run in the background */ if (daemonize) { if (daemon(0, 1)) { perror("daemon"); return -1; } } while(1) { if(processExists("mozart")==true && processExists("bitbox")==true && processExists("mplayer")==true) { // printf("Both mozart and bitbox and mplayer are exists!!!!!\n"); wdt_start_count(20000); sleep(10); } else { printf(" mozart or bitbox or mplayer is not exist, Reboot!!!!! \n"); printf("Mozart process exist ???: %d\n",processExists("mozart")); printf("BitBox process exist ???: %d\n",processExists("bitbox")); printf("Mplayer process exist ???: %d\n",processExists("mplayer")); break; } } }

 

 

程序运行起来后,通过ps可查看到:

S 0 241 1 7868 716 0:0 13:16 00:00:01 watchdog -b

 

posted @ 2016-11-24 15:52  琳麻雀  阅读(7064)  评论(1编辑  收藏  举报