Linux kernel suspend resume学习:2.6.35与3.0.35比较【转】

转自:http://blog.csdn.net/njuitjf/article/details/18317149

Linux kernel suspend resume学习:2.6.35与3.0.35比较

最近在学习linux kernel中suspend和resume的处理。
只是一味的看代码,有点枯燥,刚好有两个项目使用了不同的内核版本,就以比较这两个间的处理差别为线索进行学习。
由于列举了很多代码,为了保持连续性,整篇文章还是放到了一个blog中。

首先看看公开出去的接口,都是文件/sys/power/state。
读该文件可以获取可能取值。
写该文件可以实现状态改变。

/sys/power/state定义的地方:
power_attr(state);

两个内核版本中,power_attr的定义相同。
power_attr的定义:
#define power_attr(_name) \
static struct kobj_attribute _name##_attr = { \
 .attr = {    \
  .name = __stringify(_name), \
  .mode = 0644,   \
 },     \
 .show = _name##_show,   \
 .store = _name##_store,  \
}

power_attr就是往sysfs中添加一个文件,操作函数:
 .show = _name##_show,   
 .store = _name##_store, 

对应到state: 
 .show = state_show,   
 .store = state_store, 
 
内核中这两个函数的注释:
/**
 * state - control system power state.
 *
 * show() returns what states are supported, which is hard-coded to
 * 'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and
 * 'disk' (Suspend-to-Disk).
 *
 * store() accepts one of those strings, translates it into the 
 * proper enumerated value, and initiates a suspend transition.
 */
 
state_show其实就是将数组pm_states的内容show出来。
数组pm_states的定义,在两个项目中是不同的。

kernel 2.6.35项目中:
const char *const pm_states[PM_SUSPEND_MAX] = {
#ifdef CONFIG_EARLYSUSPEND
 [PM_SUSPEND_ON]  = "on",
#endif
 [PM_SUSPEND_STANDBY] = "standby",
 [PM_SUSPEND_MEM] = "mem",
};

kernel 3.0.35项目中:
const char *const pm_states[PM_SUSPEND_MAX] = {
 [PM_SUSPEND_STANDBY] = "standby",
 [PM_SUSPEND_MEM] = "mem",
};

关于这些 state 的介绍,请参考 kernel 中稳定:
Documentation\power\states.txt

函数state_store定义,两个项目中的差别也就是kernel 2.6.35项目中多了对CONFIG_EARLYSUSPEND的处理。
将kernel 2.6.35项目中state_store的定义列出来:
static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
      const char *buf, size_t n)
{
#ifdef CONFIG_SUSPEND
/*
看一下PM_SUSPEND_ON及几个相关的定义:
typedef int __bitwise suspend_state_t;

#define PM_SUSPEND_ON  ((__force suspend_state_t) 0)
#define PM_SUSPEND_STANDBY ((__force suspend_state_t) 1)
#define PM_SUSPEND_MEM  ((__force suspend_state_t) 3)
#define PM_SUSPEND_MAX  ((__force suspend_state_t) 4)
不难理解,如果有on的话,肯定要从on开始遍历
*/
#ifdef CONFIG_EARLYSUSPEND
 suspend_state_t state = PM_SUSPEND_ON;
#else
 suspend_state_t state = PM_SUSPEND_STANDBY;
#endif
 const char * const *s;
#endif
 char *p;
 int len;
 int error = -EINVAL;

 p = memchr(buf, '\n', n);
 len = p ? p - buf : n;

 /* First, check if we are requested to hibernate */
 if (len == 4 && !strncmp(buf, "disk", len)) {
  error = hibernate();
  goto Exit;
 }

#ifdef CONFIG_SUSPEND
 for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) {
  if (*s && len == strlen(*s) && !strncmp(buf, *s, len))
   break;
 }
 if (state < PM_SUSPEND_MAX && *s)
#ifdef CONFIG_EARLYSUSPEND
  if (state == PM_SUSPEND_ON || valid_state(state)) {
   error = 0;
   request_suspend_state(state);
  }
#else
  error = enter_state(state);
#endif
#endif

 Exit:
 return error ? error : n;
}

函数valid_state是判断是否对指定的state进行了支持:
bool valid_state(suspend_state_t state)
{
 /*
  * All states need lowlevel support and need to be valid to the lowlevel
  * implementation, no valid callback implies that none are valid.
  */
 return suspend_ops && suspend_ops->valid && suspend_ops->valid(state);
}

suspend_ops在函数suspend_set_ops中被赋值:
/**
 * suspend_set_ops - Set the global suspend method table.
 * @ops: Pointer to ops structure.
 */
void suspend_set_ops(struct platform_suspend_ops *ops)
{
 mutex_lock(&pm_mutex);
 suspend_ops = ops;
 mutex_unlock(&pm_mutex);
}
一般会在 architecture 相关的pm模块的 init 或者 probe 函数中会调用 suspend_set_ops 。
valid 一般是判断该 architecture 中是否支持该 state 。

接下来看看差异中的 request_suspend_state 函数:
void request_suspend_state(suspend_state_t new_state)
{
 unsigned long irqflags;
 int old_sleep;

 spin_lock_irqsave(&state_lock, irqflags);
 old_sleep = state & SUSPEND_REQUESTED;
...

 if (new_state == PM_SUSPEND_STANDBY) {
  mode = EARLY_SUSPEND_MODE_EINK;
  new_state = PM_SUSPEND_MEM;
 } else
  mode = EARLY_SUSPEND_MODE_NORMAL;

 if (!old_sleep && new_state != PM_SUSPEND_ON) {
  if ((state & SUSPENDED) && (last_mode != mode)) {
   /* flush the workqueue */
   spin_unlock_irqrestore(&state_lock, irqflags);
   flush_workqueue(suspend_work_queue);
   spin_lock_irqsave(&state_lock, irqflags);
  }
  state |= SUSPEND_REQUESTED;
  queue_work(suspend_work_queue, &early_suspend_work); // 将 early_suspend_work 添加到 suspend_work_queue 中
 } else if (old_sleep && new_state == PM_SUSPEND_ON) {    // 这一次走的是这个分支
  state &= ~SUSPEND_REQUESTED;
  wake_lock(&main_wake_lock);
  queue_work(suspend_work_queue, &late_resume_work);    // 将 late_resume_work 添加到 suspend_work_queue 
 }

 if (new_state != PM_SUSPEND_ON)
  last_mode = mode;

 requested_suspend_state = new_state;
 spin_unlock_irqrestore(&state_lock, irqflags);
}

suspend_work_queue 在函数 wakelocks_init 中被创建:
 suspend_work_queue = create_singlethread_workqueue("suspend");
函数 wakelocks_init 为 core_initcall :
core_initcall(wakelocks_init);

early_suspend_work 和 late_resume_work 的定义:
static DECLARE_WORK(early_suspend_work, early_suspend);
static DECLARE_WORK(late_resume_work, late_resume);

#define DECLARE_WORK(n, f)     \
 struct work_struct n = __WORK_INITIALIZER(n, f)
 
#define __WORK_INITIALIZER(n, f) {    \
 .data = WORK_DATA_STATIC_INIT(),   \
 .entry = { &(n).entry, &(n).entry },   \
 .func = (f),      \
 __WORK_INIT_LOCKDEP_MAP(#n, &(n))   \
 }
 
early_suspend 和 late_resume 是处理函数。
两个函数中都对 early_suspend_handlers 进行了处理。
early_suspend 函数中依次调用了 early_suspend_handlers 中的 suspend 函数:
 list_for_each_entry(pos, &early_suspend_handlers, link) {
  if (pos->suspend != NULL) {
   pos->pm_mode = pwr_mode;
   pos->suspend(pos);
  }
 }
late_resume 函数中一次调用了 early_suspend_handlers 中的 resume 函数:
 list_for_each_entry_reverse(pos, &early_suspend_handlers, link)
  if (pos->resume != NULL)
   pos->resume(pos);
   
early_suspend_handlers 的定义:
static LIST_HEAD(early_suspend_handlers);

函数 register_early_suspend 将 handler 注册到 early_suspend_handlers :
void register_early_suspend(struct early_suspend *handler)
{
 struct list_head *pos;

 mutex_lock(&early_suspend_lock);
 list_for_each(pos, &early_suspend_handlers) {
  struct early_suspend *e;
  e = list_entry(pos, struct early_suspend, link);
  if (e->level > handler->level)
   break;
 }
 list_add_tail(&handler->link, pos);
 if ((state & SUSPENDED) && handler->suspend)
  handler->suspend(handler);
 mutex_unlock(&early_suspend_lock);
}
需要进行early suspend处理的模块调用函数 register_early_suspend 注册 handler .
static struct early_suspend mxc_epdc_earlysuspend = {
 .level = EARLY_SUSPEND_LEVEL_DISABLE_FB,
 .suspend = mxc_epdc_early_suspend,
 .resume = mxc_epdc_late_resume,
};

 register_early_suspend(&mxc_epdc_earlysuspend);
 
看看 queue_work 的实现:
/**
 * queue_work - queue work on a workqueue
 * @wq: workqueue to use
 * @work: work to queue
 *
 * Returns 0 if @work was already on a queue, non-zero otherwise.
 *
 * We queue the work to the CPU on which it was submitted, but if the CPU dies
 * it can be processed by another CPU.
 */
int queue_work(struct workqueue_struct *wq, struct work_struct *work)
{
 int ret;

 ret = queue_work_on(get_cpu(), wq, work);
 put_cpu();

 return ret;
}
/**
 * queue_work_on - queue work on specific cpu
 * @cpu: CPU number to execute work on
 * @wq: workqueue to use
 * @work: work to queue
 *
 * Returns 0 if @work was already on a queue, non-zero otherwise.
 *
 * We queue the work to a specific CPU, the caller must ensure it
 * can't go away.
 */
int
queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
{
 int ret = 0;

 if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
  BUG_ON(!list_empty(&work->entry));
  // __queue_work 最终将 work 添加到指定 cpu_workqueue_struct 中的 worklist 中
  __queue_work(wq_per_cpu(wq, cpu), work);
  ret = 1;
 }
 return ret;
}

flush_workqueue 的实现:
/**
 * flush_workqueue - ensure that any scheduled work has run to completion.
 * @wq: workqueue to flush
 *
 * Forces execution of the workqueue and blocks until its completion.
 * This is typically used in driver shutdown handlers.
 *
 * We sleep until all works which were queued on entry have been handled,
 * but we are not livelocked by new incoming ones.
 *
 * This function used to run the workqueues itself.  Now we just wait for the
 * helper threads to do it.
 */
void flush_workqueue(struct workqueue_struct *wq)
{
 const struct cpumask *cpu_map = wq_cpu_map(wq);
 int cpu;

 might_sleep();
 lock_map_acquire(&wq->lockdep_map);
 lock_map_release(&wq->lockdep_map);
 for_each_cpu(cpu, cpu_map)
  flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
}

回到 state_store 函数。
我们发现函数中最初有个判断,如果设置的状态为 "disk" ,则调用函数 hibernate 。
根据state.txt中的说明,disk即进入深度睡眠,更省电。
因为其将 snapshot 写入到了 disk 。之后可以 power down 。

来看看 hibernate 的实现。
两个内核版本中稍有差别,kernel 3.0.35中多了一些处理。下面把 kernel 3.0.35的实现列了出来:
/**
 * hibernate - Carry out system hibernation, including saving the image.
 */
int hibernate(void)
{
 int error;

 mutex_lock(&pm_mutex);
 /* The snapshot device should not be opened while we're running */
 if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
  error = -EBUSY;
  goto Unlock;
 }

 pm_prepare_console();
 error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
 if (error)
  goto Exit;

 error = usermodehelper_disable();
 if (error)
  goto Exit;

 /* Allocate memory management structures */
 error = create_basic_memory_bitmaps();
 if (error)
  goto Exit;

 printk(KERN_INFO "PM: Syncing filesystems ... ");
 sys_sync();
 printk("done.\n");

 error = prepare_processes();
 if (error)
  goto Finish;

 if (hibernation_test(TEST_FREEZER))
  goto Thaw;

 if (hibernation_testmode(HIBERNATION_TESTPROC))
  goto Thaw;

 error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM);
 if (error)
  goto Thaw;

 if (in_suspend) {
  unsigned int flags = 0;

  if (hibernation_mode == HIBERNATION_PLATFORM)
   flags |= SF_PLATFORM_MODE;
  if (nocompress)                      // kernel 2.6.35中无
   flags |= SF_NOCOMPRESS_MODE;     // kernel 2.6.35中无
  pr_debug("PM: writing image.\n");
  error = swsusp_write(flags);
  swsusp_free();
  if (!error)
   power_down();
  in_suspend = 0;                      // kernel 2.6.35中无
  pm_restore_gfp_mask();               // kernel 2.6.35中无
 } else {
  pr_debug("PM: Image restored successfully.\n");
 }

 Thaw:
 thaw_processes();
 Finish:
 free_basic_memory_bitmaps();
 usermodehelper_enable();
 Exit:
 pm_notifier_call_chain(PM_POST_HIBERNATION);
 pm_restore_console();
 atomic_inc(&snapshot_device_available);
 Unlock:
 mutex_unlock(&pm_mutex);
 return error;
}

先看第一处差别, NOCOMPRESS 相关。
kernel 2.6.35中没有定义 SF_NOCOMPRESS_MODE 。
搜索代码发现,kernel 3.0.35中有3个地方使用了 SF_NOCOMPRESS_MODE 。
分别是判断 swap 是否有足够空间、写入 snapshot 、读取 snapshot 时。

第一处:
在函数 enough_swap 中。功能在注释中已经体现。该函数的实现:
/**
 * enough_swap - Make sure we have enough swap to save the image.
 *
 * Returns TRUE or FALSE after checking the total amount of swap
 * space avaiable from the resume partition.
 */

static int enough_swap(unsigned int nr_pages, unsigned int flags)
{
 unsigned int free_swap = count_swap_pages(root_swap, 1);
 unsigned int required;

 pr_debug("PM: Free swap pages: %u\n", free_swap);
 // 如果为非压缩模式,请求多少页即需要多少页;压缩模式下需乘以最坏情况下的压缩比
 required = PAGES_FOR_IO + ((flags & SF_NOCOMPRESS_MODE) ?
  nr_pages : (nr_pages * LZO_CMP_PAGES) / LZO_UNC_PAGES + 1);
 return free_swap > required;
}
看了下 kernel 2.6.35 中的实现,关键代码如下:
 return free_swap > nr_pages + PAGES_FOR_IO;
比较两个版本的 kernel 可知,2.6.35中只支持非压缩模式;3.0.35中增加了压缩模式的支持。

第二处:
在函数 swsusp_write 中。关键代码:
/**
 * swsusp_write - Write entire image and metadata.
 * @flags: flags to pass to the "boot" kernel in the image header
 *
 * It is important _NOT_ to umount filesystems at this point. We want
 * them synced (in case something goes wrong) but we DO not want to mark
 * filesystem clean: it is not. (And it does not matter, if we resume
 * correctly, we'll mark system clean, anyway.)
 */

int swsusp_write(unsigned int flags)
{
...
 pages = snapshot_get_image_size();
 error = get_swap_writer(&handle);
...
 // 判断 swap 中是否有足够空间
 if (!enough_swap(pages, flags)) {
...
 }
 memset(&snapshot, 0, sizeof(struct snapshot_handle));
 error = snapshot_read_next(&snapshot);
...
 header = (struct swsusp_info *)data_of(snapshot);
 // 写入元数据
 error = swap_write_page(&handle, header, NULL);
 if (!error) {
  // 写入 snapshot 数据。根据是否设置了 SF_NOCOMPRESS_MODE 分别调用 save_image 和 save_image_lzo 
  error = (flags & SF_NOCOMPRESS_MODE) ?
   save_image(&handle, &snapshot, pages - 1) :
   save_image_lzo(&handle, &snapshot, pages - 1);
 }
out_finish:
 error = swap_writer_finish(&handle, flags, error);
 return error;
}
函数 save_image 的注释:
/**
 * save_image - save the suspend image data
 */
函数 save_image_lzo 的注释:
/**
 * save_image_lzo - Save the suspend image data compressed with LZO.
 * @handle: Swap mam handle to use for saving the image.
 * @snapshot: Image to read data from.
 * @nr_to_write: Number of pages to save.
 */
lzo 压缩算法就不介绍了。具体写入的实现这儿也不深入了。
2.6.35 中函数 swsusp_write 的实现类似,只是在调用 enough_swap 时不会传入 flags 参数;
另外,没有函数 save_image_lzo ,只会调用 save_image 。
函数 swsusp_write 中调用的另外一个重要函数 snapshot_read_next 。其注释:
/**
 * snapshot_read_next - used for reading the system memory snapshot.
 *
 * On the first call to it @handle should point to a zeroed
 * snapshot_handle structure.  The structure gets updated and a pointer
 * to it should be passed to this function every next time.
 *
 * On success the function returns a positive number.  Then, the caller
 * is allowed to read up to the returned number of bytes from the memory
 * location computed by the data_of() macro.
 *
 * The function returns 0 to indicate the end of data stream condition,
 * and a negative number is returned on error.  In such cases the
 * structure pointed to by @handle is not updated and should not be used
 * any more.
 */
swsusp_write 中第一次调用了函数 snapshot_read_next ,函数 save_image/save_image_lzo 中循环调用 snapshot_read_next 函数,直到读取完 snapshot 。

第三处:
在函数 swsusp_read 中,关键代码:
/**
 * swsusp_read - read the hibernation image.
 * @flags_p: flags passed by the "frozen" kernel in the image header should
 *    be written into this memory location
 */

int swsusp_read(unsigned int *flags_p)
{
...
 memset(&snapshot, 0, sizeof(struct snapshot_handle));
 error = snapshot_write_next(&snapshot);
...
 header = (struct swsusp_info *)data_of(snapshot);
 error = get_swap_reader(&handle, flags_p);
...
 if (!error)
  error = swap_read_page(&handle, header, NULL);
 if (!error) {
  error = (*flags_p & SF_NOCOMPRESS_MODE) ?
   load_image(&handle, &snapshot, header->pages - 1) :
   load_image_lzo(&handle, &snapshot, header->pages - 1);
 }
 swap_reader_finish(&handle);
end:
 if (!error)
  pr_debug("PM: Image successfully loaded\n");
 else
  pr_debug("PM: Error %d resuming\n", error);
 return error;
}
load_image 的注释:
/**
 * load_image - load the image using the swap map handle
 * @handle and the snapshot handle @snapshot
 * (assume there are @nr_pages pages to load)
 */
 
load_image_lzo 的注释:
/**
 * load_image_lzo - Load compressed image data and decompress them with LZO.
 * @handle: Swap map handle to use for loading data.
 * @snapshot: Image to copy uncompressed data into.
 * @nr_to_read: Number of pages to load.
 */

 2.6.35 中函数 swsusp_read 的实现类似,只是少了对 SF_NOCOMPRESS_MODE 的处理,没有实现函数 load_image_lzo 。
swsusp_read 函数中调用了另外一个重要函数 snapshot_write_next ,其注释:
/**
 * snapshot_write_next - used for writing the system memory snapshot.
 *
 * On the first call to it @handle should point to a zeroed
 * snapshot_handle structure.  The structure gets updated and a pointer
 * to it should be passed to this function every next time.
 *
 * On success the function returns a positive number.  Then, the caller
 * is allowed to write up to the returned number of bytes to the memory
 * location computed by the data_of() macro.
 *
 * The function returns 0 to indicate the "end of file" condition,
 * and a negative number is returned on error.  In such cases the
 * structure pointed to by @handle is not updated and should not be used
 * any more.
 */
swsusp_read 函数中第一次调用了 snapshot_write_next , load_image/load_image_lzo 中循环调用 snapshot_write_next ,直到处理完所有的 snapshot 。


第二处差别是在3.0.35中多了:
  in_suspend = 0; 
搜索代码,发现函数 create_image 中将 in_suspend 设置为了1.
函数 hibernate 调用了函数 hibernation_snapshot 。
函数 hibernation_snapshot 的注释 :
/**
 * hibernation_snapshot - Quiesce devices and create a hibernation image.
 * @platform_mode: If set, use platform driver to prepare for the transition.
 *
 * This routine must be called with pm_mutex held.
 */
函数 hibernation_snapshot 调用了函数 create_image 。
函数 create_image 的注释:
/**
 * create_image - Create a hibernation image.
 * @platform_mode: Whether or not to use the platform driver.
 *
 * Execute device drivers' .freeze_noirq() callbacks, create a hibernation image
 * and execute the drivers' .thaw_noirq() callbacks.
 *
 * Control reappears in this routine after the subsequent restore.
 */
3.0.35中多了这么一句,难道只是为了防止 hibernate 函数中重复进入 if(hibernate)...
 
第三处差别是在3.0.35中多了:
  pm_restore_gfp_mask(); 
pm_restore_gfp_mask 的实现及相关定义:
#ifdef CONFIG_PM_SLEEP
/*
 * The following functions are used by the suspend/hibernate code to temporarily
 * change gfp_allowed_mask in order to avoid using I/O during memory allocations
 * while devices are suspended.  To avoid races with the suspend/hibernate code,
 * they should always be called with pm_mutex held (gfp_allowed_mask also should
 * only be modified with pm_mutex held, unless the suspend/hibernate code is
 * guaranteed not to run in parallel with that modification).
 */

static gfp_t saved_gfp_mask;

void pm_restore_gfp_mask(void)
{
 WARN_ON(!mutex_is_locked(&pm_mutex));
 if (saved_gfp_mask) {
  gfp_allowed_mask = saved_gfp_mask;
  saved_gfp_mask = 0;
 }
}

void pm_restrict_gfp_mask(void)
{
 WARN_ON(!mutex_is_locked(&pm_mutex));
 WARN_ON(saved_gfp_mask);
 saved_gfp_mask = gfp_allowed_mask;
 gfp_allowed_mask &= ~GFP_IOFS;
}
#endif /* CONFIG_PM_SLEEP */


回头看看 hibernate 函数。

 /* The snapshot device should not be opened while we're running */
 if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
  error = -EBUSY;
  goto Unlock;
 }
注释的意思是,我们在执行 hibernate 的时候,不允许别人再打开 snapshot 设备。
static struct miscdevice snapshot_device = {
 .minor = SNAPSHOT_MINOR,
 .name = "snapshot",
 .fops = &snapshot_fops,
};
snapshot_fops 的定义:
static const struct file_operations snapshot_fops = {
 .open = snapshot_open,
 .release = snapshot_release,
 .read = snapshot_read,
 .write = snapshot_write,
 .llseek = no_llseek,
 .unlocked_ioctl = snapshot_ioctl,
};
打开 snapshot 设备用的就是函数 snapshot_open 了。
实现在我们 running 的时候不让 snapshot 设备被打开的方法是通过变量 snapshot_device_available ,其定义:
atomic_t snapshot_device_available = ATOMIC_INIT(1);
snapshot_open 函数中有以下语句:
 if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
  error = -EBUSY;
  goto Unlock;
 }
add 个 -1 ,也就相当于减1操作。

继续 hibernate 函数。
 // console 相关处理
 pm_prepare_console();
 
 error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
pm_notifier_call_chain 函数经过多层调用,调用到了函数 __blocking_notifier_call_chain ,其注释:
/**
 * __blocking_notifier_call_chain - Call functions in a blocking notifier chain
 * @nh: Pointer to head of the blocking notifier chain
 * @val: Value passed unmodified to notifier function
 * @v: Pointer passed unmodified to notifier function
 * @nr_to_call: See comment for notifier_call_chain.
 * @nr_calls: See comment for notifier_call_chain.
 *
 * Calls each function in a notifier chain in turn.  The functions
 * run in a process context, so they are allowed to block.
 *
 * If the return value of the notifier can be and'ed
 * with %NOTIFY_STOP_MASK then blocking_notifier_call_chain()
 * will return immediately, with the return value of
 * the notifier function which halted execution.
 * Otherwise the return value is the return value
 * of the last notifier function called.
 */
 
继续 hibernate 函数。
/**
 * usermodehelper_disable - prevent new helpers from being started
 */
 error = usermodehelper_disable();
 
 /* Allocate memory management structures */
 error = create_basic_memory_bitmaps();
函数 create_basic_memory_bitmaps 的注释:
/**
 * create_basic_memory_bitmaps - create bitmaps needed for marking page
 * frames that should not be saved and free page frames.  The pointers
 * forbidden_pages_map and free_pages_map are only modified if everything
 * goes well, because we don't want the bits to be used before both bitmaps
 * are set up.
 */
 
继续 hibernate 函数。
 error = prepare_processes();
prepare_processes 函数的实现:
static int prepare_processes(void)
{
 int error = 0;

 if (freeze_processes()) {
  error = -EBUSY;
  thaw_processes();
 }
 return error;
}
可见函数 prepare_processes 的功能为:
尝试冷冻进程,如果失败,则解冻进程,并返回 -EBUSY 。
如何冷冻进程的先不看了。

继续 hibernate 函数。
 // 如果只是 debug ,那就只简单 delay 一会
 if (hibernation_test(TEST_FREEZER))
  goto Thaw;
 // 与上面类似 
 if (hibernation_testmode(HIBERNATION_TESTPROC))
  goto Thaw;
 // 此函数前面见到过,功能是让 devices 都静止,并创建 hibernation image 。它还将 in_suspend 设置为了1
 error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM);
 
 // hibernation_snapshot 被成功调用, in_suspend 肯定非0
 if (in_suspend) {
  unsigned int flags = 0;

  if (hibernation_mode == HIBERNATION_PLATFORM)
   flags |= SF_PLATFORM_MODE;
  if (nocompress)  // 是否是非压缩的
   flags |= SF_NOCOMPRESS_MODE;
  pr_debug("PM: writing image.\n");
  // 函数 swsusp_write 前面见过,将 snapshot 写入到 swap
  error = swsusp_write(flags);
  /**
   * swsusp_free - free pages allocated for the suspend.
   *
   * Suspend pages are alocated before the atomic copy is made, so we
   * need to release them after the resume.
   */
  swsusp_free();
  if (!error)
   power_down();
  in_suspend = 0;
  pm_restore_gfp_mask();
 } else {
  pr_debug("PM: Image restored successfully.\n");
 }

函数 power_down 的实现:
/**
 * power_down - Shut the machine down for hibernation.
 *
 * Use the platform driver, if configured, to put the system into the sleep
 * state corresponding to hibernation, or try to power it off or reboot,
 * depending on the value of hibernation_mode.
 */
static void power_down(void)
{
 switch (hibernation_mode) {
 case HIBERNATION_TEST:
 case HIBERNATION_TESTPROC:
  break;
 case HIBERNATION_REBOOT:
  /**
   * kernel_restart - reboot the system
   * @cmd: pointer to buffer containing command to execute for restart
   *  or %NULL
   *
   * Shutdown everything and perform a clean reboot.
   * This is not safe to call in interrupt context.
   */
  kernel_restart(NULL);
  break;
 case HIBERNATION_PLATFORM:
  /**
   * hibernation_platform_enter - Power off the system using the platform driver.
   */
  hibernation_platform_enter();
 case HIBERNATION_SHUTDOWN:
  /**
   * kernel_power_off - power_off the system
   *
   * Shutdown everything and perform a clean system power_off.
   */
  kernel_power_off();
  break;
 }
 /**
  * kernel_halt - halt the system
  *
  * Shutdown everything and perform a clean system halt.
  */
 kernel_halt();
 /*
  * Valid image is on the disk, if we continue we risk serious data
  * corruption after resume.
  */
 printk(KERN_CRIT "PM: Please power down manually\n");
 while(1);
}

到 power_down 函数,已经 power down 了,后面的代码怎么跑?
自然是重新 power up 之后继续跑了。
首先是紧跟在 power_down 之后的下两句代码:
  in_suspend = 0;
  pm_restore_gfp_mask();
  
然后是:
Thaw:
 // 春回大地,冰雪消融
 thaw_processes();
 Finish:
  /**
  * free_basic_memory_bitmaps - free memory bitmaps allocated by
  * create_basic_memory_bitmaps().  The auxiliary pointers are necessary
  * so that the bitmaps themselves are not referred to while they are being
  * freed.
  */
 free_basic_memory_bitmaps();
 // 前面 disable 了,现在要 enable 回来
 usermodehelper_enable();
 Exit:
 pm_notifier_call_chain(PM_POST_HIBERNATION);
 // 对应于前面 pm_prepare_console 的处理
 pm_restore_console();
 // 我们已经不 running 了,别人可以再使用 snapshot device 了
 atomic_inc(&snapshot_device_available);
 Unlock:
 mutex_unlock(&pm_mutex);
 return error;
}

看完了 hibernate 函数,即 Suspend-to-disk 的处理。
回到 state_store 函数继续。
根据写入的字符串,找到对应的 state ,并以该 state 为参数调用函数 enter_state 。
看看 enter_state 函数的实现:
/**
 * enter_state - Do common work of entering low-power state.
 * @state:  pm_state structure for state we're entering.
 *
 * Make sure we're the only ones trying to enter a sleep state. Fail
 * if someone has beat us to it, since we don't want anything weird to
 * happen when we wake up.
 * Then, do the setup for suspend, enter the state, and cleaup (after
 * we've woken up).
 */
int enter_state(suspend_state_t state)
{
 int error;

 // 这个函数前面看到过,判断当前 architecture 是否支持该 state
 if (!valid_state(state))
  return -ENODEV;

 if (!mutex_trylock(&pm_mutex))
  return -EBUSY;

 printk(KERN_INFO "PM: Syncing filesystems ... ");
 sys_sync();
 printk("done.\n");

 pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
 // 实现见后文
 error = suspend_prepare();
 if (error)
  goto Unlock;

 if (suspend_test(TEST_FREEZER))
  goto Finish;

 pr_debug("PM: Entering %s sleep\n", pm_states[state]);
 pm_restrict_gfp_mask();
 error = suspend_devices_and_enter(state);
 pm_restore_gfp_mask();

 Finish:
 pr_debug("PM: Finishing wakeup.\n");
 suspend_finish();
 Unlock:
 mutex_unlock(&pm_mutex);
 return error;
}

函数 suspend_prepare 的实现:
/**
 * suspend_prepare - Do prep work before entering low-power state.
 *
 * This is common code that is called for each state that we're entering.
 * Run suspend notifiers, allocate a console and stop all processes.
 */
static int suspend_prepare(void)
{
 int error;

 if (!suspend_ops || !suspend_ops->enter)
  return -EPERM;

 // 前面见过,console相关的处理
 pm_prepare_console();

 // 前面也见过,只是参数略有不同
 error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
 if (error)
  goto Finish;

 // 前面也见过
 error = usermodehelper_disable();
 if (error)
  goto Finish;

 // 如果支持 suspend freezer ,该函数直接调用函数 freeze_processes 。
 // 与前面看过的 hibernate 中的 prepare_processes 函数类似
 error = suspend_freeze_processes();
 // 一切 OK ,返回 0
 if (!error)
  return 0;

 suspend_thaw_processes();
 usermodehelper_enable();
 Finish:
 pm_notifier_call_chain(PM_POST_SUSPEND);
 pm_restore_console();
 return error;
}

回到函数 enter_state ,
 // 只有定义了 CONFIG_PM_DEBUG ,并且是在测试的时候,suspend_test 才返回1,否则返回0,继续 suspend
 if (suspend_test(TEST_FREEZER))
  goto Finish;
  
 // 让 gfp mask 变得更严格,这是 suspend 前最后调用的一个函数
 // 后面的 pm_restore_gfp_mask 函数是 resume 回来调用的第一个函数,功能是恢复 gfp mask
 // 这个在前文中也有看到,是 3.0.35 kernel 的 hibernate 函数中新加的处理
 pm_restrict_gfp_mask();
 // 函数的实现在后面
 error = suspend_devices_and_enter(state);
 
suspend_devices_and_enter 函数的实现:
/**
 * suspend_devices_and_enter - suspend devices and enter the desired system
 *        sleep state.
 * @state:    state to enter
 */
int suspend_devices_and_enter(suspend_state_t state)
{
 int error;

 if (!suspend_ops)
  return -ENOSYS;

 trace_machine_suspend(state);
 // suspend_ops 在前文出现过,在 architecture 的 pm 模块的 init 或者 probe 函数中会设置 suspend_ops
 if (suspend_ops->begin) {
  error = suspend_ops->begin(state);
  if (error)
   goto Close;
 }
 // 函数 suspend_console 的实现见后文
 suspend_console();
 // 函数 suspend_test_start 和函数 suspend_test_finish 用于计时,并输出时间信息
 suspend_test_start();
 // 函数 dpm_suspend_start 的实现见后文
 error = dpm_suspend_start(PMSG_SUSPEND);
 if (error) {
  printk(KERN_ERR "PM: Some devices failed to suspend\n");
  goto Recover_platform;
 }
 suspend_test_finish("suspend devices");
 // 此函数前面说过
 if (suspend_test(TEST_DEVICES))
  goto Recover_platform;

 // 函数 suspend_enter 的实现见后文
 error = suspend_enter(state);

 Resume_devices:
 suspend_test_start();
 /**
  * dpm_resume_end - Execute "resume" callbacks and complete system transition.
  * @state: PM transition of the system being carried out.
  *
  * Execute "resume" callbacks for all devices and complete the PM transition of
  * the system.
  */
 // dpm_resume_end 调用 dpm_resume 和 dpm_complete
 /**
  * dpm_resume - Execute "resume" callbacks for non-sysdev devices.
  * @state: PM transition of the system being carried out.
  *
  * Execute the appropriate "resume" callback for all devices whose status
  * indicates that they are suspended.
  */
 /**
  * dpm_complete - Complete a PM transition for all non-sysdev devices.
  * @state: PM transition of the system being carried out.
  *
  * Execute the ->complete() callbacks for all devices whose PM status is not
  * DPM_ON (this allows new devices to be registered).
  */
 dpm_resume_end(PMSG_RESUME);
 suspend_test_finish("resume devices");
 // console 处理
 resume_console();
 Close:
 if (suspend_ops->end)
  suspend_ops->end();
 trace_machine_suspend(PWR_EVENT_EXIT);
 return error;

 Recover_platform:
 if (suspend_ops->recover)
  suspend_ops->recover();
 goto Resume_devices;
}

函数 suspend_console 的实现:
/**
 * suspend_console - suspend the console subsystem
 *
 * This disables printk() while we go into suspend states
 */
void suspend_console(void)
{
 if (!console_suspend_enabled)
  return;
 printk("Suspending console(s) (use no_console_suspend to debug)\n");
 console_lock();
 console_suspended = 1;
 up(&console_sem);
}

函数 dpm_suspend_start 的实现:
/**
 * dpm_suspend_start - Prepare devices for PM transition and suspend them.
 * @state: PM transition of the system being carried out.
 *
 * Prepare all non-sysdev devices for system PM transition and execute "suspend"
 * callbacks for them.
 */
int dpm_suspend_start(pm_message_t state)
{
 int error;

 /**
  * dpm_prepare - Prepare all non-sysdev devices for a system PM transition.
  * @state: PM transition of the system being carried out.
  *
  * Execute the ->prepare() callback(s) for all devices.
  */
 error = dpm_prepare(state);
 if (!error)
  /**
   * dpm_suspend - Execute "suspend" callbacks for all non-sysdev devices.
   * @state: PM transition of the system being carried out.
   */
  error = dpm_suspend(state);
 return error;
}
函数 dpm_prepare 和 函数 dpm_suspend 中的处理涉及到那些 devices 的 callback 被调用。
在函数 dpm_prepare 中,会遍历队列 dpm_list ,依次调用其中设备的 ->prepare() callback(s) ,
如果成功,则将其添加到 dpm_prepared_list 队列。
函数 dpm_suspend 遍历 dpm_prepared_list 队列,依次调用其中设备的 ->suspend() callback(s) 。
如果成功,则将其添加到 dpm_suspended_list 队列,后面调用到的 dpm_resume 函数会使用该队列。
dpm_resume 处理之后又将成员 move 到 dpm_prepared_list 队列。
函数 dpm_complete 中会处理 dpm_prepared_list 队列。
接下来的问题是, dpm_list 里的成员是谁添加到?
函数 device_pm_add  中会往 dpm_list 中添加成员:
/**
 * device_pm_add - Add a device to the PM core's list of active devices.
 * @dev: Device to add to the list.
 */
void device_pm_add(struct device *dev)
{
...
 list_add_tail(&dev->power.entry, &dpm_list);
...
}
函数 device_add 中调用了函数 device_pm_add :
/**
 * device_add - add device to device hierarchy.
 * @dev: device.
 *
 * This is part 2 of device_register(), though may be called
 * separately _iff_ device_initialize() has been called separately.
 *
 * This adds @dev to the kobject hierarchy via kobject_add(), adds it
 * to the global and sibling lists for the device, then
 * adds it to the other relevant subsystems of the driver model.
 *
 * NOTE: _Never_ directly free @dev after calling this function, even
 * if it returned an error! Always use put_device() to give up your
 * reference instead.
 */
int device_add(struct device *dev)
{
...
 device_pm_add(dev);
...
}
举一个 audio device driver 的例子。 driver 的 init 函数中调用了函数 platform_device_add 。
函数 platform_device_add 中调用了函数 device_add :
/**
 * platform_device_add - add a platform device to device hierarchy
 * @pdev: platform device we're adding
 *
 * This is part 2 of platform_device_register(), though may be called
 * separately _iff_ pdev was allocated by platform_device_alloc().
 */
int platform_device_add(struct platform_device *pdev)
{
...

 ret = device_add(&pdev->dev);
...
}


函数 suspend_enter 的实现:
/**
 * suspend_enter - enter the desired system sleep state.
 * @state:  state to enter
 *
 * This function should be called after devices have been suspended.
 */
static int suspend_enter(suspend_state_t state)
{
 int error;

 // suspend_ops 已经见过多次
 if (suspend_ops->prepare) {
  error = suspend_ops->prepare();
  if (error)
   goto Platform_finish;
 }

 /**
  * dpm_suspend_noirq - Execute "late suspend" callbacks for non-sysdev devices.
  * @state: PM transition of the system being carried out.
  *
  * Prevent device drivers from receiving interrupts and call the "noirq" suspend
  * handlers for all non-sysdev devices.
  */
 error = dpm_suspend_noirq(PMSG_SUSPEND);
 if (error) {
  printk(KERN_ERR "PM: Some devices failed to power down\n");
  goto Platform_finish;
 }

 if (suspend_ops->prepare_late) {
  error = suspend_ops->prepare_late();
  if (error)
   goto Platform_wake;
 }

 if (suspend_test(TEST_PLATFORM))
  goto Platform_wake;

 // 把 first_cpu 之外的 cpu 都通过函数 _cpu_down down 掉
 error = disable_nonboot_cpus();
 if (error || suspend_test(TEST_CPUS))
  goto Enable_cpus;

 arch_suspend_disable_irqs();
 BUG_ON(!irqs_disabled());

 /**
  * syscore_suspend - Execute all the registered system core suspend callbacks.
  *
  * This function is executed with one CPU on-line and disabled interrupts.
  */
  // 遍历 syscore_ops_list 队列,依次调用成员的 suspend 函数
  // 函数 register_syscore_ops 会往队列 syscore_ops_list 上添加成员
  /**
  * register_syscore_ops - Register a set of system core operations.
  * @ops: System core operations to register.
  */
 error = syscore_suspend();
 // 成功则进入 if 语句
 if (!error) {
  if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) {
   // enter 函数的实现一般在pm.c中。fsl的imx6的pm.c中,enter suspend的时候调用了调用了iRam中的suspend处理函数,最后等待中断的到来。
   // iRam中的函数是pm模块初始化的时候,copy过去的。
   // 在举例中,imx6上用的是kernel 3.0.35。
   // kernel 2.6.35 对应的 imx5上的处理类似,只是细节上稍有不同。
   error = suspend_ops->enter(state);
   events_check_enabled = false;
  }
  /**
   * syscore_resume - Execute all the registered system core resume callbacks.
   *
   * This function is executed with one CPU on-line and disabled interrupts.
   */
  // 重新上电了,处理的队列也是 syscore_ops_list
  syscore_resume();
 }

 // 与 arch_suspend_disable_irqs  对应
 arch_suspend_enable_irqs();
 BUG_ON(irqs_disabled());

 Enable_cpus:
 // 与 disable_nonboot_cpus 对应
 enable_nonboot_cpus();

 Platform_wake:
 if (suspend_ops->wake)
  suspend_ops->wake();

 // 与 dpm_suspend_noirq 对应
 dpm_resume_noirq(PMSG_RESUME);

 Platform_finish:
 if (suspend_ops->finish)
  suspend_ops->finish();

 return error;
}

posted @ 2016-03-29 16:36  Sky&Zhang  阅读(832)  评论(0编辑  收藏  举报