MIT 6.S081入门lab6 cow

由于本实验的前置课程包括2部分 Interrupts和 Multiprocessors and locking，因此本次实验记录也分为2部分

一、参考资料阅读与总结

1.xv6 book书籍阅读（chapter 5 Interrupts and device drivers)

1. 概述

设备驱动程序： 位置：操作系统；作用：配置设备，执行操作，处理中断，并与上层进行交互；核心：驱动程序和其管理的设备是并发执行的
中断： 是引起trap的其中一种方式，在xv6中，中断经过中断管理程序devintr（kernel/trap.c），再到具体设备的中断处理程序
常见的中断形式： 上半部中断内核线程中运行，其本质上就是一种系统调用，调用设备的操作，下半部分在中断时候执行，即设备操作完成后，唤醒进程，同时让设备开始新工作。注意： 上半部和下半部是需要其并法的运行在多CPU上的。

2.代码：控制台输入

控制台： 通过UART串口硬件接受字符；处理特殊字符（Backpace和Ctrl等）；到达完整的一行，之后用户进程如shell，使用read读取输入，并进行处理；
xv6实现： QEMU仿真的16550芯片

内核访问： 内存映射的UART控制器，并连接到相应的物理地址，对这些物理地址进行读写操作；UART的内存映射地址起始于0x10000000或UART0 (kernel/memlayout.h:21)，控制器偏移量在(kernel/uart.c:22)中定义

kernel/uart.c:13

...
/ the UART control registers are memory-mapped
// at address UART0. this macro returns the
// address of one of the registers.
#define Reg(reg) ((volatile unsigned char *)(UART0 + reg)) //宏定义地址

// the UART control registers.
// some have different meanings for
// read vs write.
// see http://byterunner.com/16550.html 定义寄存器地址
#define RHR 0                 // receive holding register (for input bytes)
#define THR 0                 // transmit holding register (for output bytes)
#define IER 1                 // interrupt enable register
#define IER_TX_ENABLE (1<<0)
#define IER_RX_ENABLE (1<<1)
#define FCR 2                 // FIFO control register
#define FCR_FIFO_ENABLE (1<<0)
#define FCR_FIFO_CLEAR (3<<1) // clear the content of the two FIFOs
#define ISR 2                 // interrupt status register
#define LCR 3                 // line control register
#define LCR_EIGHT_BITS (3<<0)
#define LCR_BAUD_LATCH (1<<7) // special mode to set baud rate
#define LSR 5                 // line status register
#define LSR_RX_READY (1<<0)   // input is waiting to be read from RHR
#define LSR_TX_IDLE (1<<5)    // THR can accept another character to send

#define ReadReg(reg) (*(Reg(reg)))
#define WriteReg(reg, v) (*(Reg(reg)) = (v))
...

硬件初始化： consoleinit(kernel/console.c:184)调用 uartinit (kernel/uart.c:53)对每一个接受到的字节生成一个接受中断、发送完成的字节输出一个发送完成中断

kernel/console.c:183

void
consoleinit(void)
{
  initlock(&cons.lock, "cons");

  uartinit();

  // connect read and write system calls
  // to consoleread and consolewrite.
  devsw[CONSOLE].read = consoleread;
  devsw[CONSOLE].write = consolewrite;
}

kernel/uart.c:52

void
uartinit(void)
{
  // disable interrupts.
  WriteReg(IER, 0x00);

  // special mode to set baud rate.
  WriteReg(LCR, LCR_BAUD_LATCH);

  // LSB for baud rate of 38.4K.
  WriteReg(0, 0x03);

  // MSB for baud rate of 38.4K.
  WriteReg(1, 0x00);

  // leave set-baud mode,
  // and set word length to 8 bits, no parity.
  WriteReg(LCR, LCR_EIGHT_BITS);

  // reset and enable FIFOs.
  WriteReg(FCR, FCR_FIFO_ENABLE | FCR_FIFO_CLEAR);

  // enable transmit and receive interrupts.
  WriteReg(IER, IER_TX_ENABLE | IER_RX_ENABLE);

  initlock(&uart_tx_lock, "uart");
}

shell读取通路流程： init.c (user/init.c:19)中打开的文件描述符从控制台读取输入
-> main(user/shell.c:145) -> 调用getcmd(user/shell.c:143) -> 调用gets(user/ulib.c: 56) -> 调用read
->用户键盘输入一个字符
->UART在RHR中接收到该字符，发出中断
->xv6接收到中断，陷入trap->trap handler发现是外部设备中断,进入devintr，设备是UART
->调用uartintr->调用uartgetc读取字符，发现RHR中有字符可读
->调用consoleintr：将输入字符缓冲到cons.buf中，如果读到'\n'或'ctrl+D'，说明用户输入满足一行，就唤醒consoleread
->调用consoleread 读出一整行的用户输入，拷贝到用户空间中

kernel/trap.c devintr

// check if it's an external interrupt or software interrupt,
// and handle it.
// returns 2 if timer interrupt,
// 1 if other device,
// 0 if not recognized.
int
devintr()
{
  uint64 scause = r_scause();

  if((scause & 0x8000000000000000L) &&
	 (scause & 0xff) == 9){
	// this is a supervisor external interrupt, via PLIC.

	// irq indicates which device interrupted.
	int irq = plic_claim();

	if(irq == UART0_IRQ){
	  uartintr();
	} else if(irq == VIRTIO0_IRQ){
	  virtio_disk_intr();
	} else if(irq){
	  printf("unexpected interrupt irq=%d\n", irq);
	}

	// the PLIC allows each device to raise at most one
	// interrupt at a time; tell the PLIC the device is
	// now allowed to interrupt again.
	if(irq)
	  plic_complete(irq);

	return 1;
  } else if(scause == 0x8000000000000001L){
	// software interrupt from a machine-mode timer interrupt,
	// forwarded by timervec in kernelvec.S.

	if(cpuid() == 0){
	  clockintr();
	}

	// acknowledge the software interrupt by clearing
	// the SSIP bit in sip.
	w_sip(r_sip() & ~2);

	return 2;
  } else {
	return 0;
  }
}

kernel/uart.c uartintr

// handle a uart interrupt, raised because input has
// arrived, or the uart is ready for more output, or
// both. called from trap.c.
void
uartintr(void)
{
  // read and process incoming characters.
  while(1){
	int c = uartgetc();
	if(c == -1)
	  break;
	consoleintr(c);
  }

  // send buffered characters.
  acquire(&uart_tx_lock);
  uartstart();
  release(&uart_tx_lock);
}

kernel/uart.c uartgetc

// read one input character from the UART.
// return -1 if none is waiting.
int
uartgetc(void)
{
  if(ReadReg(LSR) & 0x01){
	// input data is ready.
	return ReadReg(RHR);
  } else {
	return -1;
  }
}

kernel/console.c cons

struct {
  struct spinlock lock;

  // input
#define INPUT_BUF 128
  char buf[INPUT_BUF];
  uint r;  // Read index
  uint w;  // Write index
  uint e;  // Edit index
} cons;
...

kernel/console.c consoleintr

...
void
consoleintr(int c)
{
  acquire(&cons.lock);

  switch(c){
  case C('P'):  // Print process list.
	procdump();
	break;
  case C('U'):  // Kill line.
	while(cons.e != cons.w &&
		  cons.buf[(cons.e-1) % INPUT_BUF] != '\n'){
	  cons.e--;
	  consputc(BACKSPACE);
	}
	break;
  case C('H'): // Backspace
  case '\x7f':
	if(cons.e != cons.w){
	  cons.e--;
	  consputc(BACKSPACE);
	}
	break;
  default:
	if(c != 0 && cons.e-cons.r < INPUT_BUF){
	  c = (c == '\r') ? '\n' : c;

	  // echo back to the user.
	  consputc(c);

	  // store for consumption by consoleread().
	  cons.buf[cons.e++ % INPUT_BUF] = c;

	  if(c == '\n' || c == C('D') || cons.e == cons.r+INPUT_BUF){
		// wake up consoleread() if a whole line (or end-of-file)
		// has arrived.
		cons.w = cons.e;
		wakeup(&cons.r);
	  }
	}
	break;
  }

  release(&cons.lock);
}
...

kernel/console.c consoleread

...
//
// user read()s from the console go here.
// copy (up to) a whole input line to dst.
// user_dist indicates whether dst is a user
// or kernel address.
//
int
consoleread(int user_dst, uint64 dst, int n)
{
  uint target;
  int c;
  char cbuf;

  target = n;
  acquire(&cons.lock);
  while(n > 0){
	// wait until interrupt handler has put some
	// input into cons.buffer.
	while(cons.r == cons.w){
	  if(myproc()->killed){
		release(&cons.lock);
		return -1;
	  }
	  sleep(&cons.r, &cons.lock);
	}

	c = cons.buf[cons.r++ % INPUT_BUF];

	if(c == C('D')){  // end-of-file
	  if(n < target){
		// Save ^D for next time, to make sure
		// caller gets a 0-byte result.
		cons.r--;
	  }
	  break;
	}

	// copy the input byte to the user-space buffer.
	cbuf = c;
	if(either_copyout(user_dst, dst, &cbuf, 1) == -1)
	  break;

	dst++;
	--n;

	if(c == '\n'){
	  // a whole line has arrived, return to
	  // the user-level read().
	  break;
	}
  }
  release(&cons.lock);

  return target - n;
}

//
// the console input interrupt handler.
// uartintr() calls this for input character.
// do erase/kill processing, append to cons.buf,
// wake up consoleread() if a whole line has arrived.
//
...

本质是三层封装 trap -> device -> uart -> cosole

3.代码控制台输出：

控制台输出流程：
用户侧执行write系统调用(往往是使用prinf，经过一系列分类之后调用putc，putc调用write)
-> 系统调用sys_write(kernel/sysfile.c:82) -> 调用filewrite(kernel/file.c:135) ->
->->经过文件系统导向UART驱动程序的上半部分
->调用uartputc(kernel/uart.c:87)向缓冲输入数据
->调用uartstart启动传输（FIFO）注：当发送完成时候，会尝试通知休眠中的uartputc进程
->完成传输触发中段，引起trap，最后跳转进入uartintr ->调用uartstart发送更多字符，直到传输完成。

kernel/uart.c:41 发送缓冲区

// the transmit output buffer.
struct spinlock uart_tx_lock;
#define UART_TX_BUF_SIZE 32
char uart_tx_buf[UART_TX_BUF_SIZE];
int uart_tx_w; // write next to uart_tx_buf[uart_tx_w++]
int uart_tx_r; // read next from uart_tx_buf[uar_tx_r++]

kernel/uart.c uartputc

// add a character to the output buffer and tell the
// UART to start sending if it isn't already.
// blocks if the output buffer is full.
// because it may block, it can't be called
// from interrupts; it's only suitable for use
// by write().
void
uartputc(int c)
{
  acquire(&uart_tx_lock);

  if(panicked){
	for(;;)
	  ;
  }

  while(1){
	if(((uart_tx_w + 1) % UART_TX_BUF_SIZE) == uart_tx_r){
	  // buffer is full.
	  // wait for uartstart() to open up space in the buffer.
	  sleep(&uart_tx_r, &uart_tx_lock);
	} else {
	  uart_tx_buf[uart_tx_w] = c;
	  uart_tx_w = (uart_tx_w + 1) % UART_TX_BUF_SIZE;
	  uartstart();
	  release(&uart_tx_lock);
	  return;
	}
  }
}

kernel/uart.c uartstart

// if the UART is idle, and a character is waiting
// in the transmit buffer, send it.
// caller must hold uart_tx_lock.
// called from both the top- and bottom-half.
void
uartstart()
{
  while(1){
	if(uart_tx_w == uart_tx_r){
	  // transmit buffer is empty.
	  return;
	}

	if((ReadReg(LSR) & LSR_TX_IDLE) == 0){
	  // the UART transmit holding register is full,
	  // so we cannot give it another byte.
	  // it will interrupt when it's ready for a new byte.
	  return;
	}

	int c = uart_tx_buf[uart_tx_r];
	uart_tx_r = (uart_tx_r + 1) % UART_TX_BUF_SIZE;

	// maybe uartputc() is waiting for space in the buffer.
	wakeup(&uart_tx_r);

	WriteReg(THR, c);
  }
}

本质上是通过缓冲区和中断机制将设备活动和进程活动解耦，实现了I/O并发，由于uartstart几乎是无阻塞的，因此实现了uartputc的异步；

同时也存在阻塞版本，这被用于内核的printf中：

kernel/uart.c uartputc_sync

// alternate version of uartputc() that doesn't 
// use interrupts, for use by kernel printf() and
// to echo characters. it spins waiting for the uart's
// output register to be empty.
void
uartputc_sync(int c)
{
  push_off();

  if(panicked){
	for(;;)
	  ;
  }

3.驱动中的并发

在consoleread和consoleintr中，在处理可能存在的并发访问时，使用了锁来保护相应的资源（使用的是自旋锁）
锁所避免的潜在问题：
两个不同CPU上的进程同时调用consoleread。
即使CPU已经在执行consoleread，但硬件要求该CPU响应一个控制台（UART）的中断。
当consoleread执行时，硬件可能会在不同的CPU上响应一个控制台（UART）中断。
中断应存在的机制：处理程序尽可能的少，通过唤醒特定的进程完成之后的工作。

4.定时器中断

xv6的进程调度策略采用了基于时间片的进程切换机制，通过yield来实现进程调度
由于定时其中断在机器模式下运行，因此与trap机制不同

定时器中断： 在timerinit(kerenl/start.c:57)初始化，是实现了硬件编程（设定延时）+（设置scratch、类似于trapframe） + （设置mtvec为timervec）+使能定时器中断。

kerenl/start.c timerinit

// set up to receive timer interrupts in machine mode,
// which arrive at timervec in kernelvec.S,
// which turns them into software interrupts for
// devintr() in trap.c.
void
timerinit()
{
  // each CPU has a separate source of timer interrupts.
  int id = r_mhartid();

  // ask the CLINT for a timer interrupt.
  int interval = 1000000; // cycles; about 1/10th second in qemu.
  *(uint64*)CLINT_MTIMECMP(id) = *(uint64*)CLINT_MTIME + interval;

  // prepare information in scratch[] for timervec.
  // scratch[0..3] : space for timervec to save registers.
  // scratch[4] : address of CLINT MTIMECMP register.
  // scratch[5] : desired interval (in cycles) between timer interrupts.
  uint64 *scratch = &mscratch0[32 * id];
  scratch[4] = CLINT_MTIMECMP(id);
  scratch[5] = interval;
  w_mscratch((uint64)scratch);

  // set the machine-mode trap handler.
  w_mtvec((uint64)timervec);

  // enable machine-mode interrupts.
  w_mstatus(r_mstatus() | MSTATUS_MIE);

  // enable machine-mode timer interrupts.
  w_mie(r_mie() | MIE_MTIE);
}

计时器中断设定核心： 保证不干扰中断的内核代码（透明机制）

计时器中断设计策略： 发出软件中断，立即返回（timervec）保存寄存器->使用寄存器更新计时器（喂狗）并触发软件中断-> 恢复寄存器；

kernel/kernelvec.S timervec

		#
		# machine-mode timer interrupt.
		#
.globl timervec
.align 4
timervec:
		# start.c has set up the memory that mscratch points to:
		# scratch[0,8,16] : register save area.
		# scratch[32] : address of CLINT's MTIMECMP register.
		# scratch[40] : desired interval between interrupts.

		csrrw a0, mscratch, a0
		sd a1, 0(a0)
		sd a2, 8(a0)
		sd a3, 16(a0)

		# schedule the next timer interrupt
		# by adding interval to mtimecmp.
		ld a1, 32(a0) # CLINT_MTIMECMP(hart)
		ld a2, 40(a0) # interval
		ld a3, 0(a1)
		add a3, a3, a2
		sd a3, 0(a1)

		# raise a supervisor software interrupt.
	li a1, 2
		csrw sip, a1

		ld a3, 16(a0)
		ld a2, 8(a0)
		ld a1, 0(a0)
		csrrw a0, mscratch, a0

		mret

5.现实世界：

由于内核代码可能被挂起，然后在不同的CPU上恢复，因此xv6内核代码比较复杂，但如果设备和计时器中断只在用户空间发生，代码可以被简化；
通常来说，驱动程序比核心内核占用了更多的代码量；
UART在xv6中使用的是Programmed I/O，即使用软件来驱动数据传输，但是其难用于高速率传输；现代多使用DMA传输，即在RAM中准备数据，使用对寄存器的单次写入同时设备
中断有着较高的cpu开销，但是对高速设备，其多采用polling机制，即禁用中断，定期检查（但是如果空闲会浪费cpu时间）。

2.xv6 book书籍阅读（Chapter 6 Locking)

1.概述

锁所解决的操作系统问题： 并发；
锁所存在的缺陷： 锁的本质是将并行操作串行化，其有着扼杀性能的缺点
并发控制技术（同步工具）：用于实现线程使用的，一种兼具并发性和正确性的抽象设计，是一种互斥原语；
常用的同步工具：锁Lock，条件变量Conditional Variable，信号量Semaphore等
锁的作用：
使用锁有助于避免更新的丢失；
使用锁使多步操作变为原子性的操作；
使用锁有助于维持一些规则或特性的不变性；

2.竞态条件

基本概念：
临界区Critical Section：访问共享资源的代码段
竞争条件Race Condition：多个执行线程同时进入临界区并尝试更新共享数据结构的情况
锁在竞态条件的作用：保护临界区域，将临界区域串行化
内核设计的其中一个主要挑战： 避免锁冲突
锁的位置也很重要，要在访问/修改临界区域之前。否则会降低性能

3.代码 LOCKS

xv6所提供的锁：自旋锁（spinloack）和睡眠锁（sleeplocks）
-由于需要保证多处理器互斥，因此需要使用原子操作
在xv6中，使用汇编完成相应的原子操作，其使用了可移植的c库进行了包装

acquire通过将锁包装在循环中实现了锁的自旋

kernel/spinloc.h 自旋锁的结构

// Mutual exclusion lock.
struct spinlock {
  uint locked;       // Is the lock held?

  // For debugging:
  char *name;        // Name of lock.
  struct cpu *cpu;   // The cpu holding the lock.
};

kernel/spinlock.c acquire

// Acquire the lock.
// Loops (spins) until the lock is acquired.
void
acquire(struct spinlock *lk)
{
  push_off(); // disable interrupts to avoid deadlock.
  if(holding(lk))
	panic("acquire");

  // On RISC-V, sync_lock_test_and_set turns into an atomic swap:
  //   a5 = 1
  //   s1 = &lk->locked
  //   amoswap.w.aq a5, a5, (s1)
  while(__sync_lock_test_and_set(&lk->locked, 1) != 0)
	;

  // Tell the C compiler and the processor to not move loads or stores
  // past this point, to ensure that the critical section's memory
  // references happen strictly after the lock is acquired.
  // On RISC-V, this emits a fence instruction.
  __sync_synchronize();

  // Record info about lock acquisition for holding() and debugging.
  lk->cpu = mycpu();
}

kernel/spinlock.c release

// Release the lock.
void
release(struct spinlock *lk)
{
  if(!holding(lk))
	panic("release");

  lk->cpu = 0;

  // Tell the C compiler and the CPU to not move loads or stores
  // past this point, to ensure that all the stores in the critical
  // section are visible to other CPUs before the lock is released,
  // and that loads in the critical section occur strictly before
  // the lock is released.
  // On RISC-V, this emits a fence instruction.
  __sync_synchronize();

  // Release the lock, equivalent to lk->locked = 0.
  // This code doesn't use a C assignment, since the C standard
  // implies that an assignment might be implemented with
  // multiple store instructions.
  // On RISC-V, sync_lock_release turns into an atomic swap:
  //   s1 = &lk->locked
  //   amoswap.w zero, zero, (s1)
  __sync_lock_release(&lk->locked);

  pop_off();
}

4.代码使用锁

使用锁的困难：使用多少锁、再哪里使用
锁使用的基本原则： 保护可能被不同cpu使用（读写）的变量；锁保护不变量，即如果不变量涉及多个位置，其都需要通过一个锁来保护；
大内核锁：本质是对内核上锁，保证内核的顺序执行，其牺牲了效率
粗粒度锁的例子（xv6）： kalloc.c分配器，每个进程再获得锁时候需要自旋等待
改进方案：使其存在多个空闲列表，每个列表有着自己单独的锁，允许并行分配
kernel/kalloc.c kmem结构体
```
struct {
  struct spinlock lock; //自旋锁
  struct run *freelist;
} kmem;
```
细粒度锁的例子（xv6）： 对于每一个文件都有单一锁，因此对不同文件可以不需要等待

改进方案：粒度的进一步细化，从而实现进程同时写入一个文件的不同区域

xv6 中的锁

锁	描述
bcache.lock	保护块缓冲区缓存项（block buffer cache entries）的分配
cons.lock	串行化对控制台硬件的访问，避免混合输出
ftable.lock	串行化文件表中文件结构体的分配
icache.lock	保护索引结点缓存项（inode cache entries）的分配
vdisk_lock	串行化对磁盘硬件和DMA描述符队列的访问
kmem.lock	串行化内存分配
log.lock	串行化事务日志操作
管道的pi->lock	串行化每个管道的操作
pid_lock	串行化next_pid的增量
进程的p->lock	串行化进程状态的改变
tickslock	串行化时钟计数操作
索引结点的 ip->lock	串行化索引结点及其内容的操作
缓冲区的b->lock	串行化每个块缓冲区的操作

5.死锁和锁排序

内核在代码中的路径如果需要持有数个锁，其以相同的顺序获取这些锁是很重要的 -> 避免死锁；
由于sleep的工作方式，因此xv6包含很多进程锁在内和锁链；全局避免死锁需要按顺序获取锁
例：创建一个文件需要目录锁、文件inode锁、磁盘缓冲区锁、驱动程序锁和进程锁；
锁涉及的困难： 逻辑结构的冲突、锁的未知性
死锁问题主要是来源于细粒度的锁

6.锁和中断处理函数

锁和中断的交互的潜在风险： 中断和锁的互相作用（如sleep和计时器中断）会形成deadlock；
解决方案： 如果自旋锁被中断使用，CPU保证在启用中断时永远不能持有该锁；
xv6解决方案： CPU获得任何锁时，xv6禁止该CPU上的中断；
xv6使用：acquire调用push_off (kernel/spinlock.c:89) 并且release调用pop_off (kernel/spinlock.c:100)来跟踪当前CPU上锁的嵌套级别，当计数到0时，恢复中断使能状态；

注意： 在设置锁之前关中断是很重要的，防止中断和锁同时出现的窗口期发生；先释放锁再开中断也是相同道理。

kernel/spinlock initlock

// Mutual exclusion spin locks.

#include "types.h"
#include "param.h"
#include "memlayout.h"
#include "spinlock.h"
#include "riscv.h"
#include "proc.h"
#include "defs.h"

void
initlock(struct spinlock *lk, char *name)
{
  lk->name = name;
  lk->locked = 0;
  lk->cpu = 0;
}

kernel/spinlock push_off pop_off holding


// Check whether this cpu is holding the lock.
// Interrupts must be off.
int
holding(struct spinlock *lk)
{
  int r;
  r = (lk->locked && lk->cpu == mycpu());
  return r;
}
// push_off/pop_off are like intr_off()/intr_on() except that they are matched:
// it takes two pop_off()s to undo two push_off()s.  Also, if interrupts
// are initially off, then push_off, pop_off leaves them off.

void
push_off(void)
{
  int old = intr_get();

  intr_off(); //关中断
  if(mycpu()->noff == 0)
	mycpu()->intena = old;
  mycpu()->noff += 1;
}

void
pop_off(void)
{
  struct cpu *c = mycpu();
  if(intr_get()) //检查中断状态
	panic("pop_off - interruptible");
  if(c->noff < 1) //计数异常
	panic("pop_off");
  c->noff -= 1;
  if(c->noff == 0 && c->intena)
	intr_on(); //开中断
}

7.指令和内存访问程序

前提：为了实现更高的性能，往往代码不是顺序执行的；
CPU排序规则： 并发模型；
防止编译器对锁进行并发优化：使用 __sync_synchronize();，设置内存障碍，防止优化，强制顺序执行

8.睡眠锁

自旋锁缺点：不能让出CPU；
睡眠锁目的： 等待锁时可以让出CPU、并允许在持有锁时让步（中断）；
睡眠锁的实现： 使用自旋锁保护的锁定自端，使其能够原子的让出cpu并释放自旋锁；
注：由于睡眠锁存在中断使能，因此不能在自旋锁临界区域使用；反过来是可以的；
自旋锁：短临界区域；睡眠锁：冗长操作

使用了自旋锁和睡眠锁相结合的方式

kernel/sleeplock.h

// Long-term locks for processes
struct sleeplock {
  uint locked;       // Is the lock held?
  struct spinlock lk; // spinlock protecting this sleep lock

  // For debugging:
  char *name;        // Name of lock.
  int pid;           // Process holding lock
};

kernel/sleeplock.c

/ Sleeping locks

#include "types.h"
#include "riscv.h"
#include "defs.h"
#include "param.h"
#include "memlayout.h"
#include "spinlock.h"
#include "proc.h"
#include "sleeplock.h"

void
initsleeplock(struct sleeplock *lk, char *name)
{
  initlock(&lk->lk, "sleep lock");
  lk->name = name;
  lk->locked = 0;
  lk->pid = 0;
}

void
acquiresleep(struct sleeplock *lk)
{
  acquire(&lk->lk); //获取小锁
  while (lk->locked) {
	sleep(lk, &lk->lk); //睡眠
  }
  lk->locked = 1; 大锁
  lk->pid = myproc()->pid;
  release(&lk->lk); //释放小锁
}

void
releasesleep(struct sleeplock *lk)
{
  acquire(&lk->lk);获取小锁
  lk->locked = 0; 释放大锁
  lk->pid = 0;
  wakeup(lk);  唤醒
  release(&lk->lk); //释放大锁
}

int
holdingsleep(struct sleeplock *lk)
{
  int r;

  acquire(&lk->lk);
  r = lk->locked && (lk->pid == myproc()->pid);
  release(&lk->lk);
  return r;
}

9.真实世界

在真实世界中，锁往往隐藏在更高级别的结构中，如同步队列；
带锁编程困难：最好使用识别竞争条件的工具，因为很容易错过锁的不变量；
POSIX线程（Pthread）：允许一个用户进程在不同的CPU上同时运行数个线程，其支持用户级锁和障碍等。其需要操作系统的支持，包括调度和同步机制；
无原子指令实现锁是可能的，但是其代价昂贵；
如果不同CPU尝试获取相同的锁，会导致昂贵的开销
-存在使用无锁的数据结构和算法，如使用链表+原子指令，但需要考虑指令和内存的重新排序

10.拓展知识

最基本的锁：互斥锁
要求：互斥、进步（存在机制决定进入临界区的顺序）、有限等待
常用方法：抢占式内核/非抢占式内核
Peterson算法：谦让模型，但是存在其不能直接运行在现代硬件上、且其指令顺序会打乱
自旋锁的缺点：互斥，进步，但是不是有限等待
硬件支持：
锁：Test and Set指令、Compare and Swap指令、Fetch and Add指令
临界区：Load-Linked and Store-Conditional指令
强制更新内存：Memory Barriers内存屏障
原子变量：Atomic Variables
pthread的使用：https://zhuanlan.zhihu.com/p/353400345
条件变量：保证线程的唤醒-睡眠，需要配合互斥锁使用
信号量：互斥锁+条件变量+全局状态变量；
同步问题： 有界缓冲区问题；读—写者问题；哲学家就餐问题
死锁成立的条件： 互斥；占有并等待；非抢占；循环等待；

二、涉及函数

Lec9

kernel/kernelvec.S： 见上文的对计时器中断的分析

kernel/plic.c： 主要通过操作内存，实现了对设备的控制，包括CPU、UART和 VIRTIO；同时也实现了对设备中断和相对应的CPU的查询

kernel/plic.c

#include "types.h"
#include "param.h"
#include "memlayout.h"
#include "riscv.h"
#include "defs.h"

//
// the riscv Platform Level Interrupt Controller (PLIC).
//

void
plicinit(void)
{
  // set desired IRQ priorities non-zero (otherwise disabled).
  *(uint32*)(PLIC + UART0_IRQ*4) = 1;
  *(uint32*)(PLIC + VIRTIO0_IRQ*4) = 1;
}

void
plicinithart(void)
{
  int hart = cpuid();

  // set uart's enable bit for this hart's S-mode. 
  *(uint32*)PLIC_SENABLE(hart)= (1 << UART0_IRQ) | (1 << VIRTIO0_IRQ);

  // set this hart's S-mode priority threshold to 0.
  *(uint32*)PLIC_SPRIORITY(hart) = 0;
}

// ask the PLIC what interrupt we should serve.
int
plic_claim(void)
{
  int hart = cpuid();
  int irq = *(uint32*)PLIC_SCLAIM(hart);
  return irq;
}

// tell the PLIC we've served this IRQ.
void
plic_complete(int irq)
{
  int hart = cpuid();
  *(uint32*)PLIC_SCLAIM(hart) = irq;
}

kernel/console.c：主要实现了对控制台这一虚拟设备的读写操作，包括非阻塞和阻塞的，其中读函数和写函数已经解释完成了，核心就是其和相应的驱动程序（UART相互调用，实现控制台的输入输出）

kernel/console.c

//
// Console input and output, to the uart.
// Reads are line at a time.
// Implements special input characters:
//   newline -- end of line
//   control-h -- backspace
//   control-u -- kill line
//   control-d -- end of file
//   control-p -- print process list
//

#include <stdarg.h>

#include "types.h"
#include "param.h"
#include "spinlock.h"
#include "sleeplock.h"
#include "fs.h"
#include "file.h"
#include "memlayout.h"
#include "riscv.h"
#include "defs.h"
#include "proc.h"

#define BACKSPACE 0x100
#define C(x)  ((x)-'@')  // Control-x

//
// send one character to the uart.
// called by printf, and to echo input characters,
// but not from write().
//
void
consputc(int c) //处理退格符号
{
  if(c == BACKSPACE){
	// if the user typed backspace, overwrite with a space.
	uartputc_sync('\b'); uartputc_sync(' '); uartputc_sync('\b');
  } else {
	uartputc_sync(c);
  }
}

struct {
  struct spinlock lock;

  // input
#define INPUT_BUF 128
  char buf[INPUT_BUF];
  uint r;  // Read index
  uint w;  // Write index
  uint e;  // Edit index
} cons; //输入缓存

//
// user write()s to the console go here.
//
int
consolewrite(int user_src, uint64 src, int n) //循环输出字符
{
  int i;

  acquire(&cons.lock);
  for(i = 0; i < n; i++){
	char c;
	if(either_copyin(&c, user_src, src+i, 1) == -1)
	  break;
	uartputc(c);
  }
  release(&cons.lock);

  return i;
}

//
// user read()s from the console go here.
// copy (up to) a whole input line to dst.
// user_dist indicates whether dst is a user
// or kernel address.
//
int
consoleread(int user_dst, uint64 dst, int n) //对整行进行数据处理
{
  uint target;
  int c;
  char cbuf;

  target = n;
  acquire(&cons.lock);
  while(n > 0){
	// wait until interrupt handler has put some
	// input into cons.buffer.
	while(cons.r == cons.w){
	  if(myproc()->killed){
		release(&cons.lock);
		return -1;
	  }
	  sleep(&cons.r, &cons.lock);
	}

	c = cons.buf[cons.r++ % INPUT_BUF];

	if(c == C('D')){  // end-of-file
	  if(n < target){
		// Save ^D for next time, to make sure
		// caller gets a 0-byte result.
		cons.r--;
	  }
	  break;
	}

	// copy the input byte to the user-space buffer.
	cbuf = c;
	if(either_copyout(user_dst, dst, &cbuf, 1) == -1)
	  break;

	dst++;
	--n;

	if(c == '\n'){
	  // a whole line has arrived, return to
	  // the user-level read().
	  break;
	}
  }
  release(&cons.lock);

  return target - n;
}

//
// the console input interrupt handler.
// uartintr() calls this for input character.
// do erase/kill processing, append to cons.buf,
// wake up consoleread() if a whole line has arrived.
//
void
consoleintr(int c) //对输入的单字符进行判定
{
  acquire(&cons.lock);

  switch(c){
  case C('P'):  // Print process list.
	procdump();
	break;
  case C('U'):  // Kill line.
	while(cons.e != cons.w &&
		  cons.buf[(cons.e-1) % INPUT_BUF] != '\n'){
	  cons.e--;
	  consputc(BACKSPACE);
	}
	break;
  case C('H'): // Backspace
  case '\x7f':
	if(cons.e != cons.w){
	  cons.e--;
	  consputc(BACKSPACE);
	}
	break;
  default:
	if(c != 0 && cons.e-cons.r < INPUT_BUF){
	  c = (c == '\r') ? '\n' : c;

	  // echo back to the user.
	  consputc(c);

	  // store for consumption by consoleread().
	  cons.buf[cons.e++ % INPUT_BUF] = c;

	  if(c == '\n' || c == C('D') || cons.e == cons.r+INPUT_BUF){
		// wake up consoleread() if a whole line (or end-of-file)
		// has arrived.
		cons.w = cons.e;
		wakeup(&cons.r);
	  }
	}
	break;
  }

  release(&cons.lock);
}

void
consoleinit(void) //初始化结构体和设备
{
  initlock(&cons.lock, "cons");

  uartinit();

  // connect read and write system calls
  // to consoleread and consolewrite.
  devsw[CONSOLE].read = consoleread;
  devsw[CONSOLE].write = consolewrite;
}

kernel/uart.c： uart设备的驱动程序，实现设备的初始化、通过调用console相应函数实现设备的读取，同时通过被console调用实现设备的输出；核心是对设备寄存器在内存映射中的操作。所有函数上文均以给出，这里就不赘述了；

kernel/printf.c： 内核中printf的实现，可以类比linux中的printk；同时实现printf的初始化
核心流程：printf读取所输出字符串，根据字符选择输出类型 -> 调用相应的printint、printptr或者相应的程序对数据进行输出预处理->调用consputc通过控制台输出；

kernel/printf.c

//
// formatted console output -- printf, panic.
//

#include <stdarg.h>

#include "types.h"
#include "param.h"
#include "spinlock.h"
#include "sleeplock.h"
#include "fs.h"
#include "file.h"
#include "memlayout.h"
#include "riscv.h"
#include "defs.h"
#include "proc.h"

volatile int panicked = 0;

// lock to avoid interleaving concurrent printf's. 自旋锁适应并发
static struct {
  struct spinlock lock;
  int locking;
} pr;

static char digits[] = "0123456789abcdef";

static void
printint(int xx, int base, int sign) //对int，10进制和16进制的预处理
{
  char buf[16];
  int i;
  uint x;

  if(sign && (sign = xx < 0))
	x = -xx;
  else
	x = xx;

  i = 0;
  do {
	buf[i++] = digits[x % base];
  } while((x /= base) != 0);

  if(sign)
	buf[i++] = '-';

  while(--i >= 0)
	consputc(buf[i]);
}

static void
printptr(uint64 x) //对地址数据的预处理
{
  int i;
  consputc('0');
  consputc('x');
  for (i = 0; i < (sizeof(uint64) * 2); i++, x <<= 4)
	consputc(digits[x >> (sizeof(uint64) * 8 - 4)]);
}

// Print to the console. only understands %d, %x, %p, %s.
void
printf(char *fmt, ...)
{
  va_list ap;
  int i, c, locking;
  char *s;

  locking = pr.locking;
  if(locking)
	acquire(&pr.lock);

  if (fmt == 0)
	panic("null fmt");

  va_start(ap, fmt);
  for(i = 0; (c = fmt[i] & 0xff) != 0; i++){
	if(c != '%'){ //字符分类1
	  consputc(c);
	  continue;
	}
	c = fmt[++i] & 0xff;
	if(c == 0)
	  break;
	switch(c){ //字符分类2
	case 'd':
	  printint(va_arg(ap, int), 10, 1);
	  break;
	case 'x':
	  printint(va_arg(ap, int), 16, 1);
	  break;
	case 'p':
	  printptr(va_arg(ap, uint64));
	  break;
	case 's': //对字符串的预处理
	  if((s = va_arg(ap, char*)) == 0)
		s = "(null)";
	  for(; *s; s++)
		consputc(*s);
	  break;
	case '%':
	  consputc('%');
	  break;
	default:
	  // Print unknown % sequence to draw attention.
	  consputc('%');
	  consputc(c);
	  break;
	}
  }

  if(locking)
	release(&pr.lock);
}

void
panic(char *s) //panic
{
  pr.locking = 0;
  printf("panic: ");
  printf(s);
  printf("\n");
  panicked = 1; // freeze uart output from other CPUs
  for(;;)
	;
}

void
printfinit(void) //初始化锁
{
  initlock(&pr.lock, "pr");
  pr.locking = 1;
}

LEC 10

kernel/spinlock.h是锁的定义，已经给出了；
kernel/spinlock.c 中也已经在上文中的阅读中给出了；

三、课程视频观看笔记

1.Interrupts

注：在真正的操作系统中，内存总是尽量被完全使用，同时，常驻内存总是比虚拟内存小的多 -> 内存的动态分配
中断过程： 硬件：触发中断；软件：保存工作；处理中断；恢复工作。
中断和trap的区别： 核心：中断是异步的，同时设备和CPU是并行的
外部中断in RISC-V是由外部设备引起的，其被映射到地址的相关位中：其由PLIC管理，其可以将中断交给不同的CPU来处理，如果所有核心的中断被禁用，其会保持并记录中断，直到存在cpu空闲。->内核实现
驱动程序： 用于管理硬件程序；其通常分为两层，及时相应部分和内核部分，其使用队列实现了并行；内核部分和上册应用程序进行交互，保证其能够返回到正确的程序。
设备使用内存映射I/O的方式操作设备的寄存器
shell中的 $ ls实现流程：
放入 $ $ 到 uart->发送完成后，触发中断->发送完成；
ls:序列化字符，接受信息，产生中断->处理中断；
RISC-V对中断的支持：
中断管理位：
SIE程序中断管理器，包括E程序、S外部、T定时器；
SSTATVS：存在1位管理中断；
中断管理寄存器SIP：显示中断类型；
进入中断原因寄存器SCAUSE ：显示中断原因；
STVEC：处理中断函数寄存器，保存trap处理函数；
xv6对寄存器编程：start.c初始化CPU、 main()以下的初始化对相应设备寄存器初始化，包括UART、PLIC（涉及多CPU的中断初始化和由优先级设置，xv6不启用中断优先级），最后在scheduler启用中断（所有cpu都运行调度器程序）
控制台输出流程修正：用户侧执行write系统调用(往往是使用prinf，经过一系列分类之后调用putc，putc调用write)
-> 系统调用sys_write(kernel/sysfile.c:82) -> 调用filewrite(kernel/file.c:135) -> 调用devsw中的write函数->
->-调用consolewrite(kernel/console.c:59)；
->调用uartputc(kernel/uart.c:87)向缓冲（循环队列）输入数据，如果满休眠等待唤醒；
->调用uartstart启动传输（FIFO）注：当发送完成时候，会尝试通知休眠中的uartputc进程；
->完成传输触发中段，引起trap，最后跳转进入uartintr ->调用uartstart发送更多字符，直到传输完成。
中断时系统反应： > 清除SIE -> 保存pc、模式->设置为管理者模式->进入预设的trap处理程序（usertrap）
设备中断反应：在进入设备中断后，PLIC分配反应CPU并获得中断号码->根据中断号码处理中断。
并发与中断：
生产者-消费者并行性->由循环队列缓冲区实现（包括uart和consloe）；
由于内核也存在中断时候，需要对内核的使能进行控制；
驱动的顶部和底部可以并行运行 ->使用锁，保证缓冲区的顺序性
中断的发展；，由于中断要处理的事件越来越多（硬件越来越复杂），因此为了解决超越中断处理速度的数据传输，使用轮询模式，即自旋来实现对高速数据设备的处理；复杂驱动可以在轮询和中断之间切换，以适应动态的负载。

2.Multiprocessors and locking

锁的需求： 多核心带来的处理并发系统调用；
锁的缺陷： 限制性能；
锁的核心： 避免竞态条件，实现串行化；
由于如果将锁自动化，其会的导致死板的错误，尤其是两个锁的中间；
锁的属性：避免丢失更新；使多步操作变为原子操作；维持不变量；
死锁解决：按照固定顺序取锁；
锁与模块化： 锁的顺序要求在一定程度上会损失模块化；
锁与性能： 细粒度锁：系统复杂、难以重构，但性能较好；通常的解决方案是从粗粒度到细粒度进行尝试，查看其对内存的影响；
以UART为例： 对于缓冲区而言锁保证了：保护了写入的数据（缓冲区）为不变量、使读写寄存器变为原子操作、保持了寄存器不会被覆盖写入；
硬件解决方案：test and set实现：通过锁定地址实现，依赖于内存控制器/总线/高速缓存一致性协议等
内存排序： 在并发执行时候，为了防止针对与临界区域的优化，使用内存屏障防止内存对原子操作的内存年重排序；
带锁编程要点：非必须不使用，从粗粒度开始，善用冲突检测器；

四、完成lab及其代码

Implement copy-on write：
声明函数与创建相应表示位

def.h

//kalloc.c
...
void*            kcowalloc(void *);
void            kaddrefpage(void *);

riscv.h

#define PTE_COW (1L << 8) // 1 -> this is a lazy page(cow page)

vm.c:修改相应的uvmcopy和copyout，实现内存的lazy分配，并实现相应的lazy分配函数

vm.c:

int
uvmcopy(pagetable_t old, pagetable_t new, uint64 sz)
{
  pte_t *pte;
  uint64 pa, i;
  uint flags;
  // char *mem;

  for(i = 0; i < sz; i += PGSIZE){
    if((pte = walk(old, i, 0)) == 0)
      panic("uvmcopy: pte should exist");
    if((*pte & PTE_V) == 0)
      panic("uvmcopy: page not present");
    pa = PTE2PA(*pte);
    if(*pte & PTE_W) { //cow只和可写页面有关
      *pte =  (*pte & ~PTE_W) | PTE_COW;
      // printf("uvmcopy pte with PTE_W %p\n", *pte);
    }
    flags = PTE_FLAGS(*pte);
    // // if((mem = kalloc()) == 0)
    // //   goto err;
    // memmove(mem, (char*)pa, PGSIZE);
    if(mappages(new, i, PGSIZE, (uint64)pa, flags) != 0){
      // kfree(mem); //在这里不涉及内存分配，因此去掉相关代码
      goto err;
    }
    kaddrefpage((void *) pa);
  }
  return 0;

 err:
  uvmunmap(new, 0, i / PGSIZE, 1);
  return -1;
}

...

int
copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len)
{
  uint64 n, va0, pa0;
  printf("into copyout\n");
  while(len > 0){
    if((uvmiscowpage(dstva) == 0)) {
      uvmcowcopy(dstva);//检查内存是否为lay分配，并进行分配
    }
    va0 = PGROUNDDOWN(dstva);
    pa0 = walkaddr(pagetable, va0);
    if(pa0 == 0)
      return -1;
    n = PGSIZE - (dstva - va0);
    if(n > len)
      n = len;
    memmove((void *)(pa0 + (dstva - va0)), src, n);

    len -= n;
    src += n;
    dstva = va0 + PGSIZE;
  }
  return 0;
}
...
int
uvmiscowpage(uint64 va) {
  struct proc *p = myproc();
  pte_t *pte;
  
  if (va > p->sz) { //检查大小
    printf("uvmiscowpage(): va over size\n");
    return -1;
  }
  if ((pte = walk(p->pagetable, va, 0)) == 0) { //检查pte有效性
    printf("uvmiscowpage(): pte is zero\n");
    return -1;
  }
  if ((*pte & PTE_V) == 0) {//检查有效性
    printf("uvmiscowpage(): page not present\n");
    return -1;
  }
  if (*pte & PTE_COW) { //检查是否为cow页面
     return 0;
  }
  printf("uvmiscowpage(): pte has something wrong\n");
  return -1;
}

int
uvmcowcopy(uint64 va) {
  pte_t *pte;
  uint64 pa, new;
  uint flags;
  struct proc *p = myproc();

  if ((pte = walk(p->pagetable, va, 0)) == 0) {
    panic("uvmcowcopy: pte should exist");
  }
  pa = PTE2PA(*pte);
  if ((new = (uint64)kcowalloc((void*)pa)) == 0) { //使用另外的函数，避免kfree的重复操作(在计数值 == 1时)
    return -1;
  }

  flags = (PTE_FLAGS(*pte) | PTE_W) & ~PTE_COW;  //调整flags
  uvmunmap(p->pagetable, PGROUNDDOWN(va), 1, 0);
  if (mappages(p ->pagetable, va, 1, new, flags) == -1) { //重新映射
    panic("uvmcowcopy(): mappages"); 
  }
  return 0;
}

实现相应的lazy分配的内存策略，对物理内存的分配次数进行计数,并修改相应初始化函数

kalloc

#define PA2PGREF_ID(p) (((p)-KERNBASE)/PGSIZE)
#define PGREFMAX PA2PGREF_ID(PHYSTOP)
...
struct {
  struct spinlock lock;
  int acon[PGREFMAX];
} kpageref; //计数数组

#define PA2PGREF(p) kpageref.acon[PA2PGREF_ID((uint64)(p))]
...

kinit()
{
  initlock(&kmem.lock, "kmem");
  initlock(&kpageref.lock, "kpageref"); //初始化锁
  freerange(end, (void*)PHYSTOP);
}
...
void
kfree(void *pa)
{
  struct run *r;

  if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)
    panic("kfree");

  acquire(&kpageref.lock);
  if (--PA2PGREF(pa) <= 0){ //先自减后判断，小于0释放页面

    // Fill with junk to catch dangling refs.
    memset(pa, 1, PGSIZE);

    r = (struct run*)pa;

    acquire(&kmem.lock);
    r->next = kmem.freelist;
    kmem.freelist = r;
    release(&kmem.lock);
  }
release(&kpageref.lock);
}

void *
kalloc(void)
{
  struct run *r;

  acquire(&kmem.lock);
  r = kmem.freelist;
  if(r)
    kmem.freelist = r->next;
  release(&kmem.lock);

  if(r){
    memset((char*)r, 5, PGSIZE); // fill with junk
    acquire(&kpageref.lock);
    PA2PGREF(r) = 1;
    release(&kpageref.lock);
  }
  return (void*)r;
}

// Allocate one 4096-byte page of physical memory if ref is more than one
// if ref  is less then one return the original one
// Returns a pointer that the kernel can use.
// Returns 0 if the memory cannot be allocated.
void*
kcowalloc(void *pa)
{
  char *newpa;
  // printf("kcowalloc\n"); 
  acquire(&kpageref.lock);

  if (PA2PGREF(pa) <=1) {
    release(&kpageref.lock);
    return pa;
  }

  if((newpa = kalloc()) == 0) {
    release(&kpageref.lock);
    return 0;
  }
  memmove(newpa, (char*)pa, PGSIZE);

  PA2PGREF(pa)--;

  release(&kpageref.lock); //尽量减少锁影响的范围

  printf("%p num of REF is %d", pa, PA2PGREF(pa));

  return (void*)newpa;
}

void
kaddrefpage(void * pa) {
  acquire(&kpageref.lock);
  PA2PGREF(pa)++;
  release(&kpageref.lock);
}

参考文献

2020版xv6手册：http://xv6.dgs.zone/tranlate_books/book-riscv-rev1/c6/s0.html
xv6手册与代码笔记：https://zhuanlan.zhihu.com/p/352699414
xv6手册中文版：http://xv6.dgs.zone/tranlate_books/book-riscv-rev1/c4/s6.html
28天速通MIT 6.S081操作系统公开课：https://zhuanlan.zhihu.com/p/627573247
MIT6.s081操作系统笔记：https://juejin.cn/post/7013843036996108325

posted @ 2024-03-14 14:56 David_Dong 阅读(114) 评论(0) 收藏举报

刷新页面返回顶部

David-Dong

MIT 6.S081入门lab6 cow

MIT 6.S081入门lab6 cow

一、参考资料阅读与总结

1.xv6 book书籍阅读（chapter 5 Interrupts and device drivers)

1. 概述

2.代码：控制台输入

3.代码 控制台输出：

3.驱动中的并发

4.定时器中断

5.现实世界：

2.xv6 book书籍阅读（Chapter 6 Locking)

1.概述

2.竞态条件

3.代码 LOCKS

4.代码 使用锁

5.死锁和锁排序

6.锁和中断处理函数

7.指令和内存访问程序

8.睡眠锁

9.真实世界

10.拓展知识

二、涉及函数

Lec9

LEC 10

三、课程视频观看笔记

1.Interrupts

2.Multiprocessors and locking

四、完成lab及其代码

参考文献

3.代码控制台输出：

4.代码使用锁