理解Linux内核抢占模型（最透彻一篇）

时间 2020-12-05

标签 linux 服务器 app ide 函数 oop 线程调试 code blog 栏目 Linux 繁體版

原文原文链接

原创宋宝华 Linux阅码场 5月6日linux

本文原文地址：
https://devarea.com/understanding-linux-kernel-preemption/#.XrKLcfnx05k
做者：Liran B.H
译者：宋宝华服务器

当配置Linux内核的时候，咱们能够选择一些参数，这些参数能影响系统的行为。你能够用不一样的优先级、调度类和抢占模型来工做。正确地选择这些参数是很是重要的。
本文将论述不一样的抢占模型如何影响用户和系统的行为。
当你使用 make menuconfig配置内核的时候，你能看到这样的菜单：

为了深刻理解这三个抢占模型的区别，咱们将写一个案例：app

2个线程，一个高优先级RT（50），一个低优先级RT（30）
高优先级的线程要睡眠3秒
低优先级的线程用CPU来作计算
3秒后高优先级线程唤醒。
若是低优先级的线程陷入系统调用，高优先级的线程睡眠到期，究竟会发生什么？下面咱们来一种模型一种模型地看。
No Forced Preemption

这种状况下，上下文切换发生在系统调用返回用户空间的点。案例以下：ide
2个线程，一个高优先级RT（50），一个低优先级RT（30）
高优先级的线程要睡眠3秒
低优先级的线程进入系统调用计算5秒
5秒后低优先级线程从内核系统调用返回
高优先级线程将醒来（可是比预期迟了2秒）。
内核代码，简单的字符设备：

1   #include <asm/uaccess.h>
2   #include <linux/fs.h>
3   #include <linux/gfp.h>
4   #include <linux/cdev.h>
5   #include <linux/sched.h>
6   #include <linux/kdev_t.h>
7   #include <linux/delay.h>
8   #include <linux/ioctl.h>
9   #include <linux/slab.h>
10  #include <linux/mempool.h>
11  #include <linux/mm.h>
12  #include <asm/io.h>
13
14
15    static dev_t my_dev;
16    static struct cdev *my_cdev;
17
18
19    // callback for read system call on the device
20    static ssize_t my_read(struct file *file, char __user *buf,size_t count,loff_t *ppos)
21  {
22   int len=5;
23   if(*ppos > 0)
24   {
25  return 0;
26   }
27   mdelay(5000); // busy-wait for 5 seconds
28   if (copy_to_user(buf , "hello" , len)) {
29      return -EFAULT;
30   } else {
31       *ppos +=len;
32       return len;
33   }
34  }
35
36
37
38  static struct file_operations my_fops =
39  {
40  .owner = THIS_MODULE,
41  .read = my_read,
42  };
43
44
45
46
47   static int hello_init (void)
48  {
49
50  my_dev = MKDEV(400,0);
51  register_chrdev_region(my_dev,1,"demo");
52
53  my_cdev=cdev_alloc();
54  if(!my_cdev)
55  {
56    printk (KERN_INFO "cdev alloc error.\n");
57     return -1;    
58  }
59  my_cdev->ops = &my_fops;
60  my_cdev->owner = THIS_MODULE;
61
62  if(cdev_add(my_cdev,my_dev,1))
63  {
64    printk (KERN_INFO "cdev add error.\n");
65     return -1;    
66   }
67
68
69     return 0;
70
71   }
72
73
74      static void
75      hello_cleanup (void)
76     {
77      cdev_del(my_cdev);
78      unregister_chrdev_region(my_dev, 1);
79  }
80
81
82   module_init (hello_init);
83   module_exit (hello_cleanup);
84   MODULE_LICENSE("GPL")

读里面delay了5秒，注意mdelay是一个计算型的busy-loop。
用户空间代码以下：函数

1.   #include<stdio.h>
2    #include<unistd.h>
3    #include<pthread.h>
4    #include <sys/types.h>
5    #include <sys/stat.h>
6    #include <fcntl.h>
7
8
9    void *hi_prio(void *p)
10   {
11     printf("thread1 start time=%ld\n",time(NULL));
12     sleep(3);
13     printf("thread1 stop time=%ld\n",time(NULL));
14    return NULL;
15   }
16
17   void *low_prio(void *p)
18   {
19    char buf[20];
20   sleep(1);
21    int fd=open("/dev/demo",O_RDWR);  // #mknod /dev/demo c 400 0
22   puts("thread2 start");
23   read(fd,buf,20);
24    puts("thread2 stop");
25    return NULL;
26    }
27
28
29  int main()
30   {
31   pthread_t t1,t2,t3;
32
33    pthread_attr_t attr;
34 
35   struct sched_param param;
36 
37   pthread_attr_init(&attr);
38   pthread_attr_setschedpolicy(&attr, SCHED_RR);
39
40   param.sched_priority = 50;
41  pthread_attr_setschedparam(&attr, &param);
42
43
44   pthread_create(&t1,&attr,hi_prio,NULL);
45
46   param.sched_priority = 30;
47   pthread_attr_setschedparam(&attr, &param);
48 
49   pthread_create(&t2,&attr,low_prio,NULL);
50  sleep(10);
51   puts("end test");
52  return 0;
53  }

实验步骤：oop

高优先级线程开始睡眠3秒
低优先级线程睡眠1秒而后作系统调用
高优先级线程6秒后醒来（stop和start的时间差）

1    # insmod demo.ko 
2    # ./app
3    thread1 start time=182
4    thread2 start
5    thread1 stop time=188
6    thread2 stop
7    end test

Preemptible Kernel

这种状况内核里面也能够抢占，意味着上述程序里面的高优先级线程3秒后可醒来。
这种状况下，系统会有更多的上下文切换，可是实时性更加好。对于要求软实时的嵌入式系统而言，这个选项是最佳的。可是对于服务器而言，一般第一个选项更好——更少的上下文切换，更多的CPU时间用做有用功。
运行结果（stop、start时间差3秒）：线程

1     # insmod ./demo.ko
2     #./app
3    thread1 start time=234
4     thread2 start
5     thread1 stop time=237
6    thread2 stop
7    end test

Voluntary Kernel Preemption

这种状况和第一种状况"no forced preemption"相似，可是内核开发者能够在进行复杂操做的时候，时不时检查一下是否能够reschedule。他们能够调用might_resched()函数。
在下面的代码中，咱们添加了一些检查点（check point）调试

1   // callback for read system call on the device
2   static ssize_t my_read(struct file *file, char __user *buf,size_t 
3   {
4    int len=5;
5     if(*ppos > 0)
6     {
7    return 0;
8     }
9   mdelay(4000); // busy-wait for 4 seconds
10   might_resched();
11   delay(3000);  // busy wait for 3 seconds 
12   if (copy_to_user(buf , "hello" , len)) {
13           return -EFAULT;
14        } else {
15           *ppos +=len;
16             return len;
17        }
18   }

若是咱们把might_resched()注释掉，它会delay 7秒。
添加cond_resched()调用将致使系统检查是否有高优先级的任务被唤醒，这样高优先级任务5秒能够醒来（其中1秒在systemcall以前，另外4秒在kernel）。
运行结果：code

1   # insmod ./demo.ko
2   #./app
3   thread1 start time=320
4   thread2 start
5   thread1 stop time=325
6   thread2 stop
7  end test

Full Real Time Preemption

若是咱们使能RT补丁，咱们会获得一个硬实时的kernel。这意味着任何代码能够抢占任何人。好比一个更加紧急的任务能够抢占中断服务程序ISR。这个patch进行了以下改动：blog

把中断服务程序转化为优先级是50的RT线程
把softIRQ转化为优先级是49的RT线程
把全部的spinlock变成mutex
高精度定时器
其余的细小改动

打补丁后会看到2个新增的菜单：

其中 “Preemptible Kernel (Basic RT)” 是为了调试目的的，为了全面使用RT补丁的功能，咱们应该选择最后一项 – Fully Preemptible Kernel。这样咱们会有更多的上下文切换，可是能够知足RT的实时要求。

(END)

理解Linux内核抢占模型（最透彻一篇）

No Forced Preemption

Preemptible Kernel

Voluntary Kernel Preemption

Full Real Time Preemption