如何加速背完一套3600题的题库？

时间 2021-08-13

标签 python c++ 数组数据结构 app 测试编码 spa code blog 栏目 Python 繁體版

原文原文链接

刚刚入职就收到了一份3600题的题库。知道本身的脑子不太好使，因而对题库进行了一系列选项分析，减少了800多题的背题量并掌握了许多蒙题技巧:)python

 1 #include <bits/stdc++.h>
 2 using namespace std;
 3 int i=0;
 4 int main(){
 5     freopen("ansi.txt","r",stdin);
 6     freopen("extract_out.txt","w",stdout);
 7     char c[1200001];
 8     cout<<"T1:";
 9     int flag=2;//状态开关 
10     bool flag1=false;//字母开关 
11     int k=1;
12     while(i<1200000){
13         c[++i]=getchar();
14         //判断题型 
15         if (i>=4&&flag==2&&c[i-3]==-75&&c[i-2]==-91&&c[i-1]==-47&&c[i]==-95){
16             cout<<"(D1)";
17             flag=3;
18         }//单选 
19         if (i>=4&&flag==2&&c[i-3]==-74&&c[i-2]==-32&&c[i-1]==-47&&c[i]==-95) {
20             cout<<"(D2)";
21             flag=3;
22         }//多选 
23         if (i>=4&&flag==2&&c[i-3]==-59&&c[i-2]==-48&&c[i-1]==-74&&c[i]==-49) {
24             cout<<"(D3)";
25             flag=3;
26         }//判断 
27         //记录答案 
28         if (i>=6&&flag==3&&c[i-5]==-76&&c[i-4]==-16&&c[i-3]==-80&&c[i-2]==-72&&c[i-1]==-93&&c[i]==-70){
29             cout<<"ANS:"; flag=1;
30         }//答案 
31         //开题 
32         if (c[i]==10) {
33             cout<<'\n';
34             if (flag==1&&c[i-1]==10){
35                 k++;
36                 cout<<'T'<<k<<':';
37                 flag=2;//换两行且flag状态为1代表一题已经结束 
38             }
39         }
40         //输出价值字母 
41         if (c[i]>='A'&&c[i]<='F'&&flag1) cout<<c[i];
42         else flag1=false;
43         if (c[i]==10||(c[i-3]==-80&&c[i-2]==-72&&c[i-1]==-93&&c[i]==-70)) flag1=true;
44     }
45     return 0;
46 }

提取完后将关键信息导出至文本文档中，大概长这样：

step 2.选项分布及正确率统计

这一步主要统计了各个题型的题目数量，计算了各题型中各个选项的正确几率，以及多选题中正确选项数量的几率。（发现多选题全选几率超过40%hhh）因为多选题通常包含四个选项，所以我单独对四个选项的多选题的各类答案数量进行了直接汇总。

首先须要给数据创建合适的存储结构。

单选题数据结构：

二维数组dan_count：dan_count_i0表示包含i个选项的题目有几题；dan_count_ij表示包含i个选项的题目中，第j个选项正确的题目有几题。

各选项正确几率：dan_count_ij / dancount_i0

多选题数据结构：

二维数组duo_count：duo_count_i0表示包含i个选项的题目有几题；duo_count_ij表示包含i个选项的题目中，第j个选项正确的题目有几题。

二维数组duo_numcount：duo_numcount_ij表示包含i个选项的题目中，有j个选项正确的题目有几题。

二维数组duo_enumcount：用于专门统计四个选项的题目：i为多选题13种可能（AB AC AD BC BD CD ABC ABD ACD BCD ABCD）对应的二进制，duo_enumcount_ij表示各状况下各项正确个数。

判断题数据结构：

pan_a：统计判断题正确数量

pan_b：统计判断题错误数量

源代码：

  1 #include<bits/stdc++.h>
  2 using namespace std;
  3 int pan_a=0,pan_b=0;
  4 int dan_count[10][10]={0};
  5 int duo_count[10][10]={0},duo_numcount[10][10]={0},duo_enumcount[10][10]={0};
  6 int dan=0,duo=0,pan=0;
  7 int k=0; 
  8 char c[21]={0};
  9 void get_char(){
 10     for (int i=1;i<=19;i++) c[i]=c[i+1];
 11     c[20]=getchar();
 12 }
 13 
 14 void danxuan(){
 15     int j=0;
 16     while (c[20]!=':'){
 17         get_char();
 18         if ('A'+j==c[20]) j++;
 19     }
 20     dan_count[j][0]++;
 21     get_char();
 22     dan_count[j][c[20]-'A'+1]++;
 23     return;
 24 }
 25 
 26 void duoxuan(){
 27     int j=0,t=0,x=0;
 28     while (c[20]!=':'){
 29         get_char();
 30         if ('A'+j==c[20]) j++;
 31     }
 32     duo_count[j][0]++;
 33     while (c[20]!='\n'){
 34         get_char();
 35         if (c[20]>='A'&&c[20]<='F'){
 36             duo_count[j][c[20]-'A'+1]++;
 37             t++;
 38             x=x+(int)(pow(2,c[20]-'A'));
 39         }
 40     }
 41     duo_numcount[j][t]++;
 42     duo_enumcount[j][x]++;
 43     return;
 44 }
 45 
 46 void panduan(){
 47     while (1){
 48         get_char();
 49         if (c[19]==':'){
 50             if (c[20]=='A') pan_a++;
 51             else pan_b++;
 52             return;
 53         }
 54     }
 55 }
 56 
 57 void print(){
 58     cout<<"单选题总数："<<dan<<endl; 
 59     for (int i=2;i<=4;i++){
 60         cout<<"有"<<i<<"个选项的题目数量为"<<dan_count[i][0]<<endl;
 61         for (int j=1;j<=i;j++) printf("选%c的几率：%.2f%c ",j+'A'-1,(double)dan_count[i][j]/dan_count[i][0]*100,'%');
 62         printf("\n");
 63         printf("\n");
 64     } 
 65     cout<<endl; 
 66     cout<<"多选题总数："<<duo<<endl; 
 67     for (int i=3;i<=6;i++){
 68         cout<<"有"<<i<<"个选项的题目数量为"<<duo_count[i][0]<<endl;
 69         for (int j=1;j<=i;j++) 
 70         printf("%c正确的几率：%6.2f%c ",j+'A'-1,(double)duo_count[i][j]/duo_count[i][0]*100,'%');
 71         printf("\n");
 72         for (int j=1;j<=i;j++) 
 73         if (duo_numcount[i][j])
 74         printf("%d个选项正确的几率：%6.2f%c ",j,(double)duo_numcount[i][j]/duo_count[i][0]*100,'%');
 75         printf("\n"); 
 76         printf("\n");
 77     } 
 78     cout<<endl;
 79     cout<<"判断题总数："<<pan<<endl; 
 80     cout<<"判断题正确几率：";
 81     printf("%.2f %\n",((double)pan_a/(pan_a+pan_b)*100));
 82     cout<<"判断题错误几率：";
 83     printf("%.2f %\n",((double)pan_b/(pan_a+pan_b)*100));
 84     cout<<endl;
 85     printf("特别统计（四个选项的多选题选项分布）\n");
 86     printf("选项  次数\n");
 87     for (int i=1;i<=15;i++){
 88         int t=0;
 89         for (int j=0;j<4;j++)
 90         if ((i>>j)&1) cout<<(char)('A'+j);
 91         else t++;
 92         t+=2;
 93         while (t){
 94             t--;
 95             cout<<' ';
 96         }
 97         cout<<duo_enumcount[4][i]<<endl;
 98     }
 99     return;
100 }
101 
102 int main(){
103     freopen("extract_out.txt","r",stdin);
104     freopen("选项统计结果.txt","w",stdout); 
105     while (dan+duo+pan<3600){
106         get_char(); 
107         if (c[20]=='1'&&c[19]=='D') danxuan(),dan++;
108         if (c[20]=='2'&&c[19]=='D') duoxuan(),duo++;
109         if (c[20]=='3'&&c[19]=='D') panduan(),pan++;
110         //if (k!=dan+duo+pan) cout<<k<<endl;
111     } 
112     print();
113     return 0;
114 }

step 3.精华选项提取

这一步前后提取了(1)单选题和判断题中的正确选项 (2)非全选的多选题。

这套题库的多选题共计 713 题，其中 309 题答案为全选，另外 404 题答案为非全选。因此说背完404题就掌握了713题......

提取选项须要扫两遍题库：

（1）第一遍扫描每道题的答案，在每道题须要的输出的选项上打上标记。

ans[k][c[20]-'A'+1]=1;//第k题的当前选项是正确

（2）第二遍扫描标记，进行输出。

源代码：

 1 #include<bits/stdc++.h>
 2 using namespace std;
 3 int ans[4000][7]={0};
 4 int k=0; 
 5 char c[21]={0};
 6 
 7 void get_char(){
 8     for (int i=1;i<=19;i++) c[i]=c[i+1];
 9     c[20]=getchar();
10 }
11 
12 int main(){
13     freopen("extract_out.txt","r",stdin);
14     freopen("answer.txt","w",stdout);
15     //提取答案 
16     while (k<3600){
17         while (c[20]!='T') get_char(); 
18         k++;
19         while (c[20]!=':'||c[19]!='S') get_char();
20         get_char();
21         while (c[20]>='A'&&c[20]<='F'){
22             ans[k][c[20]-'A'+1]=1;//第k题的该选项是对的 
23             get_char();
24         }
25     }
26     //提取正确选项 
27     freopen("ansi.txt","r",stdin);
28     k=1;
29     int num=0;
30     while (k<=3600){
31         get_char();
32         if (c[20]>='0'&&c[20]<='9'){
33             num=num*10+c[20]-'0';
34         }//读出题号 
35         else {
36             if (num==k){
37                 cout<<'T'<<num<<':';
38                 while (ans[k][1]||ans[k][2]||ans[k][3]||ans[k][4]||ans[k][5]||ans[k][6]){//已经没有可读选项时退出 
39                     get_char();
40                     if (c[20]>='A'&&c[20]<='F'&&c[19]=='\n'){
41                         if (ans[k][c[20]-'A'+1]){
42                             cout<<c[20];
43                             ans[k][c[20]-'A'+1]=0;//该选项已经提取完毕 
44                             char tc=0;
45                             while (tc!='\n'){
46                                 tc=getchar();
47                                 cout<<tc; 
48                             }//输出选项内容 
49                             c[20]='\n';
50                         }
51                     }
52                 } 
53                 k++;
54             }
55             num=0;
56         }
57     }
58     return 0;
59 }

step 4.选项出现词汇正确率统计

最后一步将全部题目的正确选项进行了提取分析。经过各个词汇在正确选项中出现率和在全文的出现率的对比，得出了195个正确率100%的高频词汇和366个正确率大于等于80%的高频词汇。

实现这一步须要用到python的jieba分词了（被迫妥协）。用ANSI编码导入全部选项与全部正确选项，根据全部选项的分词创建总词汇典dict，统计两个文件出现词汇次数至cnt(cnts)，计算出现率，排序，输出。

 1 import jieba
 2 fs=open("item.txt","r",encoding="ansi")
 3 txts=fs.read()
 4 words=jieba.lcut(txts)
 5 #建立词汇典
 6 dict=[]
 7 k=0
 8 for wrd in words:
 9     if dict.count(wrd)==0:
10         dict.append(wrd)
11 #统计两个文件出现词汇次数至cnt(s)
12 cnts={}
13 for wrd in dict:
14     cnts[wrd]=cnts.get(wrd, txts.count(wrd,0,len(txts)))
15 f=open("answer.txt","r",encoding="ansi")
16 txt=f.read()
17 cnt={}
18 for wrd in dict:
19     cnt[wrd]=cnt.get(wrd, txt.count(wrd,0,len(txt)))
20 #计算出现率
21 com={}
22 for wrd in cnt.keys():
23     com[wrd]=com.get(wrd, cnt[wrd]/cnts[wrd])
24 items=list(com.items())
25 #排序
26 items.sort(key=lambda x:x[1], reverse=True)
27 for wrd,i in items:
28     if i<0.8:
29         break
30     maxwrd=wrd
31     for wrd2,j in items:
32         if (j==i and cnts[wrd2]>cnts[maxwrd]):
33             maxwrd=wrd2
34         if j<i:
35             break
36     if (cnts[maxwrd]>2 and maxwrd[0]!='T' and (maxwrd[0]<'0' or maxwrd[0]>'9')):
37         print(maxwrd," 出现总次数：",cnts[maxwrd],' 正确几率：',i*100,'%')
38     cnts[maxwrd]=0

如何加速背完一套3600题的题库？

目录

step 1.关键信息提取

源代码：

step 2.选项分布及正确率统计

单选题数据结构：

多选题数据结构：

判断题数据结构：

源代码：

step 3.精华选项提取

源代码：

step 4.选项出现词汇正确率统计