59.排序好的大数据建立索引文件,并实现大文件的二分查找,根据索引百万数据秒读数据

  • 建立索引
    1 //建立索引
    2 struct index 3 { 4     //保存每行偏移的位置
    5     int *pindex; 6     //文件的总长度
    7     int length; 8 }allindex;//索引

     

  • 初始化索引数组并把索引写入到文件
     1 //初始化索引数组,并把索引写入到文件
     2 void init(char *path)  3 {  4     printf("\n索引数组开始分配");  5     allindex.length = N;  6     //分配内存
     7     allindex.pindex = calloc(N, sizeof(int));  8     printf("\n索引数组完成分配");  9 
    10     printf("\n开始读取"); 11     //二进制读取文件 避免/r/n读取成/n
    12     FILE *pf = fopen("filesort.txt", "rb"); 13     if (pf == NULL) 14  { 15         return -1; 16  } 17     else
    18  { 19         int alllength = 0; 20         for (int i = 0; i < N; i++) 21  { 22             char str[50] = { 0 }; 23             fgets(str, 50, pf); 24             //每个首地址的偏移
    25             allindex.pindex[i] = alllength; 26 
    27             int length = strlen(str); 28             alllength += length; 29  } 30  fclose(pf); 31  } 32     printf("\n结束读取"); 33 
    34     printf("\n开始写入"); 35     //二进制方式打开文件,并写入索引
    36     FILE *pfw = fopen("index.txt", "wb"); 37     //写入
    38     fwrite(allindex.pindex, sizeof(int), allindex.length, pfw); 39     //关闭文件
    40  fclose(pfw); 41     printf("\n结束写入"); 42 
    43     //释放内存
    44  free(allindex.pindex); 45 }

     

  • 从文件中读取索引到索引数组中
     1 //从文件中读取索引
     2 void qucik()  3 {  4     printf("\n索引数组开始分配");  5     allindex.length = N;  6     allindex.pindex = calloc(N, sizeof(int));//分配内存
     7     printf("\n索引数组完成分配");  8 
     9 
    10     printf("\n开始读取"); 11     //以二进制读的方式读取索引
    12     FILE *pfw = fopen("index.txt", "rb"); 13     //读取
    14     fread(allindex.pindex, sizeof(int), allindex.length, pfw); 15     //关闭文件
    16  fclose(pfw); 17     printf("\n结束读取"); 18 }

     

  • 测试函数
     1   FILE *pf1 = fopen("index.txt", "rb");  2     FILE *pf2 = fopen("filesort.txt", "rb");  3     while (1)  4  {  5         printf("\n请输入要读取的行数");  6         int num = 0;  7         scanf("%d", &num);  8 
     9         int indexnum = 0; 10         fseek(pf1, num*sizeof(int), SEEK_SET); 11         fread(&indexnum, sizeof(int), 1, pf1);//读索引到indexnum
    12 
    13  fseek(pf2, indexnum, SEEK_SET); 14         char str[128] = { 0 }; 15         fgets(str, 128, pf2);//读取
    16         printf("\n%s", str); 17 
    18  } 19  fclose(pf1); 20     fclose(pf2);

     

     

  • 根据索引文件对已经排序好的文件进行二分查找
     1 void binsearch(char *searchstr)  2 {  3     //头部
     4     int tou = 0;  5     //尾部
     6     int wei = N - 1;  7     //是否找到的标识
     8     int flag = 0;  9     //若是头小于尾
    10     while (tou <= wei) 11  { 12         //获取中部
    13         int zhong = (tou + wei) / 2; 14         //读取中部索引的内容
    15         char zhongstr[256] = { 0 }; 16  { 17             //打开索引文件
    18             FILE *pf1 = fopen("index.txt", "rb"); 19             //打开排序好的文件
    20             FILE *pf2 = fopen("filesort.txt", "rb"); 21 
    22             //读zhong对应的地址存到indexnum中
    23             int indexnum = 0; 24             fseek(pf1, zhong * sizeof(int), SEEK_SET); 25             fread(&indexnum, sizeof(int), 1, pf1); 26 
    27             //根据读取的位置读取文件到zhongstr中
    28  fseek(pf2, indexnum, SEEK_SET); 29             fgets(zhongstr, 128, pf2); 30 
    31  fclose(pf1); 32  fclose(pf2); 33  } 34         //消除'\r或者\n'
    35  eatN(zhongstr); 36         char pnewzhongstr[256] = { 0 }; 37  sprintf(pnewzhongstr, zhongstr); 38         //进行处理,遇到-终止
    39  eatg(pnewzhongstr); 40         //比较是否找到
    41         int res = strcmp(pnewzhongstr, searchstr);//1 0 -1
    42         if (res == 0) 43  { 44             flag = 1; 45             printf("%s", zhongstr); 46             break; 47  } 48         //若是中比searchstr要大
    49         else if (res == 1) 50  { 51             wei = zhong - 1; 52  } 53         //若是中比searchstr小
    54         else
    55  { 56             tou = zhong + 1; 57  } 58 
    59 
    60  } 61     //判断是否找到
    62     if (flag) 63  { 64         printf("\nfind"); 65  } 66     else
    67  { 68         printf("\n not find"); 69  } 70 }

     

  • 遇到'-'结束
     1 //遇到'-'结束
     2 void eatg(char *str)  3 {  4     while (*str!='\0')  5  {  6 
     7         if (*str=='-')  8  {  9             *str = '\0'; 10  } 11         str++; 12  } 13 
    14 }

     

  • 测试函数
     1 void main()  2 {  3 
     4 
     5     char str[256] = { 0 };  6     scanf("%s", str);  7  binsearch(str);  8 
     9 
    10 
    11     system("pause"); 12 }
相关文章
相关标签/搜索