readdir函数解析

函数原型:node

struct dirent *readdir(DIR *dirp);

首先纠正一个不少人都错误理解的事实,readdir不是系统调用,它是glibc的封装函数,并且readdir系统调用是存在的,原型以下:linux

int readdir(unsigend int fd, struct old_linux_dirent *dirp, unsigned int count);

glibc的readdir所调用的系统调用不是readdir而是getdents系统调用。此处说明一下为何采用封装getdents而不是readdir系统调用,最重要的一个理由是readdir系统调用每次只会读入一个目录项,而getdents会一会儿读入尽量多的目录项至缓冲。我先分析readdir系统调用的实现,具体的代码以下:函数

复制代码

 1 SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
 2                 struct old_linux_dirent __user *, dirent, unsigned int, count)
 3 {
 4         int error;
 5         struct fd f = fdget(fd);
 6         struct readdir_callback buf = {
 7                 .ctx.actor = fillonedir,
 8                 .dirent = dirent
 9         };
10 
11         if (!f.file)
12                 return -EBADF;
13 
14         error = iterate_dir(f.file, &buf.ctx);
15         if (buf.result)
16                 error = buf.result;
17 
18         fdput(f);
19         return error;
20 }

复制代码

6-9行:设置目录项填充函数为fillonedir,filonedir的具体实现不分析,只需知道每次只填充一个目录项便可(做为替代,将会分析更加复杂的filldir函数)ui

14行:iterate_dir是vfs的封装函数,该函数调用具体的文件系统的iterate函数填充目录this

    注: 3.11以前并不使用iterate做为读目录的函数而是使用readdir函数spa

总结:readdir系统调用忽略了count参数而且每次只读一个目录项.net

接下来分析glibc的readdir函数实现,这个过程可能比较复杂,有兴趣的能够看看,首先给出readdir的实现:设计

复制代码

 1 DIRENT_TYPE *
 2 __READDIR (DIR *dirp)
 3 {
 4   DIRENT_TYPE *dp;
 5   int saved_errno = errno;
 6 
 7 #ifndef NOT_IN_libc
 8   __libc_lock_lock (dirp->lock);
 9 #endif
10 
11   do
12     {
13       size_t reclen;
14 
15       if (dirp->offset >= dirp->size) 
16         {
17           /* We've emptied out our buffer.  Refill it.  */
18 
19           size_t maxread;
20           ssize_t bytes;
21 
22 #ifndef _DIRENT_HAVE_D_RECLEN
23           /* Fixed-size struct; must read one at a time (see below).  */
24           maxread = sizeof *dp;
25 #else
26           maxread = dirp->allocation;
27 #endif
28 
29           bytes = __GETDENTS (dirp->fd, dirp->data, maxread);
30           if (bytes <= 0)
31             {
32               /* On some systems getdents fails with ENOENT when the
33                  open directory has been rmdir'd already.  POSIX.1
34                  requires that we treat this condition like normal EOF.  */
35               if (bytes < 0 && errno == ENOENT)
36                 bytes = 0;
37 
38               /* Don't modifiy errno when reaching EOF.  */
39               if (bytes == 0)
40                 __set_errno (saved_errno);
41       dp = NULL;
42               break;
43             }
44           dirp->size = (size_t) bytes;
45 
46           /* Reset the offset into the buffer.  */
47           dirp->offset = 0;
48         }
49 
50       dp = (DIRENT_TYPE *) &dirp->data[dirp->offset];
51 
52 #ifdef _DIRENT_HAVE_D_RECLEN
53       reclen = dp->d_reclen;
54 #else
55       /* The only version of `struct dirent*' that lacks `d_reclen'
56          is fixed-size.  */
57       assert (sizeof dp->d_name > 1);
58       reclen = sizeof *dp;
59       /* The name is not terminated if it is the largest possible size.
60          Clobber the following byte to ensure proper null termination.  We
61          read jst one entry at a time above so we know that byte will not
62          be used later.  */
63       dp->d_name[sizeof dp->d_name] = '\0';
64 #endif
65 
66       dirp->offset += reclen;
67 
68 #ifdef _DIRENT_HAVE_D_OFF
69       dirp->filepos = dp->d_off;
70 #else
71       dirp->filepos += reclen;
72 #endif
73 
74       /* Skip deleted files.  */
75     } while (dp->d_ino == 0);
76  #ifndef NOT_IN_libc
77   __libc_lock_unlock (dirp->lock);
78 #endif
79 
80   return dp;
81 }

复制代码

7-9行:加锁互斥量orm

11-75行:一个do while循环,该循环用于过滤已经删除的目录项ip

15-48行:具体的读目录项代码,调用getdents系统调用尽量多的读入目录项至dirp->data缓冲区

总结:代码并非特别负责,本身阅读应该能够理解。。readdir函数的逻辑是分配的一个缓冲区,而后每次尽量多的读取目录项至缓冲区,而后从缓冲区读,读完了继续调用getdents读目录项至缓冲区

接下来分析最重要的getdents系统调用,代码以下:

复制代码

 1 SYSCALL_DEFINE3(getdents, unsigned int, fd,
 2                 struct linux_dirent __user *, dirent, unsigned int, count)
 3 {
 4         struct fd f;
 5         struct linux_dirent __user * lastdirent;
 6         struct getdents_callback buf = {
 7                 .ctx.actor = filldir,
 8                 .count = count,
 9                 .current_dir = dirent
10         };
11         int error;
12 
13         if (!access_ok(VERIFY_WRITE, dirent, count))
14                 return -EFAULT;
15 
16         f = fdget(fd);
17         if (!f.file)
18                 return -EBADF;
19 
20         error = iterate_dir(f.file, &buf.ctx);
21         if (error >= 0)
22                 error = buf.error;
23         lastdirent = buf.previous;
24         if (lastdirent) {
25                 if (put_user(buf.ctx.pos, &lastdirent->d_off))
26                         error = -EFAULT;
27                 else
28                         error = count - buf.count;
29         }
30         fdput(f);
31         return error;
32 }

复制代码

6-9行:设置填充函数为filldir,等会分析该函数

20: 调用iterate_dir函数,该函数会调用具体的文件系统中的iterate函数,接下来做为例子给出PFS的实现(PFS是本人设计的一个文件系统,PFS的linux driver有兴趣的能够去https://sourceforge.net/projects/pfspfs看看)

总结:在分析了iterate和filldir后在分析getdents系统调用

iterate源码以下:(此处采用pfs的实现)

复制代码

 1 static int
 2 pfs_readdir(struct file *file, struct dir_context *ctx)
 3 {
 4         int64_t dno;
 5         unsigned long off;
 6         struct buffer_head *bh;
 7         struct pfs_dir_entry *de;
 8         struct inode *inode = file_inode(file);
 9 
10         if(ctx->pos == 0)
11                 ctx->pos = PFS_DIRHASHSIZ * sizeof(int64_t) + sizeof(int64_t);
12         for(off = ctx->pos & (PFS_BLOCKSIZ - 1); ctx->pos < inode->i_size; off = ctx->pos & (PFS_BLOCKSIZ - 1)){
13                 if(!(dno = pfs_get_block_number(inode, pfs_block_number(ctx->pos), 0)))
14                         goto skip;
15                 if(!(bh = sb_bread(inode->i_sb, dno / PFS_STRS_PER_BLOCK))){
16                         pr_err("pfs: device %s: %s: failed to read block %lld of dir %lld\n",
17                                 inode->i_sb->s_id, "pfs_readdir", pfs_block_number(ctx->pos), PFS_I(inode)->i_ino);
18                         goto skip;
19                 }
20                 do{
21                         de = (struct pfs_dir_entry *)((char *)bh->b_data + off);
22                         if(de->d_ino){
23                                 if(!(dir_emit(ctx, pfs_get_de_name(de), de->d_len, (int32_t)le64_to_cpu(de->d_ino), DT_UNKNOWN))){
24                                         brelse(bh);
25                                         return 0;
26                                 }
27                         }
28                         off += pfs_get_de_size(de);
29                         ctx->pos += pfs_get_de_size(de);
30                 }while(off < PFS_BLOCKSIZ && ctx->pos < inode->i_size);
31                 brelse(bh);
32                 continue;
33 skip:
34                 ctx->pos += PFS_BLOCKSIZ - off;
35         }
36         return 0;
37 }

复制代码

12-30行:代码完整的分析可能须要读者熟悉linux的内核,所以此处不给出代码的具体分析而给出代码的逻辑,pfs_readdir不断的读目录项,而后调用dir_emit填充目录项直到dir_emit调用失败,dir_emit是一个封装函数,实现为filldir() == 0,因此在filldir成功时dir_emit返回1,在失败时返回0

总结:不一样的文件系统的目录的iterate都不一样,不过大致都是差很少的,都是读目录项,而后调用dir_emit函数填充至用户空间

filldir函数的代码以下:

复制代码

 1 static int filldir(struct dir_context *ctx, const char *name, int namlen,
 2                    loff_t offset, u64 ino, unsigned int d_type)
 3 {
 4         struct linux_dirent __user * dirent;
 5         struct getdents_callback *buf =
 6                 container_of(ctx, struct getdents_callback, ctx);
 7         unsigned long d_ino;
 8         int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2,
 9                 sizeof(long));
10 
11         buf->error = -EINVAL;   /* only used if we fail.. */
12         if (reclen > buf->count)
13                 return -EINVAL;
14         d_ino = ino;
15         if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
16                 buf->error = -EOVERFLOW;
17                 return -EOVERFLOW;
18         }
19         dirent = buf->previous;
20         if (dirent) {
21                 if (__put_user(offset, &dirent->d_off))
22                         goto efault;
23         }
24         dirent = buf->current_dir;
25         if (__put_user(d_ino, &dirent->d_ino))
26                 goto efault;
27         if (__put_user(reclen, &dirent->d_reclen))
28                 goto efault;
29         if (copy_to_user(dirent->d_name, name, namlen))
30                 goto efault;
31         if (__put_user(0, dirent->d_name + namlen))
32                 goto efault;
33         if (__put_user(d_type, (char __user *) dirent + reclen - 1))
34                 goto efault;
35       buf->previous = dirent;
36         dirent = (void __user *)dirent + reclen;
37         buf->current_dir = dirent;
38         buf->count -= reclen;
39         return 0;
40 efault:
41         buf->error = -EFAULT;
42         return -EFAULT;
43 }

复制代码

函数解释:filldir设置上一个填充的目录项的d_off为当前的偏移,而后填充当前的目录项,设置buf->previous为dirent,而后将buf->current指向下一个可用的空间

总结:最后做为一个考虑整个过程的示例,给出telldir函数的解释。telldir返回dirp->filepos, dirp->filepos在glibc的getdents函数中设置,dirp->filepos = dp->d_off,而正如在filldir的20到23行所示,dp->d_off是读下一个目录项时的偏移。

相关文章
相关标签/搜索