文件系統源碼分析之普通文件讀寫
文件讀寫主要是通過inode結構里的數據,讀取或者寫入到底層的硬碟中,並更新相應的屬性。
/*
* linux/fs/read_write.c
*
* (C) 1991 Linus Torvalds
*/
#include <sys/stat.h>
#include <errno.h>
#include <sys/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <asm/segment.h>
extern int rw_char(int rw,int dev, char * buf, int count, off_t * pos);
extern int read_pipe(struct m_inode * inode, char * buf, int count);
extern int write_pipe(struct m_inode * inode, char * buf, int count);
extern int block_read(int dev, off_t * pos, char * buf, int count);
extern int block_write(int dev, off_t * pos, char * buf, int count);
extern int file_read(struct m_inode * inode, struct file * filp,
char * buf, int count);
extern int file_write(struct m_inode * inode, struct file * filp,
char * buf, int count);
int sys_lseek(unsigned int fd,off_t offset, int origin)
{
struct file * file;
int tmp;
if (fd >= NR_OPEN || !(file=current->filp[fd]) || !(file->f_inode)
|| !IS_SEEKABLE(MAJOR(file->f_inode->i_dev)))
return -EBADF;
if (file->f_inode->i_pipe)
return -ESPIPE;
// SEEK_SET,SEEK_CUR和SEEK_END和依次為0,1和2
switch (origin) {
case 0:
if (offset<0) return -EINVAL;
file->f_pos=offset;
break;
case 1:
// 這個時候可以offset可以傳負數,但是不能大於當前偏移
if (file->f_pos+offset<0) return -EINVAL;
file->f_pos += offset;
break;
case 2:
if ((tmp=file->f_inode->i_size+offset) < 0)
return -EINVAL;
file->f_pos = tmp;
break;
default:
return -EINVAL;
}
return file->f_pos;
}
int sys_read(unsigned int fd,char * buf,int count)
{
struct file * file;
struct m_inode * inode;
if (fd>=NR_OPEN || count<0 || !(file=current->filp[fd]))
return -EINVAL;
if (!count)
return 0;
verify_area(buf,count);
inode = file->f_inode;
// 該文件描述符對應的是一個管道文件,並且是讀端則讀管道
if (inode->i_pipe)
return (file->f_mode&1)?read_pipe(inode,buf,count):-EIO;
if (S_ISCHR(inode->i_mode))
return rw_char(READ,inode->i_zone[0],buf,count,&file->f_pos);
if (S_ISBLK(inode->i_mode))
return block_read(inode->i_zone[0],&file->f_pos,buf,count);
if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode)) {
// 讀的長度不能大於剩下的可讀長度
if (count+file->f_pos > inode->i_size)
count = inode->i_size - file->f_pos;
// 到底了
if (count<=0)
return 0;
return file_read(inode,file,buf,count);
}
printk("(Read)inode->i_mode=%06o
",inode->i_mode);
return -EINVAL;
}
int sys_write(unsigned int fd,char * buf,int count)
{
struct file * file;
struct m_inode * inode;
if (fd>=NR_OPEN || count <0 || !(file=current->filp[fd]))
return -EINVAL;
if (!count)
return 0;
inode=file->f_inode;
if (inode->i_pipe)
return (file->f_mode&2)?write_pipe(inode,buf,count):-EIO;
if (S_ISCHR(inode->i_mode))
return rw_char(WRITE,inode->i_zone[0],buf,count,&file->f_pos);
if (S_ISBLK(inode->i_mode))
return block_write(inode->i_zone[0],&file->f_pos,buf,count);
if (S_ISREG(inode->i_mode))
return file_write(inode,file,buf,count);
printk("(Write)inode->i_mode=%06o
",inode->i_mode);
return -EINVAL;
}
/*
* linux/fs/file_dev.c
*
* (C) 1991 Linus Torvalds
*/
#include <errno.h>
#include <fcntl.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <asm/segment.h>
#define MIN(a,b) (((a)<(b))?(a):(b))
#define MAX(a,b) (((a)>(b))?(a):(b))
int file_read(struct m_inode * inode, struct file * filp, char * buf, int count)
{
int left,chars,nr;
struct buffer_head * bh;
if ((left=count)<=0)
return 0;
while (left) {
// bmap取得該文件偏移對應的硬碟塊號,然後讀進來
if (nr = bmap(inode,(filp->f_pos)/BLOCK_SIZE)) {
if (!(bh=bread(inode->i_dev,nr)))
break;
} else
bh = NULL;
// 偏移
nr = filp->f_pos % BLOCK_SIZE;
// 讀進來的數據中,可讀的長度和還需要讀的長度,取小的,如果還沒讀完繼續把塊從硬碟讀進來
chars = MIN( BLOCK_SIZE-nr , left );
filp->f_pos += chars; // 更新偏移指針
left -= chars; // 更新還需葯讀取的長度
if (bh) {
char * p = nr + bh->b_data;
while (chars-->0)
put_fs_byte(*(p++),buf++); //複製到buf里
brelse(bh);
} else {
while (chars-->0)
put_fs_byte(0,buf++);
}
}
// 更新訪問時間
inode->i_atime = CURRENT_TIME;
// 返回讀取的長度,如果一個都沒讀則返回錯誤
return (count-left)?(count-left):-ERROR;
}
int file_write(struct m_inode * inode, struct file * filp, char * buf, int count)
{
off_t pos;
int block,c;
struct buffer_head * bh;
char * p;
int i=0;
/*
* ok, append may not work when many processes are writing at the same time
* but so what. That way leads to madness anyway.
*/
// 如果設置了追加標記位,則更新當前位置指針到文件最後一個位元組
if (filp->f_flags & O_APPEND)
pos = inode->i_size;
else
pos = filp->f_pos;
// i為已經寫入的長度,count為需要寫入的長度
while (i<count) {
// 讀取一個硬碟的數據塊,如果沒有則創建一個塊,即標記硬碟中這個塊已經被使用
if (!(block = create_block(inode,pos/BLOCK_SIZE)))
break;
// 然後根據返回的塊號把這個塊內容讀進來
if (!(bh=bread(inode->i_dev,block)))
break;
c = pos % BLOCK_SIZE;
p = c + bh->b_data; // 開始寫入數據的位置
bh->b_dirt = 1; // 標記數據需要回寫硬碟
c = BLOCK_SIZE-c; // 算出能寫的長度
if (c > count-i) c = count-i; // 比較能寫的長度和還需要寫的長度,取小的
pos += c; // 更新偏移指針,c為準備寫入的長度
// 如果超過原來長度則需要更新i_size欄位,標記inode需要回寫
if (pos > inode->i_size) {
inode->i_size = pos;
inode->i_dirt = 1;
}
i += c; // 更新已經寫入的長度
while (c-->0)
*(p++) = get_fs_byte(buf++);
brelse(bh);
}
inode->i_mtime = CURRENT_TIME;
if (!(filp->f_flags & O_APPEND)) {
filp->f_pos = pos;
inode->i_ctime = CURRENT_TIME;
}
return (i?i:-1);
}
static void cp_stat(struct m_inode * inode, struct stat * statbuf)
{
struct stat tmp;
int i;
verify_area(statbuf,sizeof (* statbuf));
tmp.st_dev = inode->i_dev;
tmp.st_ino = inode->i_num;
tmp.st_mode = inode->i_mode;
tmp.st_nlink = inode->i_nlinks;
tmp.st_uid = inode->i_uid;
tmp.st_gid = inode->i_gid;
tmp.st_rdev = inode->i_zone[0];
tmp.st_size = inode->i_size;
tmp.st_atime = inode->i_atime;
tmp.st_mtime = inode->i_mtime;
tmp.st_ctime = inode->i_ctime;
for (i=0 ; i<sizeof (tmp) ; i++)
put_fs_byte(((char *) &tmp)[i],&((char *) statbuf)[i]);
}
int sys_stat(char * filename, struct stat * statbuf)
{
struct m_inode * inode;
// 通過文件名找到inode,把inode的信息返回
if (!(inode=namei(filename)))
return -ENOENT;
cp_stat(inode,statbuf);
iput(inode);
return 0;
}
// 通過文件描述符,最終都是需要找到inode
int sys_fstat(unsigned int fd, struct stat * statbuf)
{
struct file * f;
struct m_inode * inode;
if (fd >= NR_OPEN || !(f=current->filp[fd]) || !(inode=f->f_inode))
return -EBADF;
cp_stat(inode,statbuf);
return 0;
}