iofile的gnu官网介绍 glibc的官方手册里有

https://sourceware.org/glibc/manual/2.37/html_mono/libc.html#I_002fO-on-Streams

FILE结构在程序执行fopen等函数时会创建并分配到

Q&A

如何查看_IO_FILE_plus? 它只是一个结构体,就是_IO_list_all和vtable的封装

每一个iofile都有一个vtable吗? 是的,每一个都有,紧跟在IO_FILE后面

基础知识

​ FILE是Linux系统中 标准IO库用来描述文件的结构,也叫做流(由于历史原因被定义为了FILE),它在程序执行fopen等函数时进行创建,分配到堆中。通常定义一个指向FILE结构的指针来接收这个返回值,然后进行后续操作

​ FILE结构定义在libio/libio.h中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
struct _IO_FILE {
int _flags; /* High-order word is _IO_MAGIC; rest is flags. */
#define _IO_file_flags _flags

/* The following pointers correspond to the C++ streambuf protocol. */
/* Note: Tk uses the _IO_read_ptr and _IO_read_end fields directly. */
char* _IO_read_ptr; /* Current read pointer */
char* _IO_read_end; /* End of get area. */
char* _IO_read_base; /* Start of putback+get area. */
char* _IO_write_base; /* Start of put area. */
char* _IO_write_ptr; /* Current put pointer. */
char* _IO_write_end; /* End of put area. */
char* _IO_buf_base; /* Start of reserve area. */
char* _IO_buf_end; /* End of reserve area. */
/* The following fields are used to support backing up and undo. */
char *_IO_save_base; /* Pointer to start of non-current get area. */
char *_IO_backup_base; /* Pointer to first valid character of backup area */
char *_IO_save_end; /* Pointer to end of non-current get area. */

struct _IO_marker *_markers;

struct _IO_FILE *_chain;

int _fileno;
#if 0
int _blksize;
#else
int _flags2;
#endif
_IO_off_t _old_offset; /* This used to be _offset but it's too small. */

#define __HAVE_COLUMN /* temporary */
/* 1+column number of pbase(); 0 is unknown. */
unsigned short _cur_column;
signed char _vtable_offset;
char _shortbuf[1];

/* char* _save_gptr; char* _save_egptr; */

_IO_lock_t *_lock;
#ifdef _IO_USE_OLD_IO_FILE
};

struct _IO_FILE_complete
{
struct _IO_FILE _file;
#endif
#if defined _G_IO_IO_FILE_VERSION && _G_IO_IO_FILE_VERSION == 0x20001
_IO_off64_t _offset;
# if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
/* Wide character stream stuff. */
struct _IO_codecvt *_codecvt;
struct _IO_wide_data *_wide_data;
struct _IO_FILE *_freeres_list;
void *_freeres_buf;
# else
void *__pad1;
void *__pad2;
void *__pad3;
void *__pad4;
# endif
size_t __pad5;
int _mode;
/* Make sure we don't get into trouble again. */
char _unused2[15 * sizeof (int) - 4 * sizeof (void *) - sizeof (size_t)];
#endif
};

#ifndef __cplusplus
typedef struct _IO_FILE _IO_FILE;
#endif

struct _IO_FILE_plus;

​ 通常是用的_IO_FILE_complete?是的,但是还没在代码中找到哪里定义它为 _IO_FILE类型的

1
2
3
4
5
6
struct _IO_FILE_plus
{
_IO_FILE file;
const struct _IO_jump_t *vtable;
};

​ 初始时分配三个文件流

1
2
3
_IO_2_1_stderr_
_IO_2_1_stdout_
_IO_2_1_stdin_

p IO_2_1_stdin

fopen

1
gdb `find ./glibc-2.23 -type d -printf '-d %p '` ./a.out

​ fopen对应函数__fopen_internal 创建FILE结构,初始化结构,从这里可以看出FILE结构是存储在堆上的

1
69   } *new_f = (struct locked_FILE *) malloc (sizeof (struct locked_FILE));

​ 初始化vtable, 设置_IO_file_jumps表

1
2
3
4
5
6
7
  76 #if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
77 _IO_no_init (&new_f->fp.file, 0, 0, &new_f->wd, &_IO_wfile_jumps);
78 #else
79 _IO_no_init (&new_f->fp.file, 1, 0, NULL, NULL);
80 #endif
81 _IO_JUMPS (&new_f->fp) = &_IO_file_jumps;
82 _IO_file_init (&new_f->fp);

进一步初始化

1
_IO_file_init (&new_f->fp);

​ 在_IO_file_init 函数的初始化操作中,会调用_IO_link_in 把新分配的 FILE 链入_IO_list_all 为起始的 FILE 链表中

1
2
3
4
5
6
7
8
9
10
11
12
void
_IO_link_in (fp)
struct _IO_FILE_plus *fp;
{
if ((fp->file._flags & _IO_LINKED) == 0)
{
fp->file._flags |= _IO_LINKED;
fp->file._chain = (_IO_FILE *) _IO_list_all;
_IO_list_all = fp;
++_IO_list_all_stamp;
}
}

​ 之后调用下面函数打开文件,一路往后会到open系统调用

1
2
3
4
5
6
if (_IO_file_fopen ((_IO_FILE *) new_f, filename, mode, is32) != NULL)
return __fopen_maybe_mmap (&new_f->fp.file);

_IO_file_open

0x7ffff7a86ad9 <_IO_file_open+137> call open64 <open64>

​ 找变量: info var _IO_list_all

​ 如何查看完整的_IO_list_all的链子呢???可以手动找chain连接起来

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
pwndbg> info var _IO_list_all
All variables matching regular expression "_IO_list_all":

File genops.c:
42: static int _IO_list_all_stamp;

File stdfiles.c:
73: struct _IO_FILE_plus *_IO_list_all;
72: struct _IO_FILE_plus *__GI__IO_list_all

pwndbg> p _IO_list_all
$8 = (struct _IO_FILE_plus *) 0x7ffff7dd2540 <_IO_2_1_stderr_>


pwndbg> p *(struct _IO_FILE *)_IO_list_all
$9 = {
_flags = -72540026,
_IO_read_ptr = 0x0,
_IO_read_end = 0x0,
_IO_read_base = 0x0,
_IO_write_base = 0x0,
_IO_write_ptr = 0x0,
_IO_write_end = 0x0,
_IO_buf_base = 0x0,
_IO_buf_end = 0x0,
_IO_save_base = 0x0,
_IO_backup_base = 0x0,
_IO_save_end = 0x0,
_markers = 0x0,
_chain = 0x7ffff7dd2620 <_IO_2_1_stdout_>,
_fileno = 2,
_flags2 = 0,
_old_offset = -1,
_cur_column = 0,
_vtable_offset = 0 '\000',
_shortbuf = "",
_lock = 0x7ffff7dd3770 <_IO_stdfile_2_lock>,
_offset = -1,
_codecvt = 0x0,
_wide_data = 0x7ffff7dd1660 <_IO_wide_data_2>,
_freeres_list = 0x0,
_freeres_buf = 0x0,
__pad5 = 0,
_mode = 0,
_unused2 = '\000' <repeats 19 times>
}

fread

​ 标准IO库函数、从文件流中读取数据, 函数原型如下:

1
size_t fread ( void *buffer, size_t size, size_t count, FILE *stream) ;
  • buffer 存放读取数据的缓冲区。

  • size:指定每个记录的长度。

  • count: 指定记录的个数。

  • stream:目标文件流。

  • 返回值:返回读取到数据缓冲区中的记录个数

  • 函数代码位于libio/iofread.c中, 函数名是_IO_fread,调到 _IO_sgetn

1
38   bytes_read = _IO_sgetn (fp, (char *) buf, bytes_requested);

​ 然后到_IO_XSGETN, _IO_XSGETN 是 _IO_FILE_plus.vtable 中的函数指针, 指向 _IO_file_xsputn

1
2
3
4
5
6
7
8
9
  463 _IO_size_t
464 _IO_sgetn (_IO_FILE *fp, void *data, _IO_size_t n)
465 {
466 /* FIXME handle putback buffer here! */
467 return _IO_XSGETN (fp, data, n);
468 }


0x7ffff7a8871b <_IO_sgetn+11> jmp rax <__GI__IO_file_xsgetn>

​ _IO_file_xsputn

1
08:00400x7ffff7dd0720 (_IO_file_jumps+64) —▸ 0x7ffff7a85ed0 (__GI__IO_file_xsgetn) ◂— push r14

​ 检查想要的字节数want是否小于缓冲区可用字节数

1
2
3
4
5
6
7
8
9
10
11
1408              /* If we now want less than a buffer, underflow and repeat
1409 the copy. Otherwise, _IO_SYSREAD directly to
1410 the user buffer. */
1411 if (fp->_IO_buf_base
1412 && want < (size_t) (fp->_IO_buf_end - fp->_IO_buf_base))
1413 {
1414 if (__underflow (fp) == EOF)
1415 break;
1416
1417 continue;
1418 }

fwrite

​ 向文件流中写入数据, 函数原型如下

1
size_t fwrite(const void* buffer, size_t size, size_t count, FILE* stream);
  • buffer: 是一个指针,对 fwrite 来说,是要写入数据的地址;
  • size: 要写入内容的单字节数;
  • count: 要进行写入 size 字节的数据项的个数;
  • stream: 目标文件指针;
  • 返回值:实际写入的数据项个数 count。

​ 代码位于libio/iofwrite.c,函数名是_IO_fwrite, 这里面主要是调用 _IO_XSPUTN 实现写入功能

1
2
3
4
5
   38   if (_IO_vtable_offset (fp) != 0 || _IO_fwide (fp, -1) == -1)
39 written = _IO_sputn (fp, (const char *) buf, request);


0x7ffff7a7b7c8 <fwrite+216> call qword ptr [rax + 0x38] <_IO_file_xsputn>

​ 然后_IO_new_file_xsputn中又会调用 _IO_OVERFLOW ,对应 _IO_new_file_overflow, 最终会走到write系统调用那

1
2
851     return _IO_do_write (f, f->_IO_write_base,
852 f->_IO_write_ptr - f->_IO_write_base);

fclose

​ 标准IO库中用于关闭已经打开文件的函数,作用与fopen相反,

​ 函数原型: int fclose(FILE *stream)

​ 功能:关闭一个文件流,使用 fclose 就可以把缓冲区内最后剩余的数据输出到磁盘文件中,并释放文件指针和有关的缓冲区

换句话说,调用fclose的时候会调用很多处理函数, 可以被我们利用

​ 首先调用_IO_un_link函数将指定的iofile进行脱链(从 _chain中)

1
54     _IO_un_link ((struct _IO_FILE_plus *) fp);

​ 然后调用_IO_file_close_it函数, 进而调用系统调用close关闭文件

1
58     status = _IO_file_close_it (fp);

​ 最后调用vtable中的 ► 62 _IO_FINISH (fp);,对应的是_IO_file_finish 函数,它会调用free释放之前分配的FILE结构

(所以vtable劫持的原理就出来了) ? ?? 哪出来了?

问题

_IO_OVERFLOW 这个东西是不是宏呢? 还是啥,怎么对应vtable里面的函数的呢