glibc源码逆向——fopen
最近学习fsop,但一直没学的很明白,所以自己逆一下几个常用的函数
测试代码
#include<stdio.h> int main() { FILE*fp=fopen("test","wb"); char *ptr=malloc(0x20); return 0; }
通过分析进入fopen函数查看,首先函数会调用__fopen_internal函数
94 _IO_FILE * 95 _IO_new_fopen (const char *filename, const char *mode) ► 96 { 97 return __fopen_internal (filename, mode, 1); 98 }
进入__fopen_internal函数看看,发现其调用的是这一部分
59 _IO_FILE * 60 __fopen_internal (const char *filename, const char *mode, int is32) 61 { 62 struct locked_FILE 63 { 64 struct _IO_FILE_plus fp; 65 #ifdef _IO_MTSAFE_IO 66 _IO_lock_t lock; 67 #endif 68 struct _IO_wide_data wd; 69 } *new_f = (struct locked_FILE *) malloc (sizeof (struct locked_FILE)); 70 71 if (new_f == NULL) 72 return NULL; 73 #ifdef _IO_MTSAFE_IO 74 new_f->fp.file._lock = &new_f->lock; 75 #endif 76 #if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T 77 _IO_no_init (&new_f->fp.file, 0, 0, &new_f->wd, &_IO_wfile_jumps); 78 #else 79 _IO_no_init (&new_f->fp.file, 1, 0, NULL, NULL); 80 #endif 81 _IO_JUMPS (&new_f->fp) = &_IO_file_jumps; 82 _IO_file_init (&new_f->fp); 83 #if !_IO_UNIFIED_JUMPTABLES 84 new_f->fp.vtable = NULL; 85 #endif 86 if (_IO_file_fopen ((_IO_FILE *) new_f, filename, mode, is32) != NULL) 87 return __fopen_maybe_mmap (&new_f->fp.file); 88 89 _IO_un_link (&new_f->fp); 90 free (new_f); 91 return NULL; 92 }
这个函数首先为结构体locked_FILE分配了一段内存空间,其结构体成员有
64 struct _IO_FILE_plus fp; 65 #ifdef _IO_MTSAFE_IO 66 _IO_lock_t lock; 67 #endif 68 struct _IO_wide_data wd;
而当由于我们可以通过上一篇将vtable劫持的时候,可以知道_IO_FILE_plus结构体是非常重要的,它包含了vtable的地址
当分配完空间后,就开始执行
_IO_no_init
跟进去简单的看了看,发现是一个初始化函数,也就是把上面分配的_IO_FILE_plus结构体里的成员全部置0
596 void 597 _IO_no_init (_IO_FILE *fp, int flags, int orientation, 598 struct _IO_wide_data *wd, const struct _IO_jump_t *jmp) 599 { 600 _IO_old_init (fp, flags); 601 fp->_mode = orientation; 602 #if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T 603 if (orientation >= 0) 604 { 605 fp->_wide_data = wd; 606 fp->_wide_data->_IO_buf_base = NULL; 607 fp->_wide_data->_IO_buf_end = NULL; 608 fp->_wide_data->_IO_read_base = NULL; 609 fp->_wide_data->_IO_read_ptr = NULL; 610 fp->_wide_data->_IO_read_end = NULL; 611 fp->_wide_data->_IO_write_base = NULL; 612 fp->_wide_data->_IO_write_ptr = NULL; 613 fp->_wide_data->_IO_write_end = NULL; 614 fp->_wide_data->_IO_save_base = NULL; 615 fp->_wide_data->_IO_backup_base = NULL; 616 fp->_wide_data->_IO_save_end = NULL; 617 618 fp->_wide_data->_wide_vtable = jmp; 619 } 620 else 621 /* Cause predictable crash when a wide function is called on a byte 622 stream. */ 623 fp->_wide_data = (struct _IO_wide_data *) -1L; 624 #endif 625 fp->_freeres_list = NULL; 626 }
接着往下看,发现有一个虚表的赋值操作,这里也是我们调用fsop的原理
_IO_JUMPS (&new_f->fp) = &_IO_file_jumps;
随后,会调用一个_IO_file_init函数,而这里面会又会调用一个_IO_link_in,我没有逆进去了,我们知道在我们每次新创建的_IO_FILE_plus结构体中,都会被链入一个在libc里的一个全局变量_IO_list_all(我们可以通过这个,遍历程序中所有的IO结构体),这里_IO_link_in
函数的功能是检查FILE结构体是否包含_IO_LINKED
标志,如果不包含则表示这个结构体没有链接进入_IO_list_all
,则再后面把它链接进入_IO_list_all
链表,同时设置FILE结构体的_chain
字段为之前的链表的值,否则直接返回。
143 _IO_new_file_init (struct _IO_FILE_plus *fp) 144 { 145 /* POSIX.1 allows another file handle to be used to change the position 146 of our file descriptor. Hence we actually don't know the actual 147 position before we do the first fseek (and until a following fflush). */ 148 fp->file._offset = _IO_pos_BAD; 149 fp->file._IO_file_flags |= CLOSED_FILEBUF_FLAGS; 150 151 _IO_link_in (fp); 152 fp->file._fileno = -1; 153 }
所以_IO_file_init
主要功能是将FILE结构体链接进入_IO_list_all
链表,在没执行_IO_file_init
函数前_IO_list_all
指向的是stderr
结构体
在执行后,指向的就是我们新申请的堆空间了
此时,我们查看查看下new_f->fp指针,发现其chain已经指向了stderr
接着就是调用_IO_file_fopen来打开文件了
_IO_file_fopen ((_IO_FILE *) new_f, filename, mode, is32)
进去查看下这个函数
255 #ifdef _LIBC 256 const char *cs; 257 const char *last_recognized; 258 #endif 259 260 if (_IO_file_is_open (fp)) 261 return 0; 262 switch (*mode) 263 { 264 case 'r': 265 omode = O_RDONLY; 266 read_write = _IO_NO_WRITES; 267 break; 268 case 'w': 269 omode = O_WRONLY; 270 oflags = O_CREAT|O_TRUNC; 271 read_write = _IO_NO_READS; 272 break; 273 case 'a': 274 omode = O_WRONLY; 275 oflags = O_CREAT|O_APPEND; 276 read_write = _IO_NO_READS|_IO_IS_APPENDING; 277 break; 278 default: 279 __set_errno (EINVAL); 280 return NULL; 281 } 282 #ifdef _LIBC 283 last_recognized = mode; 284 #endif 285 for (i = 1; i < 7; ++i) 286 { 287 switch (*++mode) 288 { 289 case '\0': 290 break; 291 case '+': 292 omode = O_RDWR; 293 read_write &= _IO_IS_APPENDING; 294 #ifdef _LIBC 295 last_recognized = mode; 296 #endif 297 continue; 298 case 'x': 299 oflags |= O_EXCL; 300 #ifdef _LIBC 301 last_recognized = mode; 302 #endif 303 continue; 304 case 'b': 305 #ifdef _LIBC 306 last_recognized = mode; 307 #endif 308 continue; 309 case 'm': 310 fp->_flags2 |= _IO_FLAGS2_MMAP; 311 continue; 312 case 'c': 313 fp->_flags2 |= _IO_FLAGS2_NOTCANCEL; 314 continue; 315 case 'e': 316 #ifdef O_CLOEXEC 317 oflags |= O_CLOEXEC; 318 #endif 319 fp->_flags2 |= _IO_FLAGS2_CLOEXEC; 320 continue; 321 default: 322 /* Ignore. */ 323 continue; 324 } 325 break; 326 } 327 328 result = _IO_file_open (fp, filename, omode|oflags, oprot, read_write, 329 is32not64); 330 331 if (result != NULL) 332 { 333 #ifndef __ASSUME_O_CLOEXEC 334 if ((fp->_flags2 & _IO_FLAGS2_CLOEXEC) != 0 && __have_o_cloexec <= 0) 335 { 336 int fd = _IO_fileno (fp); 337 if (__have_o_cloexec == 0) 338 { 339 int flags = __fcntl (fd, F_GETFD); 340 __have_o_cloexec = (flags & FD_CLOEXEC) == 0 ? -1 : 1; 341 } 342 if (__have_o_cloexec < 0) 343 __fcntl (fd, F_SETFD, FD_CLOEXEC); 344 } 345 #endif 346 347 /* Test whether the mode string specifies the conversion. */ 348 cs = strstr (last_recognized + 1, ",ccs="); 349 if (cs != NULL) 350 { 351 /* Yep. Load the appropriate conversions and set the orientation 352 to wide. */ 353 struct gconv_fcts fcts; 354 struct _IO_codecvt *cc; 355 char *endp = __strchrnul (cs + 5, ','); 356 char *ccs = malloc (endp - (cs + 5) + 3); 357 358 if (ccs == NULL) 359 { 360 int malloc_err = errno; /* Whatever malloc failed with. */ 361 (void) _IO_file_close_it (fp); 362 __set_errno (malloc_err); 363 return NULL; 364 } 365 366 *((char *) __mempcpy (ccs, cs + 5, endp - (cs + 5))) = '\0'; 367 strip (ccs, ccs); 368 369 if (__wcsmbs_named_conv (&fcts, ccs[2] == '\0' 370 ? upstr (ccs, cs + 5) : ccs) != 0) 371 { 372 /* Something went wrong, we cannot load the conversion modules. 373 This means we cannot proceed since the user explicitly asked 374 for these. */ 375 (void) _IO_file_close_it (fp); 376 free (ccs); 377 __set_errno (EINVAL); 378 return NULL; 379 } 380 381 free (ccs); 382 383 assert (fcts.towc_nsteps == 1); 384 assert (fcts.tomb_nsteps == 1); 385 386 fp->_wide_data->_IO_read_ptr = fp->_wide_data->_IO_read_end; 387 fp->_wide_data->_IO_write_ptr = fp->_wide_data->_IO_write_base; 388 389 /* Clear the state. We start all over again. */ 390 memset (&fp->_wide_data->_IO_state, '\0', sizeof (__mbstate_t)); 391 memset (&fp->_wide_data->_IO_last_state, '\0', sizeof (__mbstate_t)); 392 393 cc = fp->_codecvt = &fp->_wide_data->_codecvt; 394 395 /* The functions are always the same. */ 396 *cc = __libio_codecvt; 397 398 cc->__cd_in.__cd.__nsteps = fcts.towc_nsteps; 399 cc->__cd_in.__cd.__steps = fcts.towc; 400 401 cc->__cd_in.__cd.__data[0].__invocation_counter = 0; 402 cc->__cd_in.__cd.__data[0].__internal_use = 1; 403 cc->__cd_in.__cd.__data[0].__flags = __GCONV_IS_LAST; 404 cc->__cd_in.__cd.__data[0].__statep = &result->_wide_data->_IO_state; 405 406 cc->__cd_out.__cd.__nsteps = fcts.tomb_nsteps; 407 cc->__cd_out.__cd.__steps = fcts.tomb; 408 409 cc->__cd_out.__cd.__data[0].__invocation_counter = 0; 410 cc->__cd_out.__cd.__data[0].__internal_use = 1; 411 cc->__cd_out.__cd.__data[0].__flags 412 = __GCONV_IS_LAST | __GCONV_TRANSLIT; 413 cc->__cd_out.__cd.__data[0].__statep = 414 &result->_wide_data->_IO_state; 415 416 /* From now on use the wide character callback functions. */ 417 _IO_JUMPS_FILE_plus (fp) = fp->_wide_data->_wide_vtable; 418 419 /* Set the mode now. */ 420 result->_mode = 1; 421 } 422 } 423 424 return result; 425 }
看到这里,我们很快就可以分析出,函数在分析完参数后,就又调用了_IO_file_open函数
(这里我也不能分析的很清楚)
不过我可以猜测,调用了open系统调用函数,并将文件描述符赋给了fp->_fileno成员,最后又再次调用了_IO_link_in确保该结构体被链接进入_IO_list_all
链表。
211 _IO_FILE * 212 _IO_file_open (_IO_FILE *fp, const char *filename, int posix_mode, int prot, 213 int read_write, int is32not64) 214 { 215 int fdesc; 216 #ifdef _LIBC 217 if (__glibc_unlikely (fp->_flags2 & _IO_FLAGS2_NOTCANCEL)) 218 fdesc = open_not_cancel (filename, 219 posix_mode | (is32not64 ? 0 : O_LARGEFILE), prot); 220 else 221 fdesc = open (filename, posix_mode | (is32not64 ? 0 : O_LARGEFILE), prot); 222 #else 223 fdesc = open (filename, posix_mode, prot); 224 #endif 225 if (fdesc < 0) 226 return NULL; 227 fp->_fileno = fdesc; 228 _IO_mask_flags (fp, read_write,_IO_NO_READS+_IO_NO_WRITES+_IO_IS_APPENDING); 229 /* For append mode, send the file offset to the end of the file. Don't 230 update the offset cache though, since the file handle is not active. */ 231 if ((read_write & (_IO_IS_APPENDING | _IO_NO_READS)) 232 == (_IO_IS_APPENDING | _IO_NO_READS)) 233 { 234 _IO_off64_t new_pos = _IO_SYSSEEK (fp, 0, _IO_seek_end); 235 if (new_pos == _IO_pos_BAD && errno != ESPIPE) 236 { 237 close_not_cancel (fdesc); 238 return NULL; 239 } 240 } 241 _IO_link_in ((struct _IO_FILE_plus *) fp); 242 return fp; 243 } 244 libc_hidden_def (_IO_file_open)
调用完_IO_file_fopen函数后,new_f->fp的_IO_FILE_plus结构体为
总结
这里我感觉raycp师傅比我总结的好,所以我直接cp了过来
看完代码后,可以将fopen整体的流程可以归纳为:
malloc
分配内存空间。_IO_no_init
对file结构体进行null
初始化。_IO_file_init
将结构体链接进_IO_list_all
链表。_IO_file_fopen
执行系统调用打开文件。
整个流程还是比较简单的,fopen返回之后_IO_list_all
链表指向返回的FILE结构体,且FILE结构体的_chain字段指向之前的结构体(没有其他额外打开文件的话,将是指向stderr
),同时其他的字段大多都是默认的null值,vtable
存储的是__GI__IO_file_jumps
函数表
感悟
这次逆向,大部分都是自己逆的,不过到后面有许多细节没有注意,比如查看结构体的变化之类的,不过大体都是自己逆的,不过还是有许多的不足
参考:https://ray-cp.github.io/archivers/IO_FILE_fopen_analysis
文章的部分转载:https://ray-cp.github.io/archivers/IO_FILE_fopen_analysis