newlib中printf库函数的实现
一、Newlib
arm-none-eabi-gcc编译器组件中包含了一些库文件,这些库文件的具体信息可以查阅官网(https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-rm/downloads)中相应版本的Release Note得到。
以gcc-arm-none-eabi-9-2019-q4-major为例,它的C库使用的是 newlib and newlib-nano : newlib-3.1.0,如果想要查看C库函数的具体实现,可以通过该信息找到对应源代码。
Newlib是一个面向嵌入式系统的C运行库。最初是由Cygnus Solutions收集组装的一个源代码集合,取名为newlib,现在由Red Hat维护,官网地址https://sourceware.org/newlib/。
通过Git下载好对应版本的源代码之后,可以看到C库只是newlib的一部分,C库具体路径位于newlib-cygwin-cygwin-3_1_0-release/newlib-cygwin/newlib/libc 。
二、printf实现
printf函数的具体实现位于newlib/libc/stdio/printf.c:
int printf (const char *__restrict fmt, ...) { int ret; va_list ap; struct _reent *ptr = _REENT; _REENT_SMALL_CHECK_INIT (ptr); va_start (ap, fmt); ret = _vfprintf_r (ptr, _stdout_r (ptr), fmt, ap); va_end (ap); return ret; }
其中va_list, va_start, va_end几个宏用于实现可变参数的解析,具体原理这里不再赘述。
struct _reent *ptr = _REENT;
这行代码用于获取printf的关键参数ptr, 该参数是一个指向_reent结构体的指针,_reent结构体具体为:
struct _reent { int _errno; /* local copy of errno */ /* FILE is a big struct and may change over time. To try to achieve binary compatibility with future versions, put stdin,stdout,stderr here. These are pointers into member __sf defined below. */ __FILE *_stdin, *_stdout, *_stderr; int _inc; /* used by tmpnam */ char _emergency[_REENT_EMERGENCY_SIZE]; /* TODO */ int _unspecified_locale_info; /* unused, reserved for locale stuff */ struct __locale_t *_locale;/* per-thread locale */ int __sdidinit; /* 1 means stdio has been init'd */ void (*__cleanup) (struct _reent *); /* used by mprec routines */ struct _Bigint *_result; int _result_k; struct _Bigint *_p5s; struct _Bigint **_freelist; /* used by some fp conversion routines */ int _cvtlen; /* should be size_t */ char *_cvtbuf; union { struct { unsigned int _unused_rand; char * _strtok_last; char _asctime_buf[_REENT_ASCTIME_SIZE]; struct __tm _localtime_buf; int _gamma_signgam; __extension__ unsigned long long _rand_next; struct _rand48 _r48; _mbstate_t _mblen_state; _mbstate_t _mbtowc_state; _mbstate_t _wctomb_state; char _l64a_buf[8]; char _signal_buf[_REENT_SIGNAL_SIZE]; int _getdate_err; _mbstate_t _mbrlen_state; _mbstate_t _mbrtowc_state; _mbstate_t _mbsrtowcs_state; _mbstate_t _wcrtomb_state; _mbstate_t _wcsrtombs_state; int _h_errno; } _reent; /* Two next two fields were once used by malloc. They are no longer used. They are used to preserve the space used before so as to allow addition of new reent fields and keep binary compatibility. */ struct { #define _N_LISTS 30 unsigned char * _nextf[_N_LISTS]; unsigned int _nmalloc[_N_LISTS]; } _unused; } _new; # ifndef _REENT_GLOBAL_ATEXIT /* atexit stuff */ struct _atexit *_atexit; /* points to head of LIFO stack */ struct _atexit _atexit0; /* one guaranteed table, required by ANSI */ # endif /* signal info */ void (**(_sig_func))(int); /* These are here last so that __FILE can grow without changing the offsets of the above members (on the off chance that future binary compatibility would be broken otherwise). */ struct _glue __sglue; /* root of glue chain */ # ifndef _REENT_GLOBAL_STDIO_STREAMS __FILE __sf[3]; /* first three file descriptors */ # endif };
_REENT是一个宏,将其展开后就指向了全局变量_impure_ptr,该变量就是一个(struct _reent *)类型的指针,并具体指向了impure_data结构体:
static struct _reent __ATTRIBUTE_IMPURE_DATA__ impure_data = _REENT_INIT (impure_data); struct _reent *__ATTRIBUTE_IMPURE_PTR__ _impure_ptr = &impure_data;
_REENT_INIT 宏用于impure_data结构体初始化,例如三个文件指针_stdin, _stdout, _stderr分别指向了impure_data结构体中最后的三个文件结构体__FILE __sf[3]。
_REENT_SMALL_CHECK_INIT (ptr);
这行代码用于ptr指针的初始化,根据不同的宏定义进行展开。
ret = _vfprintf_r (ptr, _stdout_r (ptr), fmt, ap);
最后调用_vfprintf_r 进行下一步操作,_stdout_r (ptr)指向_stdout文件指针,即_impure_ptr->_stdout 。
三、_vfprintf_r实现
_vfprintf_r函数实现位于newlib/libc/stdio/vfprintf.c:
int _VFPRINTF_R (struct _reent *data, FILE * fp, const char *fmt0, va_list ap) { register char *fmt; /* format string */ register int ch; /* character from fmt */ register int n, m; /* handy integers (short term usage) */ register char *cp; /* handy char pointer (short term usage) */ register int flags; /* flags as above */ char *fmt_anchor; /* current format spec being processed */ #ifndef _NO_POS_ARGS int N; /* arg number */ int arg_index; /* index into args processed directly */ int numargs; /* number of varargs read */ char *saved_fmt; /* saved fmt pointer */ union arg_val args[MAX_POS_ARGS]; int arg_type[MAX_POS_ARGS]; int is_pos_arg; /* is current format positional? */ int old_is_pos_arg; /* is current format positional? */ #endif int ret; /* return value accumulator */ int width; /* width from format (%8d), or 0 */ int prec; /* precision from format (%.3d), or -1 */ char sign; /* sign prefix (' ', '+', '-', or \0) */
... ...
该函数实现了将可变参数进行格式化并输出到stdout的过程,代码比较庞大复杂,上面只贴了开头的一部分。_vfprintf_r主要调用了以下几个接口:
__sinit
__swsetup_r (cantwrite )
__sbprintf
__sprint_r
__sinit:
用于ptr指针的初始化,根据不同的宏定义进行展开。
__swsetup_r :
__swsetup_r的作用是配置stdout输出文件,具体实现如下:
/* * Various output routines call wsetup to be sure it is safe to write, * because either _flags does not include __SWR, or _buf is NULL. * _wsetup returns 0 if OK to write, nonzero and set errno otherwise. */ int __swsetup_r (struct _reent *ptr, register FILE * fp) { /* Make sure stdio is set up. */ CHECK_INIT (_REENT, fp); /* * If we are not writing, we had better be reading and writing. */ if ((fp->_flags & __SWR) == 0) { if ((fp->_flags & __SRW) == 0) { ptr->_errno = EBADF; fp->_flags |= __SERR; return EOF; } if (fp->_flags & __SRD) { /* clobber any ungetc data */ if (HASUB (fp)) FREEUB (ptr, fp); fp->_flags &= ~(__SRD | __SEOF); fp->_r = 0; fp->_p = fp->_bf._base; } fp->_flags |= __SWR; } /* * Make a buffer if necessary, then set _w. * A string I/O file should not explicitly allocate a buffer * unless asprintf is being used. */ if (fp->_bf._base == NULL && (!(fp->_flags & __SSTR) || (fp->_flags & __SMBF))) __smakebuf_r (ptr, fp); if (fp->_flags & __SLBF) { /* * It is line buffered, so make _lbfsize be -_bufsize * for the putc() macro. We will change _lbfsize back * to 0 whenever we turn off __SWR. */ fp->_w = 0; fp->_lbfsize = -fp->_bf._size; } else fp->_w = fp->_flags & __SNBF ? 0 : fp->_bf._size; if (!fp->_bf._base && (fp->_flags & __SMBF)) { /* __smakebuf_r set errno, but not flag */ fp->_flags |= __SERR; return EOF; } return 0; }
注意在调用该函数之前,stdout->_p指针(指向stdout文件的输出Buffer)还是空的,通过该函数会给这个Buffer分配内存,分配内存的调用路径为:
__smakebuf_r ----> _malloc_r(nano_malloc) ---> sbrk_aligned ---> _sbrk_r ---> _sbrk
_sbrk为malloc的底层实现,用于开辟新的大块内存空间,该函数实现与体系结构相关,ARM体系的实现位于newlib/libc/sys/arm/syscall.c,函数具体实现如下:
void * __attribute__((weak)) _sbrk (ptrdiff_t incr) { extern char end asm ("end"); /* Defined by the linker. */ static char * heap_end; char * prev_heap_end; if (heap_end == NULL) heap_end = & end; prev_heap_end = heap_end; if ((heap_end + incr > stack_ptr) /* Honour heap limit if it's valid. */ || (__heap_limit != 0xcafedead && heap_end + incr > (char *)__heap_limit)) { /* Some of the libstdc++-v3 tests rely upon detecting out of memory errors, so do not abort here. */ #if 0 extern void abort (void); _write (1, "_sbrk: Heap and stack collision\n", 32); abort (); #else errno = ENOMEM; return (void *) -1; #endif } heap_end += incr; return (void *) prev_heap_end; }
注意函数中有个end符号:
extern char end asm ("end"); /* Defined by the linker. */
这个符号需要在Linker脚本中定义,用于指定brk分配内存的起始地址,如果 未定义则会出现链接错误。
__sbprintf :
这是一个辅助函数,通过一个临时Buffer, 实现unbufferd文件的输出。
/* * Helper function for `fprintf to unbuffered unix file': creates a * temporary buffer. We only work on write-only files; this avoids * worries about ungetc buffers and so forth. * * Make sure to avoid inlining. */ _NOINLINE_STATIC int __sbprintf (struct _reent *rptr, register FILE *fp, const char *fmt, va_list ap) { int ret; FILE fake; unsigned char buf[BUFSIZ]; /* copy the important variables */ fake._flags = fp->_flags & ~__SNBF; fake._flags2 = fp->_flags2; fake._file = fp->_file; fake._cookie = fp->_cookie; fake._write = fp->_write; /* set up the buffer */ fake._bf._base = fake._p = buf; fake._bf._size = fake._w = sizeof (buf); fake._lbfsize = 0; /* not actually used, but Just In Case */ #ifndef __SINGLE_THREAD__ __lock_init_recursive (fake._lock); #endif /* do the work, then copy any error status */ ret = _VFPRINTF_R (rptr, &fake, fmt, ap); if (ret >= 0 && _fflush_r (rptr, &fake)) ret = EOF; if (fake._flags & __SERR) fp->_flags |= __SERR; #ifndef __SINGLE_THREAD__ __lock_close_recursive (fake._lock); #endif return (ret); }
__sprint_r:
执行输出,将格式化完成的字符串输出到stdout->_p指向的buffer
/* * Flush out all the vectors defined by the given uio, * then reset it so that it can be reused. */ int __sprint_r (struct _reent *ptr, FILE *fp, register struct __suio *uio) { register int err = 0; if (uio->uio_resid == 0) { uio->uio_iovcnt = 0; return (0); } #ifdef _WIDE_ORIENT if (fp->_flags2 & __SWID) { struct __siov *iov; wchar_t *p; int i, len; iov = uio->uio_iov; for (; uio->uio_resid != 0; uio->uio_resid -= len * sizeof (wchar_t), iov++) { p = (wchar_t *) iov->iov_base; len = iov->iov_len / sizeof (wchar_t); for (i = 0; i < len; i++) { if (_fputwc_r (ptr, p[i], fp) == WEOF) { err = -1; goto out; } } } } else #endif err = __sfvwrite_r(ptr, fp, uio); out: uio->uio_resid = 0; uio->uio_iovcnt = 0; return (err); }
__sprint_r随后调用__sfvwrite_r做进一步输出.
__sfvwrite_r函数中有两种路径进入下一步输出,分别为:
(1)_fflush_r ------ __sflush_r ------ fp->_write
(2) fp->_write
两种路径最终都指向了函数指针 fp->_write,该指针在_fopen_r函数中做了设定:
FILE * _fopen_r (struct _reent *ptr, const char *__restrict file, const char *__restrict mode) { register FILE *fp; register int f; int flags, oflags; if ((flags = __sflags (ptr, mode, &oflags)) == 0) return NULL; if ((fp = __sfp (ptr)) == NULL) return NULL; if ((f = _open_r (ptr, file, oflags, 0666)) < 0) { _newlib_sfp_lock_start (); fp->_flags = 0; /* release */ #ifndef __SINGLE_THREAD__ __lock_close_recursive (fp->_lock); #endif _newlib_sfp_lock_end (); return NULL; } _newlib_flockfile_start (fp); fp->_file = f; fp->_flags = flags; fp->_cookie = (void *) fp; fp->_read = __sread; fp->_write = __swrite; fp->_seek = __sseek; fp->_close = __sclose; if (fp->_flags & __SAPP) _fseek_r (ptr, fp, 0, SEEK_END); #ifdef __SCLE if (__stextmode (fp->_file)) fp->_flags |= __SCLE; #endif _newlib_flockfile_end (fp); return fp; }
fp->_write指向了__swrite 函数,__swrite 函数通过_write_r ---- _write 路径完成最终输出,如果想要改变printf输出方式,可以对_write_r 或者 _write 函数进行重定义,替换成自定义函数,即可实现对应的输出效果。