Windows Embedded CE 6.0 Internals (3) Memory Continued
对我来说写一篇博客真的不容易,我是个十足的完美主义者,但是水平很一般,所以我会花上很多时间去修补文章。也许文章并不能让你满意,如果你有任何的建议,任何的,我都非常期待你能告诉我。这篇文章仍然是继续Windows Embedded CE Internals (2)内存部分。
从硬件上看,可作为内存的大体分为RAM、ROM、Nand/Nor Flash(兼具RAM和ROM特性的混合体)。
RAM 內存可以进一步分为静态随机存取存储器(SRAM)和动态随机存取存储器(DRAM)两大类。SRAM具有快速访问的优点,但生产成本较为昂贵,一个典型的应用是高速缓存。而DRAM由于具有较低的单位容量价格,所以被大量的采用作为系统的主存储器。
DDR SDRAM(Double Data Rate SDRAM)
DDRII(Double Data Rate Synchronous DRAM)
2.内存中的代码能够直接被执行的前提是CPU能够随机读取这个内存里面的数据,RAM满足这个条件的,还满足这个条件的是ROM和Nor Flash(也就是XIP)。CPU要执行Nand Flash里面的代码时必须把这些代码Copy到RAM里面。
4.关于Nand Flash和Nor Flash的具体区别请看这里。
2.PE文件结构。Windows Embedded CE以及Windows Mobile的PE文件结构与Windows桌面操作系统上的相同。
//------------------------------------------------------------------------------ // Helper function to create a new proces. Will NOT return if succeeds //------------------------------------------------------------------------------ DWORD CreateNewProcHelper( LPProcStartInfo lppsi ) { PTHREAD pth = pCurThread; PPROCESS pprc = pActvProc; e32_lite *eptr = &pprc->e32; LPVOID lpStack = NULL; DWORD flags, entry, dwErr = 0, len; LPCWSTR procname = GetPlainName (lppsi->pszImageName); LPCWSTR pCmdLine; LPWSTR uptr; PNAME pAlloc; LPBYTE pCurSP; REGTYPE *pArgs; DEBUGCHK (OwnLoaderLock (pprc)); eptr = &pprc->e32; if (dwErr = LoadE32 (&pprc->oe, eptr, &flags, &entry, 0)) { DEBUGMSG(1,(TEXT("ERROR! LoadE32 failed, dwErr = %8.8lx!\r\n"), dwErr)); return dwErr; } if (flags & IMAGE_FILE_DLL) { DEBUGMSG(1,(TEXT("ERROR! CreateProcess on DLL!\r\n"))); return ERROR_BAD_EXE_FORMAT; } if ((flags & IMAGE_FILE_RELOCS_STRIPPED) && ((eptr->e32_vbase < 0x10000) || (eptr->e32_vbase > 0x400000))) { DEBUGMSG(1,(TEXT("ERROR! No relocations - can't load/fixup!\r\n"))); return ERROR_BAD_EXE_FORMAT; } pprc->BasePtr = (flags & IMAGE_FILE_RELOCS_STRIPPED) ? (LPVOID)eptr->e32_vbase : (LPVOID)0x10000; DEBUGMSG(ZONE_LOADER1,(TEXT("BasePtr is %8.8lx\r\n"),pprc->BasePtr)); len = (NKwcslen (procname) + 1) * sizeof(WCHAR); // reserve/commit memory for the EXE if (!VMAlloc (pprc, pprc->BasePtr, eptr->e32_vsize, MEM_RESERVE|MEM_IMAGE, PAGE_NOACCESS) || !(pAlloc = AllocName (eptr->e32_objcnt*sizeof(o32_lite)+len + 2))) { // +2 for DWORD alignment return ERROR_OUTOFMEMORY; } // optimization for resoure loading - rebase resource rva if (eptr->e32_unit[RES].rva) { eptr->e32_unit[RES].rva += (DWORD) pprc->BasePtr; } pprc->o32_ptr = (o32_lite *) ((DWORD) pAlloc + 4); // skip header info uptr = (LPWSTR) (pprc->o32_ptr + eptr->e32_objcnt); memcpy (uptr, procname, len); pprc->lpszProcName = uptr; if (dwErr = LoadO32(&pprc->oe,eptr,pprc->o32_ptr,(ulong)pprc->BasePtr)) { DEBUGMSG(1,(TEXT("ERROR! LoadO32 failed, dwerr = %8.8lx!\r\n"), dwErr)); return dwErr; } if (!(pprc->oe.filetype & FA_PREFIXUP)) { if (!PageAble (&pprc->oe) && !Relocate(eptr, pprc->o32_ptr, (ulong)pprc->BasePtr)) { return ERROR_OUTOFMEMORY; } } // allocate stack eptr->e32_stackmax = CalcStackSize(eptr->e32_stackmax); if (!(lpStack = VMCreateStack (pprc, eptr->e32_stackmax))) { return ERROR_OUTOFMEMORY; } pth->tlsNonSecure = TLSPTR (lpStack, eptr->e32_stackmax); pCmdLine = lppsi->pszCmdLine? lppsi->pszCmdLine : L" "; DEBUGMSG (ZONE_LOADER1, (L"Initializeing user stack cmdline = '%s', program = '%s', stack = %8.8lx\r\n", pCmdLine, pprc->lpszProcName, lpStack)); // Upon return of InitCmdLineAndPrgName, pCmdLine will be pointing to user accessible command line, and // pCurSP is the user accessible program name. pCurSP is also the top of stack inuse pCurSP = InitCmdLineAndPrgName (&pCmdLine, pprc->lpszProcName, lpStack, eptr->e32_stackmax); InitModInProc (pprc, (PMODULE) hCoreDll); LockLoader (g_pprcNK); // load MUI if it exist pprc->pmodResource = LoadMUI (NULL, pprc->BasePtr, eptr); UnlockLoader (g_pprcNK); AdjustRegions (pprc->BasePtr, &pprc->oe, eptr, pprc->o32_ptr); pth->dwStartAddr = (DWORD) pprc->BasePtr + entry; DEBUGMSG(ZONE_LOADER1,(TEXT("Starting up the process!!! IP = %8.8lx\r\n"), pth->dwStartAddr)); UnlockLoader (pprc); (* HDModLoad) ((DWORD)pprc); CELOG_ProcessCreateEx (pprc); // setup arguments to MainThreadBaseFunc pArgs = (REGTYPE*) (pCurSP - 8 * REGSIZE); pArgs[0] = pth->dwStartAddr; // arg1 - entry point pArgs[1] = (REGTYPE) pCurSP; // arg2 - program name (returned value form InitCmdLineAndPrgName) pArgs[2] = (REGTYPE) pCmdLine; // arg3 - command line pArgs[3] = (REGTYPE) hCoreDll; // arg4 - hCoredll pArgs[4] = (REGTYPE) pprc->pmodResource; // arg5 - MUI DLL of EXE pArgs[5] = (REGTYPE) ((PMODULE) hCoreDll)->pmodResource;// arg6 - MUI DLL of coredll #ifdef x86 { NK_PCR *pcr = TLS2PCR (pth->tlsNonSecure); // terminate registration pointer pcr->ExceptionList = 0; // setup return address on stack pArgs --; *pArgs = 0; // return address set to 0 } #endif pth->tlsNonSecure[PRETLS_STACKBOUND] = (DWORD) pArgs & ~(VM_PAGE_SIZE-1); // update thread info pth->dwOrigBase = (DWORD) lpStack; pth->dwOrigStkSize = eptr->e32_stackmax; // notify PSL of process creation NKPSLNotify (DLL_PROCESS_ATTACH, pprc->dwId, pth->dwId); // // if create suspended, we need to suspend ourselves before releasing the creator. Or // there can be a race condition where the creator calls "ResumeTherad" before we suspend, // and the result is that the thread will be suspended forever. // // It's safe to modify bPendSusp here for no-one, but kernel has a handle to this thread. // and we know kernel will not suspend this thread for sure. // if (CREATE_SUSPENDED & lppsi->fdwCreate) { pth->bPendSusp = 1; } // notify caller process creation complete ForceEventModify (GetEventPtr (pprc->phdProcEvt), EVENT_SET); // creator signaled, we can suspend here if needed if (pth->bPendSusp) { KCall ((PKFN) SuspendSelfIfNeeded); CLEAR_USERBLOCK (pth); } // in case we got terminated even before running user code, just go straight to NKExitThread if (GET_DYING (pth)) { NKExitThread (0); // never return DEBUGCHK (0); } // update process state pprc->bState = PROCESS_STATE_NORMAL; // machine dependent main thread startup DEBUGMSG (ZONE_LOADER1,(TEXT("CreateNewProcHelper: Switch to User code at %8.8lx!\r\n"), pth->dwStartAddr)); MDSwitchToUserCode ((FARPROC) MTBFf, pArgs); DEBUGCHK (0); // never return return 0; // keep compiler happy }
// Obtain a module handle to toolhelp.dll HINSTANCE hKernel = LoadLibrary(_T("toolhelp.dll")); if (!hKernel) { MessageBox(NULL, L"Toolhelp.dll not found", L"TrayTaskList", MB_OK); return 0; } // all processes currently in the system. hProcessSnap = (HINSTANCE)CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0); if (hProcessSnap == (HANDLE)-1) return 0;
Peering Inside the PE: A Tour of the Win32 Portable Executable File Format
The Common Object File Format (COFF)
An In-Depth Look into the Win32 Portable Executable File Format
为了更方便了解,我们使用map文件,map文件是,你可以从这里获得如何从map文件定位应用程序Crash的方法,还可以阅读《Debugging Applications》获得更多相关知识。
在Visual Studio的这个位置设置是否生成map文件:
Section 0001 is the text segment containing the executable code of the program. (可执行代码文本)
Section 0002 contains the read-only static data. (只读静态数据)
Section 0003 contains the read/write static data. (可读/写静态数据)
Section 0004 contains the fix-up table to support calls to other DLLs.(调用其它DLL的修正表)
Section 0005 is the resource section containing the application's resources, such as menu and dialog box templates.(资源)
现在再看一下Windows Embedded CE 5.0用户态内存分布图:
我们把上图从0000 0000地址到03FF FFFF地址的分布再放大一下:
现在你应该大体知道EXE文件中各个Section在CE 5.0版本中大体加载到内存的哪里了。CE 6.0系统相对位置与其相似,只是单个进程不再是32M的独立空间,而是1G的独立空间。
最后顺带着再介绍一下命令dumpbin [options] [files]
我们经常需要查看EXE、DLL或者LIB文件的导入或者导出函数,比较方便的一个方法就是在Visual Studio自带的命令行中使用dumpbin,比如查看刚生成的PortableExecute.exe的导入函数:
C:\Program Files\Microsoft Visual Studio 9.0\VC> D:
D:> cd D:\Windows Mobile\PortableExecute\PortableExecute\Windows Mobile 6 Professional SDK (ARMV4I)\Debug
D:\Windows Mobile\PortableExecute\PortableExecute\Windows Mobile 6 Professional SDK (ARMV4I)\Debug> dumpbin /imports PortableExecute.exe
1.为什么Windows Embedded CE是基于页式的虚拟内存系统?(声明:文章提到的虚拟内存指的是虚拟地址空间,不是通常说的Page File。)
2.为什么使用Page Pool?
虚拟内存在1959年就被发明用来隐藏计算机存储系统的分层结构,从而极大地简化后续编程任务。但是它是如此的常见,不多人会思考它为什么会存在。如果你深刻的去了解了,你会发现这项发明实在太棒了。更多请看美国乔治梅森大学计算机科学系网上教程《Virtual Memory Module》和Peter J. Denning 1970年发表的论文《Virtual Memory 》(我坦白我只看了一点点)。
在文章Windows Embedded CE 6.0 Internals (2)中讲到,应用程序等使用的是虚拟地址,操作系统使用MMU处理虚拟地址到物理地址的实时转换,这样很明显的一个好吃是隔离了应用程序等与物理地址的具体实现。与Windows桌面操作系统相比,基于Windows Embedded CE系统设备的物理内存实现更多样化(文章第一部分也可以看到)。就这一点来说Windows Embedded CE更迫切的需要被实现为基于虚拟内存的系统。
当然任何事情都有两面的,带来了物理内存实现抽象化好处的同时降低了寻址的效率。为了尽一切方式去弥补牺牲的效率,微软又使用了静态地址映射、Page Pool等技术(这些技术可不是微软发明的)。这就是为什么微软一些操作系统的内存系统都比较复杂的原因。当然Windows Embedded CE内存系统的设计比Windows桌面系统(比如Windows XP)简洁了很多。毕竟人家是嵌入式系统。
(1). 隔离了不同进程的地址空间,每个进程都有自己独立的页目录,进程切换时CPU切换现场,包括切换页目录。
(2). 提供更细腻的内存数据安全保护和管理,管理的粒度到一个页面大小,这个页面的权限可以是只读、可读可写、可执行、可读可执行、可读可写可执行、Guard(首次访问会导致STATUS_GUARD_PAGE异常)、不允许访问、不允许缓存(CPU不会高速缓存映射到这个区域的RAM页面)。
(3). 为了更有效的使用物理内存。
另外补充一点知识,在Windows Embedded CE 6.0中页面大小是4KB(目前的嵌入式处理器大多是4KB页面的),在早期的CE系统中这个大小是1KB。虚拟内存区域对齐的边界是64KB,也就是说最小的分配粒度是64KB,每次都是以64KB的整数倍分配。提交的粒度是一个页面大小。
//----------------------------------------------------------------------------------------------------------------- // // VMAlloc: main function to allocate VM. // LPVOID VMAlloc ( PPROCESS pprc, // process LPVOID lpvaddr, // starting address DWORD cbSize, // size, in byte, of the allocation DWORD fAllocType, // allocation type DWORD fProtect // protection ) { DWORD dwAddr = (DWORD) lpvaddr; DWORD dwEnd = dwAddr + cbSize; DWORD dwErr = 0; LPVOID lpRet = NULL; // verify arguments if (!IsValidAllocType (dwAddr, fAllocType) // valid fAllocType? || ((int) cbSize <= 0) // valid size? || !IsValidProtect (fProtect)) { // valid fProtect? dwErr = ERROR_INVALID_PARAMETER; DEBUGMSG (ZONE_VIRTMEM, (L"VMAlloc failed, invalid parameter %8.8lx, %8.8lx %8.8lx %8.8lx\r\n", lpvaddr, cbSize, fAllocType, fProtect)); } else { DWORD cPages; // Number of pages // page align start address dwAddr &= ~VM_PAGE_OFST_MASK; cPages = PAGECOUNT (dwEnd - dwAddr); lpRet = DoVMAlloc (pprc, dwAddr, cPages, fAllocType, fProtect, PM_PT_ZEROED, &dwErr); } KSetLastError (pCurThread, dwErr); return lpRet; }
//----------------------------------------------------------------------------------------------------------------- // // DoVMAlloc: worker function to allocate VM. // static LPVOID DoVMAlloc( PPROCESS pprc, // the process where VM is to be allocated DWORD dwAddr, // starting address DWORD cPages, // # of pages DWORD fAllocType, // allocation type DWORD fProtect, // protection DWORD dwPageType, // when committing, what type of page to use LPDWORD pdwErr // error code if failed ) { DWORD dwRet = 0; DWORD cPageNeeded = cPages; // if dwAddr == 0, must reserve first if (!dwAddr) { fAllocType |= MEM_RESERVE; } DEBUGMSG (ZONE_VIRTMEM, (L"DoVMAlloc - proc-id: %8.8lx, dwAddr = %8.8lx, cPages = 0x%x, fAllocType = %8.8lx, dwPageType = %8.8lx\r\n", pprc->dwId, dwAddr, cPages, fAllocType, dwPageType)); if ((MEM_COMMIT & fAllocType) // commiting page && g_nStackCached // got some cached stacks && WillTriggerPageOut ((long) cPages)) { // the request will trigger a pageout. // free all cached stacks VMFreeExcessStacks (0); } if (LockVM (pprc)) { dwRet = (fAllocType & MEM_RESERVE) ? DoVMReserve (pprc, dwAddr, cPages, 0, fAllocType) // reserving VM : VMScan (pprc, dwAddr, cPages, &cPageNeeded, fProtect & VM_READWRITE_PROT); // not reserving, must be commiting. Check if // the range to be commited is from a single reservation. // count #of page needed to commit if (!dwRet) { *pdwErr = dwAddr? ERROR_INVALID_PARAMETER: ERROR_NOT_ENOUGH_MEMORY; } else if (fAllocType & MEM_COMMIT) { if (cPageNeeded && !HoldPages (cPageNeeded, FALSE)) { *pdwErr = ERROR_NOT_ENOUGH_MEMORY; } else { VMCommitStruct vs = { PageParamFormProtect (fProtect, dwRet), dwPageType, FALSE }; // got enough pages, update entries Enumerate2ndPT (pprc->ppdir, dwRet, cPages, 0, CommitPages, &vs); if (vs.fFlush) { InvalidatePages (pprc, dwRet, cPages); } } } UnlockVM (pprc); if (*pdwErr && dwRet) { // fail to commit enough pages if (fAllocType & MEM_RESERVE) { VERIFY (!VMRelease (pprc, dwRet, cPages)); } dwRet = 0; } CELOG_VirtualAlloc(pprc, dwRet, dwAddr, cPages, fAllocType, fProtect); } else { *pdwErr = ERROR_INVALID_PARAMETER; } DEBUGMSG (ZONE_VIRTMEM||!dwRet, (L"DoVMAlloc - returns %8.8lx\r\n", dwRet)); return (LPVOID) dwRet; }
虚拟的页面有3中状态:Free、Reserved、Committed,只有 Committed 的页面才真正映射到物理内存中,这就是为什么你使用 VirtualAlloc 函数有 MEM_COMMIT 和 MEM_RESERVE 的原因。
看看来是虚拟内存的机制图,因为没找到Windows Embedded CE 6.0的,我就拿《Windows Internals》(Windows XP、2003等桌面系统)中截图的说事了。
2的10次方x2的10次方x2的12次方=2的32次方=4G寻址空间。因为页面大小是4KB,所以页内索引(Byte index)需要12比特去记录,剩下的20比特索引分成一个10比特的页目录索引和一个10比特的页表索引,这样有效的降低了页目录和页表的大小(各要1KB?大小就可以了)。
Page Pool
Pool is a collection of physical pages reserved for a specific purpose.
Loader is kernel function to load code pages.
File is kernel function to load and commit non-executable pages from a file.
Trimming is reducing the page pool size by discarding pages when active pool size is larger than the target size.
Target size is the normal size of the pool the pool manager will maintain.
Maximum size is the pool will never be allowed to exceed this size.
全部RAM会被分成3个部分:红色部分为Loader pool保留,黄色部分为File pool保留,剩下的为一般分配,比如:Heaps、Stacks、DLL R/W data、Page Pool use above Target level。
那么为什么要Page Pool呢?且听下回分解。(2010.5.26注:惭愧,到现在也没分解,不是不想分解,是分解不出来。)
for (i = 0; i < 512; i++) { pMem[i] = VirualAlloc(NULL, PAGESIZE, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); }
另外其它一些API有:VirtualProtect、VirtualQuery、VirtualAllocEx、VirtualProtectEx、VirtualQueryEx、VirualFreeEx,后面Ex扩展的主要多了一个HANDLE hProcess参数,为了操作另一个进程的内存,当然之前你的OpenProcess。你还可以使用 ReadProcessMemory 和 WriteProcessMemory 操作其它进程分配的内存块。
有时需要比页更小的粒度来操作内存,这时虚拟内存API明显满足不了了,你可以考虑堆API。但是使用堆会出现碎片等问题,有一些技巧可以尽量避免这个问题的出现。因为篇幅在此略过。另外注意Windows Embedded CE里面已经没全局堆了,诸如GlobalAlloc、GlobalFree、GlobalRealloc这样的函数都是通过宏映射为LocalAlloc等API了。
Application Verify是解决这个问题最常用的工具,介于这篇博客的篇幅太长,移到下篇文章中介绍了。
