Macho文件部分解析

  • 2017-07-05
  • 1,164


本文章以machoview代码为例讲解解析过程
MachOView Github:https://github.com/gdbinit/MachOView
首先我们来看看Mach-O的文件结构

好了结构看完我们看看MAC下的解析工具machoview工具解析后的结构图

我们来看看具体的代码结构 Header 有3个结构 X32 X64 FAT
所有的结构定义基本都在MachOView/mach-o/loader.h这个文件下 除了fat在fat.h下
其X32 X64差别不大只是X64后面多了一个预留的参数

struct mach_header {
uint32_t magic; /* mach magic number identifier */
cpu_type_t cputype; /* cpu specifier */
cpu_subtype_t cpusubtype; /* machine specifier */
uint32_t filetype; /* type of file */
uint32_t ncmds; /* number of load commands */
uint32_t sizeofcmds; /* the size of all the load commands */
uint32_t flags; /* flags */
};

struct mach_header_64 {
uint32_t magic; /* mach magic number identifier */
cpu_type_t cputype; /* cpu specifier */
cpu_subtype_t cpusubtype; /* machine specifier */
uint32_t filetype; /* type of file */
uint32_t ncmds; /* number of load commands */
uint32_t sizeofcmds; /* the size of all the load commands */
uint32_t flags; /* flags */
uint32_t reserved; /* reserved */
};

不过最近APPLE开始搞事情了 准备去掉X32的了所以X32的头准备报废了
至于FAT的头 比较少见 看看就算了

struct fat_header {
uint32_t magic; /* FAT_MAGIC */
uint32_t nfat_arch; /* number of structs that follow */
};

struct fat_arch {
cpu_type_t cputype; /* cpu specifier (int) */
cpu_subtype_t cpusubtype; /* machine specifier (int) */
uint32_t offset; /* file offset to this object file */
uint32_t size; /* size of this object file */
uint32_t align; /* alignment as a power of 2 */
};

好了我们来解释下结构成员的意思
uint32_t magic //魔术头 用于判断文件类型
取值为

#define MH_MAGIC 0xfeedface //小端保存的32位文件
#define MH_CIGAM 0xcefaedfe //大端保存的32位文件
#define MH_MAGIC_64 0xfeedfacf //小端保存的64位文件
#define MH_CIGAM_64 0xcffaedfe //大端保存的64位文件
#define FAT_MAGIC 0xcafebabe //小端保存的FAT文件
#define FAT_CIGAM 0xbebafeca //大端保存的FAT文件

cpu_type_t cputype; // cpu类型 其实cpu_type_t是uint32类型
cpu_subtype_t cpusubtype; //cpu说明符
uint32_t filetype; //文件类型

好了接下就来讲重点的

uint32_t ncmds; //load commands 的数量
uint32_t sizeofcmds; //load commands 的大小

跟据上面的结构图 load commands是接着mach_header的
所以load commands的范围为sizeof(mach_header) 我们先看看load commands的结构

struct load_command {
uint32_t cmd; /* type of load command */
uint32_t cmdsize; /* total size of command in bytes */
};

以及mach0view的解析图


值得注意的是 这是一个简化版的load_command 结构 为什么这么说呢 因为load_command结构按照cmd的类型分了很多种所以这一个简化的结构是用来预读判断cmd类型的
先预读这个8字节的Load_Command跟据CMD的类型再判断那种类型
再MachoView里有详细的各种类型解析比如说

struct dylib_command {
uint32_t cmd; /* LC_ID_DYLIB, LC_LOAD_{,WEAK_}DYLIB,
LC_REEXPORT_DYLIB */
uint32_t cmdsize; /* includes pathname string */
struct dylib dylib; /* the library identification */
};
struct sub_framework_command {
uint32_t cmd; /* LC_SUB_FRAMEWORK */
uint32_t cmdsize; /* includes umbrella string */
union lc_str umbrella; /* the umbrella framework name */
};

等等..下面我们重点解释一种结构他是用来在load command里获取该段所对应的块的地址范围的结构

struct segment_command { /* for 32-bit architectures */
uint32_t cmd; /* LC_SEGMENT */
uint32_t cmdsize; /* includes sizeof section structs */
char segname[16]; /* segment name */
uint32_t vmaddr; /* memory address of this segment */
uint32_t vmsize; /* memory size of this segment */
uint32_t fileoff; /* file offset of this segment */
uint32_t filesize; /* amount to map from the file */
vm_prot_t maxprot; /* maximum VM protection */
vm_prot_t initprot; /* initial VM protection */
uint32_t nsects; /* number of sections in segment */
uint32_t flags; /* flags */
};

struct segment_command_64 { /* for 64-bit architectures */
uint32_t cmd; /* LC_SEGMENT_64 */
uint32_t cmdsize; /* includes sizeof section_64 structs */
char segname[16]; /* segment name */
uint64_t vmaddr; /* memory address of this segment */
uint64_t vmsize; /* memory size of this segment */
uint64_t fileoff; /* file offset of this segment */
uint64_t filesize; /* amount to map from the file */
vm_prot_t maxprot; /* maximum VM protection */
vm_prot_t initprot; /* initial VM protection */
uint32_t nsects; /* number of sections in segment */
uint32_t flags; /* flags */
};

这就是一个完整的load_command结构,对应的CMD是LC_SEGMENT_64。
好了解释下该结构部分信息
segname 段名固定16字节,例子__TEXT

vmaddr段虚拟地址 如果是X64他就是0x100000000

vmsize段大小

nsects该段多少个块

其实 segment_command紧接着的就是该段对应的section的区域(section在MachOView/mach-o/loader.h)

struct section { /* for 32-bit architectures */
char sectname[16]; /* name of this section */
char segname[16]; /* segment this section goes in */
uint32_t addr; /* memory address of this section */
uint32_t size; /* size in bytes of this section */
uint32_t offset; /* file offset of this section */
uint32_t align; /* section alignment (power of 2) */
uint32_t reloff; /* file offset of relocation entries */
uint32_t nreloc; /* number of relocation entries */
uint32_t flags; /* flags (section type and attributes)*/
uint32_t reserved1; /* reserved (for offset or index) */
uint32_t reserved2; /* reserved (for count or sizeof) */
};

struct section_64 { /* for 64-bit architectures */
char sectname[16]; /* name of this section */
char segname[16]; /* segment this section goes in */
uint64_t addr; /* memory address of this section */
uint64_t size; /* size in bytes of this section */
uint32_t offset; /* file offset of this section */
uint32_t align; /* section alignment (power of 2) */
uint32_t reloff; /* file offset of relocation entries */
uint32_t nreloc; /* number of relocation entries */
uint32_t flags; /* flags (section type and attributes)*/
uint32_t reserved1; /* reserved (for offset or index) */
uint32_t reserved2; /* reserved (for count or sizeof) */
uint32_t reserved3; /* reserved */
};

解释下section结构部分信息
sectname块名 例_text,_cstring
segname段名 例__Text
addr块对应的数据虚拟地址X64改值为offser+0x100000000(section对应的值一般不会跟在section后面比如cstring cfstring等等这些都是存在程序最后的地方)
size块对应的数据的大小
offset偏移 该偏移为从文件头开始的偏移
flags表示这个块存的什么东西(441行至540行)
好了总结一下大概的解析流程就是
1.线解析mach_header先判断magic头如果是大端的记得每一个结构都得转小段
2.跟据mach_header.sizeofcmds确定load commands的范围为sizeof(mach_header) 然后跟据mach_header.ncmds读取
3.先使用load command简化结构预读取 判断cmd与cmdsize再使用完整结构读取
4.如果解析segment则跟据nsects来读取segment下的section
5.section数据跟随在对应的segment结构后面单section对应的数据不一定跟随在section后面
6.跟据section的offset,size,flags来解析对应的数据