這是ELF的layout,所謂的Linking View是指以檔案呈現之ELF(左圖),而Execution View則是指被載入到RAM上執行的ELF(右圖)。ELF header會包含整個檔案的"road map",我們可以利用readelf -h檢視elf header到底包含哪些東西?
由readelf -h可看出可看出ELF header包含了許多資訊,這些資訊都可以由定義在elf.h中的Elf32_Ehdr解讀出來。介紹ELF header之前,先介紹ELF的Data Types,這些資訊也都放置在elf.h檔中。
/* Standard ELF types. */ #include <stdint.h> /* Type for a 16-bit quantity. */ typedef uint16_t Elf32_Half; typedef uint16_t Elf64_Half; /* Types for signed and unsigned 32-bit quantities. */ typedef uint32_t Elf32_Word; typedef int32_t Elf32_Sword; typedef uint32_t Elf64_Word; typedef int32_t Elf64_Sword; /* Types for signed and unsigned 64-bit quantities. */ typedef uint64_t Elf32_Xword; typedef int64_t Elf32_Sxword; typedef uint64_t Elf64_Xword; typedef int64_t Elf64_Sxword; /* Type of addresses. */ typedef uint32_t Elf32_Addr; typedef uint64_t Elf64_Addr; /* Type of file offsets. */ typedef uint32_t Elf32_Off; typedef uint64_t Elf64_Off; /* Type for section indices, which are 16-bit quantities. */ typedef uint16_t Elf32_Section; typedef uint16_t Elf64_Section; /* Type for version symbol information. */ typedef Elf32_Half Elf32_Versym; typedef Elf64_Half Elf64_Versym;Elf32和Elf64的data type只有在off和addr這兩種type的資料長度有不同,其餘都相同。您可以在下表中發現Elf32_Ehdr和Elf64_Ehdr的member資料長度只有差e_entry、e_phoff和e_shoff會不相同,其餘都相同,所以,Elf64_Ehdr比Elf32_Ehdr多了12byte。
ELF header如下
/* The ELF file header. This appears at the start of every ELF file. */ #define EI_NIDENT (16) typedef struct { unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */ Elf32_Half e_type; /* Object file type */ Elf32_Half e_machine; /* Architecture */ Elf32_Word e_version; /* Object file version */ Elf32_Addr e_entry; /* Entry point virtual address */ Elf32_Off e_phoff; /* Program header table file offset */ Elf32_Off e_shoff; /* Section header table file offset */ Elf32_Word e_flags; /* Processor-specific flags */ Elf32_Half e_ehsize; /* ELF header size in bytes */ Elf32_Half e_phentsize; /* Program header table entry size */ Elf32_Half e_phnum; /* Program header table entry count */ Elf32_Half e_shentsize; /* Section header table entry size */ Elf32_Half e_shnum; /* Section header table entry count */ Elf32_Half e_shstrndx; /* Section header string table index */ } Elf32_Ehdr; typedef struct { unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */ Elf64_Half e_type; /* Object file type */ Elf64_Half e_machine; /* Architecture */ Elf64_Word e_version; /* Object file version */ Elf64_Addr e_entry; /* Entry point virtual address */ Elf64_Off e_phoff; /* Program header table file offset */ Elf64_Off e_shoff; /* Section header table file offset */ Elf64_Word e_flags; /* Processor-specific flags */ Elf64_Half e_ehsize; /* ELF header size in bytes */ Elf64_Half e_phentsize; /* Program header table entry size */ Elf64_Half e_phnum; /* Program header table entry count */ Elf64_Half e_shentsize; /* Section header table entry size */ Elf64_Half e_shnum; /* Section header table entry count */ Elf64_Half e_shstrndx; /* Section header string table index */ } Elf64_Ehdr; /* Fields in the e_ident array. The EI_* macros are indices into the * array. The macros under each EI_* macro are the values the byte * may have. */ #define EI_MAG0 0 /* File identification byte 0 index */ #define ELFMAG0 0x7f /* Magic number byte 0 */ #define EI_MAG1 1 /* File identification byte 1 index */ #define ELFMAG1 'E' /* Magic number byte 1 */ #define EI_MAG2 2 /* File identification byte 2 index */ #define ELFMAG2 'L' /* Magic number byte 2 */ #define EI_MAG3 3 /* File identification byte 3 index */ #define ELFMAG3 'F' /* Magic number byte 3 */ /* Conglomeration of the identification bytes, for easy testing as a word. */ #define ELFMAG "\177ELF" #define SELFMAG 4 #define EI_CLASS 4 /* File class byte index */ #define ELFCLASSNONE 0 /* Invalid class */ #define ELFCLASS32 1 /* 32-bit objects */ #define ELFCLASS64 2 /* 64-bit objects */ #define ELFCLASSNUM 3 #define EI_DATA 5 /* Data encoding byte index */ #define ELFDATANONE 0 /* Invalid data encoding */ #define ELFDATA2LSB 1 /* 2's complement, little endian */ #define ELFDATA2MSB 2 /* 2's complement, big endian */ #define ELFDATANUM 3 #define EI_VERSION 6 /* File version byte index */ /* Value must be EV_CURRENT */ #define EI_OSABI 7 /* OS ABI identification */ #define ELFOSABI_NONE 0 /* UNIX System V ABI */ #define ELFOSABI_SYSV 0 /* Alias. */ #define ELFOSABI_HPUX 1 /* HP-UX */ #define ELFOSABI_NETBSD 2 /* NetBSD. */ #define ELFOSABI_LINUX 3 /* Linux. */ #define ELFOSABI_SOLARIS 6 /* Sun Solaris. */ #define ELFOSABI_AIX 7 /* IBM AIX. */ #define ELFOSABI_IRIX 8 /* SGI Irix. */ #define ELFOSABI_FREEBSD 9 /* FreeBSD. */ #define ELFOSABI_TRU64 10 /* Compaq TRU64 UNIX. */ #define ELFOSABI_MODESTO 11 /* Novell Modesto. */ #define ELFOSABI_OPENBSD 12 /* OpenBSD. */ #define ELFOSABI_ARM_AEABI 64 /* ARM EABI */ #define ELFOSABI_ARM 97 /* ARM */ #define ELFOSABI_STANDALONE 255 /* Standalone (embedded) application */ #define EI_ABIVERSION 8 /* ABI version */ #define EI_PAD 9 /* Byte index of padding bytes */
資料結構大致介紹完畢,接著就可以透過實做readelf -h的程式來了解ELF header了,首先,所有的ELF開頭都會有16 bytes的ELF Identification,也是本章節所要介紹的部份。
EI_MAG0 ~ EI_MAG3
所有的ELF前面4byte為magic number,其內容為{0x7f, 'E', 'L', 'F'},用以判斷是否為ELF檔。static int elf_header(int fd) { int sz; unsigned char e_ident[EI_NIDENT]; sz = read(fd, e_ident, sizeof(e_ident)); if (sz < sizeof(e_ident)) { fprintf(stderr, "invalid elf file\n"); return -1; } // 判斷是否為ELF檔 if (memcmp(ELFMAG, e_ident, SELFMAG)) { fprintf(stderr, "invalid elf file\n"); return -1; } elf_header_magic(e_ident); } /** * 印出ident(前面16byte) */ static int elf_header_magic(unsigned char *c) { int i; printf("%-10s", "Magic: "); for (i = 0; i < EI_NIDENT; i++) { printf("%02X ", c[i]); } printf("\n"); return 0; }
EI_CLASS
EI_CLASS這個byte是用來判斷是32-bit或是64-bit的ELF檔,根據不同的Class就要選擇使用Elf32_Ehdr或是Elf64_Ehdr判讀後面的資料。/** * 判斷是32-bit/64-bit architectures. */ static int elf_header_class(unsigned char c) { printf("%-36s", "Class: "); switch(c) { case ELFCLASSNONE: fprintf(stderr, "Invalid class\n"); return -1; case ELFCLASS32: printf("32-bit object\n"); break; case ELFCLASS64: printf("64-bit object\n"); break; default: fprintf(stderr, "unknow class\n"); return -1; } return 0; }
EI_DATA
EI_DATA這個byte是用來判斷ELF檔是LSB(Little-endian)還是MSB(Big-endian)。/** * 判斷ELF檔是LSB(Little-endian)還是MSB(Big-endian) */ static int elf_header_data(unsigned char c) { printf("%-36s", "Data: "); switch(c) { case ELFDATANONE: fprintf(stderr, "Invalid data encoding\n"); return -1; case ELFDATA2LSB: printf("2's complement, little endian\n"); break; case ELFDATA2MSB: printf("2's complement, big endian\n"); break; default: fprintf(stderr, "unknow data\n"); return -1; } return 0; }
EI_VERSION
EI_VERSION這個byte是指出這個ELF檔的ELF header的版本是多少?目前這個值必須是EV_CURRENT。static int elf_header_version(unsigned char c) { printf("%-36s", "Version: "); switch(c) { case EV_CURRENT: printf("Current version"); break; default: case EV_NONE: printf("Invalid ELF version"); break; } printf("(%d)\n", c); return 0; }
EI_OSABI
EI_OSABI這個byte是指出這個ELF檔會在那個OS上運行。static int elf_header_osabi(unsigned char c) { printf("%-36s", "OS/ABI: "); switch(c) { case ELFOSABI_SYSV: printf("UNIX System V ABI"); break; case ELFOSABI_HPUX: printf("HP-UX"); break; case ELFOSABI_NETBSD: printf("NetBSD."); break; case ELFOSABI_LINUX: printf("Linux."); break; case ELFOSABI_SOLARIS: printf("Sun Solaris."); break; case ELFOSABI_AIX: printf("IBM AIX."); break; case ELFOSABI_IRIX: printf("SGI Irix."); break; case ELFOSABI_FREEBSD: printf("FreeBSD."); break; case ELFOSABI_TRU64: printf("Compaq TRU64 UNIX."); break; case ELFOSABI_MODESTO: printf("Novell Modesto."); break; case ELFOSABI_OPENBSD: printf("OpenBSD."); break; case ELFOSABI_ARM_AEABI: printf("ARM EABI"); break; case ELFOSABI_ARM: printf("ARM"); break; case ELFOSABI_STANDALONE: printf("Standalone (embedded) application"); break; default: fprintf(stderr, "unknow osabi\n"); return -1; } printf("(%d)\n", c); return 0; }
EI_ABIVERSION
EI_ABIVERSION這個byte是指出這個ELF檔會在那個API版本上運行。一個OS上可能有多個ABI的版本在運行的版本在運行,如SYSV至少就有SVR、Solaris、SCO等ABI。0代表不指定(unspecified)。static int elf_header_abi_version(unsigned char c) { printf("%-36s%d\n", "ABI Version: ", c); return 0; }
EI_PAD
EI_PAD這個byte之後的都是padding。到目前為止,僅有解釋ELF header中的e_ident,剩下的部份會在後面繼續探討與研究。