|
|
| Next: [PATCH] kbuild fixes |
| Author |
Message |
Eric W. Biederman External

Since: May 19, 2006 Posts: 1134
|
Posted: Tue Aug 01, 2006 1:10 pm Post subject: [RFC] ELF Relocatable x86 and x86_64 bzImages Archived from groups: linux>kernel (more info?) |
|
|
The problem:
We can't always run the kernel at 1MB or 2MB, and so people who need
different addresses must build multiple kernels. The bzImage format
can't even represent loading a kernel at other than it's default address.
With kexec on panic now starting to be used by distros having a kernel
not running at the default load address is starting to become common.
The goal of this patch series is to build kernels that are relocatable
at run time, and to extend the bzImage format to make it capable of
expressing a relocatable kernel.
In extending the bzImage format I am replacing the existing unused bootsector
with an ELF header. To express what is going on the ELF header will
have type ET_DYN. Just like the kernel loading an ET_DYN executable
bootloaders are not expected to process relocations. But the executable
may be shifted in the address space so long as it's alignment requirements
are met.
The x86_64 kernel is simply built to live at a fixed virtual address
and the boot page tables are relocated. The i386 kernel is built
to process relocations generated with --embedded-relocs (after vmlinux.lds.S)
has been fixed up to sort out static and dynamic relocations.
Currently there are 33 patches in my tree to do this.
The weirdest symptom I have had so far is that page faults did not
trigger the early exception handler on x86_64 (instead I got a reboot).
There is one outstanding issue where I am probably requiring too much alignment
on the arch/i386 kernel.
Can anyone find anything else?
Eric
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo.RemoveThis@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/ |
|
| Back to top |
|
 |
Eric W. Biederman External

Since: Jun 21, 2006 Posts: 194
|
Posted: Tue Aug 01, 2006 1:10 pm Post subject: [PATCH 30/33] x86_64: Remove CONFIG_PHYSICAL_START [Login to view extended thread Info.] Archived from groups: per prev. post (more info?) |
|
|
I am about to add relocatable kernel support which has essentially
no cost so there is no point in retaining CONFIG_PHYSICAL_START
and retaining CONFIG_PHYSICAL_START makes implementation of and
testing of a relocatable kernel more difficult.
Signed-off-by: Eric W. Biederman <ebiederm RemoveThis @xmission.com>
---
arch/x86_64/Kconfig | 19 -------------------
arch/x86_64/boot/compressed/head.S | 6 +++---
arch/x86_64/boot/compressed/misc.c | 6 +++---
arch/x86_64/defconfig | 1 -
arch/x86_64/kernel/vmlinux.lds.S | 2 +-
arch/x86_64/mm/fault.c | 4 ++--
include/asm-x86_64/page.h | 2 --
7 files changed, 9 insertions(+), 31 deletions(-)
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 28df7d8..763b25b 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -486,25 +486,6 @@ config CRASH_DUMP
help
Generate crash dump after being started by kexec.
-config PHYSICAL_START
- hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
- default "0x1000000" if CRASH_DUMP
- default "0x200000"
- help
- This gives the physical address where the kernel is loaded. Normally
- for regular kernels this value is 0x200000 (2MB). But in the case
- of kexec on panic the fail safe kernel needs to run at a different
- address than the panic-ed kernel. This option is used to set the load
- address for kernels used to capture crash dump on being kexec'ed
- after panic. The default value for crash dump kernels is
- 0x1000000 (16MB). This can also be set based on the "X" value as
- specified in the "crashkernel=YM@XM" command line boot parameter
- passed to the panic-ed kernel. Typically this parameter is set as
- crashkernel=64M@16M. Please take a look at
- Documentation/kdump/kdump.txt for more details about crash dumps.
-
- Don't change this unless you know what you are doing.
-
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
depends on PROC_FS
diff --git a/arch/x86_64/boot/compressed/head.S b/arch/x86_64/boot/compressed/head.S
index 6f55565..cf55d09 100644
--- a/arch/x86_64/boot/compressed/head.S
+++ b/arch/x86_64/boot/compressed/head.S
@@ -76,7 +76,7 @@ startup_32:
jnz 3f
addl $8,%esp
xorl %ebx,%ebx
- ljmp $(__KERNEL_CS), $__PHYSICAL_START
+ ljmp $(__KERNEL_CS), $0x200000
/*
* We come here, if we were loaded high.
@@ -102,7 +102,7 @@ startup_32:
popl %ecx # lcount
popl %edx # high_buffer_start
popl %eax # hcount
- movl $__PHYSICAL_START,%edi
+ movl $0x200000,%edi
cli # make sure we don't get interrupted
ljmp $(__KERNEL_CS), $0x1000 # and jump to the move routine
@@ -127,7 +127,7 @@ move_routine_start:
movsl
movl %ebx,%esi # Restore setup pointer
xorl %ebx,%ebx
- ljmp $(__KERNEL_CS), $__PHYSICAL_START
+ ljmp $(__KERNEL_CS), $0x200000
move_routine_end:
diff --git a/arch/x86_64/boot/compressed/misc.c b/arch/x86_64/boot/compressed/misc.c
index 3755b2e..259bb05 100644
--- a/arch/x86_64/boot/compressed/misc.c
+++ b/arch/x86_64/boot/compressed/misc.c
@@ -288,7 +288,7 @@ #ifdef STANDARD_MEMORY_BIOS_CALL
#else
if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
#endif
- output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */
+ output_data = (unsigned char *)0x200000;
free_mem_end_ptr = (long)real_mode;
}
@@ -311,8 +311,8 @@ #endif
low_buffer_size = low_buffer_end - LOW_BUFFER_START;
high_loaded = 1;
free_mem_end_ptr = (long)high_buffer_start;
- if ( (__PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) {
- high_buffer_start = (uch *)(__PHYSICAL_START + low_buffer_size);
+ if ( (0x200000 + low_buffer_size) > ((ulg)high_buffer_start)) {
+ high_buffer_start = (uch *)(0x200000 + low_buffer_size);
mv->hcount = 0; /* say: we need not to move high_buffer */
}
else mv->hcount = -1;
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index 840d5d9..06cf378 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -158,7 +158,6 @@ CONFIG_X86_MCE_INTEL=y
CONFIG_X86_MCE_AMD=y
# CONFIG_KEXEC is not set
# CONFIG_CRASH_DUMP is not set
-CONFIG_PHYSICAL_START=0x200000
CONFIG_SECCOMP=y
# CONFIG_HZ_100 is not set
CONFIG_HZ_250=y
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index 7c4de31..741487b 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -15,7 +15,7 @@ ENTRY(phys_startup_64)
jiffies_64 = jiffies;
SECTIONS
{
- . = __START_KERNEL;
+ . = __START_KERNEL_map + 0x200000;
phys_startup_64 = startup_64 - LOAD_OFFSET;
_text = .; /* Text and read-only data */
.text : AT(ADDR(.text) - LOAD_OFFSET) {
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index ac8ea66..26d315b 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -650,9 +650,9 @@ void vmalloc_sync_all(void)
start = address + PGDIR_SIZE;
}
/* Check that there is no need to do the same for the modules area. */
- BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
+ BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL_map));
BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
- (__START_KERNEL & PGDIR_MASK)));
+ (__START_KERNEL_map & PGDIR_MASK)));
}
static int __init enable_pagefaulttrace(char *str)
diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h
index 37f95ca..deed41a 100644
--- a/include/asm-x86_64/page.h
+++ b/include/asm-x86_64/page.h
@@ -75,8 +75,6 @@ #define __pgprot(x) ((pgprot_t) { (x) }
#endif /* !__ASSEMBLY__ */
-#define __PHYSICAL_START _AC(CONFIG_PHYSICAL_START,UL)
-#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START)
#define __START_KERNEL_map _AC(0xffffffff80000000,UL)
#define __PAGE_OFFSET _AC(0xffff810000000000,UL)
--
1.4.2.rc2.g5209e
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo RemoveThis @vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/ |
|
| Back to top |
|
 |
Eric W. Biederman External

Since: Jun 21, 2006 Posts: 194
|
Posted: Tue Aug 01, 2006 1:10 pm Post subject: [PATCH 32/33] x86_64: Relocatable kernel support [Login to view extended thread Info.] Archived from groups: per prev. post (more info?) |
|
|
This patch modifies the x86_64 kernel so that it can be loaded and run
at any 2M aligned address, below 512G. The technique used is to
compile the decompressor with -fPIC and modify it so the decompressor
is fully relocatable. For the main kernel the page tables are
modified so the kernel remains at the same virtual address. In
addition a variable phys_base is kept that holds the physical address
the kernel is loaded at. __pa_symbol is modified to add that when
we take the address of a kernel symbol.
When loaded with a normal bootloader the decompressor will decompress
the kernel to 2M and it will run there. This both ensures the
relocation code is always working, and makes it easier to use 2M
pages for the kernel and the cpu.
Signed-off-by: Eric W. Biederman <ebiederm.DeleteThis@xmission.com>
---
arch/x86_64/boot/compressed/Makefile | 13 +
arch/x86_64/boot/compressed/head.S | 301 ++++++++++++++++++++++---------
arch/x86_64/boot/compressed/misc.c | 248 +++++++++++++-------------
arch/x86_64/boot/compressed/vmlinux.lds | 44 +++++
arch/x86_64/boot/compressed/vmlinux.scr | 5 -
arch/x86_64/kernel/head.S | 226 +++++++++++++----------
arch/x86_64/kernel/vmlinux.lds.S | 2
include/asm-x86_64/page.h | 6 -
8 files changed, 527 insertions(+), 318 deletions(-)
diff --git a/arch/x86_64/boot/compressed/Makefile b/arch/x86_64/boot/compressed/Makefile
index 8987c97..3dda50c 100644
--- a/arch/x86_64/boot/compressed/Makefile
+++ b/arch/x86_64/boot/compressed/Makefile
@@ -7,16 +7,15 @@ # Note all the files here are compiled/l
#
targets := vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o
-EXTRA_AFLAGS := -traditional -m32
+EXTRA_AFLAGS := -traditional
# cannot use EXTRA_CFLAGS because base CFLAGS contains -mkernel which conflicts with
-# -m32
-CFLAGS := -m32 -D__KERNEL__ -Iinclude -O2 -fno-strict-aliasing -fno-builtin
-LDFLAGS := -m elf_i386
+CFLAGS := -m64 -D__KERNEL__ -Iinclude -O2 -fno-strict-aliasing -fPIC -mcmodel=small -fno-builtin
+LDFLAGS := -m elf_x86_64
-LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -e startup_32 -m elf_i386
+LDFLAGS_vmlinux := -T
-$(obj)/vmlinux: $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
+$(obj)/vmlinux: $(src)/vmlinux.lds $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
$(call if_changed,ld)
@:
@@ -26,7 +25,7 @@ LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET
$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
$(call if_changed,gzip)
-LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T
+LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T
$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE
$(call if_changed,ld)
diff --git a/arch/x86_64/boot/compressed/head.S b/arch/x86_64/boot/compressed/head.S
index cf55d09..22c8dc4 100644
--- a/arch/x86_64/boot/compressed/head.S
+++ b/arch/x86_64/boot/compressed/head.S
@@ -26,116 +26,245 @@
#include <linux/linkage.h>
#include <asm/segment.h>
+#include <asm/pgtable.h>
#include <asm/page.h>
+#include <asm/msr.h>
+.section ".text.head"
.code32
.globl startup_32
startup_32:
cld
cli
- movl $(__KERNEL_DS),%eax
- movl %eax,%ds
- movl %eax,%es
- movl %eax,%fs
- movl %eax,%gs
-
- lss stack_start,%esp
- xorl %eax,%eax
-1: incl %eax # check that A20 really IS enabled
- movl %eax,0x000000 # loop forever if it isn't
- cmpl %eax,0x100000
- je 1b
+ movl $(__KERNEL_DS), %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %ss
+
+/* Calculate the delta between where we were compiled to run
+ * at and where we were actually loaded at. This can only be done
+ * with a short local call on x86. Nothing else will tell us what
+ * address we are running at. The reserved chunk of the real-mode
+ * data at 0x34-0x3f are used as the stack for this calculation.
+ * Only 4 bytes are needed.
+ */
+ leal 0x40(%esi), %esp
+ call 1f
+1: popl %ebp
+ subl $1b, %ebp
+
+/* Compute the delta between where we were compiled to run at
+ * and where the code will actually run at.
+ */
+ movl %ebp, %ebx
+ addl $(LARGE_PAGE_SIZE -1), %ebx
+ andl $LARGE_PAGE_MASK, %ebx
+
+ /* Replace the compressed data size with the uncompressed size */
+ subl input_len(%ebp), %ebx
+ movl output_len(%ebp), %eax
+ addl %eax, %ebx
+ /* Add 8 bytes for every 32K input block */
+ shrl $12, %eax
+ addl %eax, %ebx
+ /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
+ addl $(32768 + 18 + 4095), %ebx
+ andl $~4095, %ebx
/*
- * Initialize eflags. Some BIOS's leave bits like NT set. This would
- * confuse the debugger if this code is traced.
- * XXX - best to initialize before switching to protected mode.
+ * Prepare for entering 64 bit mode
*/
- pushl $0
- popfl
+
+ /* Load new GDT with the 64bit segments using 32bit descriptor */
+ leal gdt(%ebp), %eax
+ movl %eax, gdt+2(%ebp)
+ lgdt gdt(%ebp)
+
+ /* Enable PAE mode */
+ xorl %eax, %eax
+ orl $(1 << 5), %eax
+ movl %eax, %cr4
+
/*
- * Clear BSS
+ * Build early 4G boot pagetable
*/
- xorl %eax,%eax
- movl $_edata,%edi
- movl $_end,%ecx
- subl %edi,%ecx
- cld
- rep
- stosb
+ /* Initialize Page tables to 0*/
+ leal pgtable(%ebx), %edi
+ xorl %eax, %eax
+ movl $((4096*6)/4), %ecx
+ rep stosl
+
+ /* Build Level 4 */
+ leal pgtable + 0(%ebx), %edi
+ leal 0x1007 (%edi), %eax
+ movl %eax, 0(%edi)
+
+ /* Build Level 3 */
+ leal pgtable + 0x1000(%ebx), %edi
+ leal 0x1007(%edi), %eax
+ movl $4, %ecx
+1: movl %eax, 0x00(%edi)
+ addl $0x00001000, %eax
+ addl $8, %edi
+ decl %ecx
+ jnz 1b
+
+ /* Build Level 2 */
+ leal pgtable + 0x2000(%ebx), %edi
+ movl $0x00000183, %eax
+ movl $2048, %ecx
+1: movl %eax, 0(%edi)
+ addl $0x00200000, %eax
+ addl $8, %edi
+ decl %ecx
+ jnz 1b
+
+ /* Enable the boot page tables */
+ leal pgtable(%ebx), %eax
+ movl %eax, %cr3
+
+ /* Enable Long mode in EFER (Extended Feature Enable Register) */
+ movl $MSR_EFER, %ecx
+ rdmsr
+ btsl $_EFER_LME, %eax
+ wrmsr
+
+ /* Setup for the jump to 64bit mode
+ *
+ * When the jump is performend we will be in long mode but
+ * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
+ * (and in turn EFER.LMA = 1). To jump into 64bit mode we use
+ * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
+ * We place all of the values on our mini stack so lret can
+ * used to perform that far jump.
+ */
+ pushl $__KERNEL_CS
+ leal startup_64(%ebp), %eax
+ pushl %eax
+
+ /* Enter paged protected Mode, activating Long Mode */
+ movl $0x80000001, %eax /* Enable Paging and Protected mode */
+ movl %eax, %cr0
+
+ /* Jump from 32bit compatibility mode into 64bit mode. */
+ lret
+
+ /* Be careful here startup_64 needs to be at a predictable
+ * address so I can export it in an ELF header. Bootloaders
+ * should look at the ELF header to find this address, as
+ * it may change in the future.
+ */
+ .code64
+ .org 0x100
+ENTRY(startup_64)
+ /* We come here either from startup_32 or directly from a
+ * 64bit bootloader. If we come here from a bootloader we depend on
+ * an identity mapped page table being provied that maps our
+ * entire text+data+bss and hopefully all of memory.
+ */
+
+ /* Setup data segments. */
+ xorl %eax, %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %ss
+
+ /* Compute the decompressed kernel start address. It is where
+ * we were loaded at aligned to a 2M boundary.
+ */
+ leaq startup_32(%rip) /* - $startup_32 */, %rbp
+ addq $(LARGE_PAGE_SIZE - 1), %rbp
+ andq $LARGE_PAGE_MASK, %rbp
+
+/* Compute the delta between where we were compiled to run at
+ * and where the code will actually run at.
+ */
+ /* Start with the delta to where the kernel will run at. */
+ movq %rbp, %rbx
+
+ /* Replace the compressed data size with the uncompressed size */
+ movl input_len(%rip), %eax
+ subq %rax, %rbx
+ movl output_len(%rip), %eax
+ addq %rax, %rbx
+ /* Add 8 bytes for every 32K input block */
+ shrq $12, %rax
+ addq %rax, %rbx
+ /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
+ addq $(32768 + 18 + 4095), %rbx
+ andq $~4095, %rbx
+
+/* Copy the compressed kernel to the end of our buffer
+ * where decompression in place becomes safe.
+ */
+ leaq _end(%rip), %r8
+ leaq _end(%rbx), %r9
+ movq $_end /* - $startup_32 */, %rcx
+1: subq $8, %r8
+ subq $8, %r9
+ movq 0(%r , %rax
+ movq %rax, 0(%r9)
+ subq $8, %rcx
+ jnz 1b
+
/*
- * Do the decompression, and jump to the new kernel..
+ * Jump to the relocated address.
*/
- subl $16,%esp # place for structure on the stack
- movl %esp,%eax
- pushl %esi # real mode pointer as second arg
- pushl %eax # address of structure as first arg
- call decompress_kernel
- orl %eax,%eax
- jnz 3f
- addl $8,%esp
- xorl %ebx,%ebx
- ljmp $(__KERNEL_CS), $0x200000
+ leaq relocated(%rbx), %rax
+ jmp *%rax
+
+.section ".text"
+relocated:
/*
- * We come here, if we were loaded high.
- * We need to move the move-in-place routine down to 0x1000
- * and then start it with the buffer addresses in registers,
- * which we got from the stack.
+ * Clear BSS
*/
-3:
- movl %esi,%ebx
- movl $move_routine_start,%esi
- movl $0x1000,%edi
- movl $move_routine_end,%ecx
- subl %esi,%ecx
- addl $3,%ecx
- shrl $2,%ecx
+ xorq %rax, %rax
+ movq $_edata, %rdi
+ movq $_end, %rcx
+ subq %rdi, %rcx
cld
rep
- movsl
-
- popl %esi # discard the address
- addl $4,%esp # real mode pointer
- popl %esi # low_buffer_start
- popl %ecx # lcount
- popl %edx # high_buffer_start
- popl %eax # hcount
- movl $0x200000,%edi
- cli # make sure we don't get interrupted
- ljmp $(__KERNEL_CS), $0x1000 # and jump to the move routine
+ stosb
+
+ /* Setup the stack */
+ leaq user_stack_end(%rip), %rsp
+
+ /* zero EFLAGS after setting rsp */
+ pushq $0
+ popfq
/*
- * Routine (template) for moving the decompressed kernel in place,
- * if we were high loaded. This _must_ PIC-code !
+ * Do the decompression, and jump to the new kernel..
*/
-move_routine_start:
- movl %ecx,%ebp
- shrl $2,%ecx
- rep
- movsl
- movl %ebp,%ecx
- andl $3,%ecx
- rep
- movsb
- movl %edx,%esi
- movl %eax,%ecx # NOTE: rep movsb won't move if %ecx == 0
- addl $3,%ecx
- shrl $2,%ecx
- rep
- movsl
- movl %ebx,%esi # Restore setup pointer
- xorl %ebx,%ebx
- ljmp $(__KERNEL_CS), $0x200000
-move_routine_end:
+ pushq %rsi # Save the real mode argument
+ movq %rsi, %rdi # real mode address
+ leaq _heap(%rip), %rsi # _heap
+ leaq input_data(%rip), %rdx # input_data
+ movl input_len(%rip), %eax
+ movq %rax, %rcx # input_len
+ movq %rbp, %r8 # output
+ call decompress_kernel
+ popq %rsi
+/*
+ * Jump to the decompressed kernel.
+ */
+ jmp *%rbp
-/* Stack for uncompression */
- .align 32
+ .data
+gdt:
+ .word gdt_end - gdt
+ .long gdt
+ .word 0
+ .quad 0x0000000000000000 /* NULL descriptor */
+ .quad 0x00af9a000000ffff /* __KERNEL_CS */
+ .quad 0x00cf92000000ffff /* __KERNEL_DS */
+gdt_end:
+ .bss
+/* Stack for uncompression */
+ .balign 4
user_stack:
.fill 4096,4,0
-stack_start:
- .long user_stack+4096
- .word __KERNEL_DS
-
+user_stack_end:
diff --git a/arch/x86_64/boot/compressed/misc.c b/arch/x86_64/boot/compressed/misc.c
index 2cbd7cb..0e6c4b7 100644
--- a/arch/x86_64/boot/compressed/misc.c
+++ b/arch/x86_64/boot/compressed/misc.c
@@ -9,12 +9,97 @@
* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
*/
+#define _LINUX_STRING_H_ 1
+#define __LINUX_BITMAP_H 1
+
+#include <linux/linkage.h>
#include <linux/screen_info.h>
#include <linux/serial_reg.h>
#include <asm/io.h>
#include <asm/page.h>
#include <asm/setup.h>
+/* WARNING!!
+ * This code is compiled with -fPIC and it is relocated dynamically
+ * at run time, but no relocation processing is performed.
+ * This means that it is not safe to place pointers in static structures.
+ */
+
+/*
+ * Getting to provable safe in place decompression is hard.
+ * Worst case behaviours need to be analized.
+ * Background information:
+ *
+ * The file layout is:
+ * magic[2]
+ * method[1]
+ * flags[1]
+ * timestamp[4]
+ * extraflags[1]
+ * os[1]
+ * compressed data blocks[N]
+ * crc[4] orig_len[4]
+ *
+ * resulting in 18 bytes of non compressed data overhead.
+ *
+ * Files divided into blocks
+ * 1 bit (last block flag)
+ * 2 bits (block type)
+ *
+ * 1 block occurs every 32K -1 bytes or when there 50% compression has been achieved.
+ * The smallest block type encoding is always used.
+ *
+ * stored:
+ * 32 bits length in bytes.
+ *
+ * fixed:
+ * magic fixed tree.
+ * symbols.
+ *
+ * dynamic:
+ * dynamic tree encoding.
+ * symbols.
+ *
+ *
+ * The buffer for decompression in place is the length of the
+ * uncompressed data, plus a small amount extra to keep the algorithm safe.
+ * The compressed data is placed at the end of the buffer. The output
+ * pointer is placed at the start of the buffer and the input pointer
+ * is placed where the compressed data starts. Problems will occur
+ * when the output pointer overruns the input pointer.
+ *
+ * The output pointer can only overrun the input pointer if the input
+ * pointer is moving faster than the output pointer. A condition only
+ * triggered by data whose compressed form is larger than the uncompressed
+ * form.
+ *
+ * The worst case at the block level is a growth of the compressed data
+ * of 5 bytes per 32767 bytes.
+ *
+ * The worst case internal to a compressed block is very hard to figure.
+ * The worst case can at least be boundined by having one bit that represents
+ * 32764 bytes and then all of the rest of the bytes representing the very
+ * very last byte.
+ *
+ * All of which is enough to compute an amount of extra data that is required
+ * to be safe. To avoid problems at the block level allocating 5 extra bytes
+ * per 32767 bytes of data is sufficient. To avoind problems internal to a block
+ * adding an extra 32767 bytes (the worst case uncompressed block size) is
+ * sufficient, to ensure that in the worst case the decompressed data for
+ * block will stop the byte before the compressed data for a block begins.
+ * To avoid problems with the compressed data's meta information an extra 18
+ * bytes are needed. Leading to the formula:
+ *
+ * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size.
+ *
+ * Adding 8 bytes per 32K is a bit excessive but much easier to calculate.
+ * Adding 32768 instead of 32767 just makes for round numbers.
+ * Adding the decompressor_size is necessary as it musht live after all
+ * of the data as well. Last I measured the decompressor is about 14K.
+ * 10K of actuall data and 4K of bss.
+ *
+ */
+
/*
* gzip declarations
*/
@@ -30,15 +115,20 @@ typedef unsigned char uch;
typedef unsigned short ush;
typedef unsigned long ulg;
-#define WSIZE 0x8000 /* Window size must be at least 32k, */
- /* and a power of two */
+#define WSIZE 0x80000000 /* Window size must be at least 32k,
+ * and a power of two
+ * We don't actually have a window just
+ * a huge output buffer so I report
+ * a 2G windows size, as that should
+ * always be larger than our output buffer.
+ */
-static uch *inbuf; /* input buffer */
-static uch window[WSIZE]; /* Sliding window buffer */
+static uch *inbuf; /* input buffer */
+static uch *window; /* Sliding window buffer, (and final output buffer) */
-static unsigned insize = 0; /* valid bytes in inbuf */
-static unsigned inptr = 0; /* index of next byte to be processed in inbuf */
-static unsigned outcnt = 0; /* bytes in output buffer */
+static unsigned insize; /* valid bytes in inbuf */
+static unsigned inptr; /* index of next byte to be processed in inbuf */
+static unsigned outcnt; /* bytes in output buffer */
/* gzip flag byte */
#define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */
@@ -94,8 +184,6 @@ extern unsigned char input_data[];
extern int input_len;
static long bytes_out = 0;
-static uch *output_data;
-static unsigned long output_ptr = 0;
static void *malloc(int size);
static void free(void *where);
@@ -109,17 +197,10 @@ static char *strstr(const char *haystack
static void putstr(const char *);
static unsigned simple_strtou(const char *cp, char **endp, unsigned base);
-extern int end;
-static long free_mem_ptr = (long)&end;
+static long free_mem_ptr;
static long free_mem_end_ptr;
-#define INPLACE_MOVE_ROUTINE 0x1000
-#define LOW_BUFFER_START 0x2000
-#define LOW_BUFFER_MAX 0x90000
-#define HEAP_SIZE 0x3000
-static unsigned int low_buffer_end, low_buffer_size;
-static int high_loaded =0;
-static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/;
+#define HEAP_SIZE 0x6000
static char *vidmem;
static int vidport;
@@ -444,58 +525,31 @@ static char *strstr(const char *haystack
*/
static int fill_inbuf(void)
{
- if (insize != 0) {
- error("ran out of input data");
- }
-
- inbuf = input_data;
- insize = input_len;
- inptr = 1;
- return inbuf[0];
+ error("ran out of input data");
+ return 0;
}
/* ===========================================================================
* Write the output window window[0..outcnt-1] and update crc and bytes_out.
* (Used for the decompressed data only.)
*/
-static void flush_window_low(void)
-{
- ulg c = crc; /* temporary variable */
- unsigned n;
- uch *in, *out, ch;
-
- in = window;
- out = &output_data[output_ptr];
- for (n = 0; n < outcnt; n++) {
- ch = *out++ = *in++;
- c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> ;
- }
- crc = c;
- bytes_out += (ulg)outcnt;
- output_ptr += (ulg)outcnt;
- outcnt = 0;
-}
-
-static void flush_window_high(void)
-{
- ulg c = crc; /* temporary variable */
- unsigned n;
- uch *in, ch;
- in = window;
- for (n = 0; n < outcnt; n++) {
- ch = *output_data++ = *in++;
- if ((ulg)output_data == low_buffer_end) output_data=high_buffer_start;
- c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> ;
- }
- crc = c;
- bytes_out += (ulg)outcnt;
- outcnt = 0;
-}
-
static void flush_window(void)
{
- if (high_loaded) flush_window_high();
- else flush_window_low();
+ /* With my window equal to my output buffer
+ * I only need to compute the crc here.
+ */
+ ulg c = crc; /* temporary variable */
+ unsigned n;
+ uch *in, ch;
+
+ in = window;
+ for (n = 0; n < outcnt; n++) {
+ ch = *in++;
+ c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> ;
+ }
+ crc = c;
+ bytes_out += (ulg)outcnt;
+ outcnt = 0;
}
static void error(char *x)
@@ -507,56 +561,6 @@ static void error(char *x)
while(1); /* Halt */
}
-static void setup_normal_output_buffer(void)
-{
-#ifdef STANDARD_MEMORY_BIOS_CALL
- if (RM_EXT_MEM_K < 1024) error("Less than 2MB of memory");
-#else
- if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
-#endif
- output_data = (unsigned char *)0x200000;
- free_mem_end_ptr = (long)real_mode;
-}
-
-struct moveparams {
- uch *low_buffer_start; int lcount;
- uch *high_buffer_start; int hcount;
-};
-
-static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
-{
- high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE);
-#ifdef STANDARD_MEMORY_BIOS_CALL
- if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
-#else
- if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
-#endif
- mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
- low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
- ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
- low_buffer_size = low_buffer_end - LOW_BUFFER_START;
- high_loaded = 1;
- free_mem_end_ptr = (long)high_buffer_start;
- if ( (0x200000 + low_buffer_size) > ((ulg)high_buffer_start)) {
- high_buffer_start = (uch *)(0x200000 + low_buffer_size);
- mv->hcount = 0; /* say: we need not to move high_buffer */
- }
- else mv->hcount = -1;
- mv->high_buffer_start = high_buffer_start;
-}
-
-static void close_output_buffer_if_we_run_high(struct moveparams *mv)
-{
- if (bytes_out > low_buffer_size) {
- mv->lcount = low_buffer_size;
- if (mv->hcount)
- mv->hcount = bytes_out - low_buffer_size;
- } else {
- mv->lcount = bytes_out;
- mv->hcount = 0;
- }
-}
-
static void save_command_line(void)
{
/* Find the command line */
@@ -571,20 +575,28 @@ static void save_command_line(void)
saved_command_line[COMMAND_LINE_SIZE - 1] = '\0';
}
-int decompress_kernel(struct moveparams *mv, void *rmode)
+asmlinkage void decompress_kernel(void *rmode, unsigned long heap,
+ uch *input_data, unsigned long input_len, uch *output)
{
real_mode = rmode;
-
save_command_line();
console_init(saved_command_line);
- if (free_mem_ptr < 0x100000) setup_normal_output_buffer();
- else setup_output_buffer_if_we_run_high(mv);
+ window = output; /* Output buffer (Normally at 1M) */
+ free_mem_ptr = heap; /* Heap */
+ free_mem_end_ptr = heap + HEAP_SIZE;
+ inbuf = input_data; /* Input buffer */
+ insize = input_len;
+ inptr = 0;
+
+ if ((ulg)output & 0x1fffffUL)
+ error("Destination address not 2M aligned");
+ if ((ulg)output >= 0xffffffffffUL)
+ error("Destination address too large");
makecrc();
putstr(".\nDecompressing Linux...");
gunzip();
putstr("done.\nBooting the kernel.\n");
- if (high_loaded) close_output_buffer_if_we_run_high(mv);
- return high_loaded;
+ return;
}
diff --git a/arch/x86_64/boot/compressed/vmlinux.lds b/arch/x86_64/boot/compressed/vmlinux.lds
new file mode 100644
index 0000000..94c13e5
--- /dev/null
+++ b/arch/x86_64/boot/compressed/vmlinux.lds
@@ -0,0 +1,44 @@
+OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(startup_64)
+SECTIONS
+{
+ /* Be careful parts of head.S assume startup_32 is at
+ * address 0.
+ */
+ . = 0;
+ .text : {
+ _head = . ;
+ *(.text.head)
+ _ehead = . ;
+ *(.text.compressed)
+ _text = .; /* Text */
+ *(.text)
+ *(.text.*)
+ _etext = . ;
+ }
+ .rodata : {
+ _rodata = . ;
+ *(.rodata) /* read-only data */
+ *(.rodata.*)
+ _erodata = . ;
+ }
+ .data : {
+ _data = . ;
+ *(.data)
+ *(.data.*)
+ _edata = . ;
+ }
+ .bss : {
+ _bss = . ;
+ *(.bss)
+ *(.bss.*)
+ *(COMMON)
+ . = ALIGN( ;
+ _end = . ;
+ . = ALIGN(4096);
+ pgtable = . ;
+ . = . + 4096 * 6;
+ _heap = .;
+ }
+}
diff --git a/arch/x86_64/boot/compressed/vmlinux.scr b/arch/x86_64/boot/compressed/vmlinux.scr
index 1ed9d79..48117cf 100644
--- a/arch/x86_64/boot/compressed/vmlinux.scr
+++ b/arch/x86_64/boot/compressed/vmlinux.scr
@@ -1,9 +1,10 @@
SECTIONS
{
- .data : {
+ .text.compressed : {
input_len = .;
LONG(input_data_end - input_data) input_data = .;
- *(.data)
+ *(.data)
+ output_len = . - 4;
input_data_end = .;
}
}
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
index b0b5618..b821d13 100644
--- a/arch/x86_64/kernel/head.S
+++ b/arch/x86_64/kernel/head.S
@@ -5,6 +5,7 @@
* Copyright (C) 2000 Pavel Machek <pavel.DeleteThis@suse.cz>
* Copyright (C) 2000 Karsten Keil <kkeil.DeleteThis@suse.de>
* Copyright (C) 2001,2002 Andi Kleen <ak.DeleteThis@suse.de>
+ * Copyright (C) 2005 Eric Biederman <ebiederm.DeleteThis@xmission.com>
*
* $Id: head.S,v 1.49 2002/03/19 17:39:25 ak Exp $
*/
@@ -21,94 +22,121 @@ #include <asm/msr.h>
#include <asm/cache.h>
/* we are not able to switch in one step to the final KERNEL ADRESS SPACE
- * because we need identity-mapped pages on setup so define __START_KERNEL to
- * 0x100000 for this stage
+ * because we need identity-mapped pages.
*
*/
.text
.section .bootstrap.text
- .code32
- .globl startup_32
-/* %bx: 1 if coming from smp trampoline on secondary cpu */
-startup_32:
-
+ .code64
+ .globl startup_64
+startup_64:
+
/*
- * At this point the CPU runs in 32bit protected mode (CS.D = 1) with
- * paging disabled and the point of this file is to switch to 64bit
- * long mode with a kernel mapping for kerneland to jump into the
- * kernel virtual addresses.
- * There is no stack until we set one up.
+ * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
+ * and someone has loaded an identity mapped page table
+ * for us. These identity mapped page tables map all of the
+ * kernel pages and possibly all of memory.
+ *
+ * %esi holds a physical pointer to real_mode_data.
+ *
+ * We come here either directly from a 64bit bootloader, or from
+ * arch/x86_64/boot/compressed/head.S.
+ *
+ * We only come here initially at boot nothing else comes here.
+ *
+ * Since we may be loaded at an address different from what we were
+ * compiled to run at we first fixup the physical addresses in our page
+ * tables and then reload them.
*/
- /* Initialize the %ds segment register */
- movl $__KERNEL_DS,%eax
- movl %eax,%ds
-
- /* Load new GDT with the 64bit segments using 32bit descriptor */
- lgdt pGDT32 - __START_KERNEL_map
-
- /* If the CPU doesn't support CPUID this will double fault.
- * Unfortunately it is hard to check for CPUID without a stack.
+ /* Compute the delta between the address I am compiled to run at and the
+ * address I am actually running at.
*/
-
- /* Check if extended functions are implemented */
- movl $0x80000000, %eax
- cpuid
- cmpl $0x80000000, %eax
- jbe no_long_mode
- /* Check if long mode is implemented */
- mov $0x80000001, %eax
- cpuid
- btl $29, %edx
- jnc no_long_mode
-
- /*
- * Prepare for entering 64bits mode
+ leaq _text(%rip), %rbp
+ subq $_text - __START_KERNEL_map, %rbp
+
+ /* Is the address not 2M aligned? */
+ movq %rbp, %rax
+ andl $~LARGE_PAGE_MASK, %eax
+ testl %eax, %eax
+ jnz bad_address
+
+ /* Is the address too large? */
+ leaq _text(%rip), %rdx
+ movq $PGDIR_SIZE, %rax
+ cmpq %rax, %rdx
+ jae bad_address
+
+ /* Fixup the physical addresses in the page table
*/
-
- /* Enable PAE mode */
- xorl %eax, %eax
- btsl $5, %eax
- movl %eax, %cr4
-
- /* Setup early boot stage 4 level pagetables */
- movl $(init_level4_pgt - __START_KERNEL_map), %eax
- movl %eax, %cr3
-
- /* Setup EFER (Extended Feature Enable Register) */
- movl $MSR_EFER, %ecx
- rdmsr
-
- /* Enable Long Mode */
- btsl $_EFER_LME, %eax
-
- /* Make changes effective */
- wrmsr
-
- xorl %eax, %eax
- btsl $31, %eax /* Enable paging and in turn activate Long Mode */
- btsl $0, %eax /* Enable protected mode */
- /* Make changes effective */
- movl %eax, %cr0
- /*
- * At this point we're in long mode but in 32bit compatibility mode
- * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
- * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use
- * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
+ addq %rbp, init_level4_pgt + 0(%rip)
+ addq %rbp, init_level4_pgt + (258* (%rip)
+ addq %rbp, init_level4_pgt + (511* (%rip)
+
+ addq %rbp, level3_ident_pgt + 0(%rip)
+ addq %rbp, level3_kernel_pgt + (510* (%rip)
+
+ /* Add an Identity mapping if I am above 1G */
+ leaq _text(%rip), %rdi
+ andq $LARGE_PAGE_MASK, %rdi
+
+ movq %rdi, %rax
+ shrq $PUD_SHIFT, %rax
+ andq $(PTRS_PER_PUD - 1), %rax
+ jz ident_complete
+
+ leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
+ leaq level3_ident_pgt(%rip), %rbx
+ movq %rdx, 0(%rbx, %rax,
+
+ movq %rdi, %rax
+ shrq $PMD_SHIFT, %rax
+ andq $(PTRS_PER_PMD - 1), %rax
+ leaq __PAGE_KERNEL_LARGE_EXEC(%rdi), %rdx
+ leaq level2_spare_pgt(%rip), %rbx
+ movq %rdx, 0(%rbx, %rax,
+ident_complete:
+
+ /* Fixup the kernel text+data virtual addresses
*/
- ljmp $__KERNEL_CS, $(startup_64 - __START_KERNEL_map)
+ leaq level2_kernel_pgt(%rip), %rdi
+ leaq 4096(%rdi), %r8
+ /* See if it is a valid page table entry */
+1: testq $1, 0(%rdi)
+ jz 2f
+ addq %rbp, 0(%rdi)
+ /* Go to the next page */
+2: addq $8, %rdi
+ cmp %r8, %rdi
+ jne 1b
+
+ /* Fixup phys_base */
+ addq %rbp, phys_base(%rip)
+
+#ifdef CONFIG_SMP
+ addq %rbp, trampoline_level4_pgt + 0(%rip)
+ addq %rbp, trampoline_level4_pgt + (511* (%rip)
+#endif
+#ifdef CONFIG_ACPI_SLEEP
+ addq %rbp, wakeup_level4_pgt + 0(%rip)
+ addq %rbp, wakeup_level4_pgt + (511* (%rip)
+#endif
- .code64
- .org 0x100
- .globl startup_64
-startup_64:
ENTRY(secondary_startup_64)
- /* We come here either from startup_32
- * or directly from a 64bit bootloader.
- * Since we may have come directly from a bootloader we
- * reload the page tables here.
- */
+ /*
+ * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
+ * and someone has loaded a mapped page table.
+ *
+ * %esi holds a physical pointer to real_mode_data.
+ *
+ * We come here either from startup_64 (using physical addresses)
+ * or from trampoline.S (using virtual addresses).
+ *
+ * Using virtual addresses from trampoline.S removes the need
+ * to have any identity mapped pages in the kernel page table
+ * after the boot processor executes this code.
+ */
/* Enable PAE mode and PGE */
xorq %rax, %rax
@@ -118,8 +146,14 @@ ENTRY(secondary_startup_64)
/* Setup early boot stage 4 level pagetables. */
movq $(init_level4_pgt - __START_KERNEL_map), %rax
+ addq phys_base(%rip), %rax
movq %rax, %cr3
+ /* Ensure I am executing from virtual addresses */
+ movq $1f, %rax
+ jmp *%rax
+1:
+
/* Check if nx is implemented */
movl $0x80000001, %eax
cpuid
@@ -128,17 +162,11 @@ ENTRY(secondary_startup_64)
/* Setup EFER (Extended Feature Enable Register) */
movl $MSR_EFER, %ecx
rdmsr
-
- /* Enable System Call */
- btsl $_EFER_SCE, %eax
-
- /* No Execute supported? */
- btl $20,%edi
+ btsl $_EFER_SCE, %eax /* Enable System Call */
+ btl $20,%edi /* No Execute supported? */
jnc 1f
btsl $_EFER_NX, %eax
-1:
- /* Make changes effective */
- wrmsr
+1: wrmsr /* Make changes effective */
/* Setup cr0 */
#define CR0_PM 1 /* protected mode */
@@ -165,7 +193,7 @@ #define CR0_PAGING (1<<31)
* addresses where we're currently running on. We have to do that here
* because in 32bit we couldn't load a 64bit linear address.
*/
- lgdt cpu_gdt_descr
+ lgdt cpu_gdt_descr(%rip)
/*
* Setup up a dummy PDA. this is just for some early bootup code
@@ -204,6 +232,9 @@ initial_code:
init_rsp:
.quad init_thread_union+THREAD_SIZE-8
+bad_address:
+ jmp bad_address
+
ENTRY(early_idt_handler)
cmpl $2,early_recursion_flag(%rip)
jz 1f
@@ -232,23 +263,7 @@ early_idt_msg:
early_idt_ripmsg:
.asciz "RIP %s\n"
-.code32
-ENTRY(no_long_mode)
- /* This isn't an x86-64 CPU so hang */
-1:
- jmp 1b
-
-.org 0xf00
- .globl pGDT32
-pGDT32:
- .word gdt_end-cpu_gdt_table-1
- .long cpu_gdt_table-__START_KERNEL_map
-
-.org 0xf10
-ljumpvector:
- .long startup_64-__START_KERNEL_map
- .word __KERNEL_CS
-
+.balign PAGE_SIZE
ENTRY(stext)
ENTRY(_stext)
@@ -303,6 +318,9 @@ NEXT_PAGE(level2_kernel_pgt)
/* Module mapping starts here */
.fill (PTRS_PER_PMD - (KERNEL_TEXT_SIZE/PMD_SIZE)),8,0
+NEXT_PAGE(level2_spare_pgt)
+ .fill 512,8,0
+
#undef PMDS
#undef NEXT_PAGE
@@ -325,6 +343,10 @@ #ifdef CONFIG_SMP
.endr
#endif
+ENTRY(phys_base)
+ /* This must match the first entry in level2_kernel_pgt */
+ .quad 0x0000000000000000
+
/* We need valid kernel segments for data and code in long mode too
* IRET will check the segment types kkeil 2000/10/28
* Also sysret mandates a special GDT layout
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index 741487b..456fe8e 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -15,7 +15,7 @@ ENTRY(phys_startup_64)
jiffies_64 = jiffies;
SECTIONS
{
- . = __START_KERNEL_map + 0x200000;
+ . = __START_KERNEL_map;
phys_startup_64 = startup_64 - LOAD_OFFSET;
_text = .; /* Text and read-only data */
.text : AT(ADDR(.text) - LOAD_OFFSET) {
diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h
index deed41a..d125c09 100644
--- a/include/asm-x86_64/page.h
+++ b/include/asm-x86_64/page.h
@@ -61,6 +61,8 @@ #define PTE_MASK PHYSICAL_PAGE_MASK
typedef struct { unsigned long pgprot; } pgprot_t;
+extern unsigned long phys_base;
+
#define pte_val(x) ((x).pte)
#define pmd_val(x) ((x).pmd)
#define pud_val(x) ((x).pud)
@@ -99,14 +101,14 @@ #endif /* __ASSEMBLY__ */
#define PAGE_OFFSET __PAGE_OFFSET
/* Note: __pa(&symbol_visible_to_c) should be always replaced with __pa_symbol.
- Otherwise you risk miscompilation. */
+ Otherwise you risk miscompilation. */
#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET)
/* __pa_symbol should be used for C visible symbols.
This seems to be the official gcc blessed way to do such arithmetic. */
#define __pa_symbol(x) \
({unsigned long v; \
asm("" : "=r" (v) : "0" (x)); \
- (v - __START_KERNEL_map); })
+ ((v - __START_KERNEL_map) + phys_base); })
#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
#ifdef CONFIG_FLATMEM
--
1.4.2.rc2.g5209e
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo.DeleteThis@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/ |
|
| Back to top |
|
 |
Eric W. Biederman External

Since: Jun 21, 2006 Posts: 194
|
Posted: Tue Aug 01, 2006 1:10 pm Post subject: [PATCH 3/33] i386 setup.c: Reserve kernel memory starting from _text [Login to view extended thread Info.] Archived from groups: per prev. post (more info?) |
|
|
Currently when we are reserving the memory the kernel text
resides in we start at __PHYSICAL_START which happens to be
correct but not very obvious. In addition when we start relocating
the kernel __PHYSICAL_START is the wrong value, as it is an
absolute symbol that does not get relocated.
By starting the reservation at __pa_symbol(_text)
the code is clearer and will be correct when relocated.
Signed-off-by: Eric W. Biederman <ebiederm.TakeThisOut@xmission.com>
---
arch/i386/kernel/setup.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index f168220..f3a451a 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -1219,8 +1219,8 @@ void __init setup_bootmem_allocator(void
* the (very unlikely) case of us accidentally initializing the
* bootmem allocator with an invalid RAM area.
*/
- reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
- bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
+ reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) +
+ bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text));
/*
* reserve physical page 0 - it's a special BIOS page on many boxes,
--
1.4.2.rc2.g5209e
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo.TakeThisOut@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/ |
|
| Back to top |
|
 |
Eric W. Biederman External

Since: Jun 21, 2006 Posts: 194
|
Posted: Tue Aug 01, 2006 1:10 pm Post subject: [PATCH 10/33] i386: Relocatable kernel support. [Login to view extended thread Info.] Archived from groups: per prev. post (more info?) |
|
|
This patch modifies the x86 kernel so that if CONFIG_RELOCATABLE is
selected it will be able to be loaded at any 4K aligned address below
1G. The technique used is to compile the decompressor with -fPIC and
modify it so the decompressor is fully relocatable. For the main
kernel relocations are generated. Resulting in a kernel that is relocatable
with no runtime overhead and no need to modify the source code.
A reserved 32bit word in the parameters has been assigned
to serve as a stack so we figure out where are running.
Signed-off-by: Eric W. Biederman <ebiederm.RemoveThis@xmission.com>
---
arch/i386/Kconfig | 12 +
arch/i386/Makefile | 2
arch/i386/boot/compressed/Makefile | 22 +
arch/i386/boot/compressed/head.S | 184 +++++++----
arch/i386/boot/compressed/misc.c | 263 ++++++++-------
arch/i386/boot/compressed/relocs.c | 563 +++++++++++++++++++++++++++++++++
arch/i386/boot/compressed/vmlinux.lds | 40 ++
arch/i386/boot/compressed/vmlinux.scr | 3
arch/i386/boot/setup.S | 29 +-
include/linux/screen_info.h | 3
10 files changed, 910 insertions(+), 211 deletions(-)
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 062fa01..a0707d7 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -761,6 +761,18 @@ config CRASH_DUMP
help
Generate crash dump after being started by kexec.
+config RELOCATABLE
+ bool "Build a relocatable kernel"
+ help
+ This build a kernel image that retains relocation information
+ so it can be loaded someplace besides the default 1MB.
+ The relocations tend to the kernel binary about 10% larger,
+ but are discarded at runtime.
+
+ One use is for the kexec on panic case where the recovery kernel
+ must live at a different physical address than the primary
+ kernel.
+
config PHYSICAL_START
hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
diff --git a/arch/i386/Makefile b/arch/i386/Makefile
index 3e4adb1..e9d6eac 100644
--- a/arch/i386/Makefile
+++ b/arch/i386/Makefile
@@ -26,7 +26,7 @@ endif
LDFLAGS := -m elf_i386
OBJCOPYFLAGS := -O binary -R .note -R .comment -S
-LDFLAGS_vmlinux :=
+LDFLAGS_vmlinux := --emit-relocs
CHECKFLAGS += -D__i386__
CFLAGS += -pipe -msoft-float
diff --git a/arch/i386/boot/compressed/Makefile b/arch/i386/boot/compressed/Makefile
index 258ea95..1c486d1 100644
--- a/arch/i386/boot/compressed/Makefile
+++ b/arch/i386/boot/compressed/Makefile
@@ -7,19 +7,33 @@ #
targets := vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o
EXTRA_AFLAGS := -traditional
-LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -e startup_32
+LDFLAGS_vmlinux := -T
+CFLAGS_misc.o += -fPIC
+hostprogs-y := relocs
-$(obj)/vmlinux: $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
+$(obj)/vmlinux: $(src)/vmlinux.lds $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
$(call if_changed,ld)
@:
$(obj)/vmlinux.bin: vmlinux FORCE
$(call if_changed,objcopy)
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+quiet_cmd_relocs = RELOCS $@
+ cmd_relocs = $(obj)/relocs $< > $@
+$(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE
+ $(call if_changed,relocs)
+
+vmlinux.bin.all-y := $(obj)/vmlinux.bin
+vmlinux.bin.all-$(CONFIG_RELOCATABLE) += $(obj)/vmlinux.relocs
+quiet_cmd_relocbin = BUILD $@
+ cmd_relocbin = cat $(filter-out FORCE,$^) > $@
+$(obj)/vmlinux.bin.all: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,relocbin)
+
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE
$(call if_changed,gzip)
LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T
-$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE
+$(obj)/piggy.o: $(src)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE
$(call if_changed,ld)
diff --git a/arch/i386/boot/compressed/head.S b/arch/i386/boot/compressed/head.S
index 8f28ecd..418e425 100644
--- a/arch/i386/boot/compressed/head.S
+++ b/arch/i386/boot/compressed/head.S
@@ -26,9 +26,11 @@
#include <linux/config.h>
#include <linux/linkage.h>
#include <asm/segment.h>
+#include <asm/page.h>
+.section ".text.head"
.globl startup_32
-
+
startup_32:
cld
cli
@@ -37,93 +39,141 @@ startup_32:
movl %eax,%es
movl %eax,%fs
movl %eax,%gs
+ movl %eax,%ss
- lss stack_start,%esp
- xorl %eax,%eax
-1: incl %eax # check that A20 really IS enabled
- movl %eax,0x000000 # loop forever if it isn't
- cmpl %eax,0x100000
- je 1b
+/* Calculate the delta between where we were compiled to run
+ * at and where we were actually loaded at. This can only be done
+ * with a short local call on x86. Nothing else will tell us what
+ * address we are running at. The reserved chunk of the real-mode
+ * data at 0x34-0x3f are used as the stack for this calculation.
+ * Only 4 bytes are needed.
+ */
+ leal 0x40(%esi), %esp
+ call 1f
+1: popl %ebp
+ subl $1b, %ebp
+
+/* Compute the delta between where we were compiled to run at
+ * and where the code will actually run at.
+ */
+ /* Start with the delta to where the kernel will run at. If we are
+ * a relocatable kernel this is the delta to our load address otherwise
+ * this is the delta to CONFIG_PHYSICAL start.
+ */
+#ifdef CONFIG_RELOCTABLE
+ movl %ebp, %ebx
+#else
+ movl $(CONFIG_PHYSICAL_START - startup_32), %ebx
+#endif
+
+ /* Replace the compressed data size with the uncompressed size */
+ subl input_len(%ebp), %ebx
+ movl output_len(%ebp), %eax
+ addl %eax, %ebx
+ /* Add 8 bytes for every 32K input block */
+ shrl $12, %eax
+ addl %eax, %ebx
+ /* Add 32K + 18 bytes of extra slack */
+ addl $(32768 + 1 , %ebx
+ /* Align on a 4K boundary */
+ addl $4095, %ebx
+ andl $~4095, %ebx
+
+/* Copy the compressed kernel to the end of our buffer
+ * where decompression in place becomes safe.
+ */
+ pushl %esi
+ leal _end(%ebp), %esi
+ leal _end(%ebx), %edi
+ movl $(_end - startup_32), %ecx
+ std
+ rep
+ movsb
+ cld
+ popl %esi
+
+/* Compute the kernel start address.
+ */
+#ifdef CONFIG_RELOCATABLE
+ leal startup_32(%ebp), %ebp
+#else
+ movl $CONFIG_PHYSICAL_START, %ebp
+#endif
/*
- * Initialize eflags. Some BIOS's leave bits like NT set. This would
- * confuse the debugger if this code is traced.
- * XXX - best to initialize before switching to protected mode.
+ * Jump to the relocated address.
*/
- pushl $0
- popfl
+ leal relocated(%ebx), %eax
+ jmp *%eax
+.section ".text"
+relocated:
+
/*
* Clear BSS
*/
xorl %eax,%eax
- movl $_edata,%edi
- movl $_end,%ecx
+ leal _edata(%ebx),%edi
+ leal _end(%ebx), %ecx
subl %edi,%ecx
cld
rep
stosb
+
+/*
+ * Setup the stack for the decompressor
+ */
+ leal stack_end(%ebx), %esp
+
/*
* Do the decompression, and jump to the new kernel..
*/
- subl $16,%esp # place for structure on the stack
- movl %esp,%eax
+ movl output_len(%ebx), %eax
+ pushl %eax
+ pushl %ebp # output address
+ movl input_len(%ebx), %eax
+ pushl %eax # input_len
+ leal input_data(%ebx), %eax
+ pushl %eax # input_data
+ leal _end(%ebx), %eax
+ pushl %eax # end of the image as third argument
pushl %esi # real mode pointer as second arg
- pushl %eax # address of structure as first arg
call decompress_kernel
- orl %eax,%eax
- jnz 3f
- popl %esi # discard address
- popl %esi # real mode pointer
- xorl %ebx,%ebx
- ljmp $(__BOOT_CS), $CONFIG_PHYSICAL_START
+ addl $20, %esp
+ popl %ecx
+
+#if CONFIG_RELOCATABLE
+/* Find the address of the relocations.
+ */
+ movl %ebp, %edi
+ addl %ecx, %edi
+
+/* Calculate the delta between where vmlinux was compiled to run
+ * and where it was actually loaded.
+ */
+ movl %ebp, %ebx
+ subl $CONFIG_PHYSICAL_START, %ebx
/*
- * We come here, if we were loaded high.
- * We need to move the move-in-place routine down to 0x1000
- * and then start it with the buffer addresses in registers,
- * which we got from the stack.
+ * Process relocations.
*/
-3:
- movl $move_routine_start,%esi
- movl $0x1000,%edi
- movl $move_routine_end,%ecx
- subl %esi,%ecx
- addl $3,%ecx
- shrl $2,%ecx
- cld
- rep
- movsl
-
- popl %esi # discard the address
- popl %ebx # real mode pointer
- popl %esi # low_buffer_start
- popl %ecx # lcount
- popl %edx # high_buffer_start
- popl %eax # hcount
- movl $CONFIG_PHYSICAL_START,%edi
- cli # make sure we don't get interrupted
- ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine
+
+1: subl $4, %edi
+ movl 0(%edi), %ecx
+ testl %ecx, %ecx
+ jz 2f
+ addl %ebx, -__PAGE_OFFSET(%ebx, %ecx)
+ jmp 1b
+2:
+#endif
/*
- * Routine (template) for moving the decompressed kernel in place,
- * if we were high loaded. This _must_ PIC-code !
+ * Jump to the decompressed kernel.
*/
-move_routine_start:
- movl %ecx,%ebp
- shrl $2,%ecx
- rep
- movsl
- movl %ebp,%ecx
- andl $3,%ecx
- rep
- movsb
- movl %edx,%esi
- movl %eax,%ecx # NOTE: rep movsb won't move if %ecx == 0
- addl $3,%ecx
- shrl $2,%ecx
- rep
- movsl
- movl %ebx,%esi # Restore setup pointer
xorl %ebx,%ebx
- ljmp $(__BOOT_CS), $CONFIG_PHYSICAL_START
-move_routine_end:
+ jmp *%ebp
+
+.bss
+.balign 4
+stack:
+ .fill 4096, 1, 0
+stack_end:
diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c
index fcaa9f0..809eb93 100644
--- a/arch/i386/boot/compressed/misc.c
+++ b/arch/i386/boot/compressed/misc.c
@@ -17,6 +17,88 @@ #include <linux/serial_reg.h>
#include <linux/screen_info.h>
#include <asm/io.h>
#include <asm/setup.h>
+#include <asm/page.h>
+
+/* WARNING!!
+ * This code is compiled with -fPIC and it is relocated dynamically
+ * at run time, but no relocation processing is performed.
+ * This means that it is not safe to place pointers in static structures.
+ */
+
+/*
+ * Getting to provable safe in place decompression is hard.
+ * Worst case behaviours need to be analized.
+ * Background information:
+ *
+ * The file layout is:
+ * magic[2]
+ * method[1]
+ * flags[1]
+ * timestamp[4]
+ * extraflags[1]
+ * os[1]
+ * compressed data blocks[N]
+ * crc[4] orig_len[4]
+ *
+ * resulting in 18 bytes of non compressed data overhead.
+ *
+ * Files divided into blocks
+ * 1 bit (last block flag)
+ * 2 bits (block type)
+ *
+ * 1 block occurs every 32K -1 bytes or when there 50% compression has been achieved.
+ * The smallest block type encoding is always used.
+ *
+ * stored:
+ * 32 bits length in bytes.
+ *
+ * fixed:
+ * magic fixed tree.
+ * symbols.
+ *
+ * dynamic:
+ * dynamic tree encoding.
+ * symbols.
+ *
+ *
+ * The buffer for decompression in place is the length of the
+ * uncompressed data, plus a small amount extra to keep the algorithm safe.
+ * The compressed data is placed at the end of the buffer. The output
+ * pointer is placed at the start of the buffer and the input pointer
+ * is placed where the compressed data starts. Problems will occur
+ * when the output pointer overruns the input pointer.
+ *
+ * The output pointer can only overrun the input pointer if the input
+ * pointer is moving faster than the output pointer. A condition only
+ * triggered by data whose compressed form is larger than the uncompressed
+ * form.
+ *
+ * The worst case at the block level is a growth of the compressed data
+ * of 5 bytes per 32767 bytes.
+ *
+ * The worst case internal to a compressed block is very hard to figure.
+ * The worst case can at least be boundined by having one bit that represents
+ * 32764 bytes and then all of the rest of the bytes representing the very
+ * very last byte.
+ *
+ * All of which is enough to compute an amount of extra data that is required
+ * to be safe. To avoid problems at the block level allocating 5 extra bytes
+ * per 32767 bytes of data is sufficient. To avoind problems internal to a block
+ * adding an extra 32767 bytes (the worst case uncompressed block size) is
+ * sufficient, to ensure that in the worst case the decompressed data for
+ * block will stop the byte before the compressed data for a block begins.
+ * To avoid problems with the compressed data's meta information an extra 18
+ * bytes are needed. Leading to the formula:
+ *
+ * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size.
+ *
+ * Adding 8 bytes per 32K is a bit excessive but much easier to calculate.
+ * Adding 32768 instead of 32767 just makes for round numbers.
+ * Adding the decompressor_size is necessary as it musht live after all
+ * of the data as well. Last I measured the decompressor is about 14K.
+ * 10K of actuall data and 4K of bss.
+ *
+ */
/*
* gzip declarations
@@ -35,15 +117,20 @@ typedef unsigned char uch;
typedef unsigned short ush;
typedef unsigned long ulg;
-#define WSIZE 0x8000 /* Window size must be at least 32k, */
- /* and a power of two */
+#define WSIZE 0x80000000 /* Window size must be at least 32k,
+ * and a power of two
+ * We don't actually have a window just
+ * a huge output buffer so I report
+ * a 2G windows size, as that should
+ * always be larger than our output buffer.
+ */
-static uch *inbuf; /* input buffer */
-static uch window[WSIZE]; /* Sliding window buffer */
+static uch *inbuf; /* input buffer */
+static uch *window; /* Sliding window buffer, (and final output buffer) */
-static unsigned insize = 0; /* valid bytes in inbuf */
-static unsigned inptr = 0; /* index of next byte to be processed in inbuf */
-static unsigned outcnt = 0; /* bytes in output buffer */
+static unsigned insize; /* valid bytes in inbuf */
+static unsigned inptr; /* index of next byte to be processed in inbuf */
+static unsigned outcnt; /* bytes in output buffer */
/* gzip flag byte */
#define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */
@@ -99,8 +186,6 @@ extern unsigned char input_data[];
extern int input_len;
static long bytes_out = 0;
-static uch *output_data;
-static unsigned long output_ptr = 0;
static void *malloc(int size);
static void free(void *where);
@@ -112,17 +197,10 @@ static int memcmp(const void *s1, const
static void putstr(const char *);
static unsigned simple_strtou(const char *cp,char **endp,unsigned base);
-extern int end;
-static long free_mem_ptr = (long)&end;
-static long free_mem_end_ptr;
+static unsigned long free_mem_ptr;
+static unsigned long free_mem_end_ptr;
-#define INPLACE_MOVE_ROUTINE 0x1000
-#define LOW_BUFFER_START 0x2000
-#define LOW_BUFFER_MAX 0x90000
#define HEAP_SIZE 0x3000
-static unsigned int low_buffer_end, low_buffer_size;
-static int high_loaded =0;
-static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/;
static char *vidmem;
static int vidport;
@@ -174,9 +252,9 @@ static void gzip_mark(void **ptr)
static void gzip_release(void **ptr)
{
- free_mem_ptr = (long) *ptr;
+ free_mem_ptr = (unsigned long) *ptr;
}
-
+
/* The early video console */
static void vid_scroll(void)
{
@@ -203,7 +281,7 @@ static void vid_putstr(const char *s)
y--;
}
} else {
- vidmem [ ( x + cols * y ) * 2 ] = c;
+ vidmem [ ( x + cols * y ) * 2 ] = c;
if ( ++x >= cols ) {
x = 0;
if ( ++y >= lines ) {
@@ -443,58 +521,31 @@ char *strstr(const char *haystack, const
*/
static int fill_inbuf(void)
{
- if (insize != 0) {
- error("ran out of input data");
- }
-
- inbuf = input_data;
- insize = input_len;
- inptr = 1;
- return inbuf[0];
+ error("ran out of input data");
+ return 0;
}
/* ===========================================================================
* Write the output window window[0..outcnt-1] and update crc and bytes_out.
* (Used for the decompressed data only.)
*/
-static void flush_window_low(void)
-{
- ulg c = crc; /* temporary variable */
- unsigned n;
- uch *in, *out, ch;
-
- in = window;
- out = &output_data[output_ptr];
- for (n = 0; n < outcnt; n++) {
- ch = *out++ = *in++;
- c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> ;
- }
- crc = c;
- bytes_out += (ulg)outcnt;
- output_ptr += (ulg)outcnt;
- outcnt = 0;
-}
-
-static void flush_window_high(void)
-{
- ulg c = crc; /* temporary variable */
- unsigned n;
- uch *in, ch;
- in = window;
- for (n = 0; n < outcnt; n++) {
- ch = *output_data++ = *in++;
- if ((ulg)output_data == low_buffer_end) output_data=high_buffer_start;
- c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> ;
- }
- crc = c;
- bytes_out += (ulg)outcnt;
- outcnt = 0;
-}
-
static void flush_window(void)
{
- if (high_loaded) flush_window_high();
- else flush_window_low();
+ /* With my window equal to my output buffer
+ * I only need to compute the crc here.
+ */
+ ulg c = crc; /* temporary variable */
+ unsigned n;
+ uch *in, ch;
+
+ in = window;
+ for (n = 0; n < outcnt; n++) {
+ ch = *in++;
+ c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> ;
+ }
+ crc = c;
+ bytes_out += (ulg)outcnt;
+ outcnt = 0;
}
static void error(char *x)
@@ -506,65 +557,6 @@ static void error(char *x)
while(1); /* Halt */
}
-#define STACK_SIZE (4096)
-
-long user_stack [STACK_SIZE];
-
-struct {
- long * a;
- short b;
- } stack_start = { & user_stack [STACK_SIZE] , __BOOT_DS };
-
-static void setup_normal_output_buffer(void)
-{
-#ifdef STANDARD_MEMORY_BIOS_CALL
- if (RM_EXT_MEM_K < 1024) error("Less than 2MB of memory");
-#else
- if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
-#endif
- output_data = (unsigned char *)CONFIG_PHYSICAL_START; /* Normally Points to 1M */
- free_mem_end_ptr = (long)real_mode;
-}
-
-struct moveparams {
- uch *low_buffer_start; int lcount;
- uch *high_buffer_start; int hcount;
-};
-
-static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
-{
- high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE);
-#ifdef STANDARD_MEMORY_BIOS_CALL
- if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
-#else
- if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
-#endif
- mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
- low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
- ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
- low_buffer_size = low_buffer_end - LOW_BUFFER_START;
- high_loaded = 1;
- free_mem_end_ptr = (long)high_buffer_start;
- if ( (CONFIG_PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) {
- high_buffer_start = (uch *)(CONFIG_PHYSICAL_START + low_buffer_size);
- mv->hcount = 0; /* say: we need not to move high_buffer */
- }
- else mv->hcount = -1;
- mv->high_buffer_start = high_buffer_start;
-}
-
-static void close_output_buffer_if_we_run_high(struct moveparams *mv)
-{
- if (bytes_out > low_buffer_size) {
- mv->lcount = low_buffer_size;
- if (mv->hcount)
- mv->hcount = bytes_out - low_buffer_size;
- } else {
- mv->lcount = bytes_out;
- mv->hcount = 0;
- }
-}
-
static void save_command_line(void)
{
/* Find the command line */
@@ -579,19 +571,32 @@ static void save_command_line(void)
saved_command_line[COMMAND_LINE_SIZE - 1] = '\0';
}
-asmlinkage int decompress_kernel(struct moveparams *mv, void *rmode)
+asmlinkage void decompress_kernel(void *rmode, unsigned long end,
+ uch *input_data, unsigned long input_len, uch *output)
{
real_mode = rmode;
save_command_line();
console_init(saved_command_line);
- if (free_mem_ptr < 0x100000) setup_normal_output_buffer();
- else setup_output_buffer_if_we_run_high(mv);
+ window = output; /* Output buffer (Normally at 1M) */
+ free_mem_ptr = end; /* Heap */
+ free_mem_end_ptr = end + HEAP_SIZE;
+ inbuf = input_data; /* Input buffer */
+ insize = input_len;
+ inptr = 0;
+
+ if (((u32)output - CONFIG_PHYSICAL_START) & 0x3fffff)
+ error("Destination address not 4M aligned");
+ if (end > ((-__PAGE_OFFSET-(512 <<20)-1) & 0x7fffffff))
+ error("Destination address too large");
+#ifndef CONFIG_RELOCATABLE
+ if ((u32)output != CONFIG_PHYSICAL_START)
+ error("Wrong destination address");
+#endif
makecrc();
putstr("Uncompressing Linux... ");
gunzip();
putstr("Ok, booting the kernel.\n");
- if (high_loaded) close_output_buffer_if_we_run_high(mv);
- return high_loaded;
+ return;
}
diff --git a/arch/i386/boot/compressed/relocs.c b/arch/i386/boot/compressed/relocs.c
new file mode 100644
index 0000000..0551ceb
--- /dev/null
+++ b/arch/i386/boot/compressed/relocs.c
@@ -0,0 +1,563 @@
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <elf.h>
+#include <byteswap.h>
+#define USE_BSD
+#include <endian.h>
+
+#define MAX_SHDRS 100
+static Elf32_Ehdr ehdr;
+static Elf32_Shdr shdr[MAX_SHDRS];
+static Elf32_Sym *symtab[MAX_SHDRS];
+static Elf32_Rel *reltab[MAX_SHDRS];
+static char *strtab[MAX_SHDRS];
+static unsigned long reloc_count, reloc_idx;
+static unsigned long *relocs;
+
+static void die(char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ exit(1);
+}
+
+static const char *sym_type(unsigned type)
+{
+ static const char *type_name[] = {
+#define SYM_TYPE(X) [X] = #X
+ SYM_TYPE(STT_NOTYPE),
+ SYM_TYPE(STT_OBJECT),
+ SYM_TYPE(STT_FUNC),
+ SYM_TYPE(STT_SECTION),
+ SYM_TYPE(STT_FILE),
+ SYM_TYPE(STT_COMMON),
+ SYM_TYPE(STT_TLS),
+#undef SYM_TYPE
+ };
+ const char *name = "unknown sym type name";
+ if (type < sizeof(type_name)/sizeof(type_name[0])) {
+ name = type_name[type];
+ }
+ return name;
+}
+
+static const char *sym_bind(unsigned bind)
+{
+ static const char *bind_name[] = {
+#define SYM_BIND(X) [X] = #X
+ SYM_BIND(STB_LOCAL),
+ SYM_BIND(STB_GLOBAL),
+ SYM_BIND(STB_WEAK),
+#undef SYM_BIND
+ };
+ const char *name = "unknown sym bind name";
+ if (bind < sizeof(bind_name)/sizeof(bind_name[0])) {
+ name = bind_name[bind];
+ }
+ return name;
+}
+
+static const char *sym_visibility(unsigned visibility)
+{
+ static const char *visibility_name[] = {
+#define SYM_VISIBILITY(X) [X] = #X
+ SYM_VISIBILITY(STV_DEFAULT),
+ SYM_VISIBILITY(STV_INTERNAL),
+ SYM_VISIBILITY(STV_HIDDEN),
+ SYM_VISIBILITY(STV_PROTECTED),
+#undef SYM_VISIBILITY
+ };
+ const char *name = "unknown sym visibility name";
+ if (visibility < sizeof(visibility_name)/sizeof(visibility_name[0])) {
+ name = visibility_name[visibility];
+ }
+ return name;
+}
+
+static const char *rel_type(unsigned type)
+{
+ static const char *type_name[] = {
+#define REL_TYPE(X) [X] = #X
+ REL_TYPE(R_386_NONE),
+ REL_TYPE(R_386_32),
+ REL_TYPE(R_386_PC32),
+ REL_TYPE(R_386_GOT32),
+ REL_TYPE(R_386_PLT32),
+ REL_TYPE(R_386_COPY),
+ REL_TYPE(R_386_GLOB_DAT),
+ REL_TYPE(R_386_JMP_SLOT),
+ REL_TYPE(R_386_RELATIVE),
+ REL_TYPE(R_386_GOTOFF),
+ REL_TYPE(R_386_GOTPC),
+#undef REL_TYPE
+ };
+ const char *name = "unknown type rel type name";
+ if (type < sizeof(type_name)/sizeof(type_name[0])) {
+ name = type_name[type];
+ }
+ return name;
+}
+
+static const char *sec_name(unsigned shndx)
+{
+ const char *sec_strtab;
+ const char *name;
+ sec_strtab = strtab[ehdr.e_shstrndx];
+ name = "<noname>";
+ if (shndx < ehdr.e_shnum) {
+ name = sec_strtab + shdr[shndx].sh_name;
+ }
+ else if (shndx == SHN_ABS) {
+ name = "ABSOLUTE";
+ }
+ else if (shndx == SHN_COMMON) {
+ name = "COMMON";
+ }
+ return name;
+}
+
+static const char *sym_name(const char *sym_strtab, Elf32_Sym *sym)
+{
+ const char *name;
+ name = "<noname>";
+ if (sym->st_name) {
+ name = sym_strtab + sym->st_name;
+ }
+ else {
+ name = sec_name(shdr[sym->st_shndx].sh_name);
+ }
+ return name;
+}
+
+
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define le16_to_cpu(val) (val)
+#define le32_to_cpu(val) (val)
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+#define le16_to_cpu(val) bswap_16(val)
+#define le32_to_cpu(val) bswap_32(val)
+#endif
+
+static uint16_t elf16_to_cpu(uint16_t val)
+{
+ return le16_to_cpu(val);
+}
+
+static uint32_t elf32_to_cpu(uint32_t val)
+{
+ return le32_to_cpu(val);
+}
+
+static void read_ehdr(FILE *fp)
+{
+ if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) {
+ die("Cannot read ELF header: %s\n",
+ strerror(errno));
+ }
+ if (memcmp(ehdr.e_ident, ELFMAG, 4) != 0) {
+ die("No ELF magic\n");
+ }
+ if (ehdr.e_ident[EI_CLASS] != ELFCLASS32) {
+ die("Not a 32 bit executable\n");
+ }
+ if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) {
+ die("Not a LSB ELF executable\n");
+ }
+ if (ehdr.e_ident[EI_VERSION] != EV_CURRENT) {
+ die("Unknown ELF version\n");
+ }
+ /* Convert the fields to native endian */
+ ehdr.e_type = elf16_to_cpu(ehdr.e_type);
+ ehdr.e_machine = elf16_to_cpu(ehdr.e_machine);
+ ehdr.e_version = elf32_to_cpu(ehdr.e_version);
+ ehdr.e_entry = elf32_to_cpu(ehdr.e_entry);
+ ehdr.e_phoff = elf32_to_cpu(ehdr.e_phoff);
+ ehdr.e_shoff = elf32_to_cpu(ehdr.e_shoff);
+ ehdr.e_flags = elf32_to_cpu(ehdr.e_flags);
+ ehdr.e_ehsize = elf16_to_cpu(ehdr.e_ehsize);
+ ehdr.e_phentsize = elf16_to_cpu(ehdr.e_phentsize);
+ ehdr.e_phnum = elf16_to_cpu(ehdr.e_phnum);
+ ehdr.e_shentsize = elf16_to_cpu(ehdr.e_shentsize);
+ ehdr.e_shnum = elf16_to_cpu(ehdr.e_shnum);
+ ehdr.e_shstrndx = elf16_to_cpu(ehdr.e_shstrndx);
+
+ if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN)) {
+ die("Unsupported ELF header type\n");
+ }
+ if (ehdr.e_machine != EM_386) {
+ die("Not for x86\n");
+ }
+ if (ehdr.e_version != EV_CURRENT) {
+ die("Unknown ELF version\n");
+ }
+ if (ehdr.e_ehsize != sizeof(Elf32_Ehdr)) {
+ die("Bad Elf header size\n");
+ }
+ if (ehdr.e_phentsize != sizeof(Elf32_Phdr)) {
+ die("Bad program header entry\n");
+ }
+ if (ehdr.e_shentsize != sizeof(Elf32_Shdr)) {
+ die("Bad section header entry\n");
+ }
+ if (ehdr.e_shstrndx >= ehdr.e_shnum) {
+ die("String table index out of bounds\n");
+ }
+}
+
+static void read_shdrs(FILE *fp)
+{
+ int i;
+ if (ehdr.e_shnum > MAX_SHDRS) {
+ die("%d section headers supported: %d\n",
+ ehdr.e_shnum, MAX_SHDRS);
+ }
+ if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) {
+ die("Seek to %d failed: %s\n",
+ ehdr.e_shoff, strerror(errno));
+ }
+ if (fread(&shdr, sizeof(shdr[0]), ehdr.e_shnum, fp) != ehdr.e_shnum) {
+ die("Cannot read ELF section headers: %s\n",
+ strerror(errno));
+ }
+ for(i = 0; i < ehdr.e_shnum; i++) {
+ shdr[i].sh_name = elf32_to_cpu(shdr[i].sh_name);
+ shdr[i].sh_type = elf32_to_cpu(shdr[i].sh_type);
+ shdr[i].sh_flags = elf32_to_cpu(shdr[i].sh_flags);
+ shdr[i].sh_addr = elf32_to_cpu(shdr[i].sh_addr);
+ shdr[i].sh_offset = elf32_to_cpu(shdr[i].sh_offset);
+ shdr[i].sh_size = elf32_to_cpu(shdr[i].sh_size);
+ shdr[i].sh_link = elf32_to_cpu(shdr[i].sh_link);
+ shdr[i].sh_info = elf32_to_cpu(shdr[i].sh_info);
+ shdr[i].sh_addralign = elf32_to_cpu(shdr[i].sh_addralign);
+ shdr[i].sh_entsize = elf32_to_cpu(shdr[i].sh_entsize);
+ }
+
+}
+
+static void read_strtabs(FILE *fp)
+{
+ int i;
+ for(i = 0; i < ehdr.e_shnum; i++) {
+ if (shdr[i].sh_type != SHT_STRTAB) {
+ continue;
+ }
+ strtab[i] = malloc(shdr[i].sh_size);
+ if (!strtab[i]) {
+ die("malloc of %d bytes for strtab failed\n",
+ shdr[i].sh_size);
+ }
+ if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) {
+ die("Seek to %d failed: %s\n",
+ shdr[i].sh_offset, strerror(errno));
+ }
+ if (fread(strtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) {
+ die("Cannot read symbol table: %s\n",
+ strerror(errno));
+ }
+ }
+}
+
+static void read_symtabs(FILE *fp)
+{
+ int i,j;
+ for(i = 0; i < ehdr.e_shnum; i++) {
+ if (shdr[i].sh_type != SHT_SYMTAB) {
+ continue;
+ }
+ symtab[i] = malloc(shdr[i].sh_size);
+ if (!symtab[i]) {
+ die("malloc of %d bytes for symtab failed\n",
+ shdr[i].sh_size);
+ }
+ if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) {
+ die("Seek to %d failed: %s\n",
+ shdr[i].sh_offset, strerror(errno));
+ }
+ if (fread(symtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) {
+ die("Cannot read symbol table: %s\n",
+ strerror(errno));
+ }
+ for(j = 0; j < shdr[i].sh_size/sizeof(symtab[i][0]); j++) {
+ symtab[i][j].st_name = elf32_to_cpu(symtab[i][j].st_name);
+ symtab[i][j].st_value = elf32_to_cpu(symtab[i][j].st_value);
+ symtab[i][j].st_size = elf32_to_cpu(symtab[i][j].st_size);
+ symtab[i][j].st_shndx = elf16_to_cpu(symtab[i][j].st_shndx);
+ }
+ }
+}
+
+
+static void read_relocs(FILE *fp)
+{
+ int i,j;
+ for(i = 0; i < ehdr.e_shnum; i++) {
+ if (shdr[i].sh_type != SHT_REL) {
+ continue;
+ }
+ reltab[i] = malloc(shdr[i].sh_size);
+ if (!reltab[i]) {
+ die("malloc of %d bytes for relocs failed\n",
+ shdr[i].sh_size);
+ }
+ if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) {
+ die("Seek to %d failed: %s\n",
+ shdr[i].sh_offset, strerror(errno));
+ }
+ if (fread(reltab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) {
+ die("Cannot read symbol table: %s\n",
+ strerror(errno));
+ }
+ for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) {
+ reltab[i][j].r_offset = elf32_to_cpu(reltab[i][j].r_offset);
+ reltab[i][j].r_info = elf32_to_cpu(reltab[i][j].r_info);
+ }
+ }
+}
+
+
+static void print_absolute_symbols(void)
+{
+ int i;
+ printf("Absolute symbols\n");
+ printf(" Num: Value Size Type Bind Visibility Name\n");
+ for(i = 0; i < ehdr.e_shnum; i++) {
+ char *sym_strtab;
+ Elf32_Sym *sh_symtab;
+ int j;
+ if (shdr[i].sh_type != SHT_SYMTAB) {
+ continue;
+ }
+ sh_symtab = symtab[i];
+ sym_strtab = strtab[shdr[i].sh_link];
+ for(j = 0; j < shdr[i].sh_size/sizeof(symtab[0][0]); j++) {
+ Elf32_Sym *sym;
+ const char *name;
+ sym = &symtab[i][j];
+ name = sym_name(sym_strtab, sym);
+ if (sym->st_shndx != SHN_ABS) {
+ continue;
+ }
+ printf("%5d %08x %5d %10s %10s %12s %s\n",
+ j, sym->st_value, sym->st_size,
+ sym_type(ELF32_ST_TYPE(sym->st_info)),
+ sym_bind(ELF32_ST_BIND(sym->st_info)),
+ sym_visibility(ELF32_ST_VISIBILITY(sym->st_other)),
+ name);
+ }
+ }
+ printf("\n");
+}
+
+static void print_absolute_relocs(void)
+{
+ int i;
+ printf("Absolute relocations\n");
+ printf("Offset Info Type Sym.Value Sym.Name\n");
+ for(i = 0; i < ehdr.e_shnum; i++) {
+ char *sym_strtab;
+ Elf32_Sym *sh_symtab;
+ unsigned sec_applies, sec_symtab;
+ int j;
+ if (shdr[i].sh_type != SHT_REL) {
+ continue;
+ }
+ sec_symtab = shdr[i].sh_link;
+ sec_applies = shdr[i].sh_info;
+ if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) {
+ continue;
+ }
+ sh_symtab = symtab[sec_symtab];
+ sym_strtab = strtab[shdr[sec_symtab].sh_link];
+ for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) {
+ Elf32_Rel *rel;
+ Elf32_Sym *sym;
+ const char *name;
+ rel = &reltab[i][j];
+ sym = &sh_symtab[ELF32_R_SYM(rel->r_info)];
+ name = sym_name(sym_strtab, sym);
+ if (sym->st_shndx != SHN_ABS) {
+ continue;
+ }
+ printf("%08x %08x %10s %08x %s\n",
+ rel->r_offset,
+ rel->r_info,
+ rel_type(ELF32_R_TYPE(rel->r_info)),
+ sym->st_value,
+ name);
+ }
+ }
+ printf("\n");
+}
+
+static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym))
+{
+ int i;
+ /* Walk through the relocations */
+ for(i = 0; i < ehdr.e_shnum; i++) {
+ char *sym_strtab;
+ Elf32_Sym *sh_symtab;
+ unsigned sec_applies, sec_symtab;
+ int j;
+ if (shdr[i].sh_type != SHT_REL) {
+ continue;
+ }
+ sec_symtab = shdr[i].sh_link;
+ sec_applies = shdr[i].sh_info;
+ if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) {
+ continue;
+ }
+ sh_symtab = symtab[sec_symtab];
+ sym_strtab = strtab[shdr[sec_symtab].sh_link];
+ for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) {
+ Elf32_Rel *rel;
+ Elf32_Sym *sym;
+ unsigned r_type;
+ rel = &reltab[i][j];
+ sym = &sh_symtab[ELF32_R_SYM(rel->r_info)];
+ r_type = ELF32_R_TYPE(rel->r_info);
+ /* Don't visit relocations to absolute symbols */
+ if (sym->st_shndx == SHN_ABS) {
+ continue;
+ }
+ if (r_type == R_386_PC32) {
+ /* PC relative relocations don't need to be adjusted */
+ }
+ else if (r_type == R_386_32) {
+ /* Visit relocations that need to be adjusted */
+ visit(rel, sym);
+ }
+ else {
+ die("Unsupported relocation type: %d\n", r_type);
+ }
+ }
+ }
+}
+
+static void count_reloc(Elf32_Rel *rel, Elf32_Sym *sym)
+{
+ reloc_count += 1;
+}
+
+static void collect_reloc(Elf32_Rel *rel, Elf32_Sym *sym)
+{
+ /* Remember the address that needs to be adjusted. */
+ relocs[reloc_idx++] = rel->r_offset;
+}
+
+static int cmp_relocs(const void *va, const void *vb)
+{
+ const unsigned long *a, *b;
+ a = va; b = vb;
+ return (*a == *b)? 0 : (*a > *b)? 1 : -1;
+}
+
+static void emit_relocs(int as_text)
+{
+ int i;
+ /* Count how many relocations I have and allocate space for them. */
+ reloc_count = 0;
+ walk_relocs(count_reloc);
+ relocs = malloc(reloc_count * sizeof(relocs[0]));
+ if (!relocs) {
+ die("malloc of %d entries for relocs failed\n",
+ reloc_count);
+ }
+ /* Collect up the relocations */
+ reloc_idx = 0;
+ walk_relocs(collect_reloc);
+
+ /* Order the relocations for more efficient processing */
+ qsort(relocs, reloc_count, sizeof(relocs[0]), cmp_relocs);
+
+ /* Print the relocations */
+ if (as_text) {
+ /* Print the relocations in a form suitable that
+ * gas will like.
+ */
+ printf(".section ".data.reloc","a"\n");
+ printf(".balign 4\n");
+ for(i = 0; i < reloc_count; i++) {
+ printf("\t .long 0x%08lx\n", relocs[i]);
+ }
+ printf("\n");
+ }
+ else {
+ unsigned char buf[4];
+ buf[0] = buf[1] = buf[2] = buf[3] = 0;
+ /* Print a stop */
+ printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]);
+ /* Now print each relocation */
+ for(i = 0; i < reloc_count; i++) {
+ buf[0] = (relocs[i] >> 0) & 0xff;
+ buf[1] = (relocs[i] >> & 0xff;
+ buf[2] = (relocs[i] >> 16) & 0xff;
+ buf[3] = (relocs[i] >> 24) & 0xff;
+ printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]);
+ }
+ }
+}
+
+static void usage(void)
+{
+ die("i386_reloc [--abs | --text] vmlinux\n");
+}
+
+int main(int argc, char **argv)
+{
+ int show_absolute;
+ int as_text;
+ const char *fname;
+ FILE *fp;
+ int i;
+
+ show_absolute = 0;
+ as_text = 0;
+ fname = NULL;
+ for(i = 1; i < argc; i++) {
+ char *arg = argv[i];
+ if (*arg == '-') {
+ if (strcmp(argv[1], "--abs") == 0) {
+ show_absolute = 1;
+ continue;
+ }
+ else if (strcmp(argv[1], "--text") == 0) {
+ as_text = 1;
+ continue;
+ }
+ }
+ else if (!fname) {
+ fname = arg;
+ continue;
+ }
+ usage();
+ }
+ if (!fname) {
+ usage();
+ }
+ fp = fopen(fname, "r");
+ if (!fp) {
+ die("Cannot open %s: %s\n",
+ fname, strerror(errno));
+ }
+ read_ehdr(fp);
+ read_shdrs(fp);
+ read_strtabs(fp);
+ read_symtabs(fp);
+ read_relocs(fp);
+ if (show_absolute) {
+ print_absolute_symbols();
+ print_absolute_relocs();
+ return 0;
+ }
+ emit_relocs(as_text);
+ return 0;
+}
diff --git a/arch/i386/boot/compressed/vmlinux.lds b/arch/i386/boot/compressed/vmlinux.lds
new file mode 100644
index 0000000..973a23e
--- /dev/null
+++ b/arch/i386/boot/compressed/vmlinux.lds
@@ -0,0 +1,40 @@
+OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(startup_32)
+SECTIONS
+{
+ . = 0 ;
+ .text.head : {
+ _head = . ;
+ *(.text.head)
+ _ehead = . ;
+ }
+ .data.compressed : {
+ *(.data.compressed)
+ }
+ .text : {
+ _text = .; /* Text */
+ *(.text)
+ *(.text.*)
+ _etext = . ;
+ }
+ .rodata : {
+ _rodata = . ;
+ *(.rodata) /* read-only data */
+ *(.rodata.*)
+ _erodata = . ;
+ }
+ .data : {
+ _data = . ;
+ *(.data)
+ *(.data.*)
+ _edata = . ;
+ }
+ .bss : {
+ _bss = . ;
+ *(.bss)
+ *(.bss.*)
+ *(COMMON)
+ _end = . ;
+ }
+}
diff --git a/arch/i386/boot/compressed/vmlinux.scr b/arch/i386/boot/compressed/vmlinux.scr
index 1ed9d79..707a88f 100644
--- a/arch/i386/boot/compressed/vmlinux.scr
+++ b/arch/i386/boot/compressed/vmlinux.scr
@@ -1,9 +1,10 @@
SECTIONS
{
- .data : {
+ .data.compressed : {
input_len = .;
LONG(input_data_end - input_data) input_data = .;
*(.data)
+ output_len = . - 4;
input_data_end = .;
}
}
diff --git a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S
index d2b684c..04b6ea8 100644
--- a/arch/i386/boot/setup.S
+++ b/arch/i386/boot/setup.S
@@ -588,11 +588,6 @@ rmodeswtch_normal:
call default_switch
rmodeswtch_end:
-# we get the code32 start address and modify the below 'jmpi'
-# (loader may have changed it)
- movl %cs:code32_start, %eax
- movl %eax, %cs:code32
-
# Now we move the system to its rightful place ... but we check if we have a
# big-kernel. In that case we *must* not move it ...
testb $LOADED_HIGH, %cs:loadflags
@@ -788,11 +783,12 @@ a20_err_msg:
a20_done:
#endif /* CONFIG_X86_VOYAGER */
-# set up gdt and idt
+# set up gdt and idt and 32bit start address
lidt idt_48 # load idt with 0,0
xorl %eax, %eax # Compute gdt_base
movw %ds, %ax # (Convert %ds:gdt to a linear ptr)
shll $4, %eax
+ addl %eax, code32
addl $gdt, %eax
movl %eax, (gdt_48+2)
lgdt gdt_48 # load gdt with whatever is
@@ -851,9 +847,26 @@ # take our 48 bit far pointer. (INTeL 80
# Manual, Mixing 16-bit and 32-bit code, page 16-6)
.byte 0x66, 0xea # prefix + jmpi-opcode
-code32: .long 0x1000 # will be set to 0x100000
- # for big kernels
+code32: .long startup_32 # will be set to %cs+startup_32
.word __BOOT_CS
+.code32
+startup_32:
+ movl $(__BOOT_DS), %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %fs
+ movl %eax, %gs
+ movl %eax, %ss
+
+ xorl %eax, %eax
+1: incl %eax # check that A20 really IS enabled
+ movl %eax, 0x00000000 # loop forever if it isn't
+ cmpl %eax, 0x00100000
+ je 1b
+
+ # Jump to the 32bit entry point
+ jmpl *(code32_start - start + (DELTA_INITSEG << 4))(%esi)
+.code16
# Here's a bunch of information about your current kernel..
kernel_version: .ascii UTS_RELEASE
diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h
index 2925e66..b02308e 100644
--- a/include/linux/screen_info.h
+++ b/include/linux/screen_info.h
@@ -42,7 +42,8 @@ struct screen_info {
u16 pages; /* 0x32 */
u16 vesa_attributes; /* 0x34 */
u32 capabilities; /* 0x36 */
- /* 0x3a -- 0x3f reserved for future expansion */
+ /* 0x3a -- 0x3b reserved for future expansion */
+ /* 0x3c -- 0x3f micro stack for relocatable kernels */
};
extern struct screen_info screen_info;
--
1.4.2.rc2.g5209e
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo.RemoveThis@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/ |
|
| Back to top |
|
 |
Eric W. Biederman External

Since: Jun 21, 2006 Posts: 194
|
Posted: Tue Aug 01, 2006 1:20 pm Post subject: [PATCH 17/33] x86_64: Separate normal memory map initialization from the hotplug case [Login to view extended thread Info.] Archived from groups: per prev. post (more info?) |
|
|
Currently initializing the two memory maps are combining into one
set of functions with if(after_bootmem) tests scattered all over
to handle the semantic differences. Just trying to think about
what is supposed to happen when and why makes my head hurt.
In one case we initialize a page but in another we don't because
it has been zeroed by the allocator.
In one case we have to map and unmap pages and in another we
don't because we have a mapping of the pages already.
In one case we care if a page table is partially initialized
and in the other we don't.
It is ugly to reason through and makes maintenance difficult,
because the rules are different in the two cases. So I have
separated these code paths so the can evolve separately. I
think code duplication is the lesser of two evils here.
Signed-off-by: Eric W. Biederman <ebiederm.RemoveThis@xmission.com>
---
arch/x86_64/mm/init.c | 147 +++++++++++++++++++++++++++++++++----------------
1 files changed, 98 insertions(+), 49 deletions(-)
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index d14fb2d..0522c1c 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -179,19 +179,13 @@ static struct temp_map {
{}
};
-static __meminit void *alloc_low_page(int *index, unsigned long *phys)
+static __init void *alloc_low_page(int *index, unsigned long *phys)
{
struct temp_map *ti;
int i;
unsigned long pfn = table_end++, paddr;
void *adr;
- if (after_bootmem) {
- adr = (void *)get_zeroed_page(GFP_ATOMIC);
- *phys = __pa(adr);
- return adr;
- }
-
if (pfn >= end_pfn)
panic("alloc_low_page: ran out of memory");
for (i = 0; temp_mappings[i].allocated; i++) {
@@ -210,13 +204,10 @@ static __meminit void *alloc_low_page(in
return adr;
}
-static __meminit void unmap_low_page(int i)
+static __init void unmap_low_page(int i)
{
struct temp_map *ti;
- if (after_bootmem)
- return;
-
ti = &temp_mappings[i];
set_pmd(ti->pmd, __pmd(0));
ti->allocated = 0;
@@ -249,7 +240,7 @@ __init void early_iounmap(void *addr, un
__flush_tlb();
}
-static void __meminit
+static void __init
phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
{
int i;
@@ -258,9 +249,8 @@ phys_pmd_init(pmd_t *pmd, unsigned long
unsigned long entry;
if (address >= end) {
- if (!after_bootmem)
- for (; i < PTRS_PER_PMD; i++, pmd++)
- set_pmd(pmd, __pmd(0));
+ for (; i < PTRS_PER_PMD; i++, pmd++)
+ set_pmd(pmd, __pmd(0));
break;
}
entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
@@ -269,30 +259,12 @@ phys_pmd_init(pmd_t *pmd, unsigned long
}
}
-static void __meminit
-phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
-{
- pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address));
-
- if (pmd_none(*pmd)) {
- spin_lock(&init_mm.page_table_lock);
- phys_pmd_init(pmd, address, end);
- spin_unlock(&init_mm.page_table_lock);
- __flush_tlb_all();
- }
-}
-
-static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
+static void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
{
long i = pud_index(address);
pud = pud + i;
- if (after_bootmem && pud_val(*pud)) {
- phys_pmd_update(pud, address, end);
- return;
- }
-
for (; i < PTRS_PER_PUD; pud++, i++) {
int map;
unsigned long paddr, pmd_phys;
@@ -302,16 +274,14 @@ static void __meminit phys_pud_init(pud_
if (paddr >= end)
break;
- if (!after_bootmem && !e820_any_mapped(paddr, paddr+PUD_SIZE, 0)) {
+ if (!e820_any_mapped(paddr, paddr+PUD_SIZE, 0)) {
set_pud(pud, __pud(0));
continue;
}
pmd = alloc_low_page(&map, &pmd_phys);
- spin_lock(&init_mm.page_table_lock);
set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
phys_pmd_init(pmd, paddr, end);
- spin_unlock(&init_mm.page_table_lock);
unmap_low_page(map);
}
__flush_tlb();
@@ -345,7 +315,7 @@ static void __init find_early_table_spac
/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
This runs before bootmem is initialized and gets pages directly from the
physical memory. To access them they are temporarily mapped. */
-void __meminit init_memory_mapping(unsigned long start, unsigned long end)
+void __init init_memory_mapping(unsigned long start, unsigned long end)
{
unsigned long next;
@@ -357,8 +327,7 @@ void __meminit init_memory_mapping(unsig
* mapped. Unfortunately this is done currently before the nodes are
* discovered.
*/
- if (!after_bootmem)
- find_early_table_space(end);
+ find_early_table_space(end);
start = (unsigned long)__va(start);
end = (unsigned long)__va(end);
@@ -369,22 +338,17 @@ void __meminit init_memory_mapping(unsig
pgd_t *pgd = pgd_offset_k(start);
pud_t *pud;
- if (after_bootmem)
- pud = pud_offset(pgd, start & PGDIR_MASK);
- else
- pud = alloc_low_page(&map, &pud_phys);
+ pud = alloc_low_page(&map, &pud_phys);
next = start + PGDIR_SIZE;
if (next > end)
next = end;
phys_pud_init(pud, __pa(start), __pa(next));
- if (!after_bootmem)
- set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
+ set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
unmap_low_page(map);
}
- if (!after_bootmem)
- asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
+ asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
__flush_tlb_all();
}
@@ -529,6 +493,91 @@ int memory_add_physaddr_to_nid(u64 start
}
#endif
+static void
+late_phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
+{
+ int i;
+
+ for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) {
+ unsigned long entry;
+
+ if (address >= end)
+ break;
+ entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
+ entry &= __supported_pte_mask;
+ set_pmd(pmd, __pmd(entry));
+ }
+}
+
+static void
+late_phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
+{
+ pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address));
+
+ if (pmd_none(*pmd)) {
+ spin_lock(&init_mm.page_table_lock);
+ late_phys_pmd_init(pmd, address, end);
+ spin_unlock(&init_mm.page_table_lock);
+ __flush_tlb_all();
+ }
+}
+
+static void late_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
+{
+ long i = pud_index(address);
+
+ pud = pud + i;
+
+ if (pud_val(*pud)) {
+ late_phys_pmd_update(pud, address, end);
+ return;
+ }
+
+ for (; i < PTR_PER_PUD; pud++, i++) {
+ unsigned long paddr, pmd_phys;
+ pmd_t *pmd;
+
+ paddr = (address & PGDIR_MASK) + i*PUD_SIZE;
+ if (paddr >= end)
+ break;
+
+ pmd = (pmd_t *)get_zeroed_page(GFP_ATOMIC);
+ phys_pmd = __pa(pmd);
+
+ spin_lock(&init_mm.page_table_lock);
+ set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
+ late_phys_pmd_init(pmd, paddr, end);
+ spin_unlock(&init_mm.page_table_lock);
+ }
+}
+
+/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
+ * This runs after bootmem is initialized and gets pages normally.
+ */
+static void late_init_memory_mapping(unsigned long start, unsigned long end)
+{
+ unsigned long next;
+
+ Dprintk("add_memory_mapping\n");
+
+ start = (unsigned long)__va(start);
+ end = (unsigned long)__va(end);
+
+ for (; start < end; start = next) {
+ unsigned long pud_phys;
+ pgd_t *pgd = pgd_offset_k(start);
+ pud_t *pud;
+
+ pud = pud_offset(pgd, start & PGDIR_MASK);
+
+ next = start + PGDIR_SIZE;
+ if (next > end)
+ next = end;
+ late_phys_pud_init(pud, __pa(start), __pa(next));
+ }
+ __flush_tlb_all();
+}
+
/*
* Memory is added always to NORMAL zone. This means you will never get
* additional DMA/DMA32 memory.
@@ -545,7 +594,7 @@ int arch_add_memory(int nid, u64 start,
if (ret)
goto error;
- init_memory_mapping(start, (start + size -1));
+ late_init_memory_mapping(start, (start + size -1));
return ret;
error:
--
1.4.2.rc2.g5209e
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo.RemoveThis@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/ |
|
| Back to top |
|
 |
Eric W. Biederman External

Since: Jun 21, 2006 Posts: 194
|
Posted: Tue Aug 01, 2006 1:20 pm Post subject: [PATCH 33/33] x86_64: Make bzImage a valid 64bit elf executable. [Login to view extended thread Info.] Archived from groups: per prev. post (more info?) |
|
|
Signed-off-by: Eric W. Biederman <ebiederm RemoveThis @xmission.com>
---
arch/x86_64/boot/Makefile | 2
arch/x86_64/boot/bootsect.S | 93 +++++++++++++++-
arch/x86_64/boot/tools/build.c | 232 ++++++++++++++++++++++++++++++++++++----
3 files changed, 297 insertions(+), 30 deletions(-)
diff --git a/arch/x86_64/boot/Makefile b/arch/x86_64/boot/Makefile
index deb063e..80a7492 100644
--- a/arch/x86_64/boot/Makefile
+++ b/arch/x86_64/boot/Makefile
@@ -41,7 +41,7 @@ # --------------------------------------
quiet_cmd_image = BUILD $@
cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/bootsect $(obj)/setup \
- $(obj)/vmlinux.bin $(ROOT_DEV) > $@
+ $(obj)/vmlinux.bin $(ROOT_DEV) vmlinux > $@
$(obj)/bzImage: $(obj)/bootsect $(obj)/setup \
$(obj)/vmlinux.bin $(obj)/tools/build FORCE
diff --git a/arch/x86_64/boot/bootsect.S b/arch/x86_64/boot/bootsect.S
index 011b7a4..05bd1f3 100644
--- a/arch/x86_64/boot/bootsect.S
+++ b/arch/x86_64/boot/bootsect.S
@@ -13,6 +13,13 @@
*
*/
+#include <linux/version.h>
+#include <linux/utsrelease.h>
+#include <linux/compile.h>
+#include <linux/elf.h>
+#include <linux/elf-em.h>
+#include <linux/elf_boot.h>
+#include <asm/page.h>
#include <asm/boot.h>
SETUPSECTS = 4 /* default nr of setup-sectors */
@@ -42,10 +49,88 @@ #endif
.global _start
_start:
-
+ehdr:
+ # e_ident is carefully crafted so if this is treated
+ # as an x86 bootsector you will execute through
+ # e_ident and then print the bugger off message.
+ # The 1 store to bx+di is unfortunate it is
+ # unlikely to affect the ability to print
+ # a message and you aren't supposed to be booting a
+ # bzImage directly from a floppy anyway.
+
+ # e_ident
+ .byte ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3
+ .byte ELFCLASS64, ELFDATA2LSB, EV_CURRENT, ELFOSABI_STANDALONE
+ .byte 0xeb, 0x3d, 0, 0, 0, 0, 0, 0
+ .word ET_DYN # e_type
+ .word EM_X86_64 # e_machine
+ .int 1 # e_version
+ .quad 0x0000000000000100 # e_entry (startup_64)
+ .quad phdr - _start # e_phoff
+ .quad 0 # e_shoff
+ .int 0 # e_flags
+ .word e_ehdr - ehdr # e_ehsize
+ .word e_phdr1 - phdr # e_phentsize
+ .word (e_phdr - phdr)/(e_phdr1 - phdr) # e_phnum
+ .word 64 # e_shentsize
+ .word 0 # e_shnum
+ .word 0 # e_shstrndx
+e_ehdr:
+
+.org 71
+normalize:
# Normalize the start address
jmpl $BOOTSEG, $start2
+.org 80
+phdr:
+ .int PT_LOAD # p_type
+ .int PF_R | PF_W | PF_X # p_flags
+ .quad (SETUPSECTS+1)*512 # p_offset
+ .quad __START_KERNEL_map # p_vaddr
+ .quad 0x0000000000000000 # p_paddr
+ .quad SYSSIZE*16 # p_filesz
+ .quad 0 # p_memsz
+ .quad 2*1024*1024 # p_align
+e_phdr1:
+
+ .int PT_NOTE # p_type
+ .int 0 # p_flags
+ .quad b_note - _start # p_offset
+ .quad 0 # p_vaddr
+ .quad 0 # p_paddr
+ .quad e_note - b_note # p_filesz
+ .quad 0 # p_memsz
+ .quad 0 # p_align
+e_phdr:
+
+.macro note name, type
+ .balign 4
+ .int 2f - 1f # n_namesz
+ .int 4f - 3f # n_descsz
+ .int \type # n_type
+ .balign 4
+1: .asciz "\name"
+2: .balign 4
+3:
+.endm
+.macro enote
+4: .balign 4
+.endm
+
+ .balign 4
+b_note:
+ note ELF_NOTE_BOOT, EIN_PROGRAM_NAME
+ .asciz "Linux"
+ enote
+ note ELF_NOTE_BOOT, EIN_PROGRAM_VERSION
+ .asciz UTS_RELEASE
+ enote
+ note ELF_NOTE_BOOT, EIN_ARGUMENT_STYLE
+ .asciz "Linux"
+ enote
+e_note:
+
start2:
movw %cs, %ax
movw %ax, %ds
@@ -78,11 +163,11 @@ die:
bugger_off_msg:
- .ascii "Direct booting from floppy is no longer supported.\r\n"
- .ascii "Please use a boot loader program instead.\r\n"
+ .ascii "Booting linux without a boot loader is no longer supported.\r\n"
.ascii "\n"
- .ascii "Remove disk and press any key to reboot . . .\r\n"
+ .ascii "Press any key to reboot . . .\r\n"
.byte 0
+ebugger_off_msg:
# Kernel attributes; used by setup
diff --git a/arch/x86_64/boot/tools/build.c b/arch/x86_64/boot/tools/build.c
index eae8669..fd9bf41 100644
--- a/arch/x86_64/boot/tools/build.c
+++ b/arch/x86_64/boot/tools/build.c
@@ -27,6 +27,11 @@ #include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
+#include <elf.h>
+#include <byteswap.h>
+#define USE_BSD
+#include <endian.h>
+#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/sysmacros.h>
@@ -48,6 +53,10 @@ byte buf[1024];
int fd;
int is_big_kernel;
+#define MAX_PHDRS 100
+static Elf64_Ehdr ehdr;
+static Elf64_Phdr phdr[MAX_PHDRS];
+
void die(const char * str, ...)
{
va_list args;
@@ -57,20 +66,155 @@ void die(const char * str, ...)
exit(1);
}
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define le16_to_cpu(val) (val)
+#define le32_to_cpu(val) (val)
+#define le64_to_cpu(val) (val)
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+#define le16_to_cpu(val) bswap_16(val)
+#define le32_to_cpu(val) bswap_32(val)
+#define le64_to_cpu(val) bswap_64(val)
+#endif
+
+static uint16_t elf16_to_cpu(uint16_t val)
+{
+ return le16_to_cpu(val);
+}
+
+static uint32_t elf32_to_cpu(uint32_t val)
+{
+ return le32_to_cpu(val);
+}
+
+static uint64_t elf64_to_cpu(uint64_t val)
+{
+ return le64_to_cpu(val);
+}
+
void file_open(const char *name)
{
if ((fd = open(name, O_RDONLY, 0)) < 0)
die("Unable to open `%s': %m", name);
}
+static void read_ehdr(void)
+{
+ if (read(fd, &ehdr, sizeof(ehdr)) != sizeof(ehdr)) {
+ die("Cannot read ELF header: %s\n",
+ strerror(errno));
+ }
+ if (memcmp(ehdr.e_ident, ELFMAG, 4) != 0) {
+ die("No ELF magic\n");
+ }
+ if (ehdr.e_ident[EI_CLASS] != ELFCLASS64) {
+ die("Not a 64 bit executable\n");
+ }
+ if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) {
+ die("Not a LSB ELF executable\n");
+ }
+ if (ehdr.e_ident[EI_VERSION] != EV_CURRENT) {
+ die("Unknown ELF version\n");
+ }
+ /* Convert the fields to native endian */
+ ehdr.e_type = elf16_to_cpu(ehdr.e_type);
+ ehdr.e_machine = elf16_to_cpu(ehdr.e_machine);
+ ehdr.e_version = elf32_to_cpu(ehdr.e_version);
+ ehdr.e_entry = elf64_to_cpu(ehdr.e_entry);
+ ehdr.e_phoff = elf64_to_cpu(ehdr.e_phoff);
+ ehdr.e_shoff = elf64_to_cpu(ehdr.e_shoff);
+ ehdr.e_flags = elf32_to_cpu(ehdr.e_flags);
+ ehdr.e_ehsize = elf16_to_cpu(ehdr.e_ehsize);
+ ehdr.e_phentsize = elf16_to_cpu(ehdr.e_phentsize);
+ ehdr.e_phnum = elf16_to_cpu(ehdr.e_phnum);
+ ehdr.e_shentsize = elf16_to_cpu(ehdr.e_shentsize);
+ ehdr.e_shnum = elf16_to_cpu(ehdr.e_shnum);
+ ehdr.e_shstrndx = elf16_to_cpu(ehdr.e_shstrndx);
+
+ if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN)) {
+ die("Unsupported ELF header type\n");
+ }
+ if (ehdr.e_machine != EM_X86_64) {
+ die("Not for x86_64\n");
+ }
+ if (ehdr.e_version != EV_CURRENT) {
+ die("Unknown ELF version\n");
+ }
+ if (ehdr.e_ehsize != sizeof(Elf64_Ehdr)) {
+ die("Bad Elf header size\n");
+ }
+ if (ehdr.e_phentsize != sizeof(Elf64_Phdr)) {
+ die("Bad program header entry\n");
+ }
+ if (ehdr.e_shentsize != sizeof(Elf64_Shdr)) {
+ die("Bad section header entry\n");
+ }
+ if (ehdr.e_shstrndx >= ehdr.e_shnum) {
+ die("String table index out of bounds\n");
+ }
+}
+
+static void read_phds(void)
+{
+ int i;
+ size_t size;
+ if (ehdr.e_phnum > MAX_PHDRS) {
+ die("%d program headers supported: %d\n",
+ ehdr.e_phnum, MAX_PHDRS);
+ }
+ if (lseek(fd, ehdr.e_phoff, SEEK_SET) < 0) {
+ die("Seek to %d failed: %s\n",
+ ehdr.e_phoff, strerror(errno));
+ }
+ size = sizeof(phdr[0])*ehdr.e_phnum;
+ if (read(fd, &phdr, size) != size) {
+ die("Cannot read ELF section headers: %s\n",
+ strerror(errno));
+ }
+ for(i = 0; i < ehdr.e_phnum; i++) {
+ phdr[i].p_type = elf32_to_cpu(phdr[i].p_type);
+ phdr[i].p_flags = elf32_to_cpu(phdr[i].p_flags);
+ phdr[i].p_offset = elf64_to_cpu(phdr[i].p_offset);
+ phdr[i].p_vaddr = elf64_to_cpu(phdr[i].p_vaddr);
+ phdr[i].p_paddr = elf64_to_cpu(phdr[i].p_paddr);
+ phdr[i].p_filesz = elf64_to_cpu(phdr[i].p_filesz);
+ phdr[i].p_memsz = elf64_to_cpu(phdr[i].p_memsz);
+ phdr[i].p_align = elf64_to_cpu(phdr[i].p_align);
+ }
+}
+
+uint64_t vmlinux_memsz(void)
+{
+ uint64_t min, max, size;
+ int i;
+ max = 0;
+ min = ~max;
+ for(i = 0; i < ehdr.e_phnum; i++) {
+ uint64_t start, end;
+ if (phdr[i].p_type != PT_LOAD)
+ continue;
+ start = phdr[i].p_paddr;
+ end = phdr[i].p_paddr + phdr[i].p_memsz;
+ if (start < min)
+ min = start;
+ if (end > max)
+ max = end;
+ }
+ /* Get the reported size by vmlinux */
+ size = max - min;
+ return size;
+}
+
void usage(void)
{
- die("Usage: build [-b] bootsect setup system [rootdev] [> image]");
+ die("Usage: build [-b] bootsect setup system rootdev vmlinux [> image]");
}
int main(int argc, char ** argv)
{
- unsigned int i, c, sz, setup_sectors;
+ unsigned int i, sz, setup_sectors;
+ uint64_t kernel_offset, kernel_filesz, kernel_memsz;
+ int c;
u32 sys_size;
byte major_root, minor_root;
struct stat sb;
@@ -80,30 +224,25 @@ int main(int argc, char ** argv)
is_big_kernel = 1;
argc--, argv++;
}
- if ((argc < 4) || (argc > 5))
+ if (argc != 6)
usage();
- if (argc > 4) {
- if (!strcmp(argv[4], "CURRENT")) {
- if (stat("/", &sb)) {
- perror("/");
- die("Couldn't stat /");
- }
- major_root = major(sb.st_dev);
- minor_root = minor(sb.st_dev);
- } else if (strcmp(argv[4], "FLOPPY")) {
- if (stat(argv[4], &sb)) {
- perror(argv[4]);
- die("Couldn't stat root device.");
- }
- major_root = major(sb.st_rdev);
- minor_root = minor(sb.st_rdev);
- } else {
- major_root = 0;
- minor_root = 0;
+ if (!strcmp(argv[4], "CURRENT")) {
+ if (stat("/", &sb)) {
+ perror("/");
+ die("Couldn't stat /");
+ }
+ major_root = major(sb.st_dev);
+ minor_root = minor(sb.st_dev);
+ } else if (strcmp(argv[4], "FLOPPY")) {
+ if (stat(argv[4], &sb)) {
+ perror(argv[4]);
+ die("Couldn't stat root device.");
}
+ major_root = major(sb.st_rdev);
+ minor_root = minor(sb.st_rdev);
} else {
- major_root = DEFAULT_MAJOR_ROOT;
- minor_root = DEFAULT_MINOR_ROOT;
+ major_root = 0;
+ minor_root = 0;
}
fprintf(stderr, "Root device is (%d, %d)\n", major_root, minor_root);
@@ -143,10 +282,11 @@ int main(int argc, char ** argv)
i += c;
}
+ kernel_offset = (setup_sectors + 1)*512;
file_open(argv[3]);
if (fstat (fd, &sb))
die("Unable to stat `%s': %m", argv[3]);
- sz = sb.st_size;
+ kernel_filesz = sz = sb.st_size;
fprintf (stderr, "System is %d kB\n", sz/1024);
sys_size = (sz + 15) / 16;
if (!is_big_kernel && sys_size > DEF_SYSSIZE)
@@ -167,7 +307,49 @@ int main(int argc, char ** argv)
}
close(fd);
- if (lseek(1, 497, SEEK_SET) != 497) /* Write sizes to the bootsector */
+ file_open(argv[5]);
+ read_ehdr();
+ read_phds();
+ close(fd);
+ kernel_memsz = vmlinux_memsz();
+
+ if (lseek(1, 88, SEEK_SET) != 8 /* Write sizes to the bootsector */
+ die("Output: seek failed");
+ buf[0] = (kernel_offset >> 0) & 0xff;
+ buf[1] = (kernel_offset >> & 0xff;
+ buf[2] = (kernel_offset >> 16) & 0xff;
+ buf[3] = (kernel_offset >> 24) & 0xff;
+ buf[4] = (kernel_offset >> 32) & 0xff;
+ buf[5] = (kernel_offset >> 40) & 0xff;
+ buf[6] = (kernel_offset >> 4 & 0xff;
+ buf[7] = (kernel_offset >> 56) & 0xff;
+ if (write(1, buf, !=
+ die("Write of kernel file offset failed");
+ if (lseek(1, 112, SEEK_SET) != 112)
+ die("Output: seek failed");
+ buf[0] = (kernel_filesz >> 0) & 0xff;
+ buf[1] = (kernel_filesz >> & 0xff;
+ buf[2] = (kernel_filesz >> 16) & 0xff;
+ buf[3] = (kernel_filesz >> 24) & 0xff;
+ buf[4] = (kernel_filesz >> 32) & 0xff;
+ buf[5] = (kernel_filesz >> 40) & 0xff;
+ buf[6] = (kernel_filesz >> 4 & 0xff;
+ buf[7] = (kernel_filesz >> 56) & 0xff;
+ if (write(1, buf, !=
+ die("Write of kernel file size failed");
+ if (lseek(1, 120, SEEK_SET) != 120)
+ die("Output: seek failed");
+ buf[0] = (kernel_memsz >> 0) & 0xff;
+ buf[1] = (kernel_memsz >> & 0xff;
+ buf[2] = (kernel_memsz >> 16) & 0xff;
+ buf[3] = (kernel_memsz >> 24) & 0xff;
+ buf[4] = (kernel_memsz >> 32) & 0xff;
+ buf[5] = (kernel_memsz >> 40) & 0xff;
+ buf[6] = (kernel_memsz >> 4 & 0xff;
+ buf[7] = (kernel_memsz >> 56) & 0xff;
+ if (write(1, buf, !=
+ die("Write of kernel memory size failed");
+ if (lseek(1, 497, SEEK_SET) != 497)
die("Output: seek failed");
buf[0] = setup_sectors;
if (write(1, buf, 1) != 1)
--
1.4.2.rc2.g5209e
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo RemoveThis @vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/ |
|
| Back to top |
|
 |
Eric W. Biederman External

Since: Jun 21, 2006 Posts: 194
|
Posted: Tue Aug 01, 2006 1:20 pm Post subject: [PATCH 11/33] i386 boot: Add an ELF header to bzImage [Login to view extended thread Info.] Archived from groups: per prev. post (more info?) |
|
|
Increasingly the cobbled together boot protocol that
is bzImage does not have the flexibility to deal
with booting in new situations.
Now that we no longer support the bootsector loader
we have 512 bytes at the very start of a bzImage that
we can use for other things.
Placing an ELF header there allows us to retain
a single binary for all of x86 while at the same
time describing things that bzImage does not allow
us to describe.
The existing bugger off code for warning if we attempt to
boot from the bootsector is kept but the error message is
made more terse so we have a little more room to play with.
Signed-off-by: Eric W. Biederman <ebiederm RemoveThis @xmission.com>
---
arch/i386/boot/Makefile | 2
arch/i386/boot/bootsect.S | 97 ++++++++++++++++++-
arch/i386/boot/tools/build.c | 214 +++++++++++++++++++++++++++++++++++++-----
include/linux/elf_boot.h | 19 ++++
4 files changed, 303 insertions(+), 29 deletions(-)
diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile
index e979466..44ef35c 100644
--- a/arch/i386/boot/Makefile
+++ b/arch/i386/boot/Makefile
@@ -43,7 +43,7 @@ # --------------------------------------
quiet_cmd_image = BUILD $@
cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/bootsect $(obj)/setup \
- $(obj)/vmlinux.bin $(ROOT_DEV) > $@
+ $(obj)/vmlinux.bin $(ROOT_DEV) vmlinux > $@
$(obj)/zImage $(obj)/bzImage: $(obj)/bootsect $(obj)/setup \
$(obj)/vmlinux.bin $(obj)/tools/build FORCE
diff --git a/arch/i386/boot/bootsect.S b/arch/i386/boot/bootsect.S
index 011b7a4..847dc8f 100644
--- a/arch/i386/boot/bootsect.S
+++ b/arch/i386/boot/bootsect.S
@@ -13,6 +13,13 @@
*
*/
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/utsrelease.h>
+#include <linux/compile.h>
+#include <linux/elf.h>
+#include <linux/elf_boot.h>
+#include <asm/page.h>
#include <asm/boot.h>
SETUPSECTS = 4 /* default nr of setup-sectors */
@@ -42,10 +49,92 @@ #endif
.global _start
_start:
-
+ehdr:
+ # e_ident is carefully crafted so if this is treated
+ # as an x86 bootsector you will execute through
+ # e_ident and then print the bugger off message.
+ # The 1 stores to bx+di is unfortunate it is
+ # unlikely to affect the ability to print
+ # a message and you aren't supposed to be booting a
+ # bzImage directly from a floppy anyway.
+
+ # e_ident
+ .byte ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3
+ .byte ELFCLASS32, ELFDATA2LSB, EV_CURRENT, ELFOSABI_STANDALONE
+ .byte 0xeb, 0x3d, 0, 0, 0, 0, 0, 0
+#ifndef CONFIG_RELOCATABLE
+ .word ET_EXEC # e_type
+#else
+ .word ET_DYN # e_type
+#endif
+ .word EM_386 # e_machine
+ .int 1 # e_version
+ .int CONFIG_PHYSICAL_START # e_entry
+ .int phdr - _start # e_phoff
+ .int 0 # e_shoff
+ .int 0 # e_flags
+ .word e_ehdr - ehdr # e_ehsize
+ .word e_phdr1 - phdr # e_phentsize
+ .word (e_phdr - phdr)/(e_phdr1 - phdr) # e_phnum
+ .word 40 # e_shentsize
+ .word 0 # e_shnum
+ .word 0 # e_shstrndx
+e_ehdr:
+
+.org 71
+normalize:
# Normalize the start address
jmpl $BOOTSEG, $start2
+.org 80
+phdr:
+ .int PT_LOAD # p_type
+ .int (SETUPSECTS+1)*512 # p_offset
+ .int __PAGE_OFFSET + CONFIG_PHYSICAL_START # p_vaddr
+ .int CONFIG_PHYSICAL_START # p_paddr
+ .int SYSSIZE*16 # p_filesz
+ .int 0 # p_memsz
+ .int PF_R | PF_W | PF_X # p_flags
+ .int 4*1024*1024 # p_align
+e_phdr1:
+
+ .int PT_NOTE # p_type
+ .int b_note - _start # p_offset
+ .int 0 # p_vaddr
+ .int 0 # p_paddr
+ .int e_note - b_note # p_filesz
+ .int 0 # p_memsz
+ .int 0 # p_flags
+ .int 0 # p_align
+e_phdr:
+
+.macro note name, type
+ .balign 4
+ .int 2f - 1f # n_namesz
+ .int 4f - 3f # n_descsz
+ .int \type # n_type
+ .balign 4
+1: .asciz "\name"
+2: .balign 4
+3:
+.endm
+.macro enote
+4: .balign 4
+.endm
+
+ .balign 4
+b_note:
+ note ELF_NOTE_BOOT, EIN_PROGRAM_NAME
+ .asciz "Linux"
+ enote
+ note ELF_NOTE_BOOT, EIN_PROGRAM_VERSION
+ .asciz UTS_RELEASE
+ enote
+ note ELF_NOTE_BOOT, EIN_ARGUMENT_STYLE
+ .asciz "Linux"
+ enote
+e_note:
+
start2:
movw %cs, %ax
movw %ax, %ds
@@ -78,11 +167,11 @@ die:
bugger_off_msg:
- .ascii "Direct booting from floppy is no longer supported.\r\n"
- .ascii "Please use a boot loader program instead.\r\n"
+ .ascii "Booting linux without a boot loader is no longer supported.\r\n"
.ascii "\n"
- .ascii "Remove disk and press any key to reboot . . .\r\n"
+ .ascii "Press any key to reboot . . .\r\n"
.byte 0
+ebugger_off_msg:
# Kernel attributes; used by setup
diff --git a/arch/i386/boot/tools/build.c b/arch/i386/boot/tools/build.c
index 0579841..2daca93 100644
--- a/arch/i386/boot/tools/build.c
+++ b/arch/i386/boot/tools/build.c
@@ -27,6 +27,11 @@ #include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
+#include <elf.h>
+#include <byteswap.h>
+#define USE_BSD
+#include <endian.h>
+#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/sysmacros.h>
@@ -48,6 +53,10 @@ byte buf[1024];
int fd;
int is_big_kernel;
+#define MAX_PHDRS 100
+static Elf32_Ehdr ehdr;
+static Elf32_Phdr phdr[MAX_PHDRS];
+
void die(const char * str, ...)
{
va_list args;
@@ -57,20 +66,151 @@ void die(const char * str, ...)
exit(1);
}
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define le16_to_cpu(val) (val)
+#define le32_to_cpu(val) (val)
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+#define le16_to_cpu(val) bswap_16(val)
+#define le32_to_cpu(val) bswap_32(val)
+#endif
+
+static uint16_t elf16_to_cpu(uint16_t val)
+{
+ return le16_to_cpu(val);
+}
+
+static uint32_t elf32_to_cpu(uint32_t val)
+{
+ return le32_to_cpu(val);
+}
+
void file_open(const char *name)
{
if ((fd = open(name, O_RDONLY, 0)) < 0)
die("Unable to open `%s': %m", name);
}
+static void read_ehdr(void)
+{
+ if (read(fd, &ehdr, sizeof(ehdr)) != sizeof(ehdr)) {
+ die("Cannot read ELF header: %s\n",
+ strerror(errno));
+ }
+ if (memcmp(ehdr.e_ident, ELFMAG, 4) != 0) {
+ die("No ELF magic\n");
+ }
+ if (ehdr.e_ident[EI_CLASS] != ELFCLASS32) {
+ die("Not a 32 bit executable\n");
+ }
+ if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) {
+ die("Not a LSB ELF executable\n");
+ }
+ if (ehdr.e_ident[EI_VERSION] != EV_CURRENT) {
+ die("Unknown ELF version\n");
+ }
+ /* Convert the fields to native endian */
+ ehdr.e_type = elf16_to_cpu(ehdr.e_type);
+ ehdr.e_machine = elf16_to_cpu(ehdr.e_machine);
+ ehdr.e_version = elf32_to_cpu(ehdr.e_version);
+ ehdr.e_entry = elf32_to_cpu(ehdr.e_entry);
+ ehdr.e_phoff = elf32_to_cpu(ehdr.e_phoff);
+ ehdr.e_shoff = elf32_to_cpu(ehdr.e_shoff);
+ ehdr.e_flags = elf32_to_cpu(ehdr.e_flags);
+ ehdr.e_ehsize = elf16_to_cpu(ehdr.e_ehsize);
+ ehdr.e_phentsize = elf16_to_cpu(ehdr.e_phentsize);
+ ehdr.e_phnum = elf16_to_cpu(ehdr.e_phnum);
+ ehdr.e_shentsize = elf16_to_cpu(ehdr.e_shentsize);
+ ehdr.e_shnum = elf16_to_cpu(ehdr.e_shnum);
+ ehdr.e_shstrndx = elf16_to_cpu(ehdr.e_shstrndx);
+
+ if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN)) {
+ die("Unsupported ELF header type\n");
+ }
+ if (ehdr.e_machine != EM_386) {
+ die("Not for x86\n");
+ }
+ if (ehdr.e_version != EV_CURRENT) {
+ die("Unknown ELF version\n");
+ }
+ if (ehdr.e_ehsize != sizeof(Elf32_Ehdr)) {
+ die("Bad Elf header size\n");
+ }
+ if (ehdr.e_phentsize != sizeof(Elf32_Phdr)) {
+ die("Bad program header entry\n");
+ }
+ if (ehdr.e_shentsize != sizeof(Elf32_Shdr)) {
+ die("Bad section header entry\n");
+ }
+ if (ehdr.e_shstrndx >= ehdr.e_shnum) {
+ die("String table index out of bounds\n");
+ }
+}
+
+static void read_phds(void)
+{
+ int i;
+ size_t size;
+ if (ehdr.e_phnum > MAX_PHDRS) {
+ die("%d program headers supported: %d\n",
+ ehdr.e_phnum, MAX_PHDRS);
+ }
+ if (lseek(fd, ehdr.e_phoff, SEEK_SET) < 0) {
+ die("Seek to %d failed: %s\n",
+ ehdr.e_phoff, strerror(errno));
+ }
+ size = sizeof(phdr[0])*ehdr.e_phnum;
+ if (read(fd, &phdr, size) != size) {
+ die("Cannot read ELF section headers: %s\n",
+ strerror(errno));
+ }
+ for(i = 0; i < ehdr.e_phnum; i++) {
+ phdr[i].p_type = elf32_to_cpu(phdr[i].p_type);
+ phdr[i].p_offset = elf32_to_cpu(phdr[i].p_offset);
+ phdr[i].p_vaddr = elf32_to_cpu(phdr[i].p_vaddr);
+ phdr[i].p_paddr = elf32_to_cpu(phdr[i].p_paddr);
+ phdr[i].p_filesz = elf32_to_cpu(phdr[i].p_filesz);
+ phdr[i].p_memsz = elf32_to_cpu(phdr[i].p_memsz);
+ phdr[i].p_flags = elf32_to_cpu(phdr[i].p_flags);
+ phdr[i].p_align = elf32_to_cpu(phdr[i].p_align);
+ }
+}
+
+unsigned long vmlinux_memsz(void)
+{
+ unsigned long min, max, size;
+ int i;
+ min = 0xffffffff;
+ max = 0;
+ for(i = 0; i < ehdr.e_phnum; i++) {
+ unsigned long start, end;
+ if (phdr[i].p_type != PT_LOAD)
+ continue;
+ start = phdr[i].p_paddr;
+ end = phdr[i].p_paddr + phdr[i].p_memsz;
+ if (start < min)
+ min = start;
+ if (end > max)
+ max = end;
+ }
+ /* Get the reported size by vmlinux */
+ size = max - min;
+ /* Add 128K for the bootmem bitmap */
+ size += 128*1024;
+ /* Add in space for the initial page tables */
+ size = ((size + (((size + 4095) >> 12)*4)) + 4095) & ~4095;
+ return size;
+}
+
void usage(void)
{
- die("Usage: build [-b] bootsect setup system [rootdev] [> image]");
+ die("Usage: build [-b] bootsect setup system rootdev vmlinux [> image]");
}
int main(int argc, char ** argv)
{
unsigned int i, sz, setup_sectors;
+ unsigned kernel_offset, kernel_filesz, kernel_memsz;
int c;
u32 sys_size;
byte major_root, minor_root;
@@ -81,30 +221,25 @@ int main(int argc, char ** argv)
is_big_kernel = 1;
argc--, argv++;
}
- if ((argc < 4) || (argc > 5))
+ if (argc != 6)
usage();
- if (argc > 4) {
- if (!strcmp(argv[4], "CURRENT")) {
- if (stat("/", &sb)) {
- perror("/");
- die("Couldn't stat /");
- }
- major_root = major(sb.st_dev);
- minor_root = minor(sb.st_dev);
- } else if (strcmp(argv[4], "FLOPPY")) {
- if (stat(argv[4], &sb)) {
- perror(argv[4]);
- die("Couldn't stat root device.");
- }
- major_root = major(sb.st_rdev);
- minor_root = minor(sb.st_rdev);
- } else {
- major_root = 0;
- minor_root = 0;
+ if (!strcmp(argv[4], "CURRENT")) {
+ if (stat("/", &sb)) {
+ perror("/");
+ die("Couldn't stat /");
+ }
+ major_root = major(sb.st_dev);
+ minor_root = minor(sb.st_dev);
+ } else if (strcmp(argv[4], "FLOPPY")) {
+ if (stat(argv[4], &sb)) {
+ perror(argv[4]);
+ die("Couldn't stat root device.");
}
+ major_root = major(sb.st_rdev);
+ minor_root = minor(sb.st_rdev);
} else {
- major_root = DEFAULT_MAJOR_ROOT;
- minor_root = DEFAULT_MINOR_ROOT;
+ major_root = 0;
+ minor_root = 0;
}
fprintf(stderr, "Root device is (%d, %d)\n", major_root, minor_root);
@@ -144,10 +279,11 @@ int main(int argc, char ** argv)
i += c;
}
+ kernel_offset = (setup_sectors + 1)*512;
file_open(argv[3]);
if (fstat (fd, &sb))
die("Unable to stat `%s': %m", argv[3]);
- sz = sb.st_size;
+ kernel_filesz = sz = sb.st_size;
fprintf (stderr, "System is %d kB\n", sz/1024);
sys_size = (sz + 15) / 16;
if (!is_big_kernel && sys_size > DEF_SYSSIZE)
@@ -168,7 +304,37 @@ int main(int argc, char ** argv)
}
close(fd);
- if (lseek(1, 497, SEEK_SET) != 497) /* Write sizes to the bootsector */
+ file_open(argv[5]);
+ read_ehdr();
+ read_phds();
+ close(fd);
+ kernel_memsz = vmlinux_memsz();
+
+ if (lseek(1, 84, SEEK_SET) != 84) /* Write sizes to the bootsector */
+ die("Output: seek failed");
+ buf[0] = (kernel_offset >> 0) & 0xff;
+ buf[1] = (kernel_offset >> & 0xff;
+ buf[2] = (kernel_offset >> 16) & 0xff;
+ buf[3] = (kernel_offset >> 24) & 0xff;
+ if (write(1, buf, 4) != 4)
+ die("Write of kernel file offset failed");
+ if (lseek(1, 96, SEEK_SET) != 96)
+ die("Output: seek failed");
+ buf[0] = (kernel_filesz >> 0) & 0xff;
+ buf[1] = (kernel_filesz >> & 0xff;
+ buf[2] = (kernel_filesz >> 16) & 0xff;
+ buf[3] = (kernel_filesz >> 24) & 0xff;
+ if (write(1, buf, 4) != 4)
+ die("Write of kernel file size failed");
+ if (lseek(1, 100, SEEK_SET) != 100)
+ die("Output: seek failed");
+ buf[0] = (kernel_memsz >> 0) & 0xff;
+ buf[1] = (kernel_memsz >> & 0xff;
+ buf[2] = (kernel_memsz >> 16) & 0xff;
+ buf[3] = (kernel_memsz >> 24) & 0xff;
+ if (write(1, buf, 4) != 4)
+ die("Write of kernel memory size failed");
+ if (lseek(1, 497, SEEK_SET) != 497)
die("Output: seek failed");
buf[0] = setup_sectors;
if (write(1, buf, 1) != 1)
diff --git a/include/linux/elf_boot.h b/include/linux/elf_boot.h
new file mode 100644
index 0000000..09301e5
--- /dev/null
+++ b/include/linux/elf_boot.h
@@ -0,0 +1,19 @@
+#ifndef ELF_BOOT_H
+#define ELF_BOOT_H
+
+/* Elf notes to help bootloaders identify what program they are booting.
+ */
+
+/* Standardized Elf image notes for booting... The name for all of these is ELFBoot */
+#define ELF_NOTE_BOOT "ELFBoot"
+
+#define EIN_PROGRAM_NAME 0x00000001
+/* The program in this ELF file */
+#define EIN_PROGRAM_VERSION 0x00000002
+/* The version of the program in this ELF file */
+#define EIN_PROGRAM_CHECKSUM 0x00000003
+/* ip style checksum of the memory image. */
+#define EIN_ARGUMENT_STYLE 0x00000004
+/* String identifying argument passing style */
+
+#endif /* ELF_BOOT_H */
--
1.4.2.rc2.g5209e
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo RemoveThis @vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/ |
|
| Back to top |
|
 |
Eric W. Biederman External

Since: Jun 21, 2006 Posts: 194
|
Posted: Tue Aug 01, 2006 1:20 pm Post subject: [PATCH 13/33] x86_64: Remove assumptions about the kernel start address from e820/bad_addr() [Login to view extended thread Info.] Archived from groups: per prev. post (more info?) |
|
|
Signed-off-by: Eric W. Biederman <ebiederm DeleteThis @xmission.com>
---
arch/x86_64/kernel/e820.c | 9 +++++++--
1 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index 61f029f..56dd525 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -69,9 +69,14 @@ #ifdef CONFIG_BLK_DEV_INITRD
return 1;
}
#endif
- /* kernel code + 640k memory hole (later should not be needed, but
+ /* 640k memory hole (later should not be needed, but
be paranoid for now) */
- if (last >= 640*1024 && addr < __pa_symbol(&_end)) {
+ if (last >= 640*1024 && addr < HIGH_MEMORY) {
+ *addrp = HIGH_MEMORY;
+ }
+
+ /* kernel code */
+ if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) {
*addrp = __pa_symbol(&_end);
return 1;
}
--
1.4.2.rc2.g5209e
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo DeleteThis @vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/ |
|
| Back to top |
|
 |
Eric W. Biederman External

Since: Jun 21, 2006 Posts: 194
|
Posted: Tue Aug 01, 2006 1:20 pm Post subject: [PATCH 7/33] elf: Add ELFOSABI_STANDALONE to elf.h [Login to view extended thread Info.] Archived from groups: per prev. post (more info?) |
|
|
Signed-off-by: Eric W. Biederman <ebiederm.RemoveThis@xmission.com>
---
include/linux/elf.h | 5 +++--
1 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/include/linux/elf.h b/include/linux/elf.h
index c5bf043..6fa8d3d 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -338,8 +338,9 @@ #define EV_NONE 0 /* e_version, EI_VER
#define EV_CURRENT 1
#define EV_NUM 2
-#define ELFOSABI_NONE 0
-#define ELFOSABI_LINUX 3
+#define ELFOSABI_NONE 0
+#define ELFOSABI_LINUX 3
+#define ELFOSABI_STANDALONE 255
#ifndef ELF_OSABI
#define ELF_OSABI ELFOSABI_NONE
--
1.4.2.rc2.g5209e
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo.RemoveThis@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/ |
|
| Back to top |
|
 |
Eric W. Biederman External

Since: Jun 21, 2006 Posts: 194
|
Posted: Tue Aug 01, 2006 1:20 pm Post subject: [PATCH 28/33] x86_64: Remove the identity mapping as early as possible. [Login to view extended thread Info.] Archived from groups: per prev. post (more info?) |
|
|
With the rewrite of the SMP trampoline and the early page
allocator there is nothing that needs identity mapped pages,
once we start executing C code.
So add zap_identity_mappings into head64.c and remove
zap_low_mappings() from much later in the code. The functions
are subtly different thus the name change.
This also kills boot_level4_pgt which was from an earlier
attempt to move the identity mappings as early as possible,
and is now no longer needed. Essentially I have replaced
boot_level4_pgt with trampoline_level4_pgt in trampoline.S
Signed-off-by: Eric W. Biederman <ebiederm.DeleteThis@xmission.com>
---
arch/x86_64/kernel/head.S | 34 ++++++++++++++--------------------
arch/x86_64/kernel/head64.c | 16 ++++++++++------
arch/x86_64/kernel/setup.c | 2 --
arch/x86_64/kernel/setup64.c | 1 -
arch/x86_64/mm/init.c | 24 ------------------------
include/asm-x86_64/pgtable.h | 1 -
include/asm-x86_64/proto.h | 2 --
7 files changed, 24 insertions(+), 56 deletions(-)
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
index a624586..b0b5618 100644
--- a/arch/x86_64/kernel/head.S
+++ b/arch/x86_64/kernel/head.S
@@ -73,7 +73,7 @@ startup_32:
movl %eax, %cr4
/* Setup early boot stage 4 level pagetables */
- movl $(boot_level4_pgt - __START_KERNEL_map), %eax
+ movl $(init_level4_pgt - __START_KERNEL_map), %eax
movl %eax, %cr3
/* Setup EFER (Extended Feature Enable Register) */
@@ -117,7 +117,7 @@ ENTRY(secondary_startup_64)
movq %rax, %cr4
/* Setup early boot stage 4 level pagetables. */
- movq $(boot_level4_pgt - __START_KERNEL_map), %rax
+ movq $(init_level4_pgt - __START_KERNEL_map), %rax
movq %rax, %cr3
/* Check if nx is implemented */
@@ -264,9 +264,19 @@ #define PMDS(START, PERM, COUNT) \
i = i + 1 ; \
.endr
+ /*
+ * This default setting generates an ident mapping at address 0x100000
+ * and a mapping for the kernel that precisely maps virtual address
+ * 0xffffffff80000000 to physical address 0x000000. (always using
+ * 2Mbyte large pages provided by PAE mode)
+ */
NEXT_PAGE(init_level4_pgt)
- /* This gets initialized in x86_64_start_kernel */
- .fill 512,8,0
+ .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+ .fill 257,8,0
+ .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+ .fill 252,8,0
+ /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
NEXT_PAGE(level3_ident_pgt)
.quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
@@ -301,22 +311,6 @@ #undef NEXT_PAGE
#ifndef CONFIG_HOTPLUG_CPU
__INITDATA
#endif
- /*
- * This default setting generates an ident mapping at address 0x100000
- * and a mapping for the kernel that precisely maps virtual address
- * 0xffffffff80000000 to physical address 0x000000. (always using
- * 2Mbyte large pages provided by PAE mode)
- */
- .align PAGE_SIZE
-ENTRY(boot_level4_pgt)
- .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
- .fill 257,8,0
- .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
- .fill 252,8,0
- /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
- .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
-
- .data
.align 16
.globl cpu_gdt_descr
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index defef4e..99d4463 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -18,8 +18,16 @@ #include <asm/bootsetup.h>
#include <asm/setup.h>
#include <asm/desc.h>
#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
#include <asm/sections.h>
+static void __init zap_identity_mappings(void)
+{
+ pgd_t *pgd = pgd_offset_k(0UL);
+ pgd_clear(pgd);
+ __flush_tlb();
+}
+
/* Don't add a printk in there. printk relies on the PDA which is not initialized
yet. */
static void __init clear_bss(void)
@@ -78,6 +86,8 @@ void __init x86_64_start_kernel(char * r
char *s;
int i;
+ /* Make NULL pointers segfault */
+ zap_identity_mappings();
for (i = 0; i < 256; i++)
set_intr_gate(i, early_idt_handler);
asm volatile("lidt %0" :: "m" (idt_descr));
@@ -88,12 +98,6 @@ void __init x86_64_start_kernel(char * r
*/
lockdep_init();
- /*
- * switch to init_level4_pgt from boot_level4_pgt
- */
- memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t));
- asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
-
for (i = 0; i < NR_CPUS; i++)
cpu_pda(i) = &boot_cpu_pda[i];
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 21840ca..7225f61 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -568,8 +568,6 @@ #endif
dmi_scan_machine();
- zap_low_mappings(0);
-
#ifdef CONFIG_ACPI
/*
* Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index 6fe58a6..a1f3aed 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -197,7 +197,6 @@ void __cpuinit cpu_init (void)
/* CPU 0 is initialised in head64.c */
if (cpu != 0) {
pda_init(cpu);
- zap_low_mappings(cpu);
} else
estacks = boot_exception_stacks;
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index b46566a..149f363 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -332,21 +332,6 @@ void __init init_memory_mapping(unsigned
__flush_tlb_all();
}
-void __cpuinit zap_low_mappings(int cpu)
-{
- if (cpu == 0) {
- pgd_t *pgd = pgd_offset_k(0UL);
- pgd_clear(pgd);
- } else {
- /*
- * For AP's, zap the low identity mappings by changing the cr3
- * to init_level4_pgt and doing local flush tlb all
- */
- asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
- }
- __flush_tlb_all();
-}
-
/* Compute zone sizes for the DMA and DMA32 zones in a node. */
__init void
size_zones(unsigned long *z, unsigned long *h,
@@ -667,15 +652,6 @@ #endif
reservedpages << (PAGE_SHIFT-10),
datasize >> 10,
initsize >> 10);
-
-#ifdef CONFIG_SMP
- /*
- * Sync boot_level4_pgt mappings with the init_level4_pgt
- * except for the low identity mappings which are already zapped
- * in init_level4_pgt. This sync-up is essential for AP's bringup
- */
- memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t));
-#endif
}
void free_init_pages(char *what, unsigned long begin, unsigned long end)
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h
index 5528e8b..d0816fb 100644
--- a/include/asm-x86_64/pgtable.h
+++ b/include/asm-x86_64/pgtable.h
@@ -18,7 +18,6 @@ extern pud_t level3_kernel_pgt[512];
extern pud_t level3_ident_pgt[512];
extern pmd_t level2_kernel_pgt[512];
extern pgd_t init_level4_pgt[];
-extern pgd_t boot_level4_pgt[];
extern unsigned long __supported_pte_mask;
#define swapper_pg_dir init_level4_pgt
diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h
index 038fe1f..978ea43 100644
--- a/include/asm-x86_64/proto.h
+++ b/include/asm-x86_64/proto.h
@@ -11,8 +11,6 @@ struct pt_regs;
extern void start_kernel(void);
extern void pda_init(int);
-extern void zap_low_mappings(int cpu);
-
extern void early_idt_handler(void);
extern void mcheck_init(struct cpuinfo_x86 *c);
--
1.4.2.rc2.g5209e
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo.DeleteThis@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/ |
|
| Back to top |
|
 |
Eric W. Biederman External

Since: Jun 21, 2006 Posts: 194
|
Posted: Tue Aug 01, 2006 1:20 pm Post subject: [PATCH 21/33] x86_64: modify copy_bootdata to use virtual addresses [Login to view extended thread Info.] Archived from groups: per prev. post (more info?) |
|
|
Use virtual addresses instead of physical addresses
in copy bootdata. In addition fix the implementation
of the old bootloader convention. Everything is
at real_mode_data always. It is just that sometimes
real_mode_data was relocated by setup.S to not sit at
0x90000.
Signed-off-by: Eric W. Biederman <ebiederm.RemoveThis@xmission.com>
---
arch/x86_64/kernel/head64.c | 17 ++++++++---------
1 files changed, 8 insertions(+), 9 deletions(-)
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index 454498c..defef4e 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -29,29 +29,28 @@ static void __init clear_bss(void)
}
#define NEW_CL_POINTER 0x228 /* Relative to real mode data */
-#define OLD_CL_MAGIC_ADDR 0x90020
+#define OLD_CL_MAGIC_ADDR 0x20
#define OLD_CL_MAGIC 0xA33F
-#define OLD_CL_BASE_ADDR 0x90000
-#define OLD_CL_OFFSET 0x90022
+#define OLD_CL_OFFSET 0x22
extern char saved_command_line[];
static void __init copy_bootdata(char *real_mode_data)
{
- int new_data;
+ unsigned long new_data;
char * command_line;
memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE);
- new_data = *(int *) (x86_boot_params + NEW_CL_POINTER);
+ new_data = *(u32 *) (x86_boot_params + NEW_CL_POINTER);
if (!new_data) {
- if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) {
+ if (OLD_CL_MAGIC != *(u16 *)(real_mode_data + OLD_CL_MAGIC_ADDR)) {
printk("so old bootloader that it does not support commandline?!\n");
return;
}
- new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
+ new_data = __pa(real_mode_data) + *(u16 *)(real_mode_data + OLD_CL_OFFSET);
printk("old bootloader convention, maybe loadlin?\n");
}
- command_line = (char *) ((u64)(new_data));
+ command_line = __va(new_data);
memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE);
printk("Bootdata ok (command line is %s)\n", saved_command_line);
}
@@ -99,7 +98,7 @@ void __init x86_64_start_kernel(char * r
cpu_pda(i) = &boot_cpu_pda[i];
pda_init(0);
- copy_bootdata(real_mode_data);
+ copy_bootdata(__va(real_mode_data));
#ifdef CONFIG_SMP
cpu_set(0, cpu_online_map);
#endif
--
1.4.2.rc2.g5209e
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo.RemoveThis@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/ |
|
| Back to top |
|
 |
Eric W. Biederman External

Since: Jun 21, 2006 Posts: 194
|
Posted: Tue Aug 01, 2006 1:20 pm Post subject: [PATCH 12/33] x86_64: fixup indentation in e820.c [Login to view extended thread Info.] Archived from groups: per prev. post (more info?) |
|
|
Signed-off-by: Eric W. Biederman <ebiederm.DeleteThis@xmission.com>
---
arch/x86_64/kernel/e820.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index e56c2ad..61f029f 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -205,8 +205,8 @@ unsigned long __init e820_end_of_ram(voi
if (start >= end)
continue;
if (ei->type == E820_RAM) {
- if (end > end_pfn<<PAGE_SHIFT)
- end_pfn = end>>PAGE_SHIFT;
+ if (end > end_pfn<<PAGE_SHIFT)
+ end_pfn = end>>PAGE_SHIFT;
} else {
if (end > end_pfn_map<<PAGE_SHIFT)
end_pfn_map = end>>PAGE_SHIFT;
--
1.4.2.rc2.g5209e
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo.DeleteThis@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/ |
|
| Back to top |
|
 |
Eric W. Biederman External

Since: Jun 21, 2006 Posts: 194
|
Posted: Tue Aug 01, 2006 1:20 pm Post subject: [PATCH 25/33] x86_64: 64bit PIC SMP trampoline [Login to view extended thread Info.] Archived from groups: per prev. post (more info?) |
|
|
This modifies the SMP trampoline and all of the associated code so
it can jump to a 64bit kernel loaded at an arbitrary address.
The dependencies on having an idenetity mapped page in the kernel
page tables for SMP bootup have all been removed.
In addition the trampoline has been modified to verify
that long mode is supported. Asking if long mode is implemented is
down right silly but we have traditionally had some of these checks,
and they can't hurt anything. So when the totally ludicrous happens
we just might handle it correctly.
Signed-off-by: Eric W. Biederman <ebiederm.DeleteThis@xmission.com>
---
arch/x86_64/kernel/head.S | 1
arch/x86_64/kernel/setup.c | 9 --
arch/x86_64/kernel/trampoline.S | 168 ++++++++++++++++++++++++++++++++++++---
3 files changed, 156 insertions(+), 22 deletions(-)
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
index d0e626e..8d1b4a7 100644
--- a/arch/x86_64/kernel/head.S
+++ b/arch/x86_64/kernel/head.S
@@ -103,6 +103,7 @@ startup_32:
.org 0x100
.globl startup_64
startup_64:
+ENTRY(secondary_startup_64)
/* We come here either from startup_32
* or directly from a 64bit bootloader.
* Since we may have come directly from a bootloader we
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 11d31ea..66816ba 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -611,15 +611,8 @@ #endif
reserve_bootmem_generic(ebda_addr, ebda_size);
#ifdef CONFIG_SMP
- /*
- * But first pinch a few for the stack/trampoline stuff
- * FIXME: Don't need the extra page at 4K, but need to fix
- * trampoline before removing it. (see the GDT stuff)
- */
- reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE);
-
/* Reserve SMP trampoline */
- reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE);
+ reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE);
#endif
#ifdef CONFIG_ACPI_SLEEP
diff --git a/arch/x86_64/kernel/trampoline.S b/arch/x86_64/kernel/trampoline.S
index c79b99a..13eee63 100644
--- a/arch/x86_64/kernel/trampoline.S
+++ b/arch/x86_64/kernel/trampoline.S
@@ -3,6 +3,7 @@
* Trampoline.S Derived from Setup.S by Linus Torvalds
*
* 4 Jan 1997 Michael Chastain: changed to gnu as.
+ * 15 Sept 2005 Eric Biederman: 64bit PIC support
*
* Entry: CS:IP point to the start of our code, we are
* in real mode with no stack, but the rest of the
@@ -17,15 +18,20 @@
* and IP is zero. Thus, data addresses need to be absolute
* (no relocation) and are taken with regard to r_base.
*
+ * With the addition of trampoline_level4_pgt this code can
+ * now enter a 64bit kernel that lives at arbitrary 64bit
+ * physical addresses.
+ *
* If you work on this file, check the object module with objdump
* --full-contents --reloc to make sure there are no relocation
- * entries. For the GDT entry we do hand relocation in smpboot.c
- * because of 64bit linker limitations.
+ * entries.
*/
#include <linux/linkage.h>
-#include <asm/segment.h>
+#include <asm/pgtable.h>
#include <asm/page.h>
+#include <asm/msr.h>
+#include <asm/segment.h>
.data
@@ -33,15 +39,31 @@ #include <asm/page.h>
ENTRY(trampoline_data)
r_base = .
+ cli # We should be safe anyway
wbinvd
mov %cs, %ax # Code and data in the same place
mov %ax, %ds
+ mov %ax, %es
+ mov %ax, %ss
- cli # We should be safe anyway
movl $0xA5A5A5A5, trampoline_data - r_base
# write marker for master knows we're running
+ # Setup stack
+ movw $(trampoline_stack_end - r_base), %sp
+
+ call verify_cpu # Verify the cpu supports long mode
+
+ mov %cs, %ax
+ movzx %ax, %esi # Find the 32bit trampoline location
+ shll $4, %esi
+
+ # Fixup the vectors
+ addl %esi, startup_32_vector - r_base
+ addl %esi, startup_64_vector - r_base
+ addl %esi, tgdt + 2 - r_base # Fixup the gdt pointer
+
/*
* GDT tables in non default location kernel can be beyond 16MB and
* lgdt will not be able to load the address as in real mode default
@@ -49,23 +71,141 @@ r_base = .
* to 32 bit.
*/
- lidtl idt_48 - r_base # load idt with 0, 0
- lgdtl gdt_48 - r_base # load gdt with whatever is appropriate
+ lidtl tidt - r_base # load idt with 0, 0
+ lgdtl tgdt - r_base # load gdt with whatever is appropriate
xor %ax, %ax
inc %ax # protected mode (PE) bit
lmsw %ax # into protected mode
- # flaush prefetch and jump to startup_32 in arch/x86_64/kernel/head.S
- ljmpl $__KERNEL32_CS, $(startup_32-__START_KERNEL_map)
+
+ # flush prefetch and jump to startup_32
+ ljmpl *(startup_32_vector - r_base)
+
+ .code32
+ .balign 4
+startup_32:
+ movl $__KERNEL_DS, %eax # Initialize the %ds segment register
+ movl %eax, %ds
+
+ xorl %eax, %eax
+ btsl $5, %eax # Enable PAE mode
+ movl %eax, %cr4
+
+ # Setup trampoline 4 level pagetables
+ leal (trampoline_level4_pgt - r_base)(%esi), %eax
+ movl %eax, %cr3
+
+ movl $MSR_EFER, %ecx
+ movl $(1 << _EFER_LME), %eax # Enable Long Mode
+ xorl %edx, %edx
+ wrmsr
+
+ xorl %eax, %eax
+ btsl $31, %eax # Enable paging and in turn activate Long Mode
+ btsl $0, %eax # Enable protected mode
+ movl %eax, %cr0
+
+ /*
+ * At this point we're in long mode but in 32bit compatibility mode
+ * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
+ * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use
+ * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
+ */
+ ljmp *(startup_64_vector - r_base)(%esi)
+
+ .code64
+ .balign 4
+startup_64:
+ # Now jump into the kernel using virtual addresses
+ movq $secondary_startup_64, %rax
+ jmp *%rax
+
+ .code16
+verify_cpu:
+ pushl $0 # Kill any dangerous flags
+ popfl
+
+ /* minimum CPUID flags for x86-64 */
+ /* see http://www.x86-64.org/lists/discuss/msg02971.html */
+#define REQUIRED_MASK1 ((1<<0)|(1<<3)|(1<<4)|(1<<5)|(1<<6)|(1<< |\
+ (1<<13)|(1<<15)|(1<<24)|(1<<25)|(1<<26))
+#define REQUIRED_MASK2 (1<<29)
+
+ pushfl # check for cpuid
+ popl %eax
+ movl %eax, %ebx
+ xorl $0x200000,%eax
+ pushl %eax
+ popfl
+ pushfl
+ popl %eax
+ pushl %ebx
+ popfl
+ cmpl %eax, %ebx
+ jz no_longmode
+
+ xorl %eax, %eax # See if cpuid 1 is implemented
+ cpuid
+ cmpl $0x1, %eax
+ jb no_longmode
+
+ movl $0x01, %eax # Does the cpu have what it takes?
+ cpuid
+ andl $REQUIRED_MASK1, %edx
+ xorl $REQUIRED_MASK1, %edx
+ jnz no_longmode
+
+ movl $0x80000000, %eax # See if extended cpuid is implemented
+ cpuid
+ cmpl $0x80000001, %eax
+ jb no_longmode
+
+ movl $0x80000001, %eax # Does the cpu have what it takes?
+ cpuid
+ andl $REQUIRED_MASK2, %edx
+ xorl $REQUIRED_MASK2, %edx
+ jnz no_longmode
+
+ ret # The cpu supports long mode
+
+no_longmode:
+ hlt
+ jmp no_longmode
+
# Careful these need to be in the same 64K segment as the above;
-idt_48:
+tidt:
.word 0 # idt limit = 0
.word 0, 0 # idt base = 0L
-gdt_48:
- .short GDT_ENTRIES*8 - 1 # gdt limit
- .long cpu_gdt_table-__START_KERNEL_map
+ # Duplicate the global descriptor table
+ # so the kernel can live anywhere
+ .balign 4
+tgdt:
+ .short tgdt_end - tgdt # gdt limit
+ .long tgdt - r_base
+ .short 0
+ .quad 0x00cf9b000000ffff # __KERNEL32_CS
+ .quad 0x00af9b000000ffff # __KERNEL_CS
+ .quad 0x00cf93000000ffff # __KERNEL_DS
+tgdt_end:
+
+ .balign 4
+startup_32_vector:
+ .long startup_32 - r_base
+ .word __KERNEL32_CS, 0
+
+ .balign 4
+startup_64_vector:
+ .long startup_64 - r_base
+ .word __KERNEL_CS, 0
+
+trampoline_stack:
+ .org 0x1000
+trampoline_stack_end:
+ENTRY(trampoline_level4_pgt)
+ .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+ .fill 510,8,0
+ .quad level3_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
-.globl trampoline_end
-trampoline_end:
+ENTRY(trampoline_end)
--
1.4.2.rc2.g5209e
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo.DeleteThis@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/ |
|
| Back to top |
|
 |
Eric W. Biederman External

Since: Jun 21, 2006 Posts: 194
|
Posted: Tue Aug 01, 2006 1:20 pm Post subject: [PATCH 24/33] x86_64: Add EFER to the set registers saved by save_processor_state [Login to view extended thread Info.] Archived from groups: per prev. post (more info?) |
|
|
EFER varies like %cr4 depending on the cpu capabilities, and which cpu
capabilities we want to make use of. So save/restore it make certain
we have the same EFER value when we are done.
Signed-off-by: Eric W. Biederman <ebiederm.DeleteThis@xmission.com>
---
arch/x86_64/kernel/suspend.c | 3 ++-
include/asm-x86_64/suspend.h | 1 +
2 files changed, 3 insertions(+), 1 deletions(-)
diff --git a/arch/x86_64/kernel/suspend.c b/arch/x86_64/kernel/suspend.c
index 91f7e67..fe865ea 100644
--- a/arch/x86_64/kernel/suspend.c
+++ b/arch/x86_64/kernel/suspend.c
@@ -33,7 +33,6 @@ void __save_processor_state(struct saved
asm volatile ("str %0" : "=m" (ctxt->tr));
/* XMM0..XMM15 should be handled by kernel_fpu_begin(). */
- /* EFER should be constant for kernel version, no need to handle it. */
/*
* segment registers
*/
@@ -50,6 +49,7 @@ void __save_processor_state(struct saved
/*
* control registers
*/
+ rdmsrl(MSR_EFER, ctxt->efer);
asm volatile ("movq %%cr0, %0" : "=r" (ctxt->cr0));
asm volatile ("movq %%cr2, %0" : "=r" (ctxt->cr2));
asm volatile ("movq %%cr3, %0" : "=r" (ctxt->cr3));
@@ -75,6 +75,7 @@ void __restore_processor_state(struct sa
/*
* control registers
*/
+ wrmsrl(MSR_EFER, ctxt->efer);
asm volatile ("movq %0, %%cr8" :: "r" (ctxt->cr8));
asm volatile ("movq %0, %%cr4" :: "r" (ctxt->cr4));
asm volatile ("movq %0, %%cr3" :: "r" (ctxt->cr3));
diff --git a/include/asm-x86_64/suspend.h b/include/asm-x86_64/suspend.h
index bc7f817..a42306c 100644
--- a/include/asm-x86_64/suspend.h
+++ b/include/asm-x86_64/suspend.h
@@ -17,6 +17,7 @@ struct saved_context {
u16 ds, es, fs, gs, ss;
unsigned long gs_base, gs_kernel_base, fs_base;
unsigned long cr0, cr2, cr3, cr4, cr8;
+ unsigned long efer;
u16 gdt_pad;
u16 gdt_limit;
unsigned long gdt_base;
--
1.4.2.rc2.g5209e
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo.DeleteThis@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/ |
|
| Back to top |
|
 |
Eric W. Biederman External

Since: Jun 21, 2006 Posts: 194
|
Posted: Tue Aug 01, 2006 1:20 pm Post subject: [PATCH 8/33] kallsyms.c: Generate relocatable symbols. [Login to view extended thread Info.] Archived from groups: per prev. post (more info?) |
|
|
Print the addresses of non-absolute symbols relative to _text
so that ld will generate relocations. Allowing a relocatable
kernel to relocate them. We can't actually use the symbol names
because kallsyms includes static symbols that are not exported
from their object files.
Signed-off-by: Eric W. Biederman <ebiederm DeleteThis @xmission.com>
---
scripts/kallsyms.c | 20 +++++++++++++++++---
1 files changed, 17 insertions(+), 3 deletions(-)
diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index 22d281c..4c1ad0a 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -43,7 +43,7 @@ struct sym_entry {
static struct sym_entry *table;
static unsigned int table_size, table_cnt;
-static unsigned long long _stext, _etext, _sinittext, _einittext, _sextratext, _eextratext;
+static unsigned long long _text, _stext, _etext, _sinittext, _einittext, _sextratext, _eextratext;
static int all_symbols = 0;
static char symbol_prefix_char = '\0';
@@ -91,7 +91,9 @@ static int read_symbol(FILE *in, struct
sym++;
/* Ignore most absolute/undefined (?) symbols. */
- if (strcmp(sym, "_stext") == 0)
+ if (strcmp(sym, "_text") == 0)
+ _text = s->addr;
+ else if (strcmp(sym, "_stext") == 0)
_stext = s->addr;
else if (strcmp(sym, "_etext") == 0)
_etext = s->addr;
@@ -265,9 +267,21 @@ static void write_src(void)
printf(".data\n");
+ /* Provide proper symbols relocatability by their '_text'
+ * relativeness. The symbol names cannot be used to construct
+ * normal symbol references as the list of symbols contains
+ * symbols that are declared static and are private to their
+ * .o files. This prevents .tmp_kallsyms.o or any other
+ * object from referencing them.
+ */
output_label("kallsyms_addresses");
for (i = 0; i < table_cnt; i++) {
- printf("\tPTR\t%#llx\n", table[i].addr);
+ if (toupper(table[i].sym[0]) != 'A') {
+ printf("\tPTR\t_text + %#llx\n",
+ table[i].addr - _text);
+ } else {
+ printf("\tPTR\t%#llx\n", table[i].addr);
+ }
}
printf("\n");
--
1.4.2.rc2.g5209e
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo DeleteThis @vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/ |
|
| Back to top |
|
 |
Eric W. Biederman External

Since: Jun 21, 2006 Posts: 194
|
Posted: Tue Aug 01, 2006 1:20 pm Post subject: [PATCH 15/33] x86_64: Fix kernel direct mapping size check [Login to view extended thread Info.] Archived from groups: per prev. post (more info?) |
|
|
Instead of using physical addresse of _end which has nothing to
do with knowing if the kernel fits within it's reserved
virtual addresses, verify the virtual address of _end fits
within in the kernel virtual address mapping.
Signed-off-by: Eric W. Biederman <ebiederm DeleteThis @xmission.com>
---
arch/x86_64/kernel/head64.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index 36647ce..454498c 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -116,7 +116,7 @@ #ifdef CONFIG_X86_IO_APIC
disable_apic = 1;
#endif
/* You need early console to see that */
- if (__pa_symbol(&_end) >= KERNEL_TEXT_SIZE)
+ if (((unsigned long)&_end) >= (__START_KERNEL_map + KERNEL_TEXT_SIZE))
panic("Kernel too big for kernel mapping\n");
setup_boot_cpu_data();
--
1.4.2.rc2.g5209e
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo DeleteThis @vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/ |
|
| Back to top |
|
 |
Eric W. Biederman External

Since: Jun 21, 2006 Posts: 194
|
Posted: Tue Aug 01, 2006 1:20 pm Post subject: [PATCH 14/33] x86_64: Properly report in /proc/iomem the kernel address [Login to view extended thread Info.] Archived from groups: per prev. post (more info?) |
|
|
The code assumed that the kernel was always loaded
at 1M in memory. This fixes that assumption.
Signed-off-by: Eric W. Biederman <ebiederm DeleteThis @xmission.com>
---
arch/x86_64/kernel/setup.c | 5 +++--
1 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 8a099ff..11d31ea 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -521,7 +521,7 @@ static void discover_ebda(void)
void __init setup_arch(char **cmdline_p)
{
- unsigned long kernel_end;
+ unsigned long kernel_start, kernel_end;
ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
screen_info = SCREEN_INFO;
@@ -596,8 +596,9 @@ #endif
(table_end - table_start) << PAGE_SHIFT);
/* reserve kernel */
+ kernel_start = __pa_symbol(&_text);
kernel_end = round_up(__pa_symbol(&_end),PAGE_SIZE);
- reserve_bootmem_generic(HIGH_MEMORY, kernel_end - HIGH_MEMORY);
+ reserve_bootmem_generic(kernel_start, kernel_end - kernel_start);
/*
* reserve physical page 0 - it's a special BIOS page on many boxes,
--
1.4.2.rc2.g5209e
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo DeleteThis @vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/ |
|
| Back to top |
|
 |
Eric W. Biederman External

Since: Jun 21, 2006 Posts: 194
|
Posted: Tue Aug 01, 2006 1:20 pm Post subject: [PATCH 20/33] x86_64: fix early_printk to use the standard ISA mapping [Login to view extended thread Info.] Archived from groups: per prev. post (more info?) |
|
|
Signed-off-by: Eric W. Biederman <ebiederm.TakeThisOut@xmission.com>
---
arch/x86_64/kernel/early_printk.c | 3 +--
1 files changed, 1 insertions(+), 2 deletions(-)
diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c
index 140051e..d2b4cfb 100644
--- a/arch/x86_64/kernel/early_printk.c
+++ b/arch/x86_64/kernel/early_printk.c
@@ -11,11 +11,10 @@ #include <asm/fcntl.h>
#ifdef __i386__
#include <asm/setup.h>
-#define VGABASE (__ISA_IO_base + 0xb8000)
#else
#include <asm/bootsetup.h>
-#define VGABASE ((void __iomem *)0xffffffff800b8000UL)
#endif
+#define VGABASE (__ISA_IO_base + 0xb8000)
static int max_ypos = 25, max_xpos = 80;
static int current_ypos = 25, current_xpos = 0;
--
1.4.2.rc2.g5209e
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo.TakeThisOut@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/ |
|
| Back to top |
|
 |
Eric W. Biederman External

Since: Jun 21, 2006 Posts: 194
|
Posted: Tue Aug 01, 2006 1:20 pm Post subject: [PATCH 16/33] x86_64: Assembly safe page.h and pgtable.h [Login to view extended thread Info.] Archived from groups: per prev. post (more info?) |
|
|
This patch makes pgtable.h and page.h safe to include
in assembly files like head.S. Allowing us to use
symbolic constants instead of hard coded numbers when
refering to the page tables.
This patch copies asm-sparc64/const.h to asm-x86_64 to
get a definition of _AC() a very convinient macro that
allows us to force the type when we are compiling the
code in C and to drop all of the type information when
we are using the constant in assembly. Previously this
was done with multiple definition of the same constant.
const.h was modified slightly so that it works when given
CONFIG options as arguments.
This patch adds #ifndef __ASSEMBLY__ ... #endif
and _AC(1,UL) where appropriate so the assembler won't
choke on the header files. Otherwise nothing
should have changed.
Signed-off-by: Eric W. Biederman <ebiederm RemoveThis @xmission.com>
---
include/asm-x86_64/const.h | 20 ++++++++++++++++++++
include/asm-x86_64/page.h | 34 +++++++++++++---------------------
include/asm-x86_64/pgtable.h | 33 +++++++++++++++++++++------------
3 files changed, 54 insertions(+), 33 deletions(-)
diff --git a/include/asm-x86_64/const.h b/include/asm-x86_64/const.h
new file mode 100644
index 0000000..54fb08f
--- /dev/null
+++ b/include/asm-x86_64/const.h
@@ -0,0 +1,20 @@
+/* const.h: Macros for dealing with constants. */
+
+#ifndef _X86_64_CONST_H
+#define _X86_64_CONST_H
+
+/* Some constant macros are used in both assembler and
+ * C code. Therefore we cannot annotate them always with
+ * 'UL' and other type specificers unilaterally. We
+ * use the following macros to deal with this.
+ */
+
+#ifdef __ASSEMBLY__
+#define _AC(X,Y) X
+#else
+#define __AC(X,Y) (X##Y)
+#define _AC(X,Y) __AC(X,Y)
+#endif
+
+
+#endif /* !(_X86_64_CONST_H) */
diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h
index 10f3461..f030260 100644
--- a/include/asm-x86_64/page.h
+++ b/include/asm-x86_64/page.h
@@ -1,14 +1,11 @@
#ifndef _X86_64_PAGE_H
#define _X86_64_PAGE_H
+#include <asm/const.h>
/* PAGE_SHIFT determines the page size */
#define PAGE_SHIFT 12
-#ifdef __ASSEMBLY__
-#define PAGE_SIZE (0x1 << PAGE_SHIFT)
-#else
-#define PAGE_SIZE (1UL << PAGE_SHIFT)
-#endif
+#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
#define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & __PHYSICAL_MASK)
@@ -33,10 +30,10 @@ #define MCE_STACK 5
#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */
#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
-#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT)
+#define LARGE_PAGE_SIZE (_AC(1,UL) << PMD_SHIFT)
#define HPAGE_SHIFT PMD_SHIFT
-#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
+#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
@@ -76,29 +73,24 @@ #define __pud(x) ((pud_t) { (x) } )
#define __pgd(x) ((pgd_t) { (x) } )
#define __pgprot(x) ((pgprot_t) { (x) } )
-#define __PHYSICAL_START ((unsigned long)CONFIG_PHYSICAL_START)
-#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START)
-#define __START_KERNEL_map 0xffffffff80000000UL
-#define __PAGE_OFFSET 0xffff810000000000UL
+#endif /* !__ASSEMBLY__ */
-#else
-#define __PHYSICAL_START CONFIG_PHYSICAL_START
+#define __PHYSICAL_START _AC(CONFIG_PHYSICAL_START,UL)
#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START)
-#define __START_KERNEL_map 0xffffffff80000000
-#define __PAGE_OFFSET 0xffff810000000000
-#endif /* !__ASSEMBLY__ */
+#define __START_KERNEL_map _AC(0xffffffff80000000,UL)
+#define __PAGE_OFFSET _AC(0xffff810000000000,UL)
/* to align the pointer to the (next) page boundary */
#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
/* See Documentation/x86_64/mm.txt for a description of the memory map. */
#define __PHYSICAL_MASK_SHIFT 46
-#define __PHYSICAL_MASK ((1UL << __PHYSICAL_MASK_SHIFT) - 1)
+#define __PHYSICAL_MASK ((_AC(1,UL) << __PHYSICAL_MASK_SHIFT) - 1)
#define __VIRTUAL_MASK_SHIFT 48
-#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
+#define __VIRTUAL_MASK ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1)
-#define KERNEL_TEXT_SIZE (40UL*1024*1024)
-#define KERNEL_TEXT_START 0xffffffff80000000UL
+#define KERNEL_TEXT_SIZE (_AC(40,UL)*1024*1024)
+#define KERNEL_TEXT_START _AC(0xffffffff80000000,UL)
#ifndef __ASSEMBLY__
@@ -106,7 +98,7 @@ #include <asm/bug.h>
#endif /* __ASSEMBLY__ */
-#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
+#define PAGE_OFFSET __PAGE_OFFSET
/* Note: __pa(&symbol_visible_to_c) should be always replaced with __pa_symbol.
Otherwise you risk miscompilation. */
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h
index a31ab4e..211a2ca 100644
--- a/include/asm-x86_64/pgtable.h
+++ b/include/asm-x86_64/pgtable.h
@@ -1,6 +1,9 @@
#ifndef _X86_64_PGTABLE_H
#define _X86_64_PGTABLE_H
+#include <asm/const.h>
+#ifndef __ASSEMBLY__
+
/*
* This file contains the functions and defines necessary to modify and use
* the x86-64 page table tree.
@@ -34,6 +37,8 @@ extern unsigned long pgkern_mask;
extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+#endif /* !__ASSEMBLY__ */
+
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
*/
@@ -58,6 +63,8 @@ #define PTRS_PER_PMD 512
*/
#define PTRS_PER_PTE 512
+#ifndef __ASSEMBLY__
+
#define pte_ERROR(e) \
printk("%s:%d: bad pte %p(%016lx).\n", __FILE__, __LINE__, &(e), pte_val(e))
#define pmd_ERROR(e) \
@@ -124,22 +131,23 @@ #define pte_same(a, b) ((a).pte == (b).
#define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK))
-#define PMD_SIZE (1UL << PMD_SHIFT)
+#endif /* !__ASSEMBLY__ */
+
+#define PMD_SIZE (_AC(1,UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
-#define PUD_SIZE (1UL << PUD_SHIFT)
+#define PUD_SIZE (_AC(1,UL) << PUD_SHIFT)
#define PUD_MASK (~(PUD_SIZE-1))
-#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
+#define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
#define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1)
#define FIRST_USER_ADDRESS 0
-#ifndef __ASSEMBLY__
-#define MAXMEM 0x3fffffffffffUL
-#define VMALLOC_START 0xffffc20000000000UL
-#define VMALLOC_END 0xffffe1ffffffffffUL
-#define MODULES_VADDR 0xffffffff88000000UL
-#define MODULES_END 0xfffffffffff00000UL
+#define MAXMEM _AC(0x3fffffffffff,UL)
+#define VMALLOC_START _AC(0xffffc20000000000,UL)
+#define VMALLOC_END _AC(0xffffe1ffffffffff,UL)
+#define MODULES_VADDR _AC(0xffffffff88000000,UL)
+#define MODULES_END _AC(0xfffffffffff00000,UL)
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
#define _PAGE_BIT_PRESENT 0
@@ -165,7 +173,7 @@ #define _PAGE_FILE 0x040 /* nonlinear fi
#define _PAGE_GLOBAL 0x100 /* Global TLB entry */
#define _PAGE_PROTNONE 0x080 /* If not present */
-#define _PAGE_NX (1UL<<_PAGE_BIT_NX)
+#define _PAGE_NX (_AC(1,UL)<<_PAGE_BIT_NX)
#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
@@ -227,6 +235,8 @@ #define __S101 PAGE_READONLY_EXEC
#define __S110 PAGE_SHARED_EXEC
#define __S111 PAGE_SHARED_EXEC
+#ifndef __ASSEMBLY__
+
static inline unsigned long pgd_bad(pgd_t pgd)
{
unsigned long val = pgd_val(pgd);
@@ -418,8 +428,6 @@ extern spinlock_t pgd_lock;
extern struct page *pgd_list;
void vmalloc_sync_all(void);
-#endif /* !__ASSEMBLY__ */
-
extern int kern_addr_valid(unsigned long addr);
#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
@@ -449,5 +457,6 @@ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_F
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
#define __HAVE_ARCH_PTE_SAME
#include <asm-generic/pgtable.h>
+#endif /* !__ASSEMBLY__ */
#endif /* _X86_64_PGTABLE_H */
--
1.4.2.rc2.g5209e
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo RemoveThis @vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/ |
|
| Back to top |
|
 |
|
|
|
You can post new topics in this forum You can reply to topics in this forum You cannot edit your posts in this forum You cannot delete your posts in this forum You cannot vote in polls in this forum
|
| |
|
|