arch/i386/kernel/process.c
1911 /*
1912 * linux/arch/i386/kernel/process.c
1913 *
1914 * Copyright (C) 1995 Linus Torvalds
1915 */
1916
1917 /* This file handles the architecture-dependent parts of
1918 * process handling.. */
1919
1920 #define __KERNEL_SYSCALLS__
1921 #include <stdarg.h>
1922
1923 #include <linux/errno.h>
1924 #include <linux/sched.h>
1925 #include <linux/kernel.h>
1926 #include <linux/mm.h>
1927 #include <linux/smp.h>
1928 #include <linux/smp_lock.h>
1929 #include <linux/stddef.h>
1930 #include <linux/unistd.h>
1931 #include <linux/ptrace.h>
1932 #include <linux/malloc.h>
1933 #include <linux/vmalloc.h>
1934 #include <linux/user.h>
1935 #include <linux/a.out.h>
1936 #include <linux/interrupt.h>
1937 #include <linux/config.h>
1938 #include <linux/unistd.h>
1939 #include <linux/delay.h>
1940 #include <linux/smp.h>
1941 #include <linux/reboot.h>
1942 #include <linux/init.h>
1943 #if defined(CONFIG_APM) && defined(CONFIG_APM_POWER_OFF)
1944 #include <linux/apm_bios.h>
1945 #endif
1946
1947 #include <asm/uaccess.h>
1948 #include <asm/pgtable.h>
1949 #include <asm/system.h>
1950 #include <asm/io.h>
1951 #include <asm/ldt.h>
1952 #include <asm/processor.h>
1953 #include <asm/desc.h>
1954 #ifdef CONFIG_MATH_EMULATION
1955 #include <asm/math_emu.h>
1956 #endif
1957
1958 #include "irq.h"
1959
1960 spinlock_t semaphore_wake_lock = SPIN_LOCK_UNLOCKED;
1961
1962 asmlinkage void ret_from_fork(void)
1963 __asm__("ret_from_fork");
1964
1965 #ifdef CONFIG_APM
1966 extern int apm_do_idle(void);
1967 extern void apm_do_busy(void);
1968 #endif
1969
1970 static int hlt_counter=0;
1971
1972 #define HARD_IDLE_TIMEOUT (HZ / 3)
1973
1974 void disable_hlt(void)
1975 {
1976 hlt_counter++;
1977 }
1978
1979 void enable_hlt(void)
1980 {
1981 hlt_counter--;
1982 }
1983
1984 #ifndef __SMP__
1985
1986 static void hard_idle(void)
1987 {
1988 while (!current->need_resched) {
1989 if (boot_cpu_data.hlt_works_ok && !hlt_counter) {
1990 #ifdef CONFIG_APM
1991 /* If the APM BIOS is not enabled, or there
1992 is an error calling the idle routine, we
1993 should hlt if possible. We need to check
1994 need_resched again because an interrupt
1995 may have occurred in apm_do_idle(). */
1996 start_bh_atomic();
1997 if (!apm_do_idle() && !current->need_resched)
1998 __asm__("hlt");
1999 end_bh_atomic();
2000 #else
2001 __asm__("hlt");
2002 #endif
2003 }
2004 if (current->need_resched)
2005 break;
2006 schedule();
2007 }
2008 #ifdef CONFIG_APM
2009 apm_do_busy();
2010 #endif
2011 }
2012
2013 /* The idle loop on a uniprocessor i386.. */
2014 static int cpu_idle(void *unused)
2015 {
2016 int work = 1;
2017 unsigned long start_idle = 0;
2018
2019 /* endless idle loop with no priority at all */
2020 current->priority = 0;
2021 current->counter = -100;
2022 for (;;) {
2023 if (work)
2024 start_idle = jiffies;
2025
2026 if (jiffies - start_idle > HARD_IDLE_TIMEOUT)
2027 hard_idle();
2028 else {
2029 if (boot_cpu_data.hlt_works_ok &&
2030 !hlt_counter && !current->need_resched)
2031 __asm__("hlt");
2032 }
2033
2034 work = current->need_resched;
2035 schedule();
2036 check_pgt_cache();
2037 }
2038 }
2039
2040 #else
2041
2042 /* This is being executed in task 0 'user space'. */
2043
2044 int cpu_idle(void *unused)
2045 {
2046 /* endless idle loop with no priority at all */
2047 current->priority = 0;
2048 current->counter = -100;
2049 while(1) {
2050 if (current_cpu_data.hlt_works_ok && !hlt_counter &&
2051 !current->need_resched)
2052 __asm__("hlt");
2053 /* although we are an idle CPU, we do not want to get
2054 * into the scheduler unnecessarily. */
2055 if (current->need_resched) {
2056 schedule();
2057 check_pgt_cache();
2058 }
2059 }
2060 }
2061
2062 #endif
2063
2064 asmlinkage int sys_idle(void)
2065 {
2066 if (current->pid != 0)
2067 return -EPERM;
2068 cpu_idle(NULL);
2069 return 0;
2070 }
2071
2072 /* This routine reboots the machine by asking the
2073 * keyboard controller to pulse the reset-line low. We
2074 * try that for a while, and if it doesn't work, we do
2075 * some other stupid things. */
2076
2077 static long no_idt[2] = {0, 0};
2078 static int reboot_mode = 0;
2079 static int reboot_thru_bios = 0;
2080
2081 __initfunc(void reboot_setup(char *str, int *ints))
2082 {
2083 while(1) {
2084 switch (*str) {
2085 case 'w': /* "warm" reboot (no memory testing etc) */
2086 reboot_mode = 0x1234;
2087 break;
2088 case 'c': /* "cold" reboot (w/ memory testing etc) */
2089 reboot_mode = 0x0;
2090 break;
2091 case 'b': /* "bios" reboot by jumping thru the BIOS*/
2092 reboot_thru_bios = 1;
2093 break;
2094 case 'h':
2095 /* "hard" reboot by toggling RESET and/or crashing
2096 * the CPU */
2097 reboot_thru_bios = 0;
2098 break;
2099 }
2100 if((str = strchr(str,',')) != NULL)
2101 str++;
2102 else
2103 break;
2104 }
2105 }
2106
2107 /* The following code and data reboots the machine by
2108 * switching to real mode and jumping to the BIOS reset
2109 * entry point, as if the CPU has really been reset. The
2110 * previous version asked the keyboard controller to
2111 * pulse the CPU reset line, which is more thorough, but
2112 * doesn't work with at least one type of 486
2113 * motherboard. It is easy to stop this code working;
2114 * hence the copious comments. */
2115 static unsigned long long
2116 real_mode_gdt_entries [3] =
2117 {
2118 0x0000000000000000ULL, /* Null descriptor */
2119 /* 16-bit real-mode 64k code at 0x00000000 */
2120 0x00009a000000ffffULL,
2121 /* 16-bit real-mode 64k data at 0x00000100 */
2122 0x000092000100ffffULL
2123 };
2124
2125 static struct
2126 {
2127 unsigned short size __attribute__ ((packed));
2128 unsigned long long * base __attribute__ ((packed));
2129 }
2130 real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1,
2131 real_mode_gdt_entries },
2132 real_mode_idt = { 0x3ff, 0 };
2133
2134 /* This is 16-bit protected mode code to disable paging
2135 and the cache, switch to real mode and jump to the
2136 BIOS reset code.
2137
2138 The instruction that switches to real mode by writing
2139 to CR0 must be followed immediately by a far jump
2140 instruction, which set CS to a valid value for real
2141 mode, and flushes the prefetch queue to avoid running
2142 instructions that have already been decoded in
2143 protected mode.
2144
2145 Clears all the flags except ET, especially PG
2146 (paging), PE (protected-mode enable) and TS (task
2147 switch for coprocessor state save). Flushes the TLB
2148 after paging has been disabled. Sets CD and NW, to
2149 disable the cache on a 486, and invalidates the cache.
2150 This is more like the state of a 486 after reset. I
2151 don't know if something else should be done for other
2152 chips.
2153
2154 More could be done here to set up the registers as if
2155 a CPU reset had occurred; hopefully real BIOSs don't
2156 assume much. */
2157
2158 static unsigned char real_mode_switch [] =
2159 {
2160 0x66, 0x0f, 0x20, 0xc0, /*movl %cr0,%eax */
2161 0x66, 0x83, 0xe0, 0x11, /*andl $0x00000011,%eax*/
2162 /*orl $0x60000000,%eax*/
2163 0x66, 0x0d, 0x00, 0x00, 0x00, 0x60,
2164 0x66, 0x0f, 0x22, 0xc0, /*movl %eax,%cr0 */
2165 0x66, 0x0f, 0x22, 0xd8, /*movl %eax,%cr3 */
2166 0x66, 0x0f, 0x20, 0xc3, /*movl %cr0,%ebx */
2167 /*andl $0x60000000,%ebx*/
2168 0x66, 0x81, 0xe3, 0x00, 0x00, 0x00, 0x60,
2169 0x74, 0x02, /*jz f */
2170 0x0f, 0x08, /*invd */
2171 0x24, 0x10, /*f: andb $0x10,al*/
2172 0x66, 0x0f, 0x22, 0xc0, /*movl %eax,%cr0*/
2173 0xea, 0x00, 0x00, 0xff, 0xff /*ljmp $0xffff,$0x0000*/
2174 };
2175
2176 static inline void kb_wait(void)
2177 {
2178 int i;
2179
2180 for (i=0; i<0x10000; i++)
2181 if ((inb_p(0x64) & 0x02) == 0)
2182 break;
2183 }
2184
2185 void machine_restart(char * __unused)
2186 {
2187 #if __SMP__
2188 /* turn off the IO-APIC, so we can do a clean reboot */
2189 init_pic_mode();
2190 #endif
2191
2192 if(!reboot_thru_bios) {
2193 /* rebooting needs to touch the page at abs addr 0 */
2194 *((unsigned short *)__va(0x472)) = reboot_mode;
2195 for (;;) {
2196 int i;
2197 for (i=0; i<100; i++) {
2198 kb_wait();
2199 udelay(50);
2200 outb(0xfe,0x64); /* pulse reset low */
2201 udelay(50);
2202 }
2203 /* That didn't work - force a triple fault.. */
2204 __asm__ __volatile__("lidt %0": :"m" (no_idt));
2205 __asm__ __volatile__("int3");
2206 }
2207 }
2208
2209 cli();
2210
2211 /* Write zero to CMOS register number 0x0f, which the
2212 BIOS POST routine will recognize as telling it to do
2213 a proper reboot. (Well that's what this book in
2214 front of me says -- it may only apply to the Phoenix
2215 BIOS though, it's not clear). At the same time,
2216 disable NMIs by setting the top bit in the CMOS
2217 address register, as we're about to do peculiar
2218 things to the CPU. I'm not sure if `outb_p' is
2219 needed instead of just `outb'. Use it to be on the
2220 safe side. */
2221
2222 outb_p (0x8f, 0x70);
2223 outb_p (0x00, 0x71);
2224
2225 /* Remap the kernel at virtual address zero, as well as
2226 offset zero from the kernel segment. This assumes
2227 the kernel segment starts at virtual address
2228 PAGE_OFFSET. */
2229
2230 memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
2231 sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
2232
2233 /* Make sure the first page is mapped to the start of
2234 physical memory. It is normally not mapped, to trap
2235 kernel NULL pointer dereferences. */
2236
2237 pg0[0] = _PAGE_RW | _PAGE_PRESENT;
2238
2239 /* Use `swapper_pg_dir' as our page directory. We
2240 * bother with `SET_PAGE_DIR' because although might be
2241 * rebooting, but if we change the way we set root page
2242 * dir in the future, then we wont break a seldom used
2243 * feature ;) */
2244
2245 SET_PAGE_DIR(current,swapper_pg_dir);
2246
2247 /* Write 0x1234 to absolute memory location 0x472. The
2248 BIOS reads this on booting to tell it to "Bypass
2249 memory test (also warm boot)". This seems like a
2250 fairly standard thing that gets set by REBOOT.COM
2251 programs, and the previous reset routine did this
2252 too. */
2253
2254 *((unsigned short *)0x472) = reboot_mode;
2255
2256 /* For the switch to real mode, copy some code to low
2257 memory. It has to be in the first 64k because it is
2258 running in 16-bit mode, and it has to have the same
2259 physical and virtual address, because it turns off
2260 paging. Copy it near the end of the first page, out
2261 of the way of BIOS variables. */
2262
2263 memcpy ((void *) (0x1000 - sizeof (real_mode_switch)),
2264 real_mode_switch, sizeof (real_mode_switch));
2265
2266 /* Set up the IDT for real mode. */
2267
2268 __asm__ __volatile__
2269 ("lidt %0" : : "m" (real_mode_idt));
2270
2271 /* Set up a GDT from which we can load segment
2272 descriptors for real mode. The GDT is not used in
2273 real mode; it is just needed here to prepare the
2274 descriptors. */
2275
2276 __asm__ __volatile__
2277 ("lgdt %0" : : "m" (real_mode_gdt));
2278
2279 /* Load the data segment registers, and thus the
2280 descriptors ready for real mode. The base address
2281 of each segment is 0x100, 16 times the selector
2282 value being loaded here. This is so that the
2283 segment registers don't have to be reloaded after
2284 switching to real mode: the values are consistent
2285 for real mode operation already. */
2286
2287 __asm__ __volatile__ ("movl $0x0010,%%eax\n"
2288 "\tmovl %%ax,%%ds\n"
2289 "\tmovl %%ax,%%es\n"
2290 "\tmovl %%ax,%%fs\n"
2291 "\tmovl %%ax,%%gs\n"
2292 "\tmovl %%ax,%%ss" : : : "eax");
2293
2294 /* Jump to the 16-bit code that we copied earlier. It
2295 disables paging and the cache, switches to real
2296 mode, and jumps to the BIOS reset entry point. */
2297
2298 __asm__ __volatile__ ("ljmp $0x0008,%0"
2299 :
2300 : "i" ((void *) (0x1000 -
2301 sizeof (real_mode_switch))));
2302 }
2303
2304 void machine_halt(void)
2305 {}
2306
2307 void machine_power_off(void)
2308 {
2309 #if defined(CONFIG_APM) && defined(CONFIG_APM_POWER_OFF)
2310 apm_power_off();
2311 #endif
2312 }
2313
2314
2315 void show_regs(struct pt_regs * regs)
2316 {
2317 long cr0 = 0L, cr2 = 0L, cr3 = 0L;
2318
2319 printk("\n");
2320 printk("EIP: %04x:[<%08lx>]",
2321 0xffff & regs->xcs,regs->eip);
2322 if (regs->xcs & 3)
2323 printk(" ESP: %04x:%08lx",
2324 0xffff & regs->xss,regs->esp);
2325 printk(" EFLAGS: %08lx\n",regs->eflags);
2326 printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
2327 regs->eax,regs->ebx,regs->ecx,regs->edx);
2328 printk("ESI: %08lx EDI: %08lx EBP: %08lx",
2329 regs->esi, regs->edi, regs->ebp);
2330 printk(" DS: %04x ES: %04x\n",
2331 0xffff & regs->xds,0xffff & regs->xes);
2332 __asm__("movl %%cr0, %0": "=r" (cr0));
2333 __asm__("movl %%cr2, %0": "=r" (cr2));
2334 __asm__("movl %%cr3, %0": "=r" (cr3));
2335 printk("CR0: %08lx CR2: %08lx CR3: %08lx\n",
2336 cr0, cr2, cr3);
2337 }
2338
2339 /* Allocation and freeing of basic task resources.
2340 *
2341 * NOTE! The task struct and the stack go together
2342 *
2343 * The task structure is a two-page thing, and as such
2344 * not reliable to allocate using the basic page alloc
2345 * functions. We have a small cache of structures for
2346 * when the allocations fail..
2347 *
2348 * This extra buffer essentially acts to make for less
2349 * "jitter" in the allocations..
2350 *
2351 * On SMP we don't do this right now because:
2352 * - we aren't holding any locks when called, and we
2353 * might as well just depend on the generic memory
2354 * management to do proper locking for us instead of
2355 * complicating it here.
2356 * - if you use SMP you have a beefy enough machine that
2357 * this shouldn't matter.. */
2358 #ifndef __SMP__
2359 #define EXTRA_TASK_STRUCT 16
2360 static struct task_struct *
2361 task_struct_stack[EXTRA_TASK_STRUCT];
2362 static int task_struct_stack_ptr = -1;
2363 #endif
2364
2365 struct task_struct * alloc_task_struct(void)
2366 {
2367 #ifndef EXTRA_TASK_STRUCT
2368 return (struct task_struct *)
2369 __get_free_pages(GFP_KERNEL,1);
2370 #else
2371 int index;
2372 struct task_struct *ret;
2373
2374 index = task_struct_stack_ptr;
2375 if (index >= EXTRA_TASK_STRUCT/2)
2376 goto use_cache;
2377 ret = (struct task_struct *)
2378 __get_free_pages(GFP_KERNEL,1);
2379 if (!ret) {
2380 index = task_struct_stack_ptr;
2381 if (index >= 0) {
2382 use_cache:
2383 ret = task_struct_stack[index];
2384 task_struct_stack_ptr = index-1;
2385 }
2386 }
2387 return ret;
2388 #endif
2389 }
2390
2391 void free_task_struct(struct task_struct *p)
2392 {
2393 #ifdef EXTRA_TASK_STRUCT
2394 int index = task_struct_stack_ptr+1;
2395
2396 if (index < EXTRA_TASK_STRUCT) {
2397 task_struct_stack[index] = p;
2398 task_struct_stack_ptr = index;
2399 } else
2400 #endif
2401 free_pages((unsigned long) p, 1);
2402 }
2403
2404 void release_segments(struct mm_struct *mm)
2405 {
2406 if (mm->segments) {
2407 void * ldt = mm->segments;
2408 mm->segments = NULL;
2409 vfree(ldt);
2410 }
2411 }
2412
2413 void forget_segments(void)
2414 {
2415 /* forget local segments */
2416 __asm__ __volatile__("movl %w0,%%fs ; movl %w0,%%gs"
2417 : /* no outputs */
2418 : "r" (0));
2419
2420 /* Get the LDT entry from init_task. */
2421 current->tss.ldt = _LDT(0);
2422 load_ldt(0);
2423 }
2424
2425 /* Create a kernel thread */
2426 int kernel_thread(int (*fn)(void *), void * arg,
2427 unsigned long flags)
2428 {
2429 long retval, d0;
2430
2431 __asm__ __volatile__(
2432 "movl %%esp,%%esi\n\t"
2433 "int $0x80\n\t" /* Linux/i386 system call */
2434 "cmpl %%esp,%%esi\n\t" /* child or parent? */
2435 "je 1f\n\t" /* parent - jump */
2436 /* Load the argument into eax, and push it. That
2437 * way, it does not matter whether the called
2438 * function is compiled with -mregparm or not. */
2439 "movl %4,%%eax\n\t"
2440 "pushl %%eax\n\t"
2441 "call *%5\n\t" /* call fn */
2442 "movl %3,%0\n\t" /* exit */
2443 "int $0x80\n"
2444 "1:\t"
2445 :"=&a" (retval), "=&S" (d0)
2446 :"0" (__NR_clone), "i" (__NR_exit),
2447 "r" (arg), "r" (fn),
2448 "b" (flags | CLONE_VM)
2449 : "memory");
2450 return retval;
2451 }
2452
2453 /* Free current thread data structures etc.. */
2454 void exit_thread(void)
2455 {
2456 /* nothing to do ... */
2457 }
2458
2459 void flush_thread(void)
2460 {
2461 int i;
2462 struct task_struct *tsk = current;
2463
2464 for (i=0 ; i<8 ; i++)
2465 tsk->tss.debugreg[i] = 0;
2466
2467 /* Forget coprocessor state.. */
2468 clear_fpu(tsk);
2469 tsk->used_math = 0;
2470 }
2471
2472 void release_thread(struct task_struct *dead_task)
2473 {
2474 }
2475
2476 /* If new_mm is NULL, we're being called to set up the
2477 * LDT descriptor for a clone task. Each clone must have
2478 * a separate entry in the GDT. */
2479 void copy_segments(int nr, struct task_struct *p,
2480 struct mm_struct *new_mm)
2481 {
2482 struct mm_struct * old_mm = current->mm;
2483 void * old_ldt = old_mm->segments, * ldt = old_ldt;
2484
2485 /* default LDT - use the one from init_task */
2486 p->tss.ldt = _LDT(0);
2487 if (old_ldt) {
2488 if (new_mm) {
2489 ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
2490 new_mm->segments = ldt;
2491 if (!ldt) {
2492 printk(KERN_WARNING "ldt allocation failed\n");
2493 return;
2494 }
2495 memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
2496 }
2497 p->tss.ldt = _LDT(nr);
2498 set_ldt_desc(nr, ldt, LDT_ENTRIES);
2499 return;
2500 }
2501 }
2502
2503 /* Save a segment. */
2504 #define savesegment(seg,value) \
2505 asm volatile("movl %%" #seg ",%0":"=m" \
2506 (*(int *)&(value)))
2507
2508 int copy_thread(int nr, unsigned long clone_flags,
2509 unsigned long esp,struct task_struct * p,
2510 struct pt_regs * regs)
2511 {
2512 struct pt_regs * childregs;
2513
2514 childregs = ((struct pt_regs *)
2515 (2*PAGE_SIZE + (unsigned long) p)) - 1;
2516 *childregs = *regs;
2517 childregs->eax = 0;
2518 childregs->esp = esp;
2519
2520 p->tss.esp = (unsigned long) childregs;
2521 p->tss.esp0 = (unsigned long) (childregs+1);
2522 p->tss.ss0 = __KERNEL_DS;
2523
2524 p->tss.tr = _TSS(nr);
2525 set_tss_desc(nr,&(p->tss));
2526 p->tss.eip = (unsigned long) ret_from_fork;
2527
2528 savesegment(fs,p->tss.fs);
2529 savesegment(gs,p->tss.gs);
2530
2531 /* a bitmap offset pointing outside of the TSS limit
2532 * causes a nicely controllable SIGSEGV. The first
2533 * sys_ioperm() call sets up the bitmap properly. */
2534 p->tss.bitmap = sizeof(struct thread_struct);
2535
2536 unlazy_fpu(current);
2537 p->tss.i387 = current->tss.i387;
2538
2539 return 0;
2540 }
2541
2542 /* fill in the FPU structure for a core dump. */
2543 int dump_fpu(struct pt_regs * regs,
2544 struct user_i387_struct * fpu)
2545 {
2546 int fpvalid;
2547 struct task_struct *tsk = current;
2548
2549 fpvalid = tsk->used_math;
2550 if (fpvalid) {
2551 unlazy_fpu(tsk);
2552 memcpy(fpu,&tsk->tss.i387.hard,sizeof(*fpu));
2553 }
2554
2555 return fpvalid;
2556 }
2557
2558 /* fill in the user structure for a core dump.. */
2559 void dump_thread(struct pt_regs * regs,
2560 struct user * dump)
2561 {
2562 int i;
2563
2564 /* changed the size calculations - should hopefully work
2565 better. lbt */
2566 dump->magic = CMAGIC;
2567 dump->start_code = 0;
2568 dump->start_stack = regs->esp & ~(PAGE_SIZE - 1);
2569 dump->u_tsize =
2570 ((unsigned long) current->mm->end_code)
2571 >> PAGE_SHIFT;
2572 dump->u_dsize =
2573 ((unsigned long) (current->mm->brk + (PAGE_SIZE-1)))
2574 >> PAGE_SHIFT;
2575 dump->u_dsize -= dump->u_tsize;
2576 dump->u_ssize = 0;
2577 for (i = 0; i < 8; i++)
2578 dump->u_debugreg[i] = current->tss.debugreg[i];
2579
2580 if (dump->start_stack < TASK_SIZE)
2581 dump->u_ssize =
2582 ((unsigned long) (TASK_SIZE - dump->start_stack))
2583 >> PAGE_SHIFT;
2584
2585 dump->regs.ebx = regs->ebx;
2586 dump->regs.ecx = regs->ecx;
2587 dump->regs.edx = regs->edx;
2588 dump->regs.esi = regs->esi;
2589 dump->regs.edi = regs->edi;
2590 dump->regs.ebp = regs->ebp;
2591 dump->regs.eax = regs->eax;
2592 dump->regs.ds = regs->xds;
2593 dump->regs.es = regs->xes;
2594 savesegment(fs,dump->regs.fs);
2595 savesegment(gs,dump->regs.gs);
2596 dump->regs.orig_eax = regs->orig_eax;
2597 dump->regs.eip = regs->eip;
2598 dump->regs.cs = regs->xcs;
2599 dump->regs.eflags = regs->eflags;
2600 dump->regs.esp = regs->esp;
2601 dump->regs.ss = regs->xss;
2602
2603 dump->u_fpvalid = dump_fpu (regs, &dump->i387);
2604 }
2605
2606 /* This special macro can be used to load a debugging
2607 * register */
2608 #define loaddebug(tsk,register) \
2609 __asm__("movl %0,%%db" #register \
2610 : /* no output */ \
2611 :"r" (tsk->tss.debugreg[register]))
2612
2613
2614 /* switch_to(x,yn) should switch tasks from x to y.
2615 *
2616 * We fsave/fwait so that an exception goes off at the
2617 * right time (as a call from the fsave or fwait in
2618 * effect) rather than to the wrong process. Lazy FP
2619 * saving no longer makes any sense with modern CPU's,
2620 * and this simplifies a lot of things (SMP and UP become
2621 * the same).
2622 *
2623 * NOTE! We used to use the x86 hardware context
2624 * switching. The reason for not using it any more
2625 * becomes apparent when you try to recover gracefully
2626 * from saved state that is no longer valid (stale
2627 * segment register values in particular). With the
2628 * hardware task-switch, there is no way to fix up bad
2629 * state in a reasonable manner.
2630 *
2631 * The fact that Intel documents the hardware
2632 * task-switching to be slow is a fairly red herring -
2633 * this code is not noticeably faster. However, there
2634 * _is_ some room for improvement here, so the
2635 * performance issues may eventually be a valid point.
2636 * More important, however, is the fact that this allows
2637 * us much more flexibility. */
2638 void __switch_to(struct task_struct *prev,
2639 struct task_struct *next)
2640 {
2641 /* Save FPU and set TS if it wasn't set before.. */
2642 unlazy_fpu(prev);
2643
2644 /* Reload TR, LDT and the page table pointers..
2645 *
2646 * We need TR for the IO permission bitmask (and the
2647 * vm86 bitmasks in case we ever use enhanced v86 mode
2648 * properly).
2649 *
2650 * We may want to get rid of the TR register some day,
2651 * and copy the bitmaps around by hand. Oh, well. In
2652 * the meantime we have to clear the busy bit in the
2653 * TSS entry, ugh. */
2654 gdt_table[next->tss.tr >> 3].b &= 0xfffffdff;
2655 asm volatile("ltr %0": :"g"
2656 (*(unsigned short *)&next->tss.tr));
2657
2658 /* Save away %fs and %gs. No need to save %es and %ds,
2659 * as those are always kernel segments while inside the
2660 * kernel. */
2661 asm volatile("movl %%fs,%0":"=m"
2662 (*(int *)&prev->tss.fs));
2663 asm volatile("movl %%gs,%0":"=m"
2664 (*(int *)&prev->tss.gs));
2665
2666 /* Re-load LDT if necessary */
2667 if (next->mm->segments != prev->mm->segments)
2668 asm volatile("lldt %0": :"g"
2669 (*(unsigned short *)&next->tss.ldt));
2670
2671 /* Re-load page tables */
2672 {
2673 unsigned long new_cr3 = next->tss.cr3;
2674 if (new_cr3 != prev->tss.cr3)
2675 asm volatile("movl %0,%%cr3": :"r" (new_cr3));
2676 }
2677
2678 /* Restore %fs and %gs. */
2679 loadsegment(fs,next->tss.fs);
2680 loadsegment(gs,next->tss.gs);
2681
2682 /* Now maybe reload the debug registers */
2683 if (next->tss.debugreg[7]){
2684 loaddebug(next,0);
2685 loaddebug(next,1);
2686 loaddebug(next,2);
2687 loaddebug(next,3);
2688 loaddebug(next,6);
2689 loaddebug(next,7);
2690 }
2691 }
2692
2693 asmlinkage int sys_fork(struct pt_regs regs)
2694 {
2695 return do_fork(SIGCHLD, regs.esp, ®s);
2696 }
2697
2698 asmlinkage int sys_clone(struct pt_regs regs)
2699 {
2700 unsigned long clone_flags;
2701 unsigned long newsp;
2702
2703 clone_flags = regs.ebx;
2704 newsp = regs.ecx;
2705 if (!newsp)
2706 newsp = regs.esp;
2707 return do_fork(clone_flags, newsp, ®s);
2708 }
2709
2710 /* This is trivial, and on the face of it looks like it
2711 * could equally well be done in user mode.
2712 *
2713 * Not so, for quite unobvious reasons - register
2714 * pressure. In user mode vfork() cannot have a stack
2715 * frame, and if done by calling the "clone()" system
2716 * call directly, you do not have enough call-clobbered
2717 * registers to hold all the information you need. */
2718 asmlinkage int sys_vfork(struct pt_regs regs)
2719 {
2720 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD,
2721 regs.esp, ®s);
2722 }
2723
2724 /* sys_execve() executes a new program. */
2725 asmlinkage int sys_execve(struct pt_regs regs)
2726 {
2727 int error;
2728 char * filename;
2729
2730 lock_kernel();
2731 filename = getname((char *) regs.ebx);
2732 error = PTR_ERR(filename);
2733 if (IS_ERR(filename))
2734 goto out;
2735 error = do_execve(filename, (char **) regs.ecx,
2736 (char **) regs.edx, ®s);
2737 if (error == 0)
2738 current->flags &= ~PF_DTRACE;
2739 putname(filename);
2740 out:
2741 unlock_kernel();
2742 return error;
2743 }
Сайт управляется системой
uCoz