mm/mmap.c
33062 /*
33063 * linux/mm/mmap.c
33064 *
33065 * Written by obz.
33066 */
33067 #include <linux/slab.h>
33068 #include <linux/shm.h>
33069 #include <linux/mman.h>
33070 #include <linux/pagemap.h>
33071 #include <linux/swap.h>
33072 #include <linux/swapctl.h>
33073 #include <linux/smp_lock.h>
33074 #include <linux/init.h>
33075 #include <linux/file.h>
33076
33077 #include <asm/uaccess.h>
33078 #include <asm/pgtable.h>
33079
33080 /* description of effects of mapping type and prot in
33081 * current implementation. this is due to the limited
33082 * x86 page protection hardware. The expected behavior
33083 * is in parens (Y = yes, N = no, C = copy):
33084 *
33085 * map_type prot
33086 * PROT_NONE PROT_READ PROT_WRITE PROT_EXEC
33087 * MAP_SHARED r: (N) N r: (Y) Y r: (N) Y r: (N) Y
33088 * w: (N) N w: (N) N w: (Y) Y w: (N) N
33089 * x: (N) N x: (N) Y x: (N) Y x: (Y) Y
33090 *
33091 * MAP_PRIVATE r: (N) N r: (Y) Y r: (N) Y r: (N) Y
33092 * w: (N) N w: (N) N w: (C) C w: (N) N
33093 * x: (N) N x: (N) Y x: (N) Y x: (Y) Y
33094 */
33095 pgprot_t protection_map[16] = {
33096 __P000, __P001, __P010, __P011,
33097 __P100, __P101, __P110, __P111,
33098 __S000, __S001, __S010, __S011,
33099 __S100, __S101, __S110, __S111
33100 };
33101
33102 /* SLAB cache for vm_area_struct's. */
33103 kmem_cache_t *vm_area_cachep;
33104
33105 int sysctl_overcommit_memory;
33106
33107 /* Check that a process has enough memory to allocate a
33108 * new virtual mapping.
33109 */
33110 int vm_enough_memory(long pages)
33111 {
33112 /* Stupid algorithm to decide if we have enough memory:
33113 * while simple, it hopefully works in most obvious
33114 * cases.. Easy to fool it, but this should catch most
33115 * mistakes. */
33116 /* 23/11/98 NJC: Somewhat less stupid version of
33117 * algorithm, which tries to do "TheRightThing".
33118 * Instead of using half of (buffers+cache), use the
33119 * minimum values. Allow an extra 2% of num_physpages
33120 * for safety margin. */
33121
33122 long free;
33123
33124 /* Sometimes we want to use more memory than we
33125 * have. */
33126 if (sysctl_overcommit_memory)
33127 return 1;
33128
33129 free = buffermem >> PAGE_SHIFT;
33130 free += page_cache_size;
33131 free += nr_free_pages;
33132 free += nr_swap_pages;
33133 free -= (page_cache.min_percent +
33134 buffer_mem.min_percent + 2)*num_physpages/100;
33135 return free > pages;
33136 }
33137
33138 /* Remove one vm structure from the inode's i_mmap
33139 * ring. */
33140 static inline void remove_shared_vm_struct(
33141 struct vm_area_struct *vma)
33142 {
33143 struct file * file = vma->vm_file;
33144
33145 if (file) {
33146 if (vma->vm_flags & VM_DENYWRITE)
33147 file->f_dentry->d_inode->i_writecount++;
33148 if(vma->vm_next_share)
33149 vma->vm_next_share->vm_pprev_share =
33150 vma->vm_pprev_share;
33151 *vma->vm_pprev_share = vma->vm_next_share;
33152 }
33153 }
33154
33155 asmlinkage unsigned long sys_brk(unsigned long brk)
33156 {
33157 unsigned long rlim, retval;
33158 unsigned long newbrk, oldbrk;
33159 struct mm_struct *mm = current->mm;
33160
33161 down(&mm->mmap_sem);
33162
33163 /* This lock-kernel is one of the main contention
33164 * points for certain normal loads. And it really
33165 * should not be here: almost everything in
33166 * brk()/mmap()/munmap() is protected sufficiently by
33167 * the mmap semaphore that we got above.
33168 *
33169 * We should move this into the few things that really
33170 * want the lock, namely anything that actually touches
33171 * a file descriptor etc. We can do all the normal
33172 * anonymous mapping cases without ever getting the
33173 * lock at all - the actual memory management code is
33174 * already completely thread-safe. */
33175 lock_kernel();
33176
33177 if (brk < mm->end_code)
33178 goto out;
33179 newbrk = PAGE_ALIGN(brk);
33180 oldbrk = PAGE_ALIGN(mm->brk);
33181 if (oldbrk == newbrk)
33182 goto set_brk;
33183
33184 /* Always allow shrinking brk. */
33185 if (brk <= mm->brk) {
33186 if (!do_munmap(newbrk, oldbrk-newbrk))
33187 goto set_brk;
33188 goto out;
33189 }
33190
33191 /* Check against rlimit and stack.. */
33192 rlim = current->rlim[RLIMIT_DATA].rlim_cur;
33193 if (rlim < RLIM_INFINITY && brk - mm->end_code > rlim)
33194 goto out;
33195
33196 /* Check against existing mmap mappings. */
33197 if (find_vma_intersection(mm, oldbrk,newbrk+PAGE_SIZE))
33198 goto out;
33199
33200 /* Check if we have enough memory.. */
33201 if (!vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT))
33202 goto out;
33203
33204 /* Ok, looks good - let it rip. */
33205 if (do_mmap(NULL, oldbrk, newbrk-oldbrk,
33206 PROT_READ|PROT_WRITE|PROT_EXEC,
33207 MAP_FIXED|MAP_PRIVATE, 0) != oldbrk)
33208 goto out;
33209 set_brk:
33210 mm->brk = brk;
33211 out:
33212 retval = mm->brk;
33213 unlock_kernel();
33214 up(&mm->mmap_sem);
33215 return retval;
33216 }
33217
33218 /* Combine the mmap "prot" and "flags" argument into one
33219 * "vm_flags" used internally. Essentially, translate the
33220 * "PROT_xxx" and "MAP_xxx" bits into "VM_xxx". */
33221 static inline unsigned long vm_flags(unsigned long prot,
33222 unsigned long flags)
33223 {
33224 #define _trans(x,bit1,bit2) \
33225 ((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0)
33226
33227 unsigned long prot_bits, flag_bits;
33228 prot_bits =
33229 _trans(prot, PROT_READ, VM_READ) |
33230 _trans(prot, PROT_WRITE, VM_WRITE) |
33231 _trans(prot, PROT_EXEC, VM_EXEC);
33232 flag_bits =
33233 _trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN) |
33234 _trans(flags, MAP_DENYWRITE, VM_DENYWRITE) |
33235 _trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE);
33236 return prot_bits | flag_bits;
33237 #undef _trans
33238 }
33239
33240 unsigned long do_mmap(struct file * file,
33241 unsigned long addr, unsigned long len,
33242 unsigned long prot, unsigned long flags,
33243 unsigned long off)
33244 {
33245 struct mm_struct * mm = current->mm;
33246 struct vm_area_struct * vma;
33247 int error;
33248
33249 if ((len = PAGE_ALIGN(len)) == 0)
33250 return addr;
33251
33252 if (len > TASK_SIZE || addr > TASK_SIZE-len)
33253 return -EINVAL;
33254
33255 /* offset overflow? */
33256 if (off + len < off)
33257 return -EINVAL;
33258
33259 /* Too many mappings? */
33260 if (mm->map_count > MAX_MAP_COUNT)
33261 return -ENOMEM;
33262
33263 /* mlock MCL_FUTURE? */
33264 if (mm->def_flags & VM_LOCKED) {
33265 unsigned long locked = mm->locked_vm << PAGE_SHIFT;
33266 locked += len;
33267 if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
33268 return -EAGAIN;
33269 }
33270
33271 /* Do simple checking here so the lower-level routines
33272 * won't have to. we assume access permissions have
33273 * been handled by the open of the memory object, so we
33274 * don't do any here. */
33275 if (file != NULL) {
33276 switch (flags & MAP_TYPE) {
33277 case MAP_SHARED:
33278 if ((prot & PROT_WRITE) && !(file->f_mode & 2))
33279 return -EACCES;
33280
33281 /* Make sure we don't allow writing to an
33282 * append-only file.. */
33283 if (IS_APPEND(file->f_dentry->d_inode) &&
33284 (file->f_mode & 2))
33285 return -EACCES;
33286
33287 /* make sure there are no mandatory locks on the
33288 * file. */
33289 if (locks_verify_locked(file->f_dentry->d_inode))
33290 return -EAGAIN;
33291
33292 /* fall through */
33293 case MAP_PRIVATE:
33294 if (!(file->f_mode & 1))
33295 return -EACCES;
33296 break;
33297
33298 default:
33299 return -EINVAL;
33300 }
33301 } else if ((flags & MAP_TYPE) != MAP_PRIVATE)
33302 return -EINVAL;
33303
33304 /* Obtain the address to map to. we verify (or select)
33305 * it and ensure that it represents a valid section of
33306 * the address space. */
33307 if (flags & MAP_FIXED) {
33308 if (addr & ~PAGE_MASK)
33309 return -EINVAL;
33310 } else {
33311 addr = get_unmapped_area(addr, len);
33312 if (!addr)
33313 return -ENOMEM;
33314 }
33315
33316 /* Determine the object being mapped and call the
33317 * appropriate specific mapper. the address has already
33318 * been validated, but not unmapped, but the maps are
33319 * removed from the list. */
33320 if (file && (!file->f_op || !file->f_op->mmap))
33321 return -ENODEV;
33322
33323 vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
33324 if (!vma)
33325 return -ENOMEM;
33326
33327 vma->vm_mm = mm;
33328 vma->vm_start = addr;
33329 vma->vm_end = addr + len;
33330 vma->vm_flags = vm_flags(prot,flags) | mm->def_flags;
33331
33332 if (file) {
33333 if (file->f_mode & 1)
33334 vma->vm_flags |= VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC;
33335 if (flags & MAP_SHARED) {
33336 vma->vm_flags |= VM_SHARED | VM_MAYSHARE;
33337
33338 /* This looks strange, but when we don't have the
33339 * file open for writing, we can demote the shared
33340 * mapping to a simpler private mapping. That also
33341 * takes care of a security hole with ptrace()
33342 * writing to a shared mapping without write
33343 * permissions.
33344 *
33345 * We leave the VM_MAYSHARE bit on, just to get
33346 * correct output from /proc/xxx/maps.. */
33347 if (!(file->f_mode & 2))
33348 vma->vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
33349 }
33350 } else
33351 vma->vm_flags |= VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC;
33352 vma->vm_page_prot =
33353 protection_map[vma->vm_flags & 0x0f];
33354 vma->vm_ops = NULL;
33355 vma->vm_offset = off;
33356 vma->vm_file = NULL;
33357 vma->vm_pte = 0;
33358
33359 /* Clear old maps */
33360 error = -ENOMEM;
33361 if (do_munmap(addr, len))
33362 goto free_vma;
33363
33364 /* Check against address space limit. */
33365 if ((mm->total_vm << PAGE_SHIFT) + len
33366 > current->rlim[RLIMIT_AS].rlim_cur)
33367 goto free_vma;
33368
33369 /* Private writable mapping? Check memory
33370 * availability.. */
33371 if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) ==
33372 VM_WRITE &&
33373 !(flags & MAP_NORESERVE) &&
33374 !vm_enough_memory(len >> PAGE_SHIFT))
33375 goto free_vma;
33376
33377 if (file) {
33378 int correct_wcount = 0;
33379 if (vma->vm_flags & VM_DENYWRITE) {
33380 if (file->f_dentry->d_inode->i_writecount > 0) {
33381 error = -ETXTBSY;
33382 goto free_vma;
33383 }
33384 /* f_op->mmap might possibly sleep
33385 * (generic_file_mmap doesn't, but other code
33386 * might). In any case, this takes care of any
33387 * race that this might cause.
33388 */
33389 file->f_dentry->d_inode->i_writecount--;
33390 correct_wcount = 1;
33391 }
33392 error = file->f_op->mmap(file, vma);
33393 /* Fix up the count if necessary, then check for an
33394 * error */
33395 if (correct_wcount)
33396 file->f_dentry->d_inode->i_writecount++;
33397 if (error)
33398 goto unmap_and_free_vma;
33399 vma->vm_file = file;
33400 file->f_count++;
33401 }
33402
33403 /* merge_segments may merge our vma, so we can't refer
33404 * to it after the call. Save the values we need now
33405 * ... */
33406 flags = vma->vm_flags;
33407 addr = vma->vm_start; /* can addr have changed?? */
33408 insert_vm_struct(mm, vma);
33409 merge_segments(mm, vma->vm_start, vma->vm_end);
33410
33411 mm->total_vm += len >> PAGE_SHIFT;
33412 if (flags & VM_LOCKED) {
33413 mm->locked_vm += len >> PAGE_SHIFT;
33414 make_pages_present(addr, addr + len);
33415 }
33416 return addr;
33417
33418 unmap_and_free_vma:
33419 /* Undo any partial mapping done by a device driver. */
33420 flush_cache_range(mm, vma->vm_start, vma->vm_end);
33421 zap_page_range(mm, vma->vm_start,
33422 vma->vm_end - vma->vm_start);
33423 flush_tlb_range(mm, vma->vm_start, vma->vm_end);
33424 free_vma:
33425 kmem_cache_free(vm_area_cachep, vma);
33426 return error;
33427 }
33428
33429 /* Get an address range which is currently unmapped. For
33430 * mmap() without MAP_FIXED and shmat() with addr=0.
33431 * Return value 0 means ENOMEM. */
33432 unsigned long get_unmapped_area(unsigned long addr,
33433 unsigned long len)
33434 {
33435 struct vm_area_struct * vmm;
33436
33437 if (len > TASK_SIZE)
33438 return 0;
33439 if (!addr)
33440 addr = TASK_UNMAPPED_BASE;
33441 addr = PAGE_ALIGN(addr);
33442
33443 for (vmm = find_vma(current->mm, addr); ;
33444 vmm = vmm->vm_next) {
33445 /* At this point: (!vmm || addr < vmm->vm_end). */
33446 if (TASK_SIZE - len < addr)
33447 return 0;
33448 if (!vmm || addr + len <= vmm->vm_start)
33449 return addr;
33450 addr = vmm->vm_end;
33451 }
33452 }
33453
33454 #define vm_avl_empty (struct vm_area_struct *) NULL
33455
33456 #include "mmap_avl.c"
33457
33458 /* Look up the first VMA which satisfies addr < vm_end,
33459 * NULL if none. */
33460 struct vm_area_struct * find_vma(struct mm_struct * mm,
33461 unsigned long addr)
33462 {
33463 struct vm_area_struct *vma = NULL;
33464
33465 if (mm) {
33466 /* Check the cache first. */
33467 /* (Cache hit rate is typically around 35%.) */
33468 vma = mm->mmap_cache;
33469 if (!(vma && vma->vm_end > addr &&
33470 vma->vm_start <= addr)) {
33471 if (!mm->mmap_avl) {
33472 /* Go through the linear list. */
33473 vma = mm->mmap;
33474 while (vma && vma->vm_end <= addr)
33475 vma = vma->vm_next;
33476 } else {
33477 /* Then go through the AVL tree quickly. */
33478 struct vm_area_struct * tree = mm->mmap_avl;
33479 vma = NULL;
33480 for (;;) {
33481 if (tree == vm_avl_empty)
33482 break;
33483 if (tree->vm_end > addr) {
33484 vma = tree;
33485 if (tree->vm_start <= addr)
33486 break;
33487 tree = tree->vm_avl_left;
33488 } else
33489 tree = tree->vm_avl_right;
33490 }
33491 }
33492 if (vma)
33493 mm->mmap_cache = vma;
33494 }
33495 }
33496 return vma;
33497 }
33498
33499 /* Same as find_vma, but also return a pointer to the
33500 * previous VMA in *pprev. */
33501 struct vm_area_struct * find_vma_prev(
33502 struct mm_struct * mm, unsigned long addr,
33503 struct vm_area_struct **pprev)
33504 {
33505 if (mm) {
33506 if (!mm->mmap_avl) {
33507 /* Go through the linear list. */
33508 struct vm_area_struct * prev = NULL;
33509 struct vm_area_struct * vma = mm->mmap;
33510 while (vma && vma->vm_end <= addr) {
33511 prev = vma;
33512 vma = vma->vm_next;
33513 }
33514 *pprev = prev;
33515 return vma;
33516 } else {
33517 /* Go through the AVL tree quickly. */
33518 struct vm_area_struct * vma = NULL;
33519 struct vm_area_struct * last_turn_right = NULL;
33520 struct vm_area_struct * prev = NULL;
33521 struct vm_area_struct * tree = mm->mmap_avl;
33522 for (;;) {
33523 if (tree == vm_avl_empty)
33524 break;
33525 if (tree->vm_end > addr) {
33526 vma = tree;
33527 prev = last_turn_right;
33528 if (tree->vm_start <= addr)
33529 break;
33530 tree = tree->vm_avl_left;
33531 } else {
33532 last_turn_right = tree;
33533 tree = tree->vm_avl_right;
33534 }
33535 }
33536 if (vma) {
33537 if (vma->vm_avl_left != vm_avl_empty) {
33538 prev = vma->vm_avl_left;
33539 while (prev->vm_avl_right != vm_avl_empty)
33540 prev = prev->vm_avl_right;
33541 }
33542 if ((prev ? prev->vm_next : mm->mmap) != vma)
33543 printk("find_vma_prev: tree inconsistent with "
33544 "list\n");
33545 *pprev = prev;
33546 return vma;
33547 }
33548 }
33549 }
33550 *pprev = NULL;
33551 return NULL;
33552 }
33553
33554 /* Normal function to fix up a mapping
33555 * This function is the default for when an area has no
33556 * specific function. This may be used as part of a more
33557 * specific routine. This function works out what part
33558 * of an area is affected and adjusts the mapping
33559 * information. Since the actual page manipulation is
33560 * done in do_mmap(), none need be done here, though it
33561 * would probably be more appropriate.
33562 *
33563 * By the time this function is called, the area struct
33564 * has been removed from the process mapping list, so it
33565 * needs to be reinserted if necessary.
33566 *
33567 * The 4 main cases are:
33568 * Unmapping the whole area
33569 * Unmapping from the start of the seg to a point in it
33570 * Unmapping from an intermediate point to the end
33571 * Unmapping between to intermediate points, making a
33572 * hole.
33573 *
33574 * Case 4 involves the creation of 2 new areas, for each
33575 * side of the hole. If possible, we reuse the existing
33576 * area rather than allocate a new one, and the return
33577 * indicates whether the old area was reused. */
33578 static int unmap_fixup(struct vm_area_struct *area,
33579 unsigned long addr, size_t len,
33580 struct vm_area_struct **extra)
33581 {
33582 struct vm_area_struct *mpnt;
33583 unsigned long end = addr + len;
33584
33585 area->vm_mm->total_vm -= len >> PAGE_SHIFT;
33586 if (area->vm_flags & VM_LOCKED)
33587 area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
33588
33589 /* Unmapping the whole area. */
33590 if (addr == area->vm_start && end == area->vm_end) {
33591 if (area->vm_ops && area->vm_ops->close)
33592 area->vm_ops->close(area);
33593 if (area->vm_file)
33594 fput(area->vm_file);
33595 return 0;
33596 }
33597
33598 /* Work out to one of the ends. */
33599 if (end == area->vm_end)
33600 area->vm_end = addr;
33601 else if (addr == area->vm_start) {
33602 area->vm_offset += (end - area->vm_start);
33603 area->vm_start = end;
33604 } else {
33605 /* Unmapping a hole:
33606 * area->vm_start < addr <= end < area->vm_end */
33607 /* Add end mapping -- leave beginning for below */
33608 mpnt = *extra;
33609 *extra = NULL;
33610
33611 mpnt->vm_mm = area->vm_mm;
33612 mpnt->vm_start = end;
33613 mpnt->vm_end = area->vm_end;
33614 mpnt->vm_page_prot = area->vm_page_prot;
33615 mpnt->vm_flags = area->vm_flags;
33616 mpnt->vm_ops = area->vm_ops;
33617 mpnt->vm_offset =
33618 area->vm_offset + (end - area->vm_start);
33619 mpnt->vm_file = area->vm_file;
33620 mpnt->vm_pte = area->vm_pte;
33621 if (mpnt->vm_file)
33622 mpnt->vm_file->f_count++;
33623 if (mpnt->vm_ops && mpnt->vm_ops->open)
33624 mpnt->vm_ops->open(mpnt);
33625 area->vm_end = addr; /* Truncate area */
33626 insert_vm_struct(current->mm, mpnt);
33627 }
33628
33629 insert_vm_struct(current->mm, area);
33630 return 1;
33631 }
33632
33633 /* Try to free as many page directory entries as we can,
33634 * without having to work very hard at actually scanning
33635 * the page tables themselves.
33636 *
33637 * Right now we try to free page tables if we have a nice
33638 * PGDIR-aligned area that got free'd up. We could be
33639 * more granular if we want to, but this is fast and
33640 * simple, and covers the bad cases.
33641 *
33642 * "prev", if it exists, points to a vma before the one
33643 * we just free'd - but there's no telling how much
33644 * before. */
33645 static void free_pgtables(struct mm_struct * mm,
33646 struct vm_area_struct *prev,
33647 unsigned long start, unsigned long end)
33648 {
33649 unsigned long first = start & PGDIR_MASK;
33650 unsigned long last = (end + PGDIR_SIZE - 1) &
33651 PGDIR_MASK;
33652
33653 if (!prev) {
33654 prev = mm->mmap;
33655 if (!prev)
33656 goto no_mmaps;
33657 if (prev->vm_end > start) {
33658 if (last > prev->vm_end)
33659 last = prev->vm_end;
33660 goto no_mmaps;
33661 }
33662 }
33663 for (;;) {
33664 struct vm_area_struct *next = prev->vm_next;
33665
33666 if (next) {
33667 if (next->vm_start < start) {
33668 prev = next;
33669 continue;
33670 }
33671 if (last > next->vm_start)
33672 last = next->vm_start;
33673 }
33674 if (prev->vm_end > first)
33675 first = prev->vm_end + PGDIR_SIZE - 1;
33676 break;
33677 }
33678 no_mmaps:
33679 first = first >> PGDIR_SHIFT;
33680 last = last >> PGDIR_SHIFT;
33681 if (last > first)
33682 clear_page_tables(mm, first, last-first);
33683 }
33684
33685 /* Munmap is split into 2 main parts -- this part which
33686 * finds what needs doing, and the areas themselves,
33687 * which do the work. This now handles partial
33688 * unmappings. Jeremy Fitzhardine <jeremy@sw.oz.au> */
33689 int do_munmap(unsigned long addr, size_t len)
33690 {
33691 struct mm_struct * mm;
33692 struct vm_area_struct *mpnt, *prev, **npp, *free,
33693 *extra;
33694
33695 if ((addr & ~PAGE_MASK) || addr > TASK_SIZE ||
33696 len > TASK_SIZE-addr)
33697 return -EINVAL;
33698
33699 if ((len = PAGE_ALIGN(len)) == 0)
33700 return -EINVAL;
33701
33702 /* Check if this memory area is ok - put it on the
33703 * temporary list if so.. The checks here are pretty
33704 * simple -- every area affected in some way (by any
33705 * overlap) is put on the list. If nothing is put on,
33706 * nothing is affected. */
33707 mm = current->mm;
33708 mpnt = find_vma_prev(mm, addr, &prev);
33709 if (!mpnt)
33710 return 0;
33711 /* we have addr < mpnt->vm_end */
33712
33713 if (mpnt->vm_start >= addr+len)
33714 return 0;
33715
33716 /* If we'll make "hole", check the vm areas limit */
33717 if ((mpnt->vm_start < addr && mpnt->vm_end > addr+len)
33718 && mm->map_count >= MAX_MAP_COUNT)
33719 return -ENOMEM;
33720
33721 /* We may need one additional vma to fix up the
33722 * mappings ... and this is the last chance for an
33723 * easy error exit. */
33724 extra = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
33725 if (!extra)
33726 return -ENOMEM;
33727
33728 npp = (prev ? &prev->vm_next : &mm->mmap);
33729 free = NULL;
33730 for (; mpnt && mpnt->vm_start < addr+len; mpnt = *npp){
33731 *npp = mpnt->vm_next;
33732 mpnt->vm_next = free;
33733 free = mpnt;
33734 if (mm->mmap_avl)
33735 avl_remove(mpnt, &mm->mmap_avl);
33736 }
33737
33738 /* Ok - we have the memory areas we should free on the
33739 * 'free' list, so release them, and unmap the page
33740 * range.. If the one of the segments is only being
33741 * partially unmapped, it will put new
33742 * vm_area_struct(s) into the address space. */
33743 while ((mpnt = free) != NULL) {
33744 unsigned long st, end, size;
33745
33746 free = free->vm_next;
33747
33748 st = addr < mpnt->vm_start ? mpnt->vm_start : addr;
33749 end = addr+len;
33750 end = end > mpnt->vm_end ? mpnt->vm_end : end;
33751 size = end - st;
33752
33753 if (mpnt->vm_ops && mpnt->vm_ops->unmap)
33754 mpnt->vm_ops->unmap(mpnt, st, size);
33755
33756 remove_shared_vm_struct(mpnt);
33757 mm->map_count--;
33758
33759 flush_cache_range(mm, st, end);
33760 zap_page_range(mm, st, size);
33761 flush_tlb_range(mm, st, end);
33762
33763 /* Fix the mapping, and free the old area if it
33764 * wasn't reused. */
33765 if (!unmap_fixup(mpnt, st, size, &extra))
33766 kmem_cache_free(vm_area_cachep, mpnt);
33767 }
33768
33769 /* Release the extra vma struct if it wasn't used */
33770 if (extra)
33771 kmem_cache_free(vm_area_cachep, extra);
33772
33773 free_pgtables(mm, prev, addr, addr+len);
33774
33775 mm->mmap_cache = NULL; /* Kill the cache. */
33776 return 0;
33777 }
33778
33779 asmlinkage int sys_munmap(unsigned long addr, size_t len)
33780 {
33781 int ret;
33782
33783 down(¤t->mm->mmap_sem);
33784 lock_kernel();
33785 ret = do_munmap(addr, len);
33786 unlock_kernel();
33787 up(¤t->mm->mmap_sem);
33788 return ret;
33789 }
33790
33791 /* Build the AVL tree corresponding to the VMA list. */
33792 void build_mmap_avl(struct mm_struct * mm)
33793 {
33794 struct vm_area_struct * vma;
33795
33796 mm->mmap_avl = NULL;
33797 for (vma = mm->mmap; vma; vma = vma->vm_next)
33798 avl_insert(vma, &mm->mmap_avl);
33799 }
33800
33801 /* Release all mmaps. */
33802 void exit_mmap(struct mm_struct * mm)
33803 {
33804 struct vm_area_struct * mpnt;
33805
33806 mpnt = mm->mmap;
33807 mm->mmap = mm->mmap_avl = mm->mmap_cache = NULL;
33808 mm->rss = 0;
33809 mm->total_vm = 0;
33810 mm->locked_vm = 0;
33811 while (mpnt) {
33812 struct vm_area_struct * next = mpnt->vm_next;
33813 unsigned long start = mpnt->vm_start;
33814 unsigned long end = mpnt->vm_end;
33815 unsigned long size = end - start;
33816
33817 if (mpnt->vm_ops) {
33818 if (mpnt->vm_ops->unmap)
33819 mpnt->vm_ops->unmap(mpnt, start, size);
33820 if (mpnt->vm_ops->close)
33821 mpnt->vm_ops->close(mpnt);
33822 }
33823 mm->map_count--;
33824 remove_shared_vm_struct(mpnt);
33825 zap_page_range(mm, start, size);
33826 if (mpnt->vm_file)
33827 fput(mpnt->vm_file);
33828 kmem_cache_free(vm_area_cachep, mpnt);
33829 mpnt = next;
33830 }
33831
33832 /* This is just debugging */
33833 if (mm->map_count)
33834 printk("exit_mmap: map count is %d\n",
33835 mm->map_count);
33836
33837 clear_page_tables(mm, 0, USER_PTRS_PER_PGD);
33838 }
33839
33840 /* Insert vm structure into process list sorted by
33841 * address and into the inode's i_mmap ring. */
33842 void insert_vm_struct(struct mm_struct *mm,
33843 struct vm_area_struct *vmp)
33844 {
33845 struct vm_area_struct **pprev;
33846 struct file * file;
33847
33848 if (!mm->mmap_avl) {
33849 pprev = &mm->mmap;
33850 while (*pprev && (*pprev)->vm_start <= vmp->vm_start)
33851 pprev = &(*pprev)->vm_next;
33852 } else {
33853 struct vm_area_struct *prev, *next;
33854 avl_insert_neighbours(vmp, &mm->mmap_avl,
33855 &prev, &next);
33856 pprev = (prev ? &prev->vm_next : &mm->mmap);
33857 if (*pprev != next)
33858 printk("insert_vm_struct: tree inconsistent with "
33859 "list\n");
33860 }
33861 vmp->vm_next = *pprev;
33862 *pprev = vmp;
33863
33864 mm->map_count++;
33865 if (mm->map_count >= AVL_MIN_MAP_COUNT &&
33866 !mm->mmap_avl)
33867 build_mmap_avl(mm);
33868
33869 file = vmp->vm_file;
33870 if (file) {
33871 struct inode * inode = file->f_dentry->d_inode;
33872 if (vmp->vm_flags & VM_DENYWRITE)
33873 inode->i_writecount--;
33874
33875 /* insert vmp into inode's share list */
33876 if((vmp->vm_next_share = inode->i_mmap) != NULL)
33877 inode->i_mmap->vm_pprev_share =
33878 &vmp->vm_next_share;
33879 inode->i_mmap = vmp;
33880 vmp->vm_pprev_share = &inode->i_mmap;
33881 }
33882 }
33883
33884 /* Merge the list of memory segments if possible.
33885 * Redundant vm_area_structs are freed. This assumes
33886 * that the list is ordered by address. We don't need to
33887 * traverse the entire list, only those segments which
33888 * intersect or are adjacent to a given interval.
33889 *
33890 * We must already hold the mm semaphore when we get
33891 * here.. */
33892 void merge_segments (struct mm_struct * mm,
33893 unsigned long start_addr, unsigned long end_addr)
33894 {
33895 struct vm_area_struct *prev, *mpnt, *next, *prev1;
33896
33897 mpnt = find_vma_prev(mm, start_addr, &prev1);
33898 if (!mpnt)
33899 return;
33900
33901 if (prev1) {
33902 prev = prev1;
33903 } else {
33904 prev = mpnt;
33905 mpnt = mpnt->vm_next;
33906 }
33907
33908 /* prev and mpnt cycle through the list, as long as
33909 * start_addr < mpnt->vm_end &&
33910 * prev->vm_start < end_addr */
33911 for ( ; mpnt && prev->vm_start < end_addr;
33912 prev = mpnt, mpnt = next) {
33913 next = mpnt->vm_next;
33914
33915 /* To share, we must have the same file,
33916 * operations.. */
33917 if ((mpnt->vm_file != prev->vm_file)||
33918 (mpnt->vm_pte != prev->vm_pte) ||
33919 (mpnt->vm_ops != prev->vm_ops) ||
33920 (mpnt->vm_flags != prev->vm_flags) ||
33921 (prev->vm_end != mpnt->vm_start))
33922 continue;
33923
33924 /* If we have a file or it's a shared memory area the
33925 * offsets must be contiguous.. */
33926 if ((mpnt->vm_file != NULL) ||
33927 (mpnt->vm_flags & VM_SHM)) {
33928 unsigned long off =
33929 prev->vm_offset+prev->vm_end-prev->vm_start;
33930 if (off != mpnt->vm_offset)
33931 continue;
33932 }
33933
33934 /* merge prev with mpnt and set up pointers so the
33935 * new big segment can possibly merge with the next
33936 * one. The old unused mpnt is freed. */
33937 if (mm->mmap_avl)
33938 avl_remove(mpnt, &mm->mmap_avl);
33939 prev->vm_end = mpnt->vm_end;
33940 prev->vm_next = mpnt->vm_next;
33941 if (mpnt->vm_ops && mpnt->vm_ops->close) {
33942 mpnt->vm_offset += mpnt->vm_end - mpnt->vm_start;
33943 mpnt->vm_start = mpnt->vm_end;
33944 mpnt->vm_ops->close(mpnt);
33945 }
33946 mm->map_count--;
33947 remove_shared_vm_struct(mpnt);
33948 if (mpnt->vm_file)
33949 fput(mpnt->vm_file);
33950 kmem_cache_free(vm_area_cachep, mpnt);
33951 mpnt = prev;
33952 }
33953 mm->mmap_cache = NULL; /* Kill the cache. */
33954 }
33955
33956 void __init vma_init(void)
33957 {
33958 vm_area_cachep = kmem_cache_create("vm_area_struct",
33959 sizeof(struct vm_area_struct),
33960 0, SLAB_HWCACHE_ALIGN,
33961 NULL, NULL);
33962 if(!vm_area_cachep)
33963 panic("vma_init: Cannot alloc vm_area_struct cache.");
33964
33965 mm_cachep = kmem_cache_create("mm_struct",
33966 sizeof(struct mm_struct),
33967 0, SLAB_HWCACHE_ALIGN,
33968 NULL, NULL);
33969 if(!mm_cachep)
33970 panic("vma_init: Cannot alloc mm_struct cache.");
33971 }
Сайт управляется системой
uCoz