netlib.narod.ru< Назад | Оглавление | Далее >

mm/mmap.c

33062 /*
33063  *      linux/mm/mmap.c
33064  *
33065  * Written by obz.
33066  */
33067 #include <linux/slab.h>
33068 #include <linux/shm.h>
33069 #include <linux/mman.h>
33070 #include <linux/pagemap.h>
33071 #include <linux/swap.h>
33072 #include <linux/swapctl.h>
33073 #include <linux/smp_lock.h>
33074 #include <linux/init.h>
33075 #include <linux/file.h>
33076 
33077 #include <asm/uaccess.h>
33078 #include <asm/pgtable.h>
33079 
33080 /* description of effects of mapping type and prot in
33081  * current implementation.  this is due to the limited
33082  * x86 page protection hardware.  The expected behavior
33083  * is in parens (Y = yes, N = no, C = copy):
33084  *
33085  * map_type    prot
33086  *             PROT_NONE  PROT_READ  PROT_WRITE PROT_EXEC
33087  * MAP_SHARED  r: (N) N   r: (Y) Y   r: (N) Y   r: (N) Y
33088  *             w: (N) N   w: (N) N   w: (Y) Y   w: (N) N
33089  *             x: (N) N   x: (N) Y   x: (N) Y   x: (Y) Y
33090  *
33091  * MAP_PRIVATE r: (N) N   r: (Y) Y   r: (N) Y   r: (N) Y
33092  *             w: (N) N   w: (N) N   w: (C) C   w: (N) N
33093  *             x: (N) N   x: (N) Y   x: (N) Y   x: (Y) Y
33094  */
33095 pgprot_t protection_map[16] = {
33096   __P000, __P001, __P010, __P011,
33097   __P100, __P101, __P110, __P111,
33098   __S000, __S001, __S010, __S011,
33099   __S100, __S101, __S110, __S111
33100 };
33101 
33102 /* SLAB cache for vm_area_struct's. */
33103 kmem_cache_t *vm_area_cachep;
33104 
33105 int sysctl_overcommit_memory;
33106 
33107 /* Check that a process has enough memory to allocate a
33108  * new virtual mapping.
33109  */
33110 int vm_enough_memory(long pages)
33111 {
33112   /* Stupid algorithm to decide if we have enough memory:
33113    * while simple, it hopefully works in most obvious
33114    * cases.. Easy to fool it, but this should catch most
33115    * mistakes.  */
33116   /* 23/11/98 NJC: Somewhat less stupid version of
33117    * algorithm, which tries to do "TheRightThing".
33118    * Instead of using half of (buffers+cache), use the
33119    * minimum values.  Allow an extra 2% of num_physpages
33120    * for safety margin.  */
33121 
33122   long free;
33123 
33124   /* Sometimes we want to use more memory than we
33125    * have. */
33126   if (sysctl_overcommit_memory)
33127       return 1;
33128 
33129   free = buffermem >> PAGE_SHIFT;
33130   free += page_cache_size;
33131   free += nr_free_pages;
33132   free += nr_swap_pages;
33133   free -= (page_cache.min_percent +
33134            buffer_mem.min_percent + 2)*num_physpages/100;
33135   return free > pages;
33136 }
33137 
33138 /* Remove one vm structure from the inode's i_mmap
33139  * ring. */
33140 static inline void remove_shared_vm_struct(
33141   struct vm_area_struct *vma)
33142 {
33143   struct file * file = vma->vm_file;
33144 
33145   if (file) {
33146     if (vma->vm_flags & VM_DENYWRITE)
33147       file->f_dentry->d_inode->i_writecount++;
33148     if(vma->vm_next_share)
33149       vma->vm_next_share->vm_pprev_share =
33150         vma->vm_pprev_share;
33151     *vma->vm_pprev_share = vma->vm_next_share;
33152   }
33153 }
33154 
 Комментарий
33155 asmlinkage unsigned long sys_brk(unsigned long brk)
33156 {
33157   unsigned long rlim, retval;
33158   unsigned long newbrk, oldbrk;
33159   struct mm_struct *mm = current->mm;
33160 
33161   down(&mm->mmap_sem);
33162 
33163   /* This lock-kernel is one of the main contention
33164    * points for certain normal loads.  And it really
33165    * should not be here: almost everything in
33166    * brk()/mmap()/munmap() is protected sufficiently by
33167    * the mmap semaphore that we got above.
33168    *
33169    * We should move this into the few things that really
33170    * want the lock, namely anything that actually touches
33171    * a file descriptor etc.  We can do all the normal
33172    * anonymous mapping cases without ever getting the
33173    * lock at all - the actual memory management code is
33174    * already completely thread-safe.  */
33175   lock_kernel();
33176 
33177   if (brk < mm->end_code)
33178     goto out;
33179   newbrk = PAGE_ALIGN(brk);
33180   oldbrk = PAGE_ALIGN(mm->brk);
33181   if (oldbrk == newbrk)
33182     goto set_brk;
33183 
33184   /* Always allow shrinking brk. */
 Комментарий
33185   if (brk <= mm->brk) {
33186     if (!do_munmap(newbrk, oldbrk-newbrk))
33187       goto set_brk;
33188     goto out;
33189   }
33190 
33191   /* Check against rlimit and stack.. */
33192   rlim = current->rlim[RLIMIT_DATA].rlim_cur;
33193   if (rlim < RLIM_INFINITY && brk - mm->end_code > rlim)
33194     goto out;
33195 
33196   /* Check against existing mmap mappings. */
33197   if (find_vma_intersection(mm, oldbrk,newbrk+PAGE_SIZE))
33198     goto out;
33199 
33200   /* Check if we have enough memory.. */
33201   if (!vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT))
33202     goto out;
33203 
33204   /* Ok, looks good - let it rip. */
33205   if (do_mmap(NULL, oldbrk, newbrk-oldbrk,
33206        PROT_READ|PROT_WRITE|PROT_EXEC,
33207        MAP_FIXED|MAP_PRIVATE, 0) != oldbrk)
33208     goto out;
33209 set_brk:
33210   mm->brk = brk;
33211 out:
33212   retval = mm->brk;
33213   unlock_kernel();
33214   up(&mm->mmap_sem);
33215   return retval;
33216 }
33217 
33218 /* Combine the mmap "prot" and "flags" argument into one
33219  * "vm_flags" used internally. Essentially, translate the
33220  * "PROT_xxx" and "MAP_xxx" bits into "VM_xxx".  */
33221 static inline unsigned long vm_flags(unsigned long prot,
33222                                      unsigned long flags)
33223 {
33224 #define _trans(x,bit1,bit2)                             \
33225 ((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0)
33226 
33227   unsigned long prot_bits, flag_bits;
33228   prot_bits =
33229     _trans(prot, PROT_READ, VM_READ) |
33230     _trans(prot, PROT_WRITE, VM_WRITE) |
33231     _trans(prot, PROT_EXEC, VM_EXEC);
33232   flag_bits =
33233     _trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN) |
33234     _trans(flags, MAP_DENYWRITE, VM_DENYWRITE) |
33235     _trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE);
33236   return prot_bits | flag_bits;
33237 #undef _trans
33238 }
33239 
 Комментарий
33240 unsigned long do_mmap(struct file * file,
33241   unsigned long addr, unsigned long len,
33242   unsigned long prot, unsigned long flags,
33243   unsigned long off)
33244 {
33245   struct mm_struct * mm = current->mm;
33246   struct vm_area_struct * vma;
33247   int error;
33248 
33249   if ((len = PAGE_ALIGN(len)) == 0)
33250     return addr;
33251 
33252   if (len > TASK_SIZE || addr > TASK_SIZE-len)
33253     return -EINVAL;
33254 
33255   /* offset overflow? */
33256   if (off + len < off)
33257     return -EINVAL;
33258 
33259   /* Too many mappings? */
33260   if (mm->map_count > MAX_MAP_COUNT)
33261     return -ENOMEM;
33262 
33263   /* mlock MCL_FUTURE? */
33264   if (mm->def_flags & VM_LOCKED) {
33265     unsigned long locked = mm->locked_vm << PAGE_SHIFT;
33266     locked += len;
33267     if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
33268       return -EAGAIN;
33269   }
33270 
33271   /* Do simple checking here so the lower-level routines
33272    * won't have to. we assume access permissions have
33273    * been handled by the open of the memory object, so we
33274    * don't do any here.  */
33275   if (file != NULL) {
33276     switch (flags & MAP_TYPE) {
33277     case MAP_SHARED:
33278       if ((prot & PROT_WRITE) && !(file->f_mode & 2))
33279         return -EACCES;
33280 
33281       /* Make sure we don't allow writing to an
33282        * append-only file.. */
33283       if (IS_APPEND(file->f_dentry->d_inode) &&
33284           (file->f_mode & 2))
33285         return -EACCES;
33286 
33287       /* make sure there are no mandatory locks on the
33288        * file. */
33289       if (locks_verify_locked(file->f_dentry->d_inode))
33290         return -EAGAIN;
33291 
33292       /* fall through */
33293     case MAP_PRIVATE:
33294       if (!(file->f_mode & 1))
33295         return -EACCES;
33296       break;
33297 
33298     default:
33299       return -EINVAL;
33300     }
33301   } else if ((flags & MAP_TYPE) != MAP_PRIVATE)
33302     return -EINVAL;
33303 
33304   /* Obtain the address to map to. we verify (or select)
33305    * it and ensure that it represents a valid section of
33306    * the address space.  */
33307   if (flags & MAP_FIXED) {
33308     if (addr & ~PAGE_MASK)
33309       return -EINVAL;
33310   } else {
33311     addr = get_unmapped_area(addr, len);
33312     if (!addr)
33313       return -ENOMEM;
33314   }
33315 
33316   /* Determine the object being mapped and call the
33317    * appropriate specific mapper. the address has already
33318    * been validated, but not unmapped, but the maps are
33319    * removed from the list.  */
33320   if (file && (!file->f_op || !file->f_op->mmap))
33321     return -ENODEV;
33322 
33323   vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
33324   if (!vma)
33325     return -ENOMEM;
33326 
33327   vma->vm_mm = mm;
33328   vma->vm_start = addr;
33329   vma->vm_end = addr + len;
33330   vma->vm_flags = vm_flags(prot,flags) | mm->def_flags;
33331 
33332   if (file) {
33333     if (file->f_mode & 1)
33334       vma->vm_flags |= VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC;
33335     if (flags & MAP_SHARED) {
33336       vma->vm_flags |= VM_SHARED | VM_MAYSHARE;
33337 
33338       /* This looks strange, but when we don't have the
33339        * file open for writing, we can demote the shared
33340        * mapping to a simpler private mapping. That also
33341        * takes care of a security hole with ptrace()
33342        * writing to a shared mapping without write
33343        * permissions.
33344        *
33345        * We leave the VM_MAYSHARE bit on, just to get
33346        * correct output from /proc/xxx/maps..  */
33347       if (!(file->f_mode & 2))
33348         vma->vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
33349     }
33350   } else
33351     vma->vm_flags |= VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC;
33352   vma->vm_page_prot =
33353     protection_map[vma->vm_flags & 0x0f];
33354   vma->vm_ops = NULL;
33355   vma->vm_offset = off;
33356   vma->vm_file = NULL;
33357   vma->vm_pte = 0;
33358 
33359   /* Clear old maps */
33360   error = -ENOMEM;
33361   if (do_munmap(addr, len))
33362     goto free_vma;
33363 
33364   /* Check against address space limit. */
33365   if ((mm->total_vm << PAGE_SHIFT) + len
33366       > current->rlim[RLIMIT_AS].rlim_cur)
33367     goto free_vma;
33368 
33369   /* Private writable mapping? Check memory
33370    * availability.. */
33371   if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) ==
33372       VM_WRITE &&
33373       !(flags & MAP_NORESERVE)  &&
33374       !vm_enough_memory(len >> PAGE_SHIFT))
33375     goto free_vma;
33376 
33377   if (file) {
33378     int correct_wcount = 0;
33379     if (vma->vm_flags & VM_DENYWRITE) {
33380       if (file->f_dentry->d_inode->i_writecount > 0) {
33381         error = -ETXTBSY;
33382         goto free_vma;
33383       }
33384       /* f_op->mmap might possibly sleep
33385        * (generic_file_mmap doesn't, but other code
33386        * might). In any case, this takes care of any
33387        * race that this might cause.
33388        */
33389       file->f_dentry->d_inode->i_writecount--;
33390       correct_wcount = 1;
33391     }
33392     error = file->f_op->mmap(file, vma);
33393     /* Fix up the count if necessary, then check for an
33394      * error */
33395     if (correct_wcount)
33396       file->f_dentry->d_inode->i_writecount++;
33397     if (error)
33398       goto unmap_and_free_vma;
33399     vma->vm_file = file;
33400     file->f_count++;
33401   }
33402 
33403   /* merge_segments may merge our vma, so we can't refer
33404    * to it after the call.  Save the values we need now
33405    * ...  */
33406   flags = vma->vm_flags;
33407   addr = vma->vm_start; /* can addr have changed?? */
33408   insert_vm_struct(mm, vma);
33409   merge_segments(mm, vma->vm_start, vma->vm_end);
33410 
33411   mm->total_vm += len >> PAGE_SHIFT;
33412   if (flags & VM_LOCKED) {
33413     mm->locked_vm += len >> PAGE_SHIFT;
33414     make_pages_present(addr, addr + len);
33415   }
33416   return addr;
33417 
33418 unmap_and_free_vma:
33419   /* Undo any partial mapping done by a device driver. */
33420   flush_cache_range(mm, vma->vm_start, vma->vm_end);
33421   zap_page_range(mm, vma->vm_start,
33422                  vma->vm_end - vma->vm_start);
33423   flush_tlb_range(mm, vma->vm_start, vma->vm_end);
33424 free_vma:
33425   kmem_cache_free(vm_area_cachep, vma);
33426   return error;
33427 }
33428 
33429 /* Get an address range which is currently unmapped.  For
33430  * mmap() without MAP_FIXED and shmat() with addr=0.
33431  * Return value 0 means ENOMEM.  */
33432 unsigned long get_unmapped_area(unsigned long addr,
33433                                 unsigned long len)
33434 {
33435   struct vm_area_struct * vmm;
33436 
33437   if (len > TASK_SIZE)
33438     return 0;
33439   if (!addr)
33440     addr = TASK_UNMAPPED_BASE;
33441   addr = PAGE_ALIGN(addr);
33442 
33443   for (vmm = find_vma(current->mm, addr); ;
33444        vmm = vmm->vm_next) {
33445     /* At this point:  (!vmm || addr < vmm->vm_end). */
33446     if (TASK_SIZE - len < addr)
33447       return 0;
33448     if (!vmm || addr + len <= vmm->vm_start)
33449       return addr;
33450     addr = vmm->vm_end;
33451   }
33452 }
33453 
33454 #define vm_avl_empty    (struct vm_area_struct *) NULL
33455 
33456 #include "mmap_avl.c"
33457 
33458 /* Look up the first VMA which satisfies addr < vm_end,
33459  * NULL if none. */
 Комментарий
33460 struct vm_area_struct * find_vma(struct mm_struct * mm,
33461                                  unsigned long addr)
33462 {
33463   struct vm_area_struct *vma = NULL;
33464 
33465   if (mm) {
33466     /* Check the cache first. */
33467     /* (Cache hit rate is typically around 35%.) */
 Комментарий
33468     vma = mm->mmap_cache;
33469     if (!(vma && vma->vm_end > addr &&
33470           vma->vm_start <= addr)) {
33471       if (!mm->mmap_avl) {
33472         /* Go through the linear list. */
33473         vma = mm->mmap;
33474         while (vma && vma->vm_end <= addr)
33475           vma = vma->vm_next;
33476       } else {
33477         /* Then go through the AVL tree quickly. */
33478         struct vm_area_struct * tree = mm->mmap_avl;
33479         vma = NULL;
33480         for (;;) {
33481           if (tree == vm_avl_empty)
33482             break;
33483           if (tree->vm_end > addr) {
33484             vma = tree;
33485             if (tree->vm_start <= addr)
33486               break;
33487             tree = tree->vm_avl_left;
33488           } else
33489             tree = tree->vm_avl_right;
33490         }
33491       }
33492       if (vma)
33493         mm->mmap_cache = vma;
33494     }
33495   }
33496   return vma;
33497 }
33498 
33499 /* Same as find_vma, but also return a pointer to the
33500  * previous VMA in *pprev. */
33501 struct vm_area_struct * find_vma_prev(
33502   struct mm_struct * mm, unsigned long addr,
33503   struct vm_area_struct **pprev)
33504 {
33505   if (mm) {
33506     if (!mm->mmap_avl) {
33507       /* Go through the linear list. */
33508       struct vm_area_struct * prev = NULL;
33509       struct vm_area_struct * vma = mm->mmap;
33510       while (vma && vma->vm_end <= addr) {
33511         prev = vma;
33512         vma = vma->vm_next;
33513       }
33514       *pprev = prev;
33515       return vma;
33516     } else {
33517       /* Go through the AVL tree quickly. */
33518       struct vm_area_struct * vma = NULL;
33519       struct vm_area_struct * last_turn_right = NULL;
33520       struct vm_area_struct * prev = NULL;
33521       struct vm_area_struct * tree = mm->mmap_avl;
33522       for (;;) {
33523         if (tree == vm_avl_empty)
33524           break;
33525         if (tree->vm_end > addr) {
33526           vma = tree;
33527           prev = last_turn_right;
33528           if (tree->vm_start <= addr)
33529             break;
33530           tree = tree->vm_avl_left;
33531         } else {
33532           last_turn_right = tree;
33533           tree = tree->vm_avl_right;
33534         }
33535       }
33536       if (vma) {
33537         if (vma->vm_avl_left != vm_avl_empty) {
33538           prev = vma->vm_avl_left;
33539           while (prev->vm_avl_right != vm_avl_empty)
33540             prev = prev->vm_avl_right;
33541         }
33542         if ((prev ? prev->vm_next : mm->mmap) != vma)
33543           printk("find_vma_prev: tree inconsistent with "
33544                  "list\n");
33545         *pprev = prev;
33546         return vma;
33547       }
33548     }
33549   }
33550   *pprev = NULL;
33551   return NULL;
33552 }
33553 
33554 /* Normal function to fix up a mapping
33555  * This function is the default for when an area has no
33556  * specific function.  This may be used as part of a more
33557  * specific routine.  This function works out what part
33558  * of an area is affected and adjusts the mapping
33559  * information.  Since the actual page manipulation is
33560  * done in do_mmap(), none need be done here, though it
33561  * would probably be more appropriate.
33562  *
33563  * By the time this function is called, the area struct
33564  * has been removed from the process mapping list, so it
33565  * needs to be reinserted if necessary.
33566  *
33567  * The 4 main cases are:
33568  *   Unmapping the whole area
33569  *   Unmapping from the start of the seg to a point in it
33570  *   Unmapping from an intermediate point to the end
33571  *   Unmapping between to intermediate points, making a
33572  *     hole.
33573  *
33574  * Case 4 involves the creation of 2 new areas, for each
33575  * side of the hole.  If possible, we reuse the existing
33576  * area rather than allocate a new one, and the return
33577  * indicates whether the old area was reused.  */
 Комментарий
33578 static int unmap_fixup(struct vm_area_struct *area,
33579   unsigned long addr, size_t len,
33580   struct vm_area_struct **extra)
33581 {
33582   struct vm_area_struct *mpnt;
33583   unsigned long end = addr + len;
33584 
33585   area->vm_mm->total_vm -= len >> PAGE_SHIFT;
33586   if (area->vm_flags & VM_LOCKED)
33587     area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
33588 
33589   /* Unmapping the whole area. */
33590   if (addr == area->vm_start && end == area->vm_end) {
33591     if (area->vm_ops && area->vm_ops->close)
33592       area->vm_ops->close(area);
33593     if (area->vm_file)
33594       fput(area->vm_file);
33595     return 0;
33596   }
33597 
33598   /* Work out to one of the ends. */
33599   if (end == area->vm_end)
33600     area->vm_end = addr;
33601   else if (addr == area->vm_start) {
33602     area->vm_offset += (end - area->vm_start);
33603     area->vm_start = end;
33604   } else {
33605     /* Unmapping a hole:
33606      * area->vm_start < addr <= end < area->vm_end */
33607     /* Add end mapping -- leave beginning for below */
33608     mpnt = *extra;
33609     *extra = NULL;
33610 
33611     mpnt->vm_mm = area->vm_mm;
33612     mpnt->vm_start = end;
33613     mpnt->vm_end = area->vm_end;
33614     mpnt->vm_page_prot = area->vm_page_prot;
33615     mpnt->vm_flags = area->vm_flags;
33616     mpnt->vm_ops = area->vm_ops;
33617     mpnt->vm_offset =
33618       area->vm_offset + (end - area->vm_start);
33619     mpnt->vm_file = area->vm_file;
33620     mpnt->vm_pte = area->vm_pte;
33621     if (mpnt->vm_file)
33622       mpnt->vm_file->f_count++;
33623     if (mpnt->vm_ops && mpnt->vm_ops->open)
33624       mpnt->vm_ops->open(mpnt);
33625     area->vm_end = addr;    /* Truncate area */
33626     insert_vm_struct(current->mm, mpnt);
33627   }
33628 
33629   insert_vm_struct(current->mm, area);
33630   return 1;
33631 }
33632 
33633 /* Try to free as many page directory entries as we can,
33634  * without having to work very hard at actually scanning
33635  * the page tables themselves.
33636  *
33637  * Right now we try to free page tables if we have a nice
33638  * PGDIR-aligned area that got free'd up. We could be
33639  * more granular if we want to, but this is fast and
33640  * simple, and covers the bad cases.
33641  *
33642  * "prev", if it exists, points to a vma before the one
33643  * we just free'd - but there's no telling how much
33644  * before.  */
33645 static void free_pgtables(struct mm_struct * mm,
33646   struct vm_area_struct *prev,
33647   unsigned long start, unsigned long end)
33648 {
33649   unsigned long first = start & PGDIR_MASK;
33650   unsigned long last = (end + PGDIR_SIZE - 1) &
33651                        PGDIR_MASK;
33652 
33653   if (!prev) {
33654     prev = mm->mmap;
33655     if (!prev)
33656       goto no_mmaps;
33657     if (prev->vm_end > start) {
33658       if (last > prev->vm_end)
33659         last = prev->vm_end;
33660       goto no_mmaps;
33661     }
33662   }
33663   for (;;) {
33664     struct vm_area_struct *next = prev->vm_next;
33665 
33666     if (next) {
33667       if (next->vm_start < start) {
33668         prev = next;
33669         continue;
33670       }
33671       if (last > next->vm_start)
33672         last = next->vm_start;
33673     }
33674     if (prev->vm_end > first)
33675       first = prev->vm_end + PGDIR_SIZE - 1;
33676     break;
33677   }
33678 no_mmaps:
33679   first = first >> PGDIR_SHIFT;
33680   last = last >> PGDIR_SHIFT;
33681   if (last > first)
33682     clear_page_tables(mm, first, last-first);
33683 }
33684 
33685 /* Munmap is split into 2 main parts -- this part which
33686  * finds what needs doing, and the areas themselves,
33687  * which do the work.  This now handles partial
33688  * unmappings.  Jeremy Fitzhardine <jeremy@sw.oz.au> */
 Комментарий
33689 int do_munmap(unsigned long addr, size_t len)
33690 {
33691   struct mm_struct * mm;
33692   struct vm_area_struct *mpnt, *prev, **npp, *free,
33693     *extra;
33694 
33695   if ((addr & ~PAGE_MASK) || addr > TASK_SIZE ||
33696       len > TASK_SIZE-addr)
33697     return -EINVAL;
33698 
33699   if ((len = PAGE_ALIGN(len)) == 0)
33700     return -EINVAL;
33701 
33702   /* Check if this memory area is ok - put it on the
33703    * temporary list if so..  The checks here are pretty
33704    * simple -- every area affected in some way (by any
33705    * overlap) is put on the list.  If nothing is put on,
33706    * nothing is affected.  */
33707   mm = current->mm;
33708   mpnt = find_vma_prev(mm, addr, &prev);
33709   if (!mpnt)
33710     return 0;
33711   /* we have  addr < mpnt->vm_end  */
33712 
33713   if (mpnt->vm_start >= addr+len)
33714     return 0;
33715 
33716   /* If we'll make "hole", check the vm areas limit */
33717   if ((mpnt->vm_start < addr && mpnt->vm_end > addr+len)
33718       && mm->map_count >= MAX_MAP_COUNT)
33719     return -ENOMEM;
33720 
33721   /* We may need one additional vma to fix up the
33722    * mappings ...  and this is the last chance for an
33723    * easy error exit.  */
33724   extra = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
33725   if (!extra)
33726     return -ENOMEM;
33727 
33728   npp = (prev ? &prev->vm_next : &mm->mmap);
33729   free = NULL;
33730   for (; mpnt && mpnt->vm_start < addr+len; mpnt = *npp){
33731     *npp = mpnt->vm_next;
33732     mpnt->vm_next = free;
33733     free = mpnt;
33734     if (mm->mmap_avl)
33735       avl_remove(mpnt, &mm->mmap_avl);
33736   }
33737 
33738   /* Ok - we have the memory areas we should free on the
33739    * 'free' list, so release them, and unmap the page
33740    * range..  If the one of the segments is only being
33741    * partially unmapped, it will put new
33742    * vm_area_struct(s) into the address space.  */
33743   while ((mpnt = free) != NULL) {
33744     unsigned long st, end, size;
33745 
33746     free = free->vm_next;
33747 
33748     st = addr < mpnt->vm_start ? mpnt->vm_start : addr;
33749     end = addr+len;
33750     end = end > mpnt->vm_end ? mpnt->vm_end : end;
33751     size = end - st;
33752 
33753     if (mpnt->vm_ops && mpnt->vm_ops->unmap)
33754       mpnt->vm_ops->unmap(mpnt, st, size);
33755 
33756     remove_shared_vm_struct(mpnt);
33757     mm->map_count--;
33758 
33759     flush_cache_range(mm, st, end);
33760     zap_page_range(mm, st, size);
33761     flush_tlb_range(mm, st, end);
33762 
33763     /* Fix the mapping, and free the old area if it
33764      * wasn't reused.  */
33765     if (!unmap_fixup(mpnt, st, size, &extra))
33766       kmem_cache_free(vm_area_cachep, mpnt);
33767   }
33768 
33769   /* Release the extra vma struct if it wasn't used */
33770   if (extra)
33771     kmem_cache_free(vm_area_cachep, extra);
33772 
33773   free_pgtables(mm, prev, addr, addr+len);
33774 
33775   mm->mmap_cache = NULL;  /* Kill the cache. */
33776   return 0;
33777 }
33778 
33779 asmlinkage int sys_munmap(unsigned long addr, size_t len)
33780 {
33781   int ret;
33782 
33783   down(&current->mm->mmap_sem);
33784   lock_kernel();
33785   ret = do_munmap(addr, len);
33786   unlock_kernel();
33787   up(&current->mm->mmap_sem);
33788   return ret;
33789 }
33790 
33791 /* Build the AVL tree corresponding to the VMA list. */
33792 void build_mmap_avl(struct mm_struct * mm)
33793 {
33794   struct vm_area_struct * vma;
33795 
33796   mm->mmap_avl = NULL;
33797   for (vma = mm->mmap; vma; vma = vma->vm_next)
33798     avl_insert(vma, &mm->mmap_avl);
33799 }
33800 
33801 /* Release all mmaps. */
33802 void exit_mmap(struct mm_struct * mm)
33803 {
33804   struct vm_area_struct * mpnt;
33805 
33806   mpnt = mm->mmap;
33807   mm->mmap = mm->mmap_avl = mm->mmap_cache = NULL;
33808   mm->rss = 0;
33809   mm->total_vm = 0;
33810   mm->locked_vm = 0;
33811   while (mpnt) {
33812     struct vm_area_struct * next = mpnt->vm_next;
33813     unsigned long start = mpnt->vm_start;
33814     unsigned long end = mpnt->vm_end;
33815     unsigned long size = end - start;
33816 
33817     if (mpnt->vm_ops) {
33818       if (mpnt->vm_ops->unmap)
33819         mpnt->vm_ops->unmap(mpnt, start, size);
33820       if (mpnt->vm_ops->close)
33821         mpnt->vm_ops->close(mpnt);
33822     }
33823     mm->map_count--;
33824     remove_shared_vm_struct(mpnt);
33825     zap_page_range(mm, start, size);
33826     if (mpnt->vm_file)
33827       fput(mpnt->vm_file);
33828     kmem_cache_free(vm_area_cachep, mpnt);
33829     mpnt = next;
33830   }
33831 
33832   /* This is just debugging */
33833   if (mm->map_count)
33834     printk("exit_mmap: map count is %d\n",
33835            mm->map_count);
33836 
33837   clear_page_tables(mm, 0, USER_PTRS_PER_PGD);
33838 }
33839 
33840 /* Insert vm structure into process list sorted by
33841  * address and into the inode's i_mmap ring.  */
33842 void insert_vm_struct(struct mm_struct *mm,
33843                       struct vm_area_struct *vmp)
33844 {
33845   struct vm_area_struct **pprev;
33846   struct file * file;
33847 
33848   if (!mm->mmap_avl) {
33849     pprev = &mm->mmap;
33850     while (*pprev && (*pprev)->vm_start <= vmp->vm_start)
33851       pprev = &(*pprev)->vm_next;
33852   } else {
33853     struct vm_area_struct *prev, *next;
33854     avl_insert_neighbours(vmp, &mm->mmap_avl,
33855                           &prev, &next);
33856     pprev = (prev ? &prev->vm_next : &mm->mmap);
33857     if (*pprev != next)
33858       printk("insert_vm_struct: tree inconsistent with "
33859              "list\n");
33860   }
33861   vmp->vm_next = *pprev;
33862   *pprev = vmp;
33863 
33864   mm->map_count++;
33865   if (mm->map_count >= AVL_MIN_MAP_COUNT &&
33866       !mm->mmap_avl)
33867     build_mmap_avl(mm);
33868 
33869   file = vmp->vm_file;
33870   if (file) {
33871     struct inode * inode = file->f_dentry->d_inode;
33872     if (vmp->vm_flags & VM_DENYWRITE)
33873       inode->i_writecount--;
33874 
33875     /* insert vmp into inode's share list */
33876     if((vmp->vm_next_share = inode->i_mmap) != NULL)
33877       inode->i_mmap->vm_pprev_share =
33878         &vmp->vm_next_share;
33879     inode->i_mmap = vmp;
33880     vmp->vm_pprev_share = &inode->i_mmap;
33881   }
33882 }
33883 
33884 /* Merge the list of memory segments if possible.
33885  * Redundant vm_area_structs are freed.  This assumes
33886  * that the list is ordered by address.  We don't need to
33887  * traverse the entire list, only those segments which
33888  * intersect or are adjacent to a given interval.
33889  *
33890  * We must already hold the mm semaphore when we get
33891  * here..  */
 Комментарий
33892 void merge_segments (struct mm_struct * mm,
33893   unsigned long start_addr, unsigned long end_addr)
33894 {
33895   struct vm_area_struct *prev, *mpnt, *next, *prev1;
33896 
33897   mpnt = find_vma_prev(mm, start_addr, &prev1);
33898   if (!mpnt)
33899     return;
33900 
33901   if (prev1) {
33902     prev = prev1;
33903   } else {
33904     prev = mpnt;
33905     mpnt = mpnt->vm_next;
33906   }
33907 
33908   /* prev and mpnt cycle through the list, as long as
33909    * start_addr < mpnt->vm_end &&
33910    * prev->vm_start < end_addr */
33911   for ( ; mpnt && prev->vm_start < end_addr;
33912         prev = mpnt, mpnt = next) {
33913     next = mpnt->vm_next;
33914 
33915     /* To share, we must have the same file,
33916      * operations.. */
33917     if ((mpnt->vm_file != prev->vm_file)||
33918         (mpnt->vm_pte != prev->vm_pte)      ||
33919         (mpnt->vm_ops != prev->vm_ops)      ||
33920         (mpnt->vm_flags != prev->vm_flags)  ||
33921         (prev->vm_end != mpnt->vm_start))
33922       continue;
33923 
33924     /* If we have a file or it's a shared memory area the
33925      * offsets must be contiguous..  */
33926     if ((mpnt->vm_file != NULL) ||
33927         (mpnt->vm_flags & VM_SHM)) {
33928       unsigned long off =
33929         prev->vm_offset+prev->vm_end-prev->vm_start;
33930       if (off != mpnt->vm_offset)
33931         continue;
33932     }
33933 
33934     /* merge prev with mpnt and set up pointers so the
33935      * new big segment can possibly merge with the next
33936      * one.  The old unused mpnt is freed.  */
33937     if (mm->mmap_avl)
33938       avl_remove(mpnt, &mm->mmap_avl);
33939     prev->vm_end = mpnt->vm_end;
33940     prev->vm_next = mpnt->vm_next;
33941     if (mpnt->vm_ops && mpnt->vm_ops->close) {
33942       mpnt->vm_offset += mpnt->vm_end - mpnt->vm_start;
33943       mpnt->vm_start = mpnt->vm_end;
33944       mpnt->vm_ops->close(mpnt);
33945     }
33946     mm->map_count--;
33947     remove_shared_vm_struct(mpnt);
33948     if (mpnt->vm_file)
33949       fput(mpnt->vm_file);
33950     kmem_cache_free(vm_area_cachep, mpnt);
33951     mpnt = prev;
33952   }
33953   mm->mmap_cache = NULL;          /* Kill the cache. */
33954 }
33955 
33956 void __init vma_init(void)
33957 {
33958   vm_area_cachep = kmem_cache_create("vm_area_struct",
33959              sizeof(struct vm_area_struct),
33960              0, SLAB_HWCACHE_ALIGN,
33961              NULL, NULL);
33962   if(!vm_area_cachep)
33963    panic("vma_init: Cannot alloc vm_area_struct cache.");
33964 
33965   mm_cachep = kmem_cache_create("mm_struct",
33966               sizeof(struct mm_struct),
33967               0, SLAB_HWCACHE_ALIGN,
33968               NULL, NULL);
33969   if(!mm_cachep)
33970     panic("vma_init: Cannot alloc mm_struct cache.");
33971 }

netlib.narod.ru< Назад | Оглавление | Далее >

Сайт управляется системой uCoz