netlib.narod.ru< Назад | Оглавление | Далее >

mm/page_io.c

34938 /*
34939  *  linux/mm/page_io.c
34940  *
34941  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
34942  *
34943  *  Swap reorganised 29.12.95,
34944  *  Asynchronous swapping added 30.12.95. Stephen Tweedie
34945  *  Removed race in async swapping. 14.4.1996. Bruno
34946  *  Haible
34947  *  Add swap of shared pages through the page
34948  *  cache. 20.2.1998. Stephen Tweedie
34949  *  Always use brw_page, life becomes simpler. 12 May
34950  *  1998 Eric Biederman */
34951 
34952 #include <linux/mm.h>
34953 #include <linux/kernel_stat.h>
34954 #include <linux/swap.h>
34955 #include <linux/locks.h>
34956 #include <linux/swapctl.h>
34957 
34958 #include <asm/pgtable.h>
34959 
34960 static struct wait_queue * lock_queue = NULL;
34961 
34962 /* Reads or writes a swap page.
34963  * wait=1: start I/O and wait for completion.
34964  * wait=0: start asynchronous I/O.
34965  *
34966  * Important prevention of race condition: the caller
34967  * *must* atomically create a unique swap cache entry for
34968  * this swap page before calling rw_swap_page, and must
34969  * lock that page.  By ensuring that there is a single
34970  * page of memory reserved for the swap entry, the normal
34971  * VM page lock on that page also doubles as a lock on
34972  * swap entries.  Having only one lock to deal with per
34973  * swap entry (rather than locking swap and memory
34974  * independently) also makes it easier to make certain
34975  * swapping operations atomic, which is particularly
34976  * important when we are trying to ensure that shared
34977  * pages stay shared while being swapped.  */
34978 
34979 static void rw_swap_page_base(int rw,
34980   unsigned long entry, struct page *page, int wait)
34981 {
34982   unsigned long type, offset;
34983   struct swap_info_struct * p;
34984   int zones[PAGE_SIZE/512];
34985   int zones_used;
34986   kdev_t dev = 0;
34987   int block_size;
34988 
34989 #ifdef DEBUG_SWAP
34990   printk("DebugVM: %s_swap_page entry %08lx, "
34991          "page %p (count %d), %s\n",
34992          (rw == READ) ? "read" : "write",
34993          entry, (char *) page_address(page),
34994          atomic_read(&page->count),
34995          wait ? "wait" : "nowait");
34996 #endif
34997 
34998   type = SWP_TYPE(entry);
34999   if (type >= nr_swapfiles) {
35000     printk("Internal error: bad swap-device\n");
35001     return;
35002   }
35003 
35004   /* Don't allow too many pending pages in flight.. */
35005   if (atomic_read(&nr_async_pages) >
35006       pager_daemon.swap_cluster)
35007     wait = 1;
35008 
35009   p = &swap_info[type];
35010   offset = SWP_OFFSET(entry);
35011   if (offset >= p->max) {
35012     printk("rw_swap_page: weirdness\n");
35013     return;
35014   }
35015   if (p->swap_map && !p->swap_map[offset]) {
35016     printk(KERN_ERR "rw_swap_page: "
35017       "Trying to %s unallocated swap (%08lx)\n",
35018       (rw == READ) ? "read" : "write", entry);
35019     return;
35020   }
35021   if (!(p->flags & SWP_USED)) {
35022     printk(KERN_ERR "rw_swap_page: "
35023       "Trying to swap to unused swap-device\n");
35024     return;
35025   }
35026 
35027   if (!PageLocked(page)) {
35028     printk(KERN_ERR "VM: swap page is unlocked\n");
35029     return;
35030   }
35031 
35032   if (PageSwapCache(page)) {
35033     /* Make sure we are the only process doing I/O with
35034      * this swap page. */
35035     while (test_and_set_bit(offset,p->swap_lockmap)) {
35036       run_task_queue(&tq_disk);
35037       sleep_on(&lock_queue);
35038     }
35039 
35040     /* Make sure that we have a swap cache association
35041      * for this page.  We need this to find which swap
35042      * page to unlock once the swap IO has completed to
35043      * the physical page.  If the page is not already in
35044      * the cache, just overload the offset entry as if it
35045      * were: we are not allowed to manipulate the inode
35046      * hashing for locked pages.  */
35047     if (page->offset != entry) {
35048       printk ("swap entry mismatch");
35049       return;
35050     }
35051   }
35052   if (rw == READ) {
35053     clear_bit(PG_uptodate, &page->flags);
35054     kstat.pswpin++;
35055   } else
35056     kstat.pswpout++;
35057 
35058   atomic_inc(&page->count);
35059   if (p->swap_device) {
35060     zones[0] = offset;
35061     zones_used = 1;
35062     dev = p->swap_device;
35063     block_size = PAGE_SIZE;
35064   } else if (p->swap_file) {
35065     struct inode *swapf = p->swap_file->d_inode;
35066     int i;
35067     if (swapf->i_op->bmap == NULL
35068       && swapf->i_op->smap != NULL){
35069       /* With MS-DOS, we use msdos_smap which returns a
35070        * sector number (not a cluster or block number).
35071        * It is a patch to enable the UMSDOS project.
35072        * Other people are working on better solution.
35073        *
35074        * It sounds like ll_rw_swap_file defined its
35075        * operation size (sector size) based on PAGE_SIZE
35076        * and the number of blocks to read.  So using bmap
35077        * or smap should work even if smap will require
35078        * more blocks.  */
35079       int j;
35080       unsigned int block = offset << 3;
35081 
35082       for (i=0, j=0; j< PAGE_SIZE ; i++, j += 512){
35083         if (!(zones[i] =
35084               swapf->i_op->smap(swapf,block++))) {
35085           printk("rw_swap_page: bad swap file\n");
35086           return;
35087         }
35088       }
35089       block_size = 512;
35090     }else{
35091       int j;
35092       unsigned int block = offset
35093         << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits);
35094 
35095       block_size = swapf->i_sb->s_blocksize;
35096       for (i=0, j=0; j< PAGE_SIZE ; i++, j += block_size)
35097         if (!(zones[i] = bmap(swapf,block++))) {
35098           printk("rw_swap_page: bad swap file\n");
35099           return;
35100         }
35101       zones_used = i;
35102       dev = swapf->i_dev;
35103     }
35104   } else {
35105     printk(KERN_ERR
35106            "rw_swap_page: no swap file or device\n");
35107     /* Do some cleaning up so if this ever happens we can
35108      * hopefully trigger controlled shutdown.  */
35109     if (PageSwapCache(page)) {
35110       if (!test_and_clear_bit(offset,p->swap_lockmap))
35111         printk("swap_after_unlock_page: lock already "
35112                "cleared\n");
35113       wake_up(&lock_queue);
35114     }
35115     atomic_dec(&page->count);
35116     return;
35117   }
35118   if (!wait) {
35119     set_bit(PG_decr_after, &page->flags);
35120     atomic_inc(&nr_async_pages);
35121   }
35122   if (PageSwapCache(page)) {
35123     /* only lock/unlock swap cache pages! */
35124     set_bit(PG_swap_unlock_after, &page->flags);
35125   }
35126   set_bit(PG_free_after, &page->flags);
35127 
35128   /* block_size == PAGE_SIZE/zones_used */
35129   brw_page(rw, page, dev, zones, block_size, 0);
35130 
35131   /* Note! For consistency we do all of the logic,
35132    * decrementing the page count, and unlocking the page
35133    * in the swap lock map - in the IO completion handler.
35134    */
35135   if (!wait)
35136     return;
35137   wait_on_page(page);
35138   /* This shouldn't happen, but check to be sure. */
35139   if (atomic_read(&page->count) == 0)
35140     printk(KERN_ERR
35141            "rw_swap_page: page unused while waiting!\n");
35142 
35143 #ifdef DEBUG_SWAP
35144   printk("DebugVM: %s_swap_page finished on page %p "
35145          "(count %d)\n",
35146          (rw == READ) ? "read" : "write",
35147          (char *) page_adddress(page),
35148          atomic_read(&page->count));
35149 #endif
35150 }
35151 
35152 /* Note: We could remove this totally asynchronous
35153  * function, and improve swap performance, and remove the
35154  * need for the swap lock map, by not removing pages from
35155  * the swap cache until after I/O has been processed and
35156  * letting remove_from_page_cache decrement the swap
35157  * count just before it removes the page from the page
35158  * cache.  */
35159 /* This is run when asynchronous page I/O has
35160  * completed. */
35161 void swap_after_unlock_page (unsigned long entry)
35162 {
35163   unsigned long type, offset;
35164   struct swap_info_struct * p;
35165 
35166   type = SWP_TYPE(entry);
35167   if (type >= nr_swapfiles) {
35168     printk("swap_after_unlock_page: bad swap-device\n");
35169     return;
35170   }
35171   p = &swap_info[type];
35172   offset = SWP_OFFSET(entry);
35173   if (offset >= p->max) {
35174     printk("swap_after_unlock_page: weirdness\n");
35175     return;
35176   }
35177   if (!test_and_clear_bit(offset,p->swap_lockmap))
35178     printk("swap_after_unlock_page: "
35179            "lock already cleared\n");
35180   wake_up(&lock_queue);
35181 }
35182 
35183 /* A simple wrapper so the base function doesn't need to
35184  * enforce that all swap pages go through the swap cache!
35185  */
35186 void rw_swap_page(int rw, unsigned long entry, char *buf,
35187                   int wait)
35188 {
35189   struct page *page = mem_map + MAP_NR(buf);
35190 
35191   if (page->inode && page->inode != &swapper_inode)
35192     panic("Tried to swap a non-swapper page");
35193 
35194   /* Make sure that we have a swap cache association for
35195    * this page.  We need this to find which swap page to
35196    * unlock once the swap IO has completed to the
35197    * physical page.  If the page is not already in the
35198    * cache, just overload the offset entry as if it were:
35199    * we are not allowed to manipulate the inode hashing
35200    * for locked pages.  */
35201   if (!PageSwapCache(page)) {
35202     printk("VM: swap page is not in swap cache\n");
35203     return;
35204   }
35205   if (page->offset != entry) {
35206     printk ("swap entry mismatch");
35207     return;
35208   }
35209   rw_swap_page_base(rw, entry, page, wait);
35210 }
35211 
35212 /* Setting up a new swap file needs a simple wrapper just
35213  * to read the swap signature.  SysV shared memory also
35214  * needs a simple wrapper.  */
35215 void rw_swap_page_nocache(int rw,
35216   unsigned long entry, char *buffer)
35217 {
35218   struct page *page;
35219 
35220   page = mem_map + MAP_NR((unsigned long) buffer);
35221   wait_on_page(page);
35222   set_bit(PG_locked, &page->flags);
35223   if (test_and_set_bit(PG_swap_cache, &page->flags)) {
35224     printk("VM: read_swap_page: "
35225            "page already in swap cache!\n");
35226     return;
35227   }
35228   if (page->inode) {
35229     printk ("VM: read_swap_page: "
35230             "page already in page cache!\n");
35231     return;
35232   }
35233   page->inode = &swapper_inode;
35234   page->offset = entry;
35235   /* Protect from shrink_mmap() */
35236   atomic_inc(&page->count);
35237   rw_swap_page(rw, entry, buffer, 1);
35238   atomic_dec(&page->count);
35239   page->inode = 0;
35240   clear_bit(PG_swap_cache, &page->flags);
35241 }
35242 
35243 /* shmfs needs a version that doesn't put the page in the
35244  * page cache!  The swap lock map insists that pages be
35245  * in the page cache!  Therefore we can't use it.  Later
35246  * when we can remove the need for the lock map and we
35247  * can reduce the number of functions exported.  */
35248 void rw_swap_page_nolock(int rw, unsigned long entry,
35249                          char *buffer, int wait)
35250 {
35251   struct page *page =
35252     mem_map + MAP_NR((unsigned long) buffer);
35253 
35254   if (!PageLocked(page)) {
35255     printk("VM: rw_swap_page_nolock: "
35256            "page not locked!\n");
35257     return;
35258   }
35259   if (PageSwapCache(page)) {
35260     printk("VM: rw_swap_page_nolock: "
35261            "page in swap cache!\n");
35262     return;
35263   }
35264   rw_swap_page_base(rw, entry, page, wait);
35265 }

netlib.narod.ru< Назад | Оглавление | Далее >

Сайт управляется системой uCoz