mm/page_io.c
34938 /*
34939 * linux/mm/page_io.c
34940 *
34941 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
34942 *
34943 * Swap reorganised 29.12.95,
34944 * Asynchronous swapping added 30.12.95. Stephen Tweedie
34945 * Removed race in async swapping. 14.4.1996. Bruno
34946 * Haible
34947 * Add swap of shared pages through the page
34948 * cache. 20.2.1998. Stephen Tweedie
34949 * Always use brw_page, life becomes simpler. 12 May
34950 * 1998 Eric Biederman */
34951
34952 #include <linux/mm.h>
34953 #include <linux/kernel_stat.h>
34954 #include <linux/swap.h>
34955 #include <linux/locks.h>
34956 #include <linux/swapctl.h>
34957
34958 #include <asm/pgtable.h>
34959
34960 static struct wait_queue * lock_queue = NULL;
34961
34962 /* Reads or writes a swap page.
34963 * wait=1: start I/O and wait for completion.
34964 * wait=0: start asynchronous I/O.
34965 *
34966 * Important prevention of race condition: the caller
34967 * *must* atomically create a unique swap cache entry for
34968 * this swap page before calling rw_swap_page, and must
34969 * lock that page. By ensuring that there is a single
34970 * page of memory reserved for the swap entry, the normal
34971 * VM page lock on that page also doubles as a lock on
34972 * swap entries. Having only one lock to deal with per
34973 * swap entry (rather than locking swap and memory
34974 * independently) also makes it easier to make certain
34975 * swapping operations atomic, which is particularly
34976 * important when we are trying to ensure that shared
34977 * pages stay shared while being swapped. */
34978
34979 static void rw_swap_page_base(int rw,
34980 unsigned long entry, struct page *page, int wait)
34981 {
34982 unsigned long type, offset;
34983 struct swap_info_struct * p;
34984 int zones[PAGE_SIZE/512];
34985 int zones_used;
34986 kdev_t dev = 0;
34987 int block_size;
34988
34989 #ifdef DEBUG_SWAP
34990 printk("DebugVM: %s_swap_page entry %08lx, "
34991 "page %p (count %d), %s\n",
34992 (rw == READ) ? "read" : "write",
34993 entry, (char *) page_address(page),
34994 atomic_read(&page->count),
34995 wait ? "wait" : "nowait");
34996 #endif
34997
34998 type = SWP_TYPE(entry);
34999 if (type >= nr_swapfiles) {
35000 printk("Internal error: bad swap-device\n");
35001 return;
35002 }
35003
35004 /* Don't allow too many pending pages in flight.. */
35005 if (atomic_read(&nr_async_pages) >
35006 pager_daemon.swap_cluster)
35007 wait = 1;
35008
35009 p = &swap_info[type];
35010 offset = SWP_OFFSET(entry);
35011 if (offset >= p->max) {
35012 printk("rw_swap_page: weirdness\n");
35013 return;
35014 }
35015 if (p->swap_map && !p->swap_map[offset]) {
35016 printk(KERN_ERR "rw_swap_page: "
35017 "Trying to %s unallocated swap (%08lx)\n",
35018 (rw == READ) ? "read" : "write", entry);
35019 return;
35020 }
35021 if (!(p->flags & SWP_USED)) {
35022 printk(KERN_ERR "rw_swap_page: "
35023 "Trying to swap to unused swap-device\n");
35024 return;
35025 }
35026
35027 if (!PageLocked(page)) {
35028 printk(KERN_ERR "VM: swap page is unlocked\n");
35029 return;
35030 }
35031
35032 if (PageSwapCache(page)) {
35033 /* Make sure we are the only process doing I/O with
35034 * this swap page. */
35035 while (test_and_set_bit(offset,p->swap_lockmap)) {
35036 run_task_queue(&tq_disk);
35037 sleep_on(&lock_queue);
35038 }
35039
35040 /* Make sure that we have a swap cache association
35041 * for this page. We need this to find which swap
35042 * page to unlock once the swap IO has completed to
35043 * the physical page. If the page is not already in
35044 * the cache, just overload the offset entry as if it
35045 * were: we are not allowed to manipulate the inode
35046 * hashing for locked pages. */
35047 if (page->offset != entry) {
35048 printk ("swap entry mismatch");
35049 return;
35050 }
35051 }
35052 if (rw == READ) {
35053 clear_bit(PG_uptodate, &page->flags);
35054 kstat.pswpin++;
35055 } else
35056 kstat.pswpout++;
35057
35058 atomic_inc(&page->count);
35059 if (p->swap_device) {
35060 zones[0] = offset;
35061 zones_used = 1;
35062 dev = p->swap_device;
35063 block_size = PAGE_SIZE;
35064 } else if (p->swap_file) {
35065 struct inode *swapf = p->swap_file->d_inode;
35066 int i;
35067 if (swapf->i_op->bmap == NULL
35068 && swapf->i_op->smap != NULL){
35069 /* With MS-DOS, we use msdos_smap which returns a
35070 * sector number (not a cluster or block number).
35071 * It is a patch to enable the UMSDOS project.
35072 * Other people are working on better solution.
35073 *
35074 * It sounds like ll_rw_swap_file defined its
35075 * operation size (sector size) based on PAGE_SIZE
35076 * and the number of blocks to read. So using bmap
35077 * or smap should work even if smap will require
35078 * more blocks. */
35079 int j;
35080 unsigned int block = offset << 3;
35081
35082 for (i=0, j=0; j< PAGE_SIZE ; i++, j += 512){
35083 if (!(zones[i] =
35084 swapf->i_op->smap(swapf,block++))) {
35085 printk("rw_swap_page: bad swap file\n");
35086 return;
35087 }
35088 }
35089 block_size = 512;
35090 }else{
35091 int j;
35092 unsigned int block = offset
35093 << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits);
35094
35095 block_size = swapf->i_sb->s_blocksize;
35096 for (i=0, j=0; j< PAGE_SIZE ; i++, j += block_size)
35097 if (!(zones[i] = bmap(swapf,block++))) {
35098 printk("rw_swap_page: bad swap file\n");
35099 return;
35100 }
35101 zones_used = i;
35102 dev = swapf->i_dev;
35103 }
35104 } else {
35105 printk(KERN_ERR
35106 "rw_swap_page: no swap file or device\n");
35107 /* Do some cleaning up so if this ever happens we can
35108 * hopefully trigger controlled shutdown. */
35109 if (PageSwapCache(page)) {
35110 if (!test_and_clear_bit(offset,p->swap_lockmap))
35111 printk("swap_after_unlock_page: lock already "
35112 "cleared\n");
35113 wake_up(&lock_queue);
35114 }
35115 atomic_dec(&page->count);
35116 return;
35117 }
35118 if (!wait) {
35119 set_bit(PG_decr_after, &page->flags);
35120 atomic_inc(&nr_async_pages);
35121 }
35122 if (PageSwapCache(page)) {
35123 /* only lock/unlock swap cache pages! */
35124 set_bit(PG_swap_unlock_after, &page->flags);
35125 }
35126 set_bit(PG_free_after, &page->flags);
35127
35128 /* block_size == PAGE_SIZE/zones_used */
35129 brw_page(rw, page, dev, zones, block_size, 0);
35130
35131 /* Note! For consistency we do all of the logic,
35132 * decrementing the page count, and unlocking the page
35133 * in the swap lock map - in the IO completion handler.
35134 */
35135 if (!wait)
35136 return;
35137 wait_on_page(page);
35138 /* This shouldn't happen, but check to be sure. */
35139 if (atomic_read(&page->count) == 0)
35140 printk(KERN_ERR
35141 "rw_swap_page: page unused while waiting!\n");
35142
35143 #ifdef DEBUG_SWAP
35144 printk("DebugVM: %s_swap_page finished on page %p "
35145 "(count %d)\n",
35146 (rw == READ) ? "read" : "write",
35147 (char *) page_adddress(page),
35148 atomic_read(&page->count));
35149 #endif
35150 }
35151
35152 /* Note: We could remove this totally asynchronous
35153 * function, and improve swap performance, and remove the
35154 * need for the swap lock map, by not removing pages from
35155 * the swap cache until after I/O has been processed and
35156 * letting remove_from_page_cache decrement the swap
35157 * count just before it removes the page from the page
35158 * cache. */
35159 /* This is run when asynchronous page I/O has
35160 * completed. */
35161 void swap_after_unlock_page (unsigned long entry)
35162 {
35163 unsigned long type, offset;
35164 struct swap_info_struct * p;
35165
35166 type = SWP_TYPE(entry);
35167 if (type >= nr_swapfiles) {
35168 printk("swap_after_unlock_page: bad swap-device\n");
35169 return;
35170 }
35171 p = &swap_info[type];
35172 offset = SWP_OFFSET(entry);
35173 if (offset >= p->max) {
35174 printk("swap_after_unlock_page: weirdness\n");
35175 return;
35176 }
35177 if (!test_and_clear_bit(offset,p->swap_lockmap))
35178 printk("swap_after_unlock_page: "
35179 "lock already cleared\n");
35180 wake_up(&lock_queue);
35181 }
35182
35183 /* A simple wrapper so the base function doesn't need to
35184 * enforce that all swap pages go through the swap cache!
35185 */
35186 void rw_swap_page(int rw, unsigned long entry, char *buf,
35187 int wait)
35188 {
35189 struct page *page = mem_map + MAP_NR(buf);
35190
35191 if (page->inode && page->inode != &swapper_inode)
35192 panic("Tried to swap a non-swapper page");
35193
35194 /* Make sure that we have a swap cache association for
35195 * this page. We need this to find which swap page to
35196 * unlock once the swap IO has completed to the
35197 * physical page. If the page is not already in the
35198 * cache, just overload the offset entry as if it were:
35199 * we are not allowed to manipulate the inode hashing
35200 * for locked pages. */
35201 if (!PageSwapCache(page)) {
35202 printk("VM: swap page is not in swap cache\n");
35203 return;
35204 }
35205 if (page->offset != entry) {
35206 printk ("swap entry mismatch");
35207 return;
35208 }
35209 rw_swap_page_base(rw, entry, page, wait);
35210 }
35211
35212 /* Setting up a new swap file needs a simple wrapper just
35213 * to read the swap signature. SysV shared memory also
35214 * needs a simple wrapper. */
35215 void rw_swap_page_nocache(int rw,
35216 unsigned long entry, char *buffer)
35217 {
35218 struct page *page;
35219
35220 page = mem_map + MAP_NR((unsigned long) buffer);
35221 wait_on_page(page);
35222 set_bit(PG_locked, &page->flags);
35223 if (test_and_set_bit(PG_swap_cache, &page->flags)) {
35224 printk("VM: read_swap_page: "
35225 "page already in swap cache!\n");
35226 return;
35227 }
35228 if (page->inode) {
35229 printk ("VM: read_swap_page: "
35230 "page already in page cache!\n");
35231 return;
35232 }
35233 page->inode = &swapper_inode;
35234 page->offset = entry;
35235 /* Protect from shrink_mmap() */
35236 atomic_inc(&page->count);
35237 rw_swap_page(rw, entry, buffer, 1);
35238 atomic_dec(&page->count);
35239 page->inode = 0;
35240 clear_bit(PG_swap_cache, &page->flags);
35241 }
35242
35243 /* shmfs needs a version that doesn't put the page in the
35244 * page cache! The swap lock map insists that pages be
35245 * in the page cache! Therefore we can't use it. Later
35246 * when we can remove the need for the lock map and we
35247 * can reduce the number of functions exported. */
35248 void rw_swap_page_nolock(int rw, unsigned long entry,
35249 char *buffer, int wait)
35250 {
35251 struct page *page =
35252 mem_map + MAP_NR((unsigned long) buffer);
35253
35254 if (!PageLocked(page)) {
35255 printk("VM: rw_swap_page_nolock: "
35256 "page not locked!\n");
35257 return;
35258 }
35259 if (PageSwapCache(page)) {
35260 printk("VM: rw_swap_page_nolock: "
35261 "page in swap cache!\n");
35262 return;
35263 }
35264 rw_swap_page_base(rw, entry, page, wait);
35265 }
Сайт управляется системой
uCoz