/* * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * * The contents of this file constitute Original Code as defined in and * are subject to the Apple Public Source License Version 1.1 (the * "License"). You may not use this file except in compliance with the * License. Please obtain a copy of the License at * http://www.apple.com/publicsource and read it before using this file. * * This Original Code and all software distributed under the License are * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the * License for the specific language governing rights and limitations * under the License. * * @APPLE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ */ /* * Mach Operating System * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University * All Rights Reserved. * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie Mellon * the rights to redistribute these changes. */ /* */ /* * File: vm/vm_pageout.c * Author: Avadis Tevanian, Jr., Michael Wayne Young * Date: 1985 * * The proverbial page-out daemon. */ #ifdef MACH_BSD /* remove after component merge */ extern int vnode_pager_workaround; #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern ipc_port_t memory_manager_default; #ifndef VM_PAGE_LAUNDRY_MAX #define VM_PAGE_LAUNDRY_MAX 10 /* outstanding DMM page cleans */ #endif /* VM_PAGEOUT_LAUNDRY_MAX */ #ifndef VM_PAGEOUT_BURST_MAX #define VM_PAGEOUT_BURST_MAX 32 /* simultaneous EMM page cleans */ #endif /* VM_PAGEOUT_BURST_MAX */ #ifndef VM_PAGEOUT_DISCARD_MAX #define VM_PAGEOUT_DISCARD_MAX 68 /* simultaneous EMM page cleans */ #endif /* VM_PAGEOUT_DISCARD_MAX */ #ifndef VM_PAGEOUT_BURST_WAIT #define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds per page */ #endif /* VM_PAGEOUT_BURST_WAIT */ #ifndef VM_PAGEOUT_EMPTY_WAIT #define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */ #endif /* VM_PAGEOUT_EMPTY_WAIT */ /* * To obtain a reasonable LRU approximation, the inactive queue * needs to be large enough to give pages on it a chance to be * referenced a second time. This macro defines the fraction * of active+inactive pages that should be inactive. * The pageout daemon uses it to update vm_page_inactive_target. * * If vm_page_free_count falls below vm_page_free_target and * vm_page_inactive_count is below vm_page_inactive_target, * then the pageout daemon starts running. */ #ifndef VM_PAGE_INACTIVE_TARGET #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 3) #endif /* VM_PAGE_INACTIVE_TARGET */ /* * Once the pageout daemon starts running, it keeps going * until vm_page_free_count meets or exceeds vm_page_free_target. */ #ifndef VM_PAGE_FREE_TARGET #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80) #endif /* VM_PAGE_FREE_TARGET */ /* * The pageout daemon always starts running once vm_page_free_count * falls below vm_page_free_min. */ #ifndef VM_PAGE_FREE_MIN #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100) #endif /* VM_PAGE_FREE_MIN */ /* * When vm_page_free_count falls below vm_page_free_reserved, * only vm-privileged threads can allocate pages. vm-privilege * allows the pageout daemon and default pager (and any other * associated threads needed for default pageout) to continue * operation by dipping into the reserved pool of pages. */ #ifndef VM_PAGE_FREE_RESERVED #define VM_PAGE_FREE_RESERVED \ ((8 * VM_PAGE_LAUNDRY_MAX) + NCPUS) #endif /* VM_PAGE_FREE_RESERVED */ /* * Forward declarations for internal routines. */ extern void vm_pageout_continue(void); extern void vm_pageout_scan(void); extern void vm_pageout_throttle(vm_page_t m); extern vm_page_t vm_pageout_cluster_page( vm_object_t object, vm_object_offset_t offset, boolean_t precious_clean); unsigned int vm_pageout_reserved_internal = 0; unsigned int vm_pageout_reserved_really = 0; unsigned int vm_page_laundry_max = 0; /* # of clusters outstanding */ unsigned int vm_page_laundry_min = 0; unsigned int vm_pageout_burst_max = 0; unsigned int vm_pageout_burst_wait = 0; /* milliseconds per page */ unsigned int vm_pageout_empty_wait = 0; /* milliseconds */ unsigned int vm_pageout_burst_min = 0; unsigned int vm_pageout_pause_count = 0; unsigned int vm_pageout_pause_max = 0; unsigned int vm_free_page_pause = 100; /* milliseconds */ /* * These variables record the pageout daemon's actions: * how many pages it looks at and what happens to those pages. * No locking needed because only one thread modifies the variables. */ unsigned int vm_pageout_active = 0; /* debugging */ unsigned int vm_pageout_inactive = 0; /* debugging */ unsigned int vm_pageout_inactive_throttled = 0; /* debugging */ unsigned int vm_pageout_inactive_forced = 0; /* debugging */ unsigned int vm_pageout_inactive_nolock = 0; /* debugging */ unsigned int vm_pageout_inactive_avoid = 0; /* debugging */ unsigned int vm_pageout_inactive_busy = 0; /* debugging */ unsigned int vm_pageout_inactive_absent = 0; /* debugging */ unsigned int vm_pageout_inactive_used = 0; /* debugging */ unsigned int vm_pageout_inactive_clean = 0; /* debugging */ unsigned int vm_pageout_inactive_dirty = 0; /* debugging */ unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */ unsigned int vm_pageout_inactive_pinned = 0; /* debugging */ unsigned int vm_pageout_inactive_limbo = 0; /* debugging */ unsigned int vm_pageout_setup_limbo = 0; /* debugging */ unsigned int vm_pageout_setup_unprepped = 0; /* debugging */ unsigned int vm_stat_discard = 0; /* debugging */ unsigned int vm_stat_discard_sent = 0; /* debugging */ unsigned int vm_stat_discard_failure = 0; /* debugging */ unsigned int vm_stat_discard_throttle = 0; /* debugging */ unsigned int vm_pageout_scan_active_emm_throttle = 0; /* debugging */ unsigned int vm_pageout_scan_active_emm_throttle_success = 0; /* debugging */ unsigned int vm_pageout_scan_active_emm_throttle_failure = 0; /* debugging */ unsigned int vm_pageout_scan_inactive_emm_throttle = 0; /* debugging */ unsigned int vm_pageout_scan_inactive_emm_throttle_success = 0; /* debugging */ unsigned int vm_pageout_scan_inactive_emm_throttle_failure = 0; /* debugging */ unsigned int vm_pageout_out_of_line = 0; unsigned int vm_pageout_in_place = 0; /* * Routine: vm_pageout_object_allocate * Purpose: * Allocate an object for use as out-of-line memory in a * data_return/data_initialize message. * The page must be in an unlocked object. * * If the page belongs to a trusted pager, cleaning in place * will be used, which utilizes a special "pageout object" * containing private alias pages for the real page frames. * Untrusted pagers use normal out-of-line memory. */ vm_object_t vm_pageout_object_allocate( vm_page_t m, vm_size_t size, vm_object_offset_t offset) { vm_object_t object = m->object; vm_object_t new_object; assert(object->pager_ready); if (object->pager_trusted || object->internal) vm_pageout_throttle(m); new_object = vm_object_allocate(size); if (object->pager_trusted) { assert (offset < object->size); vm_object_lock(new_object); new_object->pageout = TRUE; new_object->shadow = object; new_object->can_persist = FALSE; new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; new_object->shadow_offset = offset; vm_object_unlock(new_object); /* * Take a paging reference on the object. This will be dropped * in vm_pageout_object_terminate() */ vm_object_lock(object); vm_object_paging_begin(object); vm_object_unlock(object); vm_pageout_in_place++; } else vm_pageout_out_of_line++; return(new_object); } #if MACH_CLUSTER_STATS unsigned long vm_pageout_cluster_dirtied = 0; unsigned long vm_pageout_cluster_cleaned = 0; unsigned long vm_pageout_cluster_collisions = 0; unsigned long vm_pageout_cluster_clusters = 0; unsigned long vm_pageout_cluster_conversions = 0; unsigned long vm_pageout_target_collisions = 0; unsigned long vm_pageout_target_page_dirtied = 0; unsigned long vm_pageout_target_page_freed = 0; unsigned long vm_pageout_target_page_pinned = 0; unsigned long vm_pageout_target_page_limbo = 0; #define CLUSTER_STAT(clause) clause #else /* MACH_CLUSTER_STATS */ #define CLUSTER_STAT(clause) #endif /* MACH_CLUSTER_STATS */ /* * Routine: vm_pageout_object_terminate * Purpose: * Destroy the pageout_object allocated by * vm_pageout_object_allocate(), and perform all of the * required cleanup actions. * * In/Out conditions: * The object must be locked, and will be returned locked. */ void vm_pageout_object_terminate( vm_object_t object) { vm_object_t shadow_object; /* * Deal with the deallocation (last reference) of a pageout object * (used for cleaning-in-place) by dropping the paging references/ * freeing pages in the original object. */ assert(object->pageout); shadow_object = object->shadow; vm_object_lock(shadow_object); while (!queue_empty(&object->memq)) { vm_page_t p, m; vm_object_offset_t offset; p = (vm_page_t) queue_first(&object->memq); assert(p->private); assert(p->pageout); p->pageout = FALSE; assert(!p->cleaning); offset = p->offset; VM_PAGE_FREE(p); p = VM_PAGE_NULL; m = vm_page_lookup(shadow_object, offset + object->shadow_offset); if(m == VM_PAGE_NULL) continue; assert(m->cleaning); /* * Account for the paging reference taken when * m->cleaning was set on this page. */ vm_object_paging_end(shadow_object); assert((m->dirty) || (m->precious) || (m->busy && m->cleaning)); /* * Handle the trusted pager throttle. */ vm_page_lock_queues(); if (m->laundry) { vm_page_laundry_count--; m->laundry = FALSE; if (vm_page_laundry_count < vm_page_laundry_min) { vm_page_laundry_min = 0; thread_wakeup((event_t) &vm_page_laundry_count); } } /* * Handle the "target" page(s). These pages are to be freed if * successfully cleaned. Target pages are always busy, and are * wired exactly once. The initial target pages are not mapped, * (so cannot be referenced or modified) but converted target * pages may have been modified between the selection as an * adjacent page and conversion to a target. */ if (m->pageout) { assert(m->busy); assert(m->wire_count == 1); m->cleaning = FALSE; m->pageout = FALSE; #if MACH_CLUSTER_STATS if (m->wanted) vm_pageout_target_collisions++; #endif /* * Revoke all access to the page. Since the object is * locked, and the page is busy, this prevents the page * from being dirtied after the pmap_is_modified() call * returns. */ pmap_page_protect(m->phys_addr, VM_PROT_NONE); /* * Since the page is left "dirty" but "not modifed", we * can detect whether the page was redirtied during * pageout by checking the modify state. */ m->dirty = pmap_is_modified(m->phys_addr); if (m->dirty) { CLUSTER_STAT(vm_pageout_target_page_dirtied++;) vm_page_unwire(m);/* reactivates */ VM_STAT(reactivations++); PAGE_WAKEUP_DONE(m); } else if (m->prep_pin_count != 0) { vm_page_pin_lock(); if (m->pin_count != 0) { /* page is pinned; reactivate */ CLUSTER_STAT( vm_pageout_target_page_pinned++;) vm_page_unwire(m);/* reactivates */ VM_STAT(reactivations++); PAGE_WAKEUP_DONE(m); } else { /* * page is prepped but not pinned; send * it into limbo. Note that * vm_page_free (which will be called * after releasing the pin lock) knows * how to handle a page with limbo set. */ m->limbo = TRUE; CLUSTER_STAT( vm_pageout_target_page_limbo++;) } vm_page_pin_unlock(); if (m->limbo) vm_page_free(m); } else { CLUSTER_STAT(vm_pageout_target_page_freed++;) vm_page_free(m);/* clears busy, etc. */ } vm_page_unlock_queues(); continue; } /* * Handle the "adjacent" pages. These pages were cleaned in * place, and should be left alone. * If prep_pin_count is nonzero, then someone is using the * page, so make it active. */ if (!m->active && !m->inactive) { if (m->reference || m->prep_pin_count != 0) vm_page_activate(m); else vm_page_deactivate(m); } if((m->busy) && (m->cleaning)) { /* the request_page_list case, (COPY_OUT_FROM FALSE) */ m->busy = FALSE; /* We do not re-set m->dirty ! */ /* The page was busy so no extraneous activity */ /* could have occured. COPY_INTO is a read into the */ /* new pages. CLEAN_IN_PLACE does actually write */ /* out the pages but handling outside of this code */ /* will take care of resetting dirty. We clear the */ /* modify however for the Programmed I/O case. */ pmap_clear_modify(m->phys_addr); if(m->absent) { m->absent = FALSE; if(shadow_object->absent_count == 1) vm_object_absent_release(shadow_object); else shadow_object->absent_count--; } m->overwriting = FALSE; } else if (m->overwriting) { /* alternate request page list, write to page_list */ /* case. Occurs when the original page was wired */ /* at the time of the list request */ assert(m->wire_count != 0); vm_page_unwire(m);/* reactivates */ m->overwriting = FALSE; } else { /* * Set the dirty state according to whether or not the page was * modified during the pageout. Note that we purposefully do * NOT call pmap_clear_modify since the page is still mapped. * If the page were to be dirtied between the 2 calls, this * this fact would be lost. This code is only necessary to * maintain statistics, since the pmap module is always * consulted if m->dirty is false. */ #if MACH_CLUSTER_STATS m->dirty = pmap_is_modified(m->phys_addr); if (m->dirty) vm_pageout_cluster_dirtied++; else vm_pageout_cluster_cleaned++; if (m->wanted) vm_pageout_cluster_collisions++; #else m->dirty = 0; #endif } m->cleaning = FALSE; /* * Wakeup any thread waiting for the page to be un-cleaning. */ PAGE_WAKEUP(m); vm_page_unlock_queues(); } /* * Account for the paging reference taken in vm_paging_object_allocate. */ vm_object_paging_end(shadow_object); vm_object_unlock(shadow_object); assert(object->ref_count == 0); assert(object->paging_in_progress == 0); assert(object->resident_page_count == 0); return; } /* * Routine: vm_pageout_setup * Purpose: * Set up a page for pageout (clean & flush). * * Move the page to a new object, as part of which it will be * sent to its memory manager in a memory_object_data_write or * memory_object_initialize message. * * The "new_object" and "new_offset" arguments * indicate where the page should be moved. * * In/Out conditions: * The page in question must not be on any pageout queues, * and must be busy. The object to which it belongs * must be unlocked, and the caller must hold a paging * reference to it. The new_object must not be locked. * * This routine returns a pointer to a place-holder page, * inserted at the same offset, to block out-of-order * requests for the page. The place-holder page must * be freed after the data_write or initialize message * has been sent. * * The original page is put on a paging queue and marked * not busy on exit. */ vm_page_t vm_pageout_setup( register vm_page_t m, register vm_object_t new_object, vm_object_offset_t new_offset) { register vm_object_t old_object = m->object; vm_object_offset_t paging_offset; vm_object_offset_t offset; register vm_page_t holding_page; register vm_page_t new_m; register vm_page_t new_page; boolean_t need_to_wire = FALSE; XPR(XPR_VM_PAGEOUT, "vm_pageout_setup, obj 0x%X off 0x%X page 0x%X new obj 0x%X offset 0x%X\n", (integer_t)m->object, (integer_t)m->offset, (integer_t)m, (integer_t)new_object, (integer_t)new_offset); assert(m && m->busy && !m->absent && !m->fictitious && !m->error && !m->restart); assert(m->dirty || m->precious); /* * Create a place-holder page where the old one was, to prevent * attempted pageins of this page while we're unlocked. * If the pageout daemon put this page in limbo and we're not * going to clean in place, get another fictitious page to * exchange for it now. */ VM_PAGE_GRAB_FICTITIOUS(holding_page); if (m->limbo) VM_PAGE_GRAB_FICTITIOUS(new_page); vm_object_lock(old_object); offset = m->offset; paging_offset = offset + old_object->paging_offset; if (old_object->pager_trusted) { /* * This pager is trusted, so we can clean this page * in place. Leave it in the old object, and mark it * cleaning & pageout. */ new_m = holding_page; holding_page = VM_PAGE_NULL; /* * If the pageout daemon put this page in limbo, exchange the * identities of the limbo page and the new fictitious page, * and continue with the new page, unless the prep count has * gone to zero in the meantime (which means no one is * interested in the page any more). In that case, just clear * the limbo bit and free the extra fictitious page. */ if (m->limbo) { if (m->prep_pin_count == 0) { /* page doesn't have to be in limbo any more */ m->limbo = FALSE; vm_page_lock_queues(); vm_page_free(new_page); vm_page_unlock_queues(); vm_pageout_setup_unprepped++; } else { vm_page_lock_queues(); VM_PAGE_QUEUES_REMOVE(m); vm_page_remove(m); vm_page_limbo_exchange(m, new_page); vm_pageout_setup_limbo++; vm_page_release_limbo(m); m = new_page; vm_page_insert(m, old_object, offset); vm_page_unlock_queues(); } } /* * Set up new page to be private shadow of real page. */ new_m->phys_addr = m->phys_addr; new_m->fictitious = FALSE; new_m->private = TRUE; new_m->pageout = TRUE; /* * Mark real page as cleaning (indicating that we hold a * paging reference to be released via m_o_d_r_c) and * pageout (indicating that the page should be freed * when the pageout completes). */ pmap_clear_modify(m->phys_addr); vm_page_lock_queues(); vm_page_wire(new_m); m->cleaning = TRUE; m->pageout = TRUE; vm_page_wire(m); assert(m->wire_count == 1); vm_page_unlock_queues(); m->dirty = TRUE; m->precious = FALSE; m->page_lock = VM_PROT_NONE; m->unusual = FALSE; m->unlock_request = VM_PROT_NONE; } else { /* * Cannot clean in place, so rip the old page out of the * object, and stick the holding page in. Set new_m to the * page in the new object. */ vm_page_lock_queues(); VM_PAGE_QUEUES_REMOVE(m); vm_page_remove(m); /* * If the pageout daemon put this page in limbo, exchange the * identities of the limbo page and the new fictitious page, * and continue with the new page, unless the prep count has * gone to zero in the meantime (which means no one is * interested in the page any more). In that case, just clear * the limbo bit and free the extra fictitious page. */ if (m->limbo) { if (m->prep_pin_count == 0) { /* page doesn't have to be in limbo any more */ m->limbo = FALSE; vm_page_free(new_page); vm_pageout_setup_unprepped++; } else { vm_page_limbo_exchange(m, new_page); vm_pageout_setup_limbo++; vm_page_release_limbo(m); m = new_page; } } vm_page_insert(holding_page, old_object, offset); vm_page_unlock_queues(); m->dirty = TRUE; m->precious = FALSE; new_m = m; new_m->page_lock = VM_PROT_NONE; new_m->unlock_request = VM_PROT_NONE; if (old_object->internal) need_to_wire = TRUE; } /* * Record that this page has been written out */ #if MACH_PAGEMAP vm_external_state_set(old_object->existence_map, offset); #endif /* MACH_PAGEMAP */ vm_object_unlock(old_object); vm_object_lock(new_object); /* * Put the page into the new object. If it is a not wired * (if it's the real page) it will be activated. */ vm_page_lock_queues(); vm_page_insert(new_m, new_object, new_offset); if (need_to_wire) vm_page_wire(new_m); else vm_page_activate(new_m); PAGE_WAKEUP_DONE(new_m); vm_page_unlock_queues(); vm_object_unlock(new_object); /* * Return the placeholder page to simplify cleanup. */ return (holding_page); } /* * Routine: vm_pageclean_setup * * Purpose: setup a page to be cleaned (made non-dirty), but not * necessarily flushed from the VM page cache. * This is accomplished by cleaning in place. * * The page must not be busy, and the object and page * queues must be locked. * */ void vm_pageclean_setup( vm_page_t m, vm_page_t new_m, vm_object_t new_object, vm_object_offset_t new_offset) { vm_object_t old_object = m->object; assert(!m->busy); assert(!m->cleaning); XPR(XPR_VM_PAGEOUT, "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n", (integer_t)old_object, m->offset, (integer_t)m, (integer_t)new_m, new_offset); pmap_clear_modify(m->phys_addr); vm_object_paging_begin(old_object); /* * Record that this page has been written out */ #if MACH_PAGEMAP vm_external_state_set(old_object->existence_map, m->offset); #endif /*MACH_PAGEMAP*/ /* * Mark original page as cleaning in place. */ m->cleaning = TRUE; m->dirty = TRUE; m->precious = FALSE; /* * Convert the fictitious page to a private shadow of * the real page. */ assert(new_m->fictitious); new_m->fictitious = FALSE; new_m->private = TRUE; new_m->pageout = TRUE; new_m->phys_addr = m->phys_addr; vm_page_wire(new_m); vm_page_insert(new_m, new_object, new_offset); assert(!new_m->wanted); new_m->busy = FALSE; } void vm_pageclean_copy( vm_page_t m, vm_page_t new_m, vm_object_t new_object, vm_object_offset_t new_offset) { XPR(XPR_VM_PAGEOUT, "vm_pageclean_copy, page 0x%X new_m 0x%X new_obj 0x%X offset 0x%X\n", m, new_m, new_object, new_offset, 0); assert((!m->busy) && (!m->cleaning)); assert(!new_m->private && !new_m->fictitious); pmap_clear_modify(m->phys_addr); m->busy = TRUE; vm_object_paging_begin(m->object); vm_page_unlock_queues(); vm_object_unlock(m->object); /* * Copy the original page to the new page. */ vm_page_copy(m, new_m); /* * Mark the old page as clean. A request to pmap_is_modified * will get the right answer. */ vm_object_lock(m->object); m->dirty = FALSE; vm_object_paging_end(m->object); vm_page_lock_queues(); if (!m->active && !m->inactive) vm_page_activate(m); PAGE_WAKEUP_DONE(m); vm_page_insert(new_m, new_object, new_offset); vm_page_activate(new_m); new_m->busy = FALSE; /* No other thread can be waiting */ } /* * Routine: vm_pageout_initialize_page * Purpose: * Causes the specified page to be initialized in * the appropriate memory object. This routine is used to push * pages into a copy-object when they are modified in the * permanent object. * * The page is moved to a temporary object and paged out. * * In/out conditions: * The page in question must not be on any pageout queues. * The object to which it belongs must be locked. * The page must be busy, but not hold a paging reference. * * Implementation: * Move this page to a completely new object. */ void vm_pageout_initialize_page( vm_page_t m) { vm_map_copy_t copy; vm_object_t new_object; vm_object_t object; vm_object_offset_t paging_offset; vm_page_t holding_page; XPR(XPR_VM_PAGEOUT, "vm_pageout_initialize_page, page 0x%X\n", (integer_t)m, 0, 0, 0, 0); assert(m->busy); /* * Verify that we really want to clean this page */ assert(!m->absent); assert(!m->error); assert(m->dirty); /* * Create a paging reference to let us play with the object. */ object = m->object; paging_offset = m->offset + object->paging_offset; vm_object_paging_begin(object); vm_object_unlock(object); if (m->absent || m->error || m->restart || (!m->dirty && !m->precious)) { VM_PAGE_FREE(m); panic("reservation without pageout?"); /* alan */ return; } /* set the page for future call to vm_fault_list_request */ holding_page = NULL; vm_object_lock(m->object); vm_page_lock_queues(); pmap_clear_modify(m->phys_addr); m->dirty = TRUE; m->busy = TRUE; m->list_req_pending = TRUE; m->cleaning = TRUE; m->pageout = TRUE; vm_page_wire(m); vm_page_unlock_queues(); vm_object_unlock(m->object); vm_pageout_throttle(m); copy = NULL; VM_STAT(pageouts++); /* VM_STAT(pages_pagedout++); */ /* * Write the data to its pager. * Note that the data is passed by naming the new object, * not a virtual address; the pager interface has been * manipulated to use the "internal memory" data type. * [The object reference from its allocation is donated * to the eventual recipient.] */ memory_object_data_initialize(object->pager, object->pager_request, paging_offset, POINTER_T(copy), PAGE_SIZE); vm_object_lock(object); } #if MACH_CLUSTER_STATS #define MAXCLUSTERPAGES 16 struct { unsigned long pages_in_cluster; unsigned long pages_at_higher_offsets; unsigned long pages_at_lower_offsets; } cluster_stats[MAXCLUSTERPAGES]; #endif /* MACH_CLUSTER_STATS */ boolean_t allow_clustered_pageouts = FALSE; /* * vm_pageout_cluster: * * Given a page, page it out, and attempt to clean adjacent pages * in the same operation. * * The page must be busy, and the object unlocked w/ paging reference * to prevent deallocation or collapse. The page must not be on any * pageout queue. */ void vm_pageout_cluster( vm_page_t m) { vm_object_t object = m->object; vm_object_offset_t offset = m->offset; /* from vm_object start */ vm_object_offset_t paging_offset = m->offset + object->paging_offset; vm_object_t new_object; vm_object_offset_t new_offset; vm_size_t cluster_size; vm_object_offset_t cluster_offset; /* from memory_object start */ vm_object_offset_t cluster_lower_bound; /* from vm_object_start */ vm_object_offset_t cluster_upper_bound; /* from vm_object_start */ vm_object_offset_t cluster_start, cluster_end;/* from vm_object start */ vm_object_offset_t offset_within_cluster; vm_size_t length_of_data; vm_page_t friend, holding_page; vm_map_copy_t copy; kern_return_t rc; boolean_t precious_clean = TRUE; int pages_in_cluster; CLUSTER_STAT(int pages_at_higher_offsets = 0;) CLUSTER_STAT(int pages_at_lower_offsets = 0;) XPR(XPR_VM_PAGEOUT, "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n", (integer_t)object, offset, (integer_t)m, 0, 0); CLUSTER_STAT(vm_pageout_cluster_clusters++;) /* * Only a certain kind of page is appreciated here. */ assert(m->busy && (m->dirty || m->precious) && (m->wire_count == 0)); assert(!m->cleaning && !m->pageout && !m->inactive && !m->active); vm_object_lock(object); cluster_size = object->cluster_size; assert(cluster_size >= PAGE_SIZE); if (cluster_size < PAGE_SIZE) cluster_size = PAGE_SIZE; assert(object->pager_created && object->pager_initialized); assert(object->internal || object->pager_ready); if (m->precious && !m->dirty) precious_clean = TRUE; if (!object->pager_trusted || !allow_clustered_pageouts) cluster_size = PAGE_SIZE; vm_object_unlock(object); cluster_offset = paging_offset & (vm_object_offset_t)(cluster_size - 1); /* bytes from beginning of cluster */ /* * Due to unaligned mappings, we have to be careful * of negative offsets into the VM object. Clip the cluster * boundary to the VM object, not the memory object. */ if (offset > cluster_offset) { cluster_lower_bound = offset - cluster_offset; /* from vm_object */ } else { cluster_lower_bound = 0; } cluster_upper_bound = (offset - cluster_offset) + (vm_object_offset_t)cluster_size; /* set the page for future call to vm_fault_list_request */ holding_page = NULL; vm_object_lock(m->object); vm_page_lock_queues(); m->busy = TRUE; m->list_req_pending = TRUE; m->cleaning = TRUE; m->pageout = TRUE; vm_page_wire(m); vm_page_unlock_queues(); vm_object_unlock(m->object); vm_pageout_throttle(m); /* * Search backward for adjacent eligible pages to clean in * this operation. */ cluster_start = offset; if (offset) { /* avoid wrap-around at zero */ for (cluster_start = offset - PAGE_SIZE_64; cluster_start >= cluster_lower_bound; cluster_start -= PAGE_SIZE_64) { assert(cluster_size > PAGE_SIZE); vm_object_lock(object); vm_page_lock_queues(); if ((friend = vm_pageout_cluster_page(object, cluster_start, precious_clean)) == VM_PAGE_NULL) { vm_page_unlock_queues(); vm_object_unlock(object); break; } new_offset = (cluster_start + object->paging_offset) & (cluster_size - 1); assert(new_offset < cluster_offset); m->list_req_pending = TRUE; m->cleaning = TRUE; /* do nothing except advance the write request, all we really need to */ /* do is push the target page and let the code at the other end decide */ /* what is really the right size */ if (vm_page_free_count <= vm_page_free_reserved) { m->busy = TRUE; m->pageout = TRUE; vm_page_wire(m); } vm_page_unlock_queues(); vm_object_unlock(object); if(m->dirty || m->object->internal) { CLUSTER_STAT(pages_at_lower_offsets++;) } } cluster_start += PAGE_SIZE_64; } assert(cluster_start >= cluster_lower_bound); assert(cluster_start <= offset); /* * Search forward for adjacent eligible pages to clean in * this operation. */ for (cluster_end = offset + PAGE_SIZE_64; cluster_end < cluster_upper_bound; cluster_end += PAGE_SIZE_64) { assert(cluster_size > PAGE_SIZE); vm_object_lock(object); vm_page_lock_queues(); if ((friend = vm_pageout_cluster_page(object, cluster_end, precious_clean)) == VM_PAGE_NULL) { vm_page_unlock_queues(); vm_object_unlock(object); break; } new_offset = (cluster_end + object->paging_offset) & (cluster_size - 1); assert(new_offset < cluster_size); m->list_req_pending = TRUE; m->cleaning = TRUE; /* do nothing except advance the write request, all we really need to */ /* do is push the target page and let the code at the other end decide */ /* what is really the right size */ if (vm_page_free_count <= vm_page_free_reserved) { m->busy = TRUE; m->pageout = TRUE; vm_page_wire(m); } vm_page_unlock_queues(); vm_object_unlock(object); if(m->dirty || m->object->internal) { CLUSTER_STAT(pages_at_higher_offsets++;) } } assert(cluster_end <= cluster_upper_bound); assert(cluster_end >= offset + PAGE_SIZE); /* * (offset - cluster_offset) is beginning of cluster_object * relative to vm_object start. */ offset_within_cluster = cluster_start - (offset - cluster_offset); length_of_data = cluster_end - cluster_start; assert(offset_within_cluster < cluster_size); assert((offset_within_cluster + length_of_data) <= cluster_size); rc = KERN_SUCCESS; assert(rc == KERN_SUCCESS); pages_in_cluster = length_of_data/PAGE_SIZE; if(m->dirty || m->object->internal) { VM_STAT(pageouts++); } /* VM_STAT(pages_pagedout += pages_in_cluster); */ #if MACH_CLUSTER_STATS (cluster_stats[pages_at_lower_offsets].pages_at_lower_offsets)++; (cluster_stats[pages_at_higher_offsets].pages_at_higher_offsets)++; (cluster_stats[pages_in_cluster].pages_in_cluster)++; #endif /* MACH_CLUSTER_STATS */ /* * Send the data to the pager. */ paging_offset = cluster_start + object->paging_offset; #ifdef MACH_BSD if(((rpc_subsystem_t)pager_mux_hash_lookup(object->pager)) == ((rpc_subsystem_t) &vnode_pager_workaround)) { rc = vnode_pager_data_return(object->pager, object->pager_request, paging_offset, POINTER_T(copy), length_of_data, !precious_clean, FALSE); } else { rc = memory_object_data_return(object->pager, object->pager_request, paging_offset, POINTER_T(copy), length_of_data, !precious_clean, FALSE); } #else rc = memory_object_data_return(object->pager, object->pager_request, paging_offset, POINTER_T(copy), length_of_data, !precious_clean, FALSE); #endif vm_object_lock(object); vm_object_paging_end(object); if (holding_page) { assert(!object->pager_trusted); VM_PAGE_FREE(holding_page); vm_object_paging_end(object); } vm_object_unlock(object); } /* * vm_pageout_return_write_pages * Recover pages from an aborted write attempt * */ vm_pageout_return_write_pages( ipc_port_t control_port, vm_object_offset_t object_offset, vm_map_copy_t copy) { vm_object_t object; int offset; int size; int shadow_offset; int copy_offset; int j; vm_page_t m; object = copy->cpy_object; copy_offset = copy->offset; size = copy->size; if((copy->type != VM_MAP_COPY_OBJECT) || (object->shadow == 0)) { object = (vm_object_t)control_port->ip_kobject; shadow_offset = (object_offset - object->paging_offset) - copy->offset; } else { /* get the offset from the copy object */ shadow_offset = object->shadow_offset; /* find the backing object */ object = object->shadow; } vm_object_lock(object); for(offset = 0, j=0; offset < size; offset+=page_size, j++) { m = vm_page_lookup(object, offset + shadow_offset + copy_offset); if((m == VM_PAGE_NULL) || m->fictitious) { vm_page_t p; int i; vm_object_t copy_object; /* m might be fictitious if the original page */ /* was found to be in limbo at the time of */ /* vm_pageout_setup */ if((m != VM_PAGE_NULL) && m->fictitious) { m->cleaning = FALSE; vm_page_remove(m); /* if object is not pager trusted then */ /* this fictitious page will be removed */ /* as the holding page in vm_pageout_cluster */ if (object->pager_trusted) vm_page_free(m); if(vm_page_laundry_count) vm_page_laundry_count--; if (vm_page_laundry_count < vm_page_laundry_min) { vm_page_laundry_min = 0; thread_wakeup((event_t) &vm_page_laundry_count); } } else if ((object->pager_trusted) && (copy->type == VM_MAP_COPY_OBJECT)) { vm_object_paging_end(object); } copy_object = copy->cpy_object; if(copy->type == VM_MAP_COPY_OBJECT) { p = (vm_page_t) queue_first(©_object->memq); for(i = 0; i < copy_object->resident_page_count; i++) { if(p->offset == (offset + copy_offset)) break; p = (vm_page_t) queue_next(&p->listq); } vm_page_remove(p); } else { p = copy->cpy_page_list[j]; copy->cpy_page_list[j] = 0; p->gobbled = FALSE; } vm_page_insert(p, object, offset + shadow_offset + copy_offset); p->busy = TRUE; p->dirty = TRUE; p->laundry = FALSE; if (p->pageout) { p->pageout = FALSE; /*dont throw away target*/ vm_page_unwire(p);/* reactivates */ } } else if(m->pageout) { m->pageout = FALSE; /* dont throw away target pages */ vm_page_unwire(m);/* reactivates */ } } vm_object_unlock(object); vm_map_copy_discard(copy); vm_object_lock(object); for(offset = 0; offset < size; offset+=page_size) { m = vm_page_lookup(object, offset + shadow_offset + copy_offset); m->dirty = TRUE; /* we'll send the pages home later */ m->busy = FALSE; /* allow system access again */ } vm_object_unlock(object); } /* * Trusted pager throttle. * Object must be unlocked, page queues must be unlocked. */ void vm_pageout_throttle( register vm_page_t m) { vm_page_lock_queues(); assert(!m->laundry); m->laundry = TRUE; while (vm_page_laundry_count >= vm_page_laundry_max) { /* * Set the threshold for when vm_page_free() * should wake us up. */ vm_page_laundry_min = vm_page_laundry_max/2; assert_wait((event_t) &vm_page_laundry_count, THREAD_UNINT); vm_page_unlock_queues(); /* * Pause to let the default pager catch up. */ thread_block((void (*)(void)) 0); vm_page_lock_queues(); } vm_page_laundry_count++; vm_page_unlock_queues(); } /* * The global variable vm_pageout_clean_active_pages controls whether * active pages are considered valid to be cleaned in place during a * clustered pageout. Performance measurements are necessary to determine * the best policy. */ int vm_pageout_clean_active_pages = 1; /* * vm_pageout_cluster_page: [Internal] * * return a vm_page_t to the page at (object,offset) if it is appropriate * to clean in place. Pages that are non-existent, busy, absent, already * cleaning, or not dirty are not eligible to be cleaned as an adjacent * page in a cluster. * * The object must be locked on entry, and remains locked throughout * this call. */ vm_page_t vm_pageout_cluster_page( vm_object_t object, vm_object_offset_t offset, boolean_t precious_clean) { vm_page_t m; XPR(XPR_VM_PAGEOUT, "vm_pageout_cluster_page, object 0x%X offset 0x%X\n", (integer_t)object, offset, 0, 0, 0); if ((m = vm_page_lookup(object, offset)) == VM_PAGE_NULL) return(VM_PAGE_NULL); if (m->busy || m->absent || m->cleaning || m->prep_pin_count != 0 || (m->wire_count != 0) || m->error) return(VM_PAGE_NULL); if (vm_pageout_clean_active_pages) { if (!m->active && !m->inactive) return(VM_PAGE_NULL); } else { if (!m->inactive) return(VM_PAGE_NULL); } assert(!m->private); assert(!m->fictitious); if (!m->dirty) m->dirty = pmap_is_modified(m->phys_addr); if (precious_clean) { if (!m->precious || !m->dirty) return(VM_PAGE_NULL); } else { if (!m->dirty) return(VM_PAGE_NULL); } return(m); } /* * vm_pageout_scan does the dirty work for the pageout daemon. * It returns with vm_page_queue_free_lock held and * vm_page_free_wanted == 0. */ extern void vm_pageout_scan_continue(void); /* forward; */ void vm_pageout_scan(void) { unsigned int burst_count; boolean_t now = FALSE; unsigned int laundry_pages; boolean_t need_more_inactive_pages; unsigned int loop_detect; XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0); /*???*/ /* * We want to gradually dribble pages from the active queue * to the inactive queue. If we let the inactive queue get * very small, and then suddenly dump many pages into it, * those pages won't get a sufficient chance to be referenced * before we start taking them from the inactive queue. * * We must limit the rate at which we send pages to the pagers. * data_write messages consume memory, for message buffers and * for map-copy objects. If we get too far ahead of the pagers, * we can potentially run out of memory. * * We can use the laundry count to limit directly the number * of pages outstanding to the default pager. A similar * strategy for external pagers doesn't work, because * external pagers don't have to deallocate the pages sent them, * and because we might have to send pages to external pagers * even if they aren't processing writes. So we also * use a burst count to limit writes to external pagers. * * When memory is very tight, we can't rely on external pagers to * clean pages. They probably aren't running, because they * aren't vm-privileged. If we kept sending dirty pages to them, * we could exhaust the free list. However, we can't just ignore * pages belonging to external objects, because there might be no * pages belonging to internal objects. Hence, we get the page * into an internal object and then immediately double-page it, * sending it to the default pager. * * consider_zone_gc should be last, because the other operations * might return memory to zones. */ Restart: mutex_lock(&vm_page_queue_free_lock); now = (vm_page_free_count < vm_page_free_min); mutex_unlock(&vm_page_queue_free_lock); #if THREAD_SWAPPER swapout_threads(now); #endif /* THREAD_SWAPPER */ stack_collect(); consider_task_collect(); consider_thread_collect(); cleanup_limbo_queue(); consider_zone_gc(); consider_machine_collect(); loop_detect = vm_page_active_count + vm_page_inactive_count; #if 0 if (vm_page_free_count <= vm_page_free_reserved) { need_more_inactive_pages = TRUE; } else { need_more_inactive_pages = FALSE; } #else need_more_inactive_pages = FALSE; #endif for (burst_count = 0;;) { register vm_page_t m; register vm_object_t object; unsigned int free_count; /* * Recalculate vm_page_inactivate_target. */ vm_page_lock_queues(); vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count + vm_page_inactive_count); /* * Move pages from active to inactive. */ while ((vm_page_inactive_count < vm_page_inactive_target || need_more_inactive_pages) && !queue_empty(&vm_page_queue_active)) { register vm_object_t object; vm_pageout_active++; m = (vm_page_t) queue_first(&vm_page_queue_active); /* * If we're getting really low on memory, * try selecting a page that will go * directly to the default_pager. * If there are no such pages, we have to * page out a page backed by an EMM, * so that the default_pager can recover * it eventually. */ if (need_more_inactive_pages && (IP_VALID(memory_manager_default))) { vm_pageout_scan_active_emm_throttle++; do { assert(m->active && !m->inactive); object = m->object; if (vm_object_lock_try(object)) { #if 0 if (object->pager_trusted || object->internal) { /* found one ! */ vm_pageout_scan_active_emm_throttle_success++; goto object_locked_active; } #else vm_pageout_scan_active_emm_throttle_success++; goto object_locked_active; #endif vm_object_unlock(object); } m = (vm_page_t) queue_next(&m->pageq); } while (!queue_end(&vm_page_queue_active, (queue_entry_t) m)); if (queue_end(&vm_page_queue_active, (queue_entry_t) m)) { vm_pageout_scan_active_emm_throttle_failure++; m = (vm_page_t) queue_first(&vm_page_queue_active); } } assert(m->active && !m->inactive); object = m->object; if (!vm_object_lock_try(object)) { /* * Move page to end and continue. */ queue_remove(&vm_page_queue_active, m, vm_page_t, pageq); queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); vm_page_unlock_queues(); mutex_pause(); vm_page_lock_queues(); continue; } object_locked_active: /* * If the page is busy, then we pull it * off the active queue and leave it alone. */ if (m->busy) { vm_object_unlock(object); queue_remove(&vm_page_queue_active, m, vm_page_t, pageq); m->active = FALSE; if (!m->fictitious) vm_page_active_count--; continue; } /* * Deactivate the page while holding the object * locked, so we know the page is still not busy. * This should prevent races between pmap_enter * and pmap_clear_reference. The page might be * absent or fictitious, but vm_page_deactivate * can handle that. */ vm_page_deactivate(m); vm_object_unlock(object); } /* * We are done if we have met our target *and* * nobody is still waiting for a page. */ mutex_lock(&vm_page_queue_free_lock); free_count = vm_page_free_count; if ((free_count >= vm_page_free_target) && (vm_page_free_wanted == 0)) { vm_page_unlock_queues(); break; } mutex_unlock(&vm_page_queue_free_lock); /* * Sometimes we have to pause: * 1) No inactive pages - nothing to do. * 2) Flow control - wait for untrusted pagers to catch up. */ if (queue_empty(&vm_page_queue_inactive) || ((--loop_detect) == 0) || (burst_count >= vm_pageout_burst_max)) { unsigned int pages, msecs; int wait_result; consider_machine_adjust(); /* * vm_pageout_burst_wait is msecs/page. * If there is nothing for us to do, we wait * at least vm_pageout_empty_wait msecs. */ pages = burst_count; if (loop_detect == 0) { printf("Warning: No physical memory suitable for pageout or reclaim, pageout thread temporarily going to sleep\n"); msecs = vm_free_page_pause; } else { msecs = burst_count * vm_pageout_burst_wait; } if (queue_empty(&vm_page_queue_inactive) && (msecs < vm_pageout_empty_wait)) msecs = vm_pageout_empty_wait; vm_page_unlock_queues(); assert_wait_timeout(msecs, THREAD_INTERRUPTIBLE); counter(c_vm_pageout_scan_block++); /* * Unfortunately, we don't have call_continuation * so we can't rely on tail-recursion. */ wait_result = thread_block((void (*)(void)) 0); if (wait_result != THREAD_TIMED_OUT) thread_cancel_timer(); vm_pageout_scan_continue(); goto Restart; /*NOTREACHED*/ } vm_pageout_inactive++; m = (vm_page_t) queue_first(&vm_page_queue_inactive); if ((vm_page_free_count <= vm_page_free_reserved) && (IP_VALID(memory_manager_default))) { /* * We're really low on memory. Try to select a page that * would go directly to the default_pager. * If there are no such pages, we have to page out a * page backed by an EMM, so that the default_pager * can recover it eventually. */ vm_pageout_scan_inactive_emm_throttle++; do { assert(!m->active && m->inactive); object = m->object; if (vm_object_lock_try(object)) { #if 0 if (object->pager_trusted || object->internal) { /* found one ! */ vm_pageout_scan_inactive_emm_throttle_success++; goto object_locked_inactive; } #else vm_pageout_scan_inactive_emm_throttle_success++; goto object_locked_inactive; #endif /* 0 */ vm_object_unlock(object); } m = (vm_page_t) queue_next(&m->pageq); } while (!queue_end(&vm_page_queue_inactive, (queue_entry_t) m)); if (queue_end(&vm_page_queue_inactive, (queue_entry_t) m)) { vm_pageout_scan_inactive_emm_throttle_failure++; /* * We should check the "active" queue * for good candidates to page out. */ need_more_inactive_pages = TRUE; m = (vm_page_t) queue_first(&vm_page_queue_inactive); } } assert(!m->active && m->inactive); object = m->object; /* * Try to lock object; since we've got the * page queues lock, we can only try for this one. */ if (!vm_object_lock_try(object)) { /* * Move page to end and continue. */ queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq); queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq); vm_page_unlock_queues(); mutex_pause(); vm_pageout_inactive_nolock++; continue; } object_locked_inactive: /* * Paging out pages of objects which pager is being * created by another thread must be avoided, because * this thread may claim for memory, thus leading to a * possible dead lock between it and the pageout thread * which will wait for pager creation, if such pages are * finally chosen. The remaining assumption is that there * will finally be enough available pages in the inactive * pool to page out in order to satisfy all memory claimed * by the thread which concurrently creates the pager. */ if (!object->pager_initialized && object->pager_created) { /* * Move page to end and continue, hoping that * there will be enough other inactive pages to * page out so that the thread which currently * initializes the pager will succeed. */ queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq); queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq); vm_page_unlock_queues(); vm_object_unlock(object); vm_pageout_inactive_avoid++; continue; } /* * Remove the page from the inactive list. */ queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq); m->inactive = FALSE; if (!m->fictitious) vm_page_inactive_count--; if (m->busy || !object->alive) { /* * Somebody is already playing with this page. * Leave it off the pageout queues. */ vm_page_unlock_queues(); vm_object_unlock(object); vm_pageout_inactive_busy++; continue; } /* * If it's absent or in error, we can reclaim the page. */ if (m->absent || m->error) { vm_pageout_inactive_absent++; reclaim_page: vm_page_free(m); vm_page_unlock_queues(); vm_object_unlock(object); continue; } assert(!m->private); assert(!m->fictitious); /* * If already cleaning this page in place, convert from * "adjacent" to "target". We can leave the page mapped, * and vm_pageout_object_terminate will determine whether * to free or reactivate. */ if (m->cleaning) { #if MACH_CLUSTER_STATS vm_pageout_cluster_conversions++; #endif if (m->prep_pin_count == 0) { m->busy = TRUE; m->pageout = TRUE; vm_page_wire(m); } vm_object_unlock(object); vm_page_unlock_queues(); continue; } /* * If it's being used, reactivate. * (Fictitious pages are either busy or absent.) */ if (m->reference || pmap_is_referenced(m->phys_addr)) { vm_pageout_inactive_used++; reactivate_page: #if ADVISORY_PAGEOUT if (m->discard_request) { m->discard_request = FALSE; } #endif /* ADVISORY_PAGEOUT */ vm_object_unlock(object); vm_page_activate(m); VM_STAT(reactivations++); vm_page_unlock_queues(); continue; } if (m->prep_pin_count != 0) { boolean_t pinned = FALSE; vm_page_pin_lock(); if (m->pin_count != 0) { /* skip and reactivate pinned page */ pinned = TRUE; vm_pageout_inactive_pinned++; } else { /* page is prepped; send it into limbo */ m->limbo = TRUE; vm_pageout_inactive_limbo++; } vm_page_pin_unlock(); if (pinned) goto reactivate_page; } #if ADVISORY_PAGEOUT if (object->advisory_pageout) { boolean_t do_throttle; ipc_port_t port; vm_object_offset_t discard_offset; if (m->discard_request) { vm_stat_discard_failure++; goto mandatory_pageout; } assert(object->pager_initialized); m->discard_request = TRUE; port = object->pager; /* system-wide throttle */ do_throttle = (vm_page_free_count <= vm_page_free_reserved); if (!do_throttle) { /* throttle on this pager */ /* XXX lock ordering ? */ ip_lock(port); do_throttle= imq_full(&port->ip_messages); ip_unlock(port); } if (do_throttle) { vm_stat_discard_throttle++; #if 0 /* ignore this page and skip to next */ vm_page_unlock_queues(); vm_object_unlock(object); continue; #else /* force mandatory pageout */ goto mandatory_pageout; #endif } /* proceed with discard_request */ vm_page_activate(m); vm_stat_discard++; VM_STAT(reactivations++); discard_offset = m->offset + object->paging_offset; vm_stat_discard_sent++; vm_page_unlock_queues(); vm_object_unlock(object); /* memory_object_discard_request(object->pager, object->pager_request, discard_offset, PAGE_SIZE); */ continue; } mandatory_pageout: #endif /* ADVISORY_PAGEOUT */ XPR(XPR_VM_PAGEOUT, "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n", (integer_t)object, (integer_t)m->offset, (integer_t)m, 0,0); /* * Eliminate all mappings. */ m->busy = TRUE; pmap_page_protect(m->phys_addr, VM_PROT_NONE); if (!m->dirty) m->dirty = pmap_is_modified(m->phys_addr); /* * If it's clean and not precious, we can free the page. */ if (!m->dirty && !m->precious) { vm_pageout_inactive_clean++; goto reclaim_page; } vm_page_unlock_queues(); /* * If there is no memory object for the page, create * one and hand it to the default pager. */ if (!object->pager_initialized) vm_object_collapse(object); if (!object->pager_initialized) vm_object_pager_create(object); if (!object->pager_initialized) { /* * Still no pager for the object. * Reactivate the page. * * Should only happen if there is no * default pager. */ vm_page_lock_queues(); vm_page_activate(m); vm_page_unlock_queues(); /* * And we are done with it. */ PAGE_WAKEUP_DONE(m); vm_object_unlock(object); /* * break here to get back to the preemption * point in the outer loop so that we don't * spin forever if there is no default pager. */ vm_pageout_dirty_no_pager++; /* * Well there's no pager, but we can still reclaim * free pages out of the inactive list. Go back * to top of loop and look for suitable pages. */ continue; } if (object->pager_initialized && object->pager == IP_NULL) { /* * This pager has been destroyed by either * memory_object_destroy or vm_object_destroy, and * so there is nowhere for the page to go. * Just free the page. */ VM_PAGE_FREE(m); vm_object_unlock(object); continue; } vm_pageout_inactive_dirty++; /* if (!object->internal) burst_count++; */ vm_object_paging_begin(object); vm_object_unlock(object); vm_pageout_cluster(m); /* flush it */ } consider_machine_adjust(); } counter(unsigned int c_vm_pageout_scan_continue = 0;) void vm_pageout_scan_continue(void) { /* * We just paused to let the pagers catch up. * If vm_page_laundry_count is still high, * then we aren't waiting long enough. * If we have paused some vm_pageout_pause_max times without * adjusting vm_pageout_burst_wait, it might be too big, * so we decrease it. */ vm_page_lock_queues(); counter(++c_vm_pageout_scan_continue); if (vm_page_laundry_count > vm_pageout_burst_min) { vm_pageout_burst_wait++; vm_pageout_pause_count = 0; } else if (++vm_pageout_pause_count > vm_pageout_pause_max) { vm_pageout_burst_wait = (vm_pageout_burst_wait * 3) / 4; if (vm_pageout_burst_wait < 1) vm_pageout_burst_wait = 1; vm_pageout_pause_count = 0; } vm_page_unlock_queues(); } void vm_page_free_reserve(int pages); int vm_page_free_count_init; void vm_page_free_reserve( int pages) { int free_after_reserve; vm_page_free_reserved += pages; free_after_reserve = vm_page_free_count_init - vm_page_free_reserved; vm_page_free_min = vm_page_free_reserved + VM_PAGE_FREE_MIN(free_after_reserve); vm_page_free_target = vm_page_free_reserved + VM_PAGE_FREE_TARGET(free_after_reserve); if (vm_page_free_target < vm_page_free_min + 5) vm_page_free_target = vm_page_free_min + 5; } /* * vm_pageout is the high level pageout daemon. */ void vm_pageout(void) { thread_t self = current_thread(); /* * Set thread privileges. */ self->vm_privilege = TRUE; stack_privilege(self); thread_swappable(current_act(), FALSE); /* * Initialize some paging parameters. */ if (vm_page_laundry_max == 0) vm_page_laundry_max = VM_PAGE_LAUNDRY_MAX; if (vm_pageout_burst_max == 0) vm_pageout_burst_max = VM_PAGEOUT_BURST_MAX; if (vm_pageout_burst_wait == 0) vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT; if (vm_pageout_empty_wait == 0) vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT; vm_page_free_count_init = vm_page_free_count; /* * even if we've already called vm_page_free_reserve * call it again here to insure that the targets are * accurately calculated (it uses vm_page_free_count_init) * calling it with an arg of 0 will not change the reserve * but will re-calculate free_min and free_target */ if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED) vm_page_free_reserve(VM_PAGE_FREE_RESERVED - vm_page_free_reserved); else vm_page_free_reserve(0); /* * vm_pageout_scan will set vm_page_inactive_target. * * The pageout daemon is never done, so loop forever. * We should call vm_pageout_scan at least once each * time we are woken, even if vm_page_free_wanted is * zero, to check vm_page_free_target and * vm_page_inactive_target. */ for (;;) { vm_pageout_scan(); /* we hold vm_page_queue_free_lock now */ assert(vm_page_free_wanted == 0); assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT); mutex_unlock(&vm_page_queue_free_lock); counter(c_vm_pageout_block++); thread_block((void (*)(void)) 0); } /*NOTREACHED*/ } void upl_dealloc( upl_t upl) { upl->ref_count -= 1; if(upl->ref_count == 0) { upl_destroy(upl); } } /* * Routine: vm_fault_list_request * Purpose: * Cause the population of a portion of a vm_object. * Depending on the nature of the request, the pages * returned may be contain valid data or be uninitialized. * A page list structure, listing the physical pages * will be returned upon request. * This function is called by the file system or any other * supplier of backing store to a pager. * IMPORTANT NOTE: The caller must still respect the relationship * between the vm_object and its backing memory object. The * caller MUST NOT substitute changes in the backing file * without first doing a memory_object_lock_request on the * target range unless it is know that the pages are not * shared with another entity at the pager level. * Copy_in_to: * if a page list structure is present * return the mapped physical pages, where a * page is not present, return a non-initialized * one. If the no_sync bit is turned on, don't * call the pager unlock to synchronize with other * possible copies of the page. Leave pages busy * in the original object, if a page list structure * was specified. When a commit of the page list * pages is done, the dirty bit will be set for each one. * Copy_out_from: * If a page list structure is present, return * all mapped pages. Where a page does not exist * map a zero filled one. Leave pages busy in * the original object. If a page list structure * is not specified, this call is a no-op. * * Note: access of default pager objects has a rather interesting * twist. The caller of this routine, presumably the file system * page cache handling code, will never actually make a request * against a default pager backed object. Only the default * pager will make requests on backing store related vm_objects * In this way the default pager can maintain the relationship * between backing store files (abstract memory objects) and * the vm_objects (cache objects), they support. * */ kern_return_t vm_fault_list_request( vm_object_t object, vm_object_offset_t offset, vm_size_t size, upl_t *upl_ptr, upl_page_info_t **user_page_list_ptr, int page_list_count, int cntrl_flags) { vm_page_t dst_page; vm_object_offset_t dst_offset = offset; upl_page_info_t *user_page_list; vm_size_t xfer_size = size; boolean_t do_m_lock = FALSE; boolean_t dirty; upl_t upl = NULL; int entry; boolean_t encountered_lrp = FALSE; vm_page_t alias_page = NULL; if(cntrl_flags & UPL_SET_INTERNAL) page_list_count = MAX_UPL_TRANSFER; if(((user_page_list_ptr || (cntrl_flags & UPL_SET_INTERNAL)) && !(object->private)) && (page_list_count < (size/page_size))) return KERN_INVALID_ARGUMENT; if((!object->internal) && (object->paging_offset != 0)) panic("vm_fault_list_request: vnode object with non-zero paging offset\n"); if((cntrl_flags & UPL_COPYOUT_FROM) && (upl_ptr == NULL)) { return KERN_SUCCESS; } if(upl_ptr) { if((cntrl_flags & UPL_SET_INTERNAL) && !(object->private)) { upl = upl_create(TRUE); user_page_list = (upl_page_info_t *) (((vm_offset_t)upl) + sizeof(struct upl)); if(user_page_list_ptr) *user_page_list_ptr = user_page_list; upl->flags |= UPL_INTERNAL; } else { upl = upl_create(FALSE); if(user_page_list_ptr) user_page_list = *user_page_list_ptr; else user_page_list = NULL; if(object->private) { upl->size = size; upl->offset = offset; *upl_ptr = upl; if(user_page_list) { user_page_list[0].phys_addr = offset; user_page_list[0].device = TRUE; } upl->flags = UPL_DEVICE_MEMORY; return KERN_SUCCESS; } } upl->map_object = vm_object_allocate(size); vm_object_lock(upl->map_object); upl->map_object->shadow = object; upl->size = size; upl->offset = offset + object->paging_offset; upl->map_object->pageout = TRUE; upl->map_object->can_persist = FALSE; upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; upl->map_object->shadow_offset = offset; vm_object_unlock(upl->map_object); *upl_ptr = upl; } VM_PAGE_GRAB_FICTITIOUS(alias_page); vm_object_lock(object); #ifdef UBC_DEBUG if(upl_ptr) queue_enter(&object->uplq, upl, upl_t, uplq); #endif /* UBC_DEBUG */ vm_object_paging_begin(object); entry = 0; if(cntrl_flags & UPL_COPYOUT_FROM) { upl->flags |= UPL_PAGE_SYNC_DONE; while (xfer_size) { if(alias_page == NULL) { vm_object_unlock(object); VM_PAGE_GRAB_FICTITIOUS(alias_page); vm_object_lock(object); } if(((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) || dst_page->fictitious || dst_page->absent || dst_page->error || (dst_page->wire_count != 0 && !dst_page->pageout) || ((!(dst_page->dirty || dst_page->precious || pmap_is_modified(dst_page->phys_addr))) && (cntrl_flags & UPL_RET_ONLY_DIRTY))) { if(user_page_list) user_page_list[entry].phys_addr = 0; } else { if(dst_page->busy && (!(dst_page->list_req_pending && dst_page->pageout))) { if(cntrl_flags & UPL_NOBLOCK) { if(user_page_list) user_page_list[entry] .phys_addr = 0; entry++; dst_offset += PAGE_SIZE_64; xfer_size -= PAGE_SIZE; continue; } /*someone else is playing with the */ /* page. We will have to wait. */ PAGE_ASSERT_WAIT( dst_page, THREAD_UNINT); vm_object_unlock(object); thread_block((void(*)(void))0); vm_object_lock(object); continue; } /* Someone else already cleaning the page? */ if((dst_page->cleaning || dst_page->absent || dst_page->prep_pin_count != 0 || dst_page->wire_count != 0) && !dst_page->list_req_pending) { if(user_page_list) user_page_list[entry].phys_addr = 0; entry++; dst_offset += PAGE_SIZE_64; xfer_size -= PAGE_SIZE; continue; } /* eliminate all mappings from the */ /* original object and its prodigy */ vm_page_lock_queues(); pmap_page_protect(dst_page->phys_addr, VM_PROT_NONE); /* pageout statistics gathering. count */ /* all the pages we will page out that */ /* were not counted in the initial */ /* vm_pageout_scan work */ if(dst_page->list_req_pending) encountered_lrp = TRUE; if((dst_page->dirty || (dst_page->object->internal && dst_page->precious)) && (dst_page->list_req_pending == FALSE)) { if(encountered_lrp) { CLUSTER_STAT (pages_at_higher_offsets++;) } else { CLUSTER_STAT (pages_at_lower_offsets++;) } } /* Turn off busy indication on pending */ /* pageout. Note: we can only get here */ /* in the request pending case. */ dst_page->list_req_pending = FALSE; dst_page->busy = FALSE; dst_page->cleaning = FALSE; dirty = pmap_is_modified(dst_page->phys_addr); dirty = dirty ? TRUE : dst_page->dirty; /* use pageclean setup, it is more convenient */ /* even for the pageout cases here */ vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size); if(!dirty) { dst_page->dirty = FALSE; dst_page->precious = TRUE; } if(dst_page->pageout) dst_page->busy = TRUE; alias_page->absent = FALSE; alias_page = NULL; if(!(cntrl_flags & UPL_CLEAN_IN_PLACE)) { /* deny access to the target page */ /* while it is being worked on */ if((!dst_page->pageout) && (dst_page->wire_count == 0)) { dst_page->busy = TRUE; dst_page->pageout = TRUE; vm_page_wire(dst_page); } } if(user_page_list) { user_page_list[entry].phys_addr = dst_page->phys_addr; user_page_list[entry].dirty = dst_page->dirty; user_page_list[entry].pageout = dst_page->pageout; user_page_list[entry].absent = dst_page->absent; user_page_list[entry].precious = dst_page->precious; } vm_page_unlock_queues(); } entry++; dst_offset += PAGE_SIZE_64; xfer_size -= PAGE_SIZE; } } else { while (xfer_size) { if(alias_page == NULL) { vm_object_unlock(object); VM_PAGE_GRAB_FICTITIOUS(alias_page); vm_object_lock(object); } dst_page = vm_page_lookup(object, dst_offset); if(dst_page != VM_PAGE_NULL) { if((dst_page->cleaning) && !(dst_page->list_req_pending)) { /*someone else is writing to the */ /* page. We will have to wait. */ PAGE_ASSERT_WAIT(dst_page, THREAD_UNINT); vm_object_unlock(object); thread_block((void(*)(void))0); vm_object_lock(object); continue; } if ((dst_page->fictitious && dst_page->list_req_pending)) { /* dump the fictitious page */ dst_page->list_req_pending = FALSE; dst_page->clustered = FALSE; vm_page_lock_queues(); vm_page_free(dst_page); vm_page_unlock_queues(); } else if ((dst_page->absent && dst_page->list_req_pending)) { /* the default_pager case */ dst_page->list_req_pending = FALSE; dst_page->busy = FALSE; dst_page->clustered = FALSE; } } if((dst_page = vm_page_lookup( object, dst_offset)) == VM_PAGE_NULL) { /* need to allocate a page */ dst_page = vm_page_alloc(object, dst_offset); if (dst_page == VM_PAGE_NULL) { vm_object_unlock(object); VM_PAGE_WAIT(); vm_object_lock(object); continue; } dst_page->busy = FALSE; #if 0 if(cntrl_flags & UPL_NO_SYNC) { dst_page->page_lock = 0; dst_page->unlock_request = 0; } #endif dst_page->absent = TRUE; object->absent_count++; } #if 1 if(cntrl_flags & UPL_NO_SYNC) { dst_page->page_lock = 0; dst_page->unlock_request = 0; } #endif /* 1 */ dst_page->overwriting = TRUE; if(dst_page->fictitious) { panic("need corner case for fictitious page"); } if(dst_page->page_lock) { do_m_lock = TRUE; } if(upl_ptr) { /* eliminate all mappings from the */ /* original object and its prodigy */ if(dst_page->busy) { /*someone else is playing with the */ /* page. We will have to wait. */ PAGE_ASSERT_WAIT( dst_page, THREAD_UNINT); vm_object_unlock(object); thread_block((void(*)(void))0); vm_object_lock(object); continue; } vm_page_lock_queues(); pmap_page_protect(dst_page->phys_addr, VM_PROT_NONE); dirty = pmap_is_modified(dst_page->phys_addr); dirty = dirty ? TRUE : dst_page->dirty; vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size); if(cntrl_flags & UPL_CLEAN_IN_PLACE) { /* clean in place for read implies */ /* that a write will be done on all */ /* the pages that are dirty before */ /* a upl commit is done. The caller */ /* is obligated to preserve the */ /* contents of all pages marked */ /* dirty. */ upl->flags |= UPL_CLEAR_DIRTY; } if(!dirty) { dst_page->dirty = FALSE; dst_page->precious = TRUE; } if (dst_page->wire_count == 0) { /* deny access to the target page while */ /* it is being worked on */ dst_page->busy = TRUE; } else { vm_page_wire(dst_page); } /* expect the page to be used */ dst_page->reference = TRUE; dst_page->precious = (cntrl_flags & UPL_PRECIOUS) ? TRUE : FALSE; alias_page->absent = FALSE; alias_page = NULL; if(user_page_list) { user_page_list[entry].phys_addr = dst_page->phys_addr; user_page_list[entry].dirty = dst_page->dirty; user_page_list[entry].pageout = dst_page->pageout; user_page_list[entry].absent = dst_page->absent; user_page_list[entry].precious = dst_page->precious; } vm_page_unlock_queues(); } entry++; dst_offset += PAGE_SIZE_64; xfer_size -= PAGE_SIZE; } } if(alias_page != NULL) { vm_page_lock_queues(); vm_page_free(alias_page); vm_page_unlock_queues(); } if(do_m_lock) { vm_prot_t access_required; /* call back all associated pages from other users of the pager */ /* all future updates will be on data which is based on the */ /* changes we are going to make here. Note: it is assumed that */ /* we already hold copies of the data so we will not be seeing */ /* an avalanche of incoming data from the pager */ access_required = (cntrl_flags & UPL_COPYOUT_FROM) ? VM_PROT_READ : VM_PROT_WRITE; while (TRUE) { kern_return_t rc; thread_t thread; if(!object->pager_ready) { thread = current_thread(); vm_object_assert_wait(object, VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT); vm_object_unlock(object); thread_block((void (*)(void))0); if (thread->wait_result != THREAD_AWAKENED) { return(KERN_FAILURE); } vm_object_lock(object); continue; } vm_object_unlock(object); if (rc = memory_object_data_unlock( object->pager, object->pager_request, dst_offset + object->paging_offset, size, access_required)) { if (rc == MACH_SEND_INTERRUPTED) continue; else return KERN_FAILURE; } break; } /* lets wait on the last page requested */ /* NOTE: we will have to update lock completed routine to signal */ if(dst_page != VM_PAGE_NULL && (access_required & dst_page->page_lock) != access_required) { PAGE_ASSERT_WAIT(dst_page, THREAD_UNINT); thread_block((void (*)(void))0); vm_object_lock(object); } } vm_object_unlock(object); return KERN_SUCCESS; } kern_return_t upl_system_list_request( vm_object_t object, vm_object_offset_t offset, vm_size_t size, vm_size_t super_cluster, upl_t *upl, upl_page_info_t **user_page_list_ptr, int page_list_count, int cntrl_flags) { if(object->paging_offset > offset) return KERN_FAILURE; offset = offset - object->paging_offset; /* turns off super cluster exercised by the default_pager */ /* super_cluster = size; */ if ((super_cluster > size) && (vm_page_free_count > vm_page_free_reserved)) { vm_object_offset_t base_offset; vm_size_t super_size; base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1)); super_size = (offset+size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster; super_size = ((base_offset + super_size) > object->size) ? (object->size - base_offset) : super_size; if(offset > (base_offset + super_size)) panic("upl_system_list_request: Missed target pageout 0x%x,0x%x, 0x%x, 0x%x, 0x%x, 0x%x\n", offset, base_offset, super_size, super_cluster, size, object->paging_offset); /* apparently there is a case where the vm requests a */ /* page to be written out who's offset is beyond the */ /* object size */ if((offset + size) > (base_offset + super_size)) super_size = (offset + size) - base_offset; offset = base_offset; size = super_size; } vm_fault_list_request(object, offset, size, upl, user_page_list_ptr, page_list_count, cntrl_flags); } kern_return_t uc_upl_map( vm_map_t map, upl_t upl, vm_offset_t *dst_addr) { vm_size_t size; vm_object_offset_t offset; vm_offset_t addr; vm_page_t m; kern_return_t kr; /* check to see if already mapped */ if(UPL_PAGE_LIST_MAPPED & upl->flags) return KERN_FAILURE; offset = 0; /* Always map the entire object */ size = upl->size; vm_object_lock(upl->map_object); upl->map_object->ref_count++; vm_object_res_reference(upl->map_object); vm_object_unlock(upl->map_object); *dst_addr = 0; /* NEED A UPL_MAP ALIAS */ kr = vm_map_enter(map, dst_addr, size, (vm_offset_t) 0, TRUE, upl->map_object, offset, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); if (kr != KERN_SUCCESS) return(kr); for(addr=*dst_addr; size > 0; size-=PAGE_SIZE,addr+=PAGE_SIZE) { m = vm_page_lookup(upl->map_object, offset); if(m) { PMAP_ENTER(map->pmap, addr, m, VM_PROT_ALL, TRUE); } offset+=PAGE_SIZE_64; } upl->flags |= UPL_PAGE_LIST_MAPPED; upl->kaddr = *dst_addr; return KERN_SUCCESS; } kern_return_t uc_upl_un_map( vm_map_t map, upl_t upl) { vm_size_t size; if(upl->flags & UPL_PAGE_LIST_MAPPED) { size = upl->size; vm_deallocate(map, upl->kaddr, size); upl->flags &= ~UPL_PAGE_LIST_MAPPED; upl->kaddr = (vm_offset_t) 0; return KERN_SUCCESS; } else { return KERN_FAILURE; } } kern_return_t uc_upl_commit_range( upl_t upl, vm_offset_t offset, vm_size_t size, int flags, upl_page_info_t *page_list) { vm_size_t xfer_size = size; vm_object_t shadow_object = upl->map_object->shadow; vm_object_t object = upl->map_object; vm_object_offset_t target_offset; vm_object_offset_t page_offset; int entry; if(upl->flags & UPL_DEVICE_MEMORY) { xfer_size = 0; } else if ((offset + size) > upl->size) { return KERN_FAILURE; } vm_object_lock(shadow_object); entry = offset/PAGE_SIZE; target_offset = (vm_object_offset_t)offset; while(xfer_size) { vm_page_t t,m; upl_page_info_t *p; if((t = vm_page_lookup(object, target_offset)) != NULL) { t->pageout = FALSE; page_offset = t->offset; VM_PAGE_FREE(t); t = VM_PAGE_NULL; m = vm_page_lookup(shadow_object, page_offset + object->shadow_offset); if(m != VM_PAGE_NULL) { vm_object_paging_end(shadow_object); vm_page_lock_queues(); if ((upl->flags & UPL_CLEAR_DIRTY) || (flags & UPL_COMMIT_CLEAR_DIRTY)) { pmap_clear_modify(m->phys_addr); m->dirty = FALSE; } if(page_list) { p = &(page_list[entry]); if(p->phys_addr && p->pageout && !m->pageout) { m->busy = TRUE; m->pageout = TRUE; vm_page_wire(m); } else if (page_list[entry].phys_addr && !p->pageout && m->pageout) { m->pageout = FALSE; m->absent = FALSE; m->overwriting = FALSE; vm_page_unwire(m); PAGE_WAKEUP_DONE(m); } page_list[entry].phys_addr = 0; } if(m->laundry) { vm_page_laundry_count--; m->laundry = FALSE; if (vm_page_laundry_count < vm_page_laundry_min) { vm_page_laundry_min = 0; thread_wakeup((event_t) &vm_page_laundry_count); } } if(m->pageout) { m->cleaning = FALSE; m->pageout = FALSE; #if MACH_CLUSTER_STATS if (m->wanted) vm_pageout_target_collisions++; #endif pmap_page_protect(m->phys_addr, VM_PROT_NONE); m->dirty = pmap_is_modified(m->phys_addr); if(m->dirty) { CLUSTER_STAT( vm_pageout_target_page_dirtied++;) vm_page_unwire(m);/* reactivates */ VM_STAT(reactivations++); PAGE_WAKEUP_DONE(m); } else if (m->prep_pin_count != 0) { vm_page_pin_lock(); if (m->pin_count != 0) { /* page is pinned; reactivate */ CLUSTER_STAT( vm_pageout_target_page_pinned++;) vm_page_unwire(m);/* reactivates */ VM_STAT(reactivations++); PAGE_WAKEUP_DONE(m); } else { /* * page is prepped but not pinned; * send it into limbo. Note that * vm_page_free (which will be * called after releasing the pin * lock) knows how to handle a page * with limbo set. */ m->limbo = TRUE; CLUSTER_STAT( vm_pageout_target_page_limbo++;) } vm_page_pin_unlock(); if (m->limbo) vm_page_free(m); } else { CLUSTER_STAT( vm_pageout_target_page_freed++;) vm_page_free(m);/* clears busy, etc. */ } vm_page_unlock_queues(); target_offset += PAGE_SIZE_64; xfer_size -= PAGE_SIZE; entry++; continue; } if (flags & UPL_COMMIT_INACTIVATE) { vm_page_deactivate(m); m->reference = FALSE; pmap_clear_reference(m->phys_addr); } else if (!m->active && !m->inactive) { if (m->reference || m->prep_pin_count != 0) vm_page_activate(m); else vm_page_deactivate(m); } #if MACH_CLUSTER_STATS m->dirty = pmap_is_modified(m->phys_addr); if (m->dirty) vm_pageout_cluster_dirtied++; else vm_pageout_cluster_cleaned++; if (m->wanted) vm_pageout_cluster_collisions++; #else m->dirty = 0; #endif if((m->busy) && (m->cleaning)) { /* the request_page_list case */ if(m->absent) { m->absent = FALSE; if(shadow_object->absent_count == 1) vm_object_absent_release(shadow_object); else shadow_object->absent_count--; } m->overwriting = FALSE; m->busy = FALSE; m->dirty = FALSE; } else if (m->overwriting) { /* alternate request page list, write to /* page_list case. Occurs when the original /* page was wired at the time of the list /* request */ assert(m->wire_count != 0); vm_page_unwire(m);/* reactivates */ m->overwriting = FALSE; } m->cleaning = FALSE; /* It is a part of the semantic of COPYOUT_FROM */ /* UPLs that a commit implies cache sync */ /* between the vm page and the backing store */ /* this can be used to strip the precious bit */ /* as well as clean */ if (upl->flags & UPL_PAGE_SYNC_DONE) m->precious = FALSE; if (flags & UPL_COMMIT_SET_DIRTY) { m->dirty = TRUE; } /* * Wakeup any thread waiting for the page to be un-cleaning. */ PAGE_WAKEUP(m); vm_page_unlock_queues(); } } target_offset += PAGE_SIZE_64; xfer_size -= PAGE_SIZE; entry++; } vm_object_unlock(shadow_object); if(flags & UPL_COMMIT_FREE_ON_EMPTY) { if((upl->flags & UPL_DEVICE_MEMORY) || (queue_empty(&upl->map_object->memq))) { upl_dealloc(upl); } } return KERN_SUCCESS; } uc_upl_abort_range( upl_t upl, vm_offset_t offset, vm_size_t size, int error) { vm_size_t xfer_size = size; vm_object_t shadow_object = upl->map_object->shadow; vm_object_t object = upl->map_object; vm_object_offset_t target_offset; vm_object_offset_t page_offset; int entry; if(upl->flags & UPL_DEVICE_MEMORY) { xfer_size = 0; } else if ((offset + size) > upl->size) { return KERN_FAILURE; } vm_object_lock(shadow_object); entry = offset/PAGE_SIZE; target_offset = (vm_object_offset_t)offset; while(xfer_size) { vm_page_t t,m; upl_page_info_t *p; if((t = vm_page_lookup(object, target_offset)) != NULL) { t->pageout = FALSE; page_offset = t->offset; VM_PAGE_FREE(t); t = VM_PAGE_NULL; m = vm_page_lookup(shadow_object, page_offset + object->shadow_offset); if(m != VM_PAGE_NULL) { vm_object_paging_end(m->object); vm_page_lock_queues(); if(m->absent) { /* COPYOUT = FALSE case */ /* check for error conditions which must */ /* be passed back to the pages customer */ if(error & UPL_ABORT_RESTART) { m->restart = TRUE; m->absent = FALSE; vm_object_absent_release(m->object); m->page_error = KERN_MEMORY_ERROR; m->error = TRUE; } else if(error & UPL_ABORT_UNAVAILABLE) { m->restart = FALSE; m->unusual = TRUE; m->clustered = FALSE; } else if(error & UPL_ABORT_ERROR) { m->restart = FALSE; m->absent = FALSE; vm_object_absent_release(m->object); m->page_error = KERN_MEMORY_ERROR; m->error = TRUE; } else if(error & UPL_ABORT_DUMP_PAGES) { m->clustered = TRUE; } else { m->clustered = TRUE; } m->cleaning = FALSE; m->overwriting = FALSE; PAGE_WAKEUP_DONE(m); if(m->clustered) { vm_page_free(m); } else { vm_page_activate(m); } vm_page_unlock_queues(); target_offset += PAGE_SIZE_64; xfer_size -= PAGE_SIZE; entry++; continue; } /* * Handle the trusted pager throttle. */ if (m->laundry) { vm_page_laundry_count--; m->laundry = FALSE; if (vm_page_laundry_count < vm_page_laundry_min) { vm_page_laundry_min = 0; thread_wakeup((event_t) &vm_page_laundry_count); } } if(m->pageout) { assert(m->busy); assert(m->wire_count == 1); m->pageout = FALSE; vm_page_unwire(m); } m->cleaning = FALSE; m->busy = FALSE; m->overwriting = FALSE; #if MACH_PAGEMAP vm_external_state_clr( m->object->existence_map, m->offset); #endif /* MACH_PAGEMAP */ if(error & UPL_ABORT_DUMP_PAGES) { vm_page_free(m); pmap_page_protect(m->phys_addr, VM_PROT_NONE); } else { PAGE_WAKEUP(m); } vm_page_unlock_queues(); } } target_offset += PAGE_SIZE_64; xfer_size -= PAGE_SIZE; entry++; } vm_object_unlock(shadow_object); if(error & UPL_ABORT_FREE_ON_EMPTY) { if((upl->flags & UPL_DEVICE_MEMORY) || (queue_empty(&upl->map_object->memq))) { upl_dealloc(upl); } } return KERN_SUCCESS; } kern_return_t uc_upl_abort( upl_t upl, int error) { vm_object_t object = NULL; vm_object_t shadow_object = NULL; vm_object_offset_t offset; vm_object_offset_t shadow_offset; vm_object_offset_t target_offset; int i; vm_page_t t,m; if(upl->flags & UPL_DEVICE_MEMORY) { upl_dealloc(upl); return KERN_SUCCESS; } object = upl->map_object; if(object == NULL) { panic("upl_abort: upl object is not backed by an object"); return KERN_INVALID_ARGUMENT; } shadow_object = upl->map_object->shadow; shadow_offset = upl->map_object->shadow_offset; offset = 0; vm_object_lock(shadow_object); for(i = 0; i<(upl->size); i+=PAGE_SIZE, offset += PAGE_SIZE_64) { if((t = vm_page_lookup(object,offset)) != NULL) { target_offset = t->offset + shadow_offset; if((m = vm_page_lookup(shadow_object, target_offset)) != NULL) { vm_object_paging_end(m->object); vm_page_lock_queues(); if(m->absent) { /* COPYOUT = FALSE case */ /* check for error conditions which must */ /* be passed back to the pages customer */ if(error & UPL_ABORT_RESTART) { m->restart = TRUE; m->absent = FALSE; vm_object_absent_release(m->object); m->page_error = KERN_MEMORY_ERROR; m->error = TRUE; } else if(error & UPL_ABORT_UNAVAILABLE) { m->restart = FALSE; m->unusual = TRUE; m->clustered = FALSE; } else if(error & UPL_ABORT_ERROR) { m->restart = FALSE; m->absent = FALSE; vm_object_absent_release(m->object); m->page_error = KERN_MEMORY_ERROR; m->error = TRUE; } else if(error & UPL_ABORT_DUMP_PAGES) { m->clustered = TRUE; } else { m->clustered = TRUE; } m->cleaning = FALSE; m->overwriting = FALSE; PAGE_WAKEUP_DONE(m); if(m->clustered) { vm_page_free(m); } else { vm_page_activate(m); } vm_page_unlock_queues(); continue; } /* * Handle the trusted pager throttle. */ if (m->laundry) { vm_page_laundry_count--; m->laundry = FALSE; if (vm_page_laundry_count < vm_page_laundry_min) { vm_page_laundry_min = 0; thread_wakeup((event_t) &vm_page_laundry_count); } } if(m->pageout) { assert(m->busy); assert(m->wire_count == 1); m->pageout = FALSE; vm_page_unwire(m); } m->cleaning = FALSE; m->busy = FALSE; m->overwriting = FALSE; #if MACH_PAGEMAP vm_external_state_clr( m->object->existence_map, m->offset); #endif /* MACH_PAGEMAP */ if(error & UPL_ABORT_DUMP_PAGES) { vm_page_free(m); pmap_page_protect(m->phys_addr, VM_PROT_NONE); } else { PAGE_WAKEUP(m); } vm_page_unlock_queues(); } } } vm_object_unlock(shadow_object); /* Remove all the pages from the map object so */ /* vm_pageout_object_terminate will work properly. */ while (!queue_empty(&upl->map_object->memq)) { vm_page_t p; p = (vm_page_t) queue_first(&upl->map_object->memq); assert(p->private); assert(p->pageout); p->pageout = FALSE; assert(!p->cleaning); VM_PAGE_FREE(p); } upl_dealloc(upl); return KERN_SUCCESS; } /* an option on commit should be wire */ kern_return_t uc_upl_commit( upl_t upl, upl_page_info_t *page_list) { if (upl->flags & UPL_DEVICE_MEMORY) page_list = NULL; if ((upl->flags & UPL_CLEAR_DIRTY) || (upl->flags & UPL_PAGE_SYNC_DONE)) { vm_object_t shadow_object = upl->map_object->shadow; vm_object_t object = upl->map_object; vm_object_offset_t target_offset; vm_size_t xfer_end; vm_page_t t,m; vm_object_lock(shadow_object); target_offset = object->shadow_offset; xfer_end = upl->size + object->shadow_offset; while(target_offset < xfer_end) { if ((t = vm_page_lookup(object, target_offset - object->shadow_offset)) != NULL) { m = vm_page_lookup( shadow_object, target_offset); if(m != VM_PAGE_NULL) { if (upl->flags & UPL_CLEAR_DIRTY) { pmap_clear_modify(m->phys_addr); m->dirty = FALSE; } /* It is a part of the semantic of */ /* COPYOUT_FROM UPLs that a commit */ /* implies cache sync between the */ /* vm page and the backing store */ /* this can be used to strip the */ /* precious bit as well as clean */ if (upl->flags & UPL_PAGE_SYNC_DONE) m->precious = FALSE; } } target_offset += PAGE_SIZE_64; } vm_object_unlock(shadow_object); } if (page_list) { vm_object_t shadow_object = upl->map_object->shadow; vm_object_t object = upl->map_object; vm_object_offset_t target_offset; vm_size_t xfer_end; int entry; vm_page_t t, m; upl_page_info_t *p; vm_object_lock(shadow_object); entry = 0; target_offset = object->shadow_offset; xfer_end = upl->size + object->shadow_offset; while(target_offset < xfer_end) { if ((t = vm_page_lookup(object, target_offset - object->shadow_offset)) == NULL) { target_offset += PAGE_SIZE_64; entry++; continue; } m = vm_page_lookup(shadow_object, target_offset); if(m != VM_PAGE_NULL) { p = &(page_list[entry]); if(page_list[entry].phys_addr && p->pageout && !m->pageout) { vm_page_lock_queues(); m->busy = TRUE; m->pageout = TRUE; vm_page_wire(m); vm_page_unlock_queues(); } else if (page_list[entry].phys_addr && !p->pageout && m->pageout) { vm_page_lock_queues(); m->pageout = FALSE; m->absent = FALSE; m->overwriting = FALSE; vm_page_unwire(m); PAGE_WAKEUP_DONE(m); vm_page_unlock_queues(); } page_list[entry].phys_addr = 0; } target_offset += PAGE_SIZE_64; entry++; } vm_object_unlock(shadow_object); } upl_dealloc(upl); return KERN_SUCCESS; } upl_t upl_create( boolean_t internal) { upl_t upl; if(internal) { upl = (upl_t)kalloc(sizeof(struct upl) + (sizeof(struct upl_page_info)*MAX_UPL_TRANSFER)); } else { upl = (upl_t)kalloc(sizeof(struct upl)); } upl->flags = 0; upl->src_object = NULL; upl->kaddr = (vm_offset_t)0; upl->size = 0; upl->map_object = NULL; upl->ref_count = 1; upl_lock_init(upl); #ifdef UBC_DEBUG upl->ubc_alias1 = 0; upl->ubc_alias2 = 0; #endif /* UBC_DEBUG */ return(upl); } void upl_destroy( upl_t upl) { #ifdef UBC_DEBUG { upl_t upl_ele; vm_object_lock(upl->map_object->shadow); queue_iterate(&upl->map_object->shadow->uplq, upl_ele, upl_t, uplq) { if(upl_ele == upl) { queue_remove(&upl->map_object->shadow->uplq, upl_ele, upl_t, uplq); break; } } vm_object_unlock(upl->map_object->shadow); } #endif /* UBC_DEBUG */ if(!(upl->flags & UPL_DEVICE_MEMORY)) vm_object_deallocate(upl->map_object); if(upl->flags & UPL_INTERNAL) { kfree((vm_offset_t)upl, sizeof(struct upl) + (sizeof(struct upl_page_info) * MAX_UPL_TRANSFER)); } else { kfree((vm_offset_t)upl, sizeof(struct upl)); } } vm_size_t upl_get_internal_pagelist_offset() { return sizeof(struct upl); } void upl_set_dirty( upl_t upl) { upl->flags |= UPL_CLEAR_DIRTY; } void upl_clear_dirty( upl_t upl) { upl->flags &= ~UPL_CLEAR_DIRTY; } #ifdef MACH_BSD boolean_t upl_page_present(upl_page_info_t *upl, int index); boolean_t upl_dirty_page(upl_page_info_t *upl, int index); boolean_t upl_valid_page(upl_page_info_t *upl, int index); vm_offset_t upl_phys_page(upl_page_info_t *upl, int index); boolean_t upl_page_present(upl_page_info_t *upl, int index) { return(UPL_PAGE_PRESENT(upl, index)); } boolean_t upl_dirty_page(upl_page_info_t *upl, int index) { return(UPL_DIRTY_PAGE(upl, index)); } boolean_t upl_valid_page(upl_page_info_t *upl, int index) { return(UPL_VALID_PAGE(upl, index)); } vm_offset_t upl_phys_page(upl_page_info_t *upl, int index) { return((vm_offset_t)UPL_PHYS_PAGE(upl, index)); } void vm_countdirtypages(void) { vm_page_t m; int dpages; int pgopages; int precpages; dpages=0; pgopages=0; precpages=0; vm_page_lock_queues(); m = (vm_page_t) queue_first(&vm_page_queue_inactive); do { if (m ==(vm_page_t )0) break; if(m->dirty) dpages++; if(m->pageout) pgopages++; if(m->precious) precpages++; m = (vm_page_t) queue_next(&m->pageq); if (m ==(vm_page_t )0) break; } while (!queue_end(&vm_page_queue_inactive,(queue_entry_t) m)); vm_page_unlock_queues(); printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages); dpages=0; pgopages=0; precpages=0; vm_page_lock_queues(); m = (vm_page_t) queue_first(&vm_page_queue_active); do { if(m == (vm_page_t )0) break; if(m->dirty) dpages++; if(m->pageout) pgopages++; if(m->precious) precpages++; m = (vm_page_t) queue_next(&m->pageq); if(m == (vm_page_t )0) break; } while (!queue_end(&vm_page_queue_active,(queue_entry_t) m)); vm_page_unlock_queues(); printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages); } #endif /* MACH_BSD */ #ifdef UBC_DEBUG kern_return_t upl_ubc_alias_set(upl_t upl, unsigned int alias1, unsigned int alias2) { upl->ubc_alias1 = alias1; upl->ubc_alias2 = alias2; return KERN_SUCCESS; } int upl_ubc_alias_get(upl_t upl, unsigned int * al, unsigned int * al2) { if(al) *al = upl->ubc_alias1; if(al2) *al2 = upl->ubc_alias2; return KERN_SUCCESS; } #endif /* UBC_DEBUG */ #if MACH_KDB #include #include #include #define printf kdbprintf extern int db_indent; void db_pageout(void); void db_vm(void) { extern int vm_page_gobble_count; extern int vm_page_limbo_count, vm_page_limbo_real_count; extern int vm_page_pin_count; iprintf("VM Statistics:\n"); db_indent += 2; iprintf("pages:\n"); db_indent += 2; iprintf("activ %5d inact %5d free %5d", vm_page_active_count, vm_page_inactive_count, vm_page_free_count); printf(" wire %5d gobbl %5d\n", vm_page_wire_count, vm_page_gobble_count); iprintf("laund %5d limbo %5d lim_r %5d pin %5d\n", vm_page_laundry_count, vm_page_limbo_count, vm_page_limbo_real_count, vm_page_pin_count); db_indent -= 2; iprintf("target:\n"); db_indent += 2; iprintf("min %5d inact %5d free %5d", vm_page_free_min, vm_page_inactive_target, vm_page_free_target); printf(" resrv %5d\n", vm_page_free_reserved); db_indent -= 2; iprintf("burst:\n"); db_indent += 2; iprintf("max %5d min %5d wait %5d empty %5d\n", vm_pageout_burst_max, vm_pageout_burst_min, vm_pageout_burst_wait, vm_pageout_empty_wait); db_indent -= 2; iprintf("pause:\n"); db_indent += 2; iprintf("count %5d max %5d\n", vm_pageout_pause_count, vm_pageout_pause_max); #if MACH_COUNTERS iprintf("scan_continue called %8d\n", c_vm_pageout_scan_continue); #endif /* MACH_COUNTERS */ db_indent -= 2; db_pageout(); db_indent -= 2; } void db_pageout(void) { extern int c_limbo_page_free; extern int c_limbo_convert; #if MACH_COUNTERS extern int c_laundry_pages_freed; #endif /* MACH_COUNTERS */ iprintf("Pageout Statistics:\n"); db_indent += 2; iprintf("active %5d inactv %5d\n", vm_pageout_active, vm_pageout_inactive); iprintf("nolock %5d avoid %5d busy %5d absent %5d\n", vm_pageout_inactive_nolock, vm_pageout_inactive_avoid, vm_pageout_inactive_busy, vm_pageout_inactive_absent); iprintf("used %5d clean %5d dirty %5d\n", vm_pageout_inactive_used, vm_pageout_inactive_clean, vm_pageout_inactive_dirty); iprintf("pinned %5d limbo %5d setup_limbo %5d setup_unprep %5d\n", vm_pageout_inactive_pinned, vm_pageout_inactive_limbo, vm_pageout_setup_limbo, vm_pageout_setup_unprepped); iprintf("limbo_page_free %5d limbo_convert %5d\n", c_limbo_page_free, c_limbo_convert); #if MACH_COUNTERS iprintf("laundry_pages_freed %d\n", c_laundry_pages_freed); #endif /* MACH_COUNTERS */ #if MACH_CLUSTER_STATS iprintf("Cluster Statistics:\n"); db_indent += 2; iprintf("dirtied %5d cleaned %5d collisions %5d\n", vm_pageout_cluster_dirtied, vm_pageout_cluster_cleaned, vm_pageout_cluster_collisions); iprintf("clusters %5d conversions %5d\n", vm_pageout_cluster_clusters, vm_pageout_cluster_conversions); db_indent -= 2; iprintf("Target Statistics:\n"); db_indent += 2; iprintf("collisions %5d page_dirtied %5d page_freed %5d\n", vm_pageout_target_collisions, vm_pageout_target_page_dirtied, vm_pageout_target_page_freed); iprintf("page_pinned %5d page_limbo %5d\n", vm_pageout_target_page_pinned, vm_pageout_target_page_limbo); db_indent -= 2; #endif /* MACH_CLUSTER_STATS */ db_indent -= 2; } #if MACH_CLUSTER_STATS unsigned long vm_pageout_cluster_dirtied = 0; unsigned long vm_pageout_cluster_cleaned = 0; unsigned long vm_pageout_cluster_collisions = 0; unsigned long vm_pageout_cluster_clusters = 0; unsigned long vm_pageout_cluster_conversions = 0; unsigned long vm_pageout_target_collisions = 0; unsigned long vm_pageout_target_page_dirtied = 0; unsigned long vm_pageout_target_page_freed = 0; unsigned long vm_pageout_target_page_pinned = 0; unsigned long vm_pageout_target_page_limbo = 0; #define CLUSTER_STAT(clause) clause #else /* MACH_CLUSTER_STATS */ #define CLUSTER_STAT(clause) #endif /* MACH_CLUSTER_STATS */ #endif /* MACH_KDB */