/* * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * * The contents of this file constitute Original Code as defined in and * are subject to the Apple Public Source License Version 1.1 (the * "License"). You may not use this file except in compliance with the * License. Please obtain a copy of the License at * http://www.apple.com/publicsource and read it before using this file. * * This Original Code and all software distributed under the License are * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the * License for the specific language governing rights and limitations * under the License. * * @APPLE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ */ /* * File: kern/task_swap.c * * Task residency management primitives implementation. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* We use something from in here */ /* * Note: if TASK_SWAPPER is disabled, then this file defines only * a stub version of task_swappable(), so that the service can always * be defined, even if swapping has been configured out of the kernel. */ #if TASK_SWAPPER /* temporary debug flags */ #define TASK_SW_DEBUG 1 #define TASK_SW_STATS 1 int task_swap_debug = 0; int task_swap_stats = 0; int task_swap_enable = 1; int task_swap_on = 1; queue_head_t swapped_tasks; /* completely swapped out tasks */ queue_head_t swapout_thread_q; /* threads to be swapped out */ mutex_t task_swapper_lock; /* protects above queue */ #define task_swapper_lock() mutex_lock(&task_swapper_lock) #define task_swapper_unlock() mutex_unlock(&task_swapper_lock) #define task_swapper_wakeup() thread_wakeup((event_t)&swapout_thread_q) #define task_swapper_sleep() thread_sleep_mutex((event_t)&swapout_thread_q, \ &task_swapper_lock, \ THREAD_UNINT) queue_head_t eligible_tasks; /* tasks eligible for swapout */ mutex_t task_swapout_list_lock; /* protects above queue */ #define task_swapout_lock() mutex_lock(&task_swapout_list_lock) #define task_swapout_unlock() mutex_unlock(&task_swapout_list_lock) /* * The next section of constants and globals are tunable parameters * used in making swapping decisions. They may be changed dynamically * without adversely affecting the robustness of the system; however, * the policy will change, one way or the other. */ #define SHORT_AVG_INTERVAL 5 /* in seconds */ #define LONG_AVG_INTERVAL 30 /* in seconds */ #define AVE_SCALE 1024 unsigned int short_avg_interval = SHORT_AVG_INTERVAL; unsigned int long_avg_interval = LONG_AVG_INTERVAL; #ifndef MIN_SWAP_PAGEOUT_RATE #define MIN_SWAP_PAGEOUT_RATE 10 #endif /* * The following are all stored in fixed-point representation (the actual * value times AVE_SCALE), to allow more accurate computing of decaying * averages. So all variables that end with "avg" must be divided by * AVE_SCALE to convert them or compare them to ints. */ unsigned int vm_grab_rate_avg; unsigned int vm_pageout_rate_avg = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE; unsigned int vm_pageout_rate_longavg = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE; unsigned int vm_pageout_rate_peakavg = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE; unsigned int vm_page_free_avg; /* average free pages over short_avg_interval */ unsigned int vm_page_free_longavg; /* avg free pages over long_avg_interval */ /* * Trigger task swapping when paging activity reaches * SWAP_HIGH_WATER_MARK per cent of the maximum paging activity ever observed. * Turn off task swapping when paging activity goes back down to below * SWAP_PAGEOUT_LOW_WATER_MARK per cent of the maximum. * These numbers have been found empirically and might need some tuning... */ #ifndef SWAP_PAGEOUT_HIGH_WATER_MARK #define SWAP_PAGEOUT_HIGH_WATER_MARK 30 #endif #ifndef SWAP_PAGEOUT_LOW_WATER_MARK #define SWAP_PAGEOUT_LOW_WATER_MARK 10 #endif #ifndef MAX_GRAB_RATE #define MAX_GRAB_RATE ((unsigned int) -1) /* XXX no maximum */ #endif /* * swap_{start,stop}_pageout_rate start at the minimum value, then increase * to adjust to the hardware's performance, following the paging rate peaks. */ unsigned int swap_pageout_high_water_mark = SWAP_PAGEOUT_HIGH_WATER_MARK; unsigned int swap_pageout_low_water_mark = SWAP_PAGEOUT_LOW_WATER_MARK; unsigned int swap_start_pageout_rate = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE * SWAP_PAGEOUT_HIGH_WATER_MARK / 100; unsigned int swap_stop_pageout_rate = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE * SWAP_PAGEOUT_LOW_WATER_MARK / 100; #if TASK_SW_DEBUG unsigned int fixed_swap_start_pageout_rate = 0; /* only for testing purpose */ unsigned int fixed_swap_stop_pageout_rate = 0; /* only for testing purpose */ #endif /* TASK_SW_DEBUG */ unsigned int max_grab_rate = MAX_GRAB_RATE; #ifndef MIN_SWAP_TIME #define MIN_SWAP_TIME 1 #endif int min_swap_time = MIN_SWAP_TIME; /* in seconds */ #ifndef MIN_RES_TIME #define MIN_RES_TIME 6 #endif int min_res_time = MIN_RES_TIME; /* in seconds */ #ifndef MIN_ACTIVE_TASKS #define MIN_ACTIVE_TASKS 4 #endif int min_active_tasks = MIN_ACTIVE_TASKS; #ifndef TASK_SWAP_CYCLE_TIME #define TASK_SWAP_CYCLE_TIME 2 #endif int task_swap_cycle_time = TASK_SWAP_CYCLE_TIME; /* in seconds */ int last_task_swap_cycle = 0; /* temporary statistics */ int task_swapouts = 0; int task_swapins = 0; int task_swaprss_out = 0; /* total rss at swapout time */ int task_swaprss_in = 0; /* total rss at swapin time */ int task_swap_total_time = 0; /* total time spent swapped out */ int tasks_swapped_out = 0; /* number of tasks swapped out now */ #ifdef TASK_SW_STATS #define TASK_STATS_INCR(cnt) (cnt)++ #else #define TASK_STATS_INCR(cnt) #endif /* TASK_SW_STATS */ #if TASK_SW_DEBUG boolean_t on_swapped_list(task_t task); /* forward */ /* * Debug function to determine if a task is already on the * swapped out tasks list. It also checks for tasks on the list * that are in an illegal state (i.e. swapped in). */ boolean_t on_swapped_list(task_t task) { task_t ltask; /* task_swapper_lock is locked. */ if (queue_empty(&swapped_tasks)) { return(FALSE); } ltask = (task_t)queue_first(&swapped_tasks); while (!queue_end(&swapped_tasks, (queue_entry_t)ltask)) { /* check for illegal state */ if (ltask->swap_state == TASK_SW_IN) { printf("on_swapped_list and in: 0x%X\n",ltask); Debugger(""); } if (ltask == task) return(TRUE); ltask = (task_t)queue_next(<ask->swapped_tasks); } return(FALSE); } #endif /* TASK_SW_DEBUG */ /* * task_swapper_init: [exported] */ void task_swapper_init() { queue_init(&swapped_tasks); queue_init(&eligible_tasks); queue_init(&swapout_thread_q); mutex_init(&task_swapper_lock, ETAP_THREAD_TASK_SWAP); mutex_init(&task_swapout_list_lock, ETAP_THREAD_TASK_SWAPOUT); vm_page_free_avg = vm_page_free_count * AVE_SCALE; vm_page_free_longavg = vm_page_free_count * AVE_SCALE; } #endif /* TASK_SWAPPER */ /* * task_swappable: [exported] * * Make a task swappable or non-swappable. If made non-swappable, * it will be swapped in. * * Locking: task_swapout_lock is taken before task lock. */ kern_return_t task_swappable( host_priv_t host_priv, task_t task, boolean_t make_swappable) { if (host_priv == HOST_PRIV_NULL) return(KERN_INVALID_ARGUMENT); if (task == TASK_NULL) return(KERN_INVALID_ARGUMENT); #if !TASK_SWAPPER /* * If we don't support swapping, this call is purely advisory. */ return(KERN_SUCCESS); #else /* TASK_SWAPPER */ task_lock(task); if (make_swappable) { /* make task swappable */ if (task->swap_state == TASK_SW_UNSWAPPABLE) { task->swap_state = TASK_SW_IN; task_unlock(task); task_swapout_eligible(task); } } else { switch (task->swap_state) { case TASK_SW_IN: task->swap_state = TASK_SW_UNSWAPPABLE; task_unlock(task); task_swapout_ineligible(task); break; case TASK_SW_UNSWAPPABLE: task_unlock(task); break; default: /* * swap_state could be TASK_SW_OUT, TASK_SW_GOING_OUT, * or TASK_SW_COMING_IN. task_swapin handles all * three, and its default case will catch any bad * states. */ task_unlock(task); task_swapin(task, TRUE); break; } } return(KERN_SUCCESS); #endif /* TASK_SWAPPER */ } #if TASK_SWAPPER /* * task_swapout: * A reference to the task must be held. * * Start swapping out a task by sending an AST_SWAPOUT to each thread. * When the threads reach a clean point, they queue themselves up on the * swapout_thread_q to be swapped out by the task_swap_swapout_thread. * The task can be swapped in at any point in this process. * * A task will not be fully swapped out (i.e. its map residence count * at zero) until all currently-swapped threads run and reach * a clean point, at which time they will be swapped again, * decrementing the swap_ast_waiting count on the task. * * Locking: no locks held upon entry and exit. * Task_lock is held throughout this function. */ kern_return_t task_swapout(task_t task) { thread_act_t thr_act; thread_t thread; queue_head_t *list; int s; task_swapout_lock(); task_lock(task); /* * NOTE: look into turning these into assertions if they * are invariants. */ if ((task->swap_state != TASK_SW_IN) || (!task->active)) { task_unlock(task); task_swapout_unlock(); return(KERN_FAILURE); } if (task->swap_flags & TASK_SW_ELIGIBLE) { queue_remove(&eligible_tasks, task, task_t, swapped_tasks); task->swap_flags &= ~TASK_SW_ELIGIBLE; } task_swapout_unlock(); /* set state to avoid races with task_swappable(FALSE) */ task->swap_state = TASK_SW_GOING_OUT; task->swap_rss = pmap_resident_count(task->map->pmap); task_swaprss_out += task->swap_rss; task->swap_ast_waiting = task->thr_act_count; /* * halt all threads in this task: * We don't need the thread list lock for traversal. */ list = &task->thr_acts; thr_act = (thread_act_t) queue_first(list); while (!queue_end(list, (queue_entry_t) thr_act)) { boolean_t swappable; thread_act_t ract; thread = act_lock_thread(thr_act); s = splsched(); if (!thread) swappable = (thr_act->swap_state != TH_SW_UNSWAPPABLE); else { thread_lock(thread); swappable = TRUE; for (ract = thread->top_act; ract; ract = ract->lower) if (ract->swap_state == TH_SW_UNSWAPPABLE) { swappable = FALSE; break; } } if (swappable) thread_ast_set(thr_act, AST_SWAPOUT); if (thread) thread_unlock(thread); splx(s); assert((thr_act->ast & AST_TERMINATE) == 0); act_unlock_thread(thr_act); thr_act = (thread_act_t) queue_next(&thr_act->thr_acts); } task->swap_stamp = sched_tick; task->swap_nswap++; assert((task->swap_flags&TASK_SW_WANT_IN) == 0); /* put task on the queue of swapped out tasks */ task_swapper_lock(); #if TASK_SW_DEBUG if (task_swap_debug && on_swapped_list(task)) { printf("task 0x%X already on list\n", task); Debugger(""); } #endif /* TASK_SW_DEBUG */ queue_enter(&swapped_tasks, task, task_t, swapped_tasks); tasks_swapped_out++; task_swapouts++; task_swapper_unlock(); task_unlock(task); return(KERN_SUCCESS); } #ifdef TASK_SW_STATS int task_sw_race_in = 0; int task_sw_race_coming_in = 0; int task_sw_race_going_out = 0; int task_sw_before_ast = 0; int task_sw_before_swap = 0; int task_sw_after_swap = 0; int task_sw_race_in_won = 0; int task_sw_unswappable = 0; int task_sw_act_inactive = 0; #endif /* TASK_SW_STATS */ /* * thread_swapout_enqueue is called by thread_halt_self when it * processes AST_SWAPOUT to enqueue threads to be swapped out. * It must be called at normal interrupt priority for the * sake of the task_swapper_lock. * * There can be races with task swapin here. * First lock task and decrement swap_ast_waiting count, and if * it's 0, we can decrement the residence count on the task's map * and set the task's swap state to TASK_SW_OUT. */ void thread_swapout_enqueue(thread_act_t thr_act) { task_t task = thr_act->task; task_lock(task); /* * If the swap_state is not TASK_SW_GOING_OUT, then * task_swapin has beaten us to this operation, and * we have nothing to do. */ if (task->swap_state != TASK_SW_GOING_OUT) { task_unlock(task); return; } if (--task->swap_ast_waiting == 0) { vm_map_t map = task->map; task->swap_state = TASK_SW_OUT; task_unlock(task); mutex_lock(&map->s_lock); vm_map_res_deallocate(map); mutex_unlock(&map->s_lock); } else task_unlock(task); task_swapper_lock(); act_lock(thr_act); if (! (thr_act->swap_state & TH_SW_TASK_SWAPPING)) { /* * We lost a race with task_swapin(): don't enqueue. */ } else { queue_enter(&swapout_thread_q, thr_act, thread_act_t, swap_queue); task_swapper_wakeup(); } act_unlock(thr_act); task_swapper_unlock(); } /* * task_swap_swapout_thread: [exported] * * Executes as a separate kernel thread. * Its job is to swap out threads that have been halted by AST_SWAPOUT. */ void task_swap_swapout_thread(void) { thread_act_t thr_act; thread_t thread, nthread; task_t task; int s; thread_swappable(current_act(), FALSE); stack_privilege(current_thread()); spllo(); task_swapper_lock(); while (TRUE) { while (! queue_empty(&swapout_thread_q)) { queue_remove_first(&swapout_thread_q, thr_act, thread_act_t, swap_queue); /* * If we're racing with task_swapin, we need * to make it safe for it to do remque on the * thread, so make its links point to itself. * Allowing this ugliness is cheaper than * making task_swapin search the entire queue. */ act_lock(thr_act); queue_init((queue_t) &thr_act->swap_queue); act_unlock(thr_act); task_swapper_unlock(); /* * Wait for thread's RUN bit to be deasserted. */ thread = act_lock_thread(thr_act); if (thread == THREAD_NULL) act_unlock_thread(thr_act); else { boolean_t r; thread_reference(thread); thread_hold(thr_act); act_unlock_thread(thr_act); r = thread_stop_wait(thread); nthread = act_lock_thread(thr_act); thread_release(thr_act); thread_deallocate(thread); act_unlock_thread(thr_act); if (!r || nthread != thread) { task_swapper_lock(); continue; } } task = thr_act->task; task_lock(task); /* * we can race with swapin, which would set the * state to TASK_SW_IN. */ if ((task->swap_state != TASK_SW_OUT) && (task->swap_state != TASK_SW_GOING_OUT)) { task_unlock(task); task_swapper_lock(); TASK_STATS_INCR(task_sw_race_in_won); if (thread != THREAD_NULL) thread_unstop(thread); continue; } nthread = act_lock_thread(thr_act); if (nthread != thread || thr_act->active == FALSE) { act_unlock_thread(thr_act); task_unlock(task); task_swapper_lock(); TASK_STATS_INCR(task_sw_act_inactive); if (thread != THREAD_NULL) thread_unstop(thread); continue; } s = splsched(); if (thread != THREAD_NULL) thread_lock(thread); /* * Thread cannot have been swapped out yet because * TH_SW_TASK_SWAPPING was set in AST. If task_swapin * beat us here, we either wouldn't have found it on * the queue, or the task->swap_state would have * changed. The synchronization is on the * task's swap_state and the task_lock. * The thread can't be swapped in any other way * because its task has been swapped. */ assert(thr_act->swap_state & TH_SW_TASK_SWAPPING); assert(thread == THREAD_NULL || !(thread->state & (TH_SWAPPED_OUT|TH_RUN))); assert((thr_act->swap_state & TH_SW_STATE) == TH_SW_IN); /* assert(thread->state & TH_HALTED); */ /* this also clears TH_SW_TASK_SWAPPING flag */ thr_act->swap_state = TH_SW_GOING_OUT; if (thread != THREAD_NULL) { if (thread->top_act == thr_act) { thread->state |= TH_SWAPPED_OUT; /* * Once we unlock the task, things can happen * to the thread, so make sure it's consistent * for thread_swapout. */ } thread->ref_count++; thread_unlock(thread); thread_unstop(thread); } splx(s); act_locked_act_reference(thr_act); act_unlock_thread(thr_act); task_unlock(task); thread_swapout(thr_act); /* do the work */ if (thread != THREAD_NULL) thread_deallocate(thread); act_deallocate(thr_act); task_swapper_lock(); } task_swapper_sleep(); } } /* * task_swapin: * * Make a task resident. * Performs all of the work to make a task resident and possibly * non-swappable. If we race with a competing task_swapin call, * we wait for its completion, then return. * * Locking: no locks held upon entry and exit. * * Note that TASK_SW_MAKE_UNSWAPPABLE can only be set when the * state is TASK_SW_COMING_IN. */ kern_return_t task_swapin(task_t task, boolean_t make_unswappable) { register queue_head_t *list; register thread_act_t thr_act, next; thread_t thread; int s; boolean_t swappable = TRUE; task_lock(task); switch (task->swap_state) { case TASK_SW_OUT: { vm_map_t map = task->map; /* * Task has made it all the way out, which means * that vm_map_res_deallocate has been done; set * state to TASK_SW_COMING_IN, then bring map * back in. We could actually be racing with * the thread_swapout_enqueue, which does the * vm_map_res_deallocate, but that race is covered. */ task->swap_state = TASK_SW_COMING_IN; assert(task->swap_ast_waiting == 0); assert(map->res_count >= 0); task_unlock(task); mutex_lock(&map->s_lock); vm_map_res_reference(map); mutex_unlock(&map->s_lock); task_lock(task); assert(task->swap_state == TASK_SW_COMING_IN); } break; case TASK_SW_GOING_OUT: /* * Task isn't all the way out yet. There is * still at least one thread not swapped, and * vm_map_res_deallocate has not been done. */ task->swap_state = TASK_SW_COMING_IN; assert(task->swap_ast_waiting > 0 || (task->swap_ast_waiting == 0 && task->thr_act_count == 0)); assert(task->map->res_count > 0); TASK_STATS_INCR(task_sw_race_going_out); break; case TASK_SW_IN: assert(task->map->res_count > 0); #if TASK_SW_DEBUG task_swapper_lock(); if (task_swap_debug && on_swapped_list(task)) { printf("task 0x%X on list, state is SW_IN\n", task); Debugger(""); } task_swapper_unlock(); #endif /* TASK_SW_DEBUG */ TASK_STATS_INCR(task_sw_race_in); if (make_unswappable) { task->swap_state = TASK_SW_UNSWAPPABLE; task_unlock(task); task_swapout_ineligible(task); } else task_unlock(task); return(KERN_SUCCESS); case TASK_SW_COMING_IN: /* * Raced with another task_swapin and lost; * wait for other one to complete first */ assert(task->map->res_count >= 0); /* * set MAKE_UNSWAPPABLE so that whoever is swapping * the task in will make it unswappable, and return */ if (make_unswappable) task->swap_flags |= TASK_SW_MAKE_UNSWAPPABLE; task->swap_flags |= TASK_SW_WANT_IN; assert_wait((event_t)&task->swap_state, THREAD_UNINT); task_unlock(task); thread_block(THREAD_CONTINUE_NULL); TASK_STATS_INCR(task_sw_race_coming_in); return(KERN_SUCCESS); case TASK_SW_UNSWAPPABLE: /* * This can happen, since task_terminate * unconditionally calls task_swapin. */ task_unlock(task); return(KERN_SUCCESS); default: panic("task_swapin bad state"); break; } if (make_unswappable) task->swap_flags |= TASK_SW_MAKE_UNSWAPPABLE; assert(task->swap_state == TASK_SW_COMING_IN); task_swapper_lock(); #if TASK_SW_DEBUG if (task_swap_debug && !on_swapped_list(task)) { printf("task 0x%X not on list\n", task); Debugger(""); } #endif /* TASK_SW_DEBUG */ queue_remove(&swapped_tasks, task, task_t, swapped_tasks); tasks_swapped_out--; task_swapins++; task_swapper_unlock(); /* * Iterate through all threads for this task and * release them, as required. They may not have been swapped * out yet. The task remains locked throughout. */ list = &task->thr_acts; thr_act = (thread_act_t) queue_first(list); while (!queue_end(list, (queue_entry_t) thr_act)) { boolean_t need_to_release; next = (thread_act_t) queue_next(&thr_act->thr_acts); /* * Keep task_swapper_lock across thread handling * to synchronize with task_swap_swapout_thread */ task_swapper_lock(); thread = act_lock_thread(thr_act); s = splsched(); if (thr_act->ast & AST_SWAPOUT) { /* thread hasn't gotten the AST yet, just clear it */ thread_ast_clear(thr_act, AST_SWAPOUT); need_to_release = FALSE; TASK_STATS_INCR(task_sw_before_ast); splx(s); act_unlock_thread(thr_act); } else { /* * If AST_SWAPOUT was cleared, then thread_hold, * or equivalent was done. */ need_to_release = TRUE; /* * Thread has hit AST, but it may not have * been dequeued yet, so we need to check. * NOTE: the thread may have been dequeued, but * has not yet been swapped (the task_swapper_lock * has been dropped, but the thread is not yet * locked), and the TH_SW_TASK_SWAPPING flag may * not have been cleared. In this case, we will do * an extra remque, which the task_swap_swapout_thread * has made safe, and clear the flag, which is also * checked by the t_s_s_t before doing the swapout. */ if (thread) thread_lock(thread); if (thr_act->swap_state & TH_SW_TASK_SWAPPING) { /* * hasn't yet been dequeued for swapout, * so clear flags and dequeue it first. */ thr_act->swap_state &= ~TH_SW_TASK_SWAPPING; assert(thr_act->thread == THREAD_NULL || !(thr_act->thread->state & TH_SWAPPED_OUT)); queue_remove(&swapout_thread_q, thr_act, thread_act_t, swap_queue); TASK_STATS_INCR(task_sw_before_swap); } else { TASK_STATS_INCR(task_sw_after_swap); /* * It's possible that the thread was * made unswappable before hitting the * AST, in which case it's still running. */ if (thr_act->swap_state == TH_SW_UNSWAPPABLE) { need_to_release = FALSE; TASK_STATS_INCR(task_sw_unswappable); } } if (thread) thread_unlock(thread); splx(s); act_unlock_thread(thr_act); } task_swapper_unlock(); /* * thread_release will swap in the thread if it's been * swapped out. */ if (need_to_release) { act_lock_thread(thr_act); thread_release(thr_act); act_unlock_thread(thr_act); } thr_act = next; } if (task->swap_flags & TASK_SW_MAKE_UNSWAPPABLE) { task->swap_flags &= ~TASK_SW_MAKE_UNSWAPPABLE; task->swap_state = TASK_SW_UNSWAPPABLE; swappable = FALSE; } else { task->swap_state = TASK_SW_IN; } task_swaprss_in += pmap_resident_count(task->map->pmap); task_swap_total_time += sched_tick - task->swap_stamp; /* note when task came back in */ task->swap_stamp = sched_tick; if (task->swap_flags & TASK_SW_WANT_IN) { task->swap_flags &= ~TASK_SW_WANT_IN; thread_wakeup((event_t)&task->swap_state); } assert((task->swap_flags & TASK_SW_ELIGIBLE) == 0); task_unlock(task); #if TASK_SW_DEBUG task_swapper_lock(); if (task_swap_debug && on_swapped_list(task)) { printf("task 0x%X on list at end of swap in\n", task); Debugger(""); } task_swapper_unlock(); #endif /* TASK_SW_DEBUG */ /* * Make the task eligible to be swapped again */ if (swappable) task_swapout_eligible(task); return(KERN_SUCCESS); } void wake_task_swapper(boolean_t now); /* forward */ /* * wake_task_swapper: [exported] * * Wakes up task swapper if now == TRUE or if at least * task_swap_cycle_time has elapsed since the last call. * * NOTE: this function is not multithreaded, so if there is * more than one caller, it must be modified. */ void wake_task_swapper(boolean_t now) { /* last_task_swap_cycle may require locking */ if (now || (sched_tick > (last_task_swap_cycle + task_swap_cycle_time))) { last_task_swap_cycle = sched_tick; if (task_swap_debug) printf("wake_task_swapper: waking swapper\n"); thread_wakeup((event_t)&swapped_tasks); /* poke swapper */ } } task_t pick_intask(void); /* forward */ /* * pick_intask: * returns a task to be swapped in, or TASK_NULL if nothing suitable is found. * * current algorithm: Return the task that has been swapped out the * longest, as long as it is > min_swap_time. It will be dequeued * if actually swapped in. * * NOTE:********************************************** * task->swap_rss (the size when the task was swapped out) could be used to * further refine the selection. Another possibility would be to look at * the state of the thread(s) to see if the task/threads would run if they * were swapped in. * *************************************************** * * Locking: no locks held upon entry and exit. */ task_t pick_intask(void) { register task_t task = TASK_NULL; task_swapper_lock(); /* the oldest task is the first one */ if (!queue_empty(&swapped_tasks)) { task = (task_t) queue_first(&swapped_tasks); assert(task != TASK_NULL); /* Make sure it's been out min_swap_time */ if ((sched_tick - task->swap_stamp) < min_swap_time) task = TASK_NULL; } task_swapper_unlock(); return(task); #if 0 /* * This code looks at the entire list of swapped tasks, but since * it does not yet do anything but look at time swapped, we * can simply use the fact that the queue is ordered, and take * the first one off the queue. */ task = (task_t)queue_first(&swapped_tasks); while (!queue_end(&swapped_tasks, (queue_entry_t)task)) { task_lock(task); tmp_time = sched_tick - task->swap_stamp; if (tmp_time > min_swap_time && tmp_time > time_swapped) { target_task = task; time_swapped = tmp_time; } task_unlock(task); task = (task_t)queue_next(&task->swapped_tasks); } task_swapper_unlock(); return(target_task); #endif } task_t pick_outtask(void); /* forward */ /* * pick_outtask: * returns a task to be swapped out, with a reference on the task, * or NULL if no suitable task is found. * * current algorithm: * * Examine all eligible tasks. While looking, use the first thread in * each task as an indication of the task's activity. Count up * "active" threads (those either runnable or sleeping). If the task * is active (by these criteria), swapped in, and resident * for at least min_res_time, then select the task with the largest * number of pages in memory. If there are less * than min_active_tasks active tasks in the system, then don't * swap anything out (this avoids swapping out the only running task * in the system, for example). * * NOTE: the task selected will not be removed from the eligible list. * This means that it will be selected again if it is not swapped * out, where it is removed from the list. * * Locking: no locks held upon entry and exit. Task_swapout_lock must be * taken before task locks. * * *************************************************** * TBD: * This algorithm only examines the first thread in the task. Currently, since * most swappable tasks in the system are single-threaded, this generalization * works reasonably well. However, the algorithm should be changed * to consider all threads in the task if more multi-threaded tasks were used. * *************************************************** */ #ifdef TASK_SW_STATS int inactive_task_count = 0; int empty_task_count = 0; #endif /* TASK_SW_STATS */ task_t pick_outtask(void) { register task_t task; register task_t target_task = TASK_NULL; unsigned long task_rss; unsigned long target_rss = 0; boolean_t wired; boolean_t active; int nactive = 0; task_swapout_lock(); if (queue_empty(&eligible_tasks)) { /* not likely to happen */ task_swapout_unlock(); return(TASK_NULL); } task = (task_t)queue_first(&eligible_tasks); while (!queue_end(&eligible_tasks, (queue_entry_t)task)) { int s; register thread_act_t thr_act; thread_t th; task_lock(task); /* * Don't swap real-time tasks. * XXX Should we enforce that or can we let really critical * tasks use task_swappable() to make sure they never end up * n the eligible list ? */ if (task->policy & POLICYCLASS_FIXEDPRI) { goto tryagain; } if (!task->active) { TASK_STATS_INCR(inactive_task_count); goto tryagain; } if (task->res_act_count == 0) { TASK_STATS_INCR(empty_task_count); goto tryagain; } assert(!queue_empty(&task->thr_acts)); thr_act = (thread_act_t)queue_first(&task->thr_acts); active = FALSE; th = act_lock_thread(thr_act); s = splsched(); if (th != THREAD_NULL) thread_lock(th); if ((th == THREAD_NULL) || (th->state == TH_RUN) || (th->state & TH_WAIT)) { /* * thread is "active": either runnable * or sleeping. Count it and examine * it further below. */ nactive++; active = TRUE; } if (th != THREAD_NULL) thread_unlock(th); splx(s); act_unlock_thread(thr_act); if (active && (task->swap_state == TASK_SW_IN) && ((sched_tick - task->swap_stamp) > min_res_time)) { long rescount = pmap_resident_count(task->map->pmap); /* * thread must be "active", task must be swapped * in and resident for at least min_res_time */ #if 0 /* DEBUG Test round-robin strategy. Picking biggest task could cause extreme * unfairness to such large interactive programs as xterm. Instead, pick the * first task that has any pages resident: */ if (rescount > 1) { task->ref_count++; target_task = task; task_unlock(task); task_swapout_unlock(); return(target_task); } #else if (rescount > target_rss) { /* * task is not swapped, and it has the * largest rss seen so far. */ task->ref_count++; target_rss = rescount; assert(target_task != task); if (target_task != TASK_NULL) task_deallocate(target_task); target_task = task; } #endif } tryagain: task_unlock(task); task = (task_t)queue_next(&task->swapped_tasks); } task_swapout_unlock(); /* only swap out if there are at least min_active_tasks */ if (nactive < min_active_tasks) { if (target_task != TASK_NULL) { task_deallocate(target_task); target_task = TASK_NULL; } } return(target_task); } #if TASK_SW_DEBUG void print_pid(task_t task, unsigned long n1, unsigned long n2, const char *comp, const char *inout); /* forward */ void print_pid( task_t task, unsigned long n1, unsigned long n2, const char *comp, const char *inout) { long rescount; task_lock(task); rescount = pmap_resident_count(task->map->pmap); task_unlock(task); printf("task_swapper: swapped %s task %x; %d %s %d; res=%d\n", inout, task, n1, comp, n2, rescount); } #endif /* * task_swapper: [exported] * * Executes as a separate kernel thread. */ #define MAX_LOOP 3 void task_swapper(void) { task_t outtask, intask; int timeout; int loopcnt = 0; boolean_t start_swapping; boolean_t stop_swapping; int local_page_free_avg; extern int hz; thread_swappable(current_act(), FALSE); stack_privilege(current_thread()); spllo(); for (;;) { local_page_free_avg = vm_page_free_avg; while (TRUE) { #if 0 if (task_swap_debug) printf("task_swapper: top of loop; cnt = %d\n",loopcnt); #endif intask = pick_intask(); start_swapping = ((vm_pageout_rate_avg > swap_start_pageout_rate) || (vm_grab_rate_avg > max_grab_rate)); stop_swapping = (vm_pageout_rate_avg < swap_stop_pageout_rate); /* * If a lot of paging is going on, or another task should come * in but memory is tight, find something to swap out and start * it. Don't swap any task out if task swapping is disabled. * vm_page_queue_free_lock protects the vm globals. */ outtask = TASK_NULL; if (start_swapping || (!stop_swapping && intask && ((local_page_free_avg / AVE_SCALE) < vm_page_free_target)) ) { if (task_swap_enable && (outtask = pick_outtask()) && (task_swapout(outtask) == KERN_SUCCESS)) { unsigned long rss; #if TASK_SW_DEBUG if (task_swap_debug) print_pid(outtask, local_page_free_avg / AVE_SCALE, vm_page_free_target, "<", "out"); #endif rss = outtask->swap_rss; if (outtask->swap_nswap == 1) rss /= 2; /* divide by 2 if never out */ local_page_free_avg += (rss/short_avg_interval) * AVE_SCALE; } if (outtask != TASK_NULL) task_deallocate(outtask); } /* * If there is an eligible task to bring in and there are at * least vm_page_free_target free pages, swap it in. If task * swapping has been disabled, bring the task in anyway. */ if (intask && ((local_page_free_avg / AVE_SCALE) >= vm_page_free_target || stop_swapping || !task_swap_enable)) { if (task_swapin(intask, FALSE) == KERN_SUCCESS) { unsigned long rss; #if TASK_SW_DEBUG if (task_swap_debug) print_pid(intask, local_page_free_avg / AVE_SCALE, vm_page_free_target, ">=", "in"); #endif rss = intask->swap_rss; if (intask->swap_nswap == 1) rss /= 2; /* divide by 2 if never out */ local_page_free_avg -= (rss/short_avg_interval) * AVE_SCALE; } } /* * XXX * Here we have to decide whether to continue swapping * in and/or out before sleeping. The decision should * be made based on the previous action (swapin/out) and * current system parameters, such as paging rates and * demand. * The function, compute_vm_averages, which does these * calculations, depends on being called every second, * so we can't just do the same thing. */ if (++loopcnt < MAX_LOOP) continue; /* * Arrange to be awakened if paging is still heavy or there are * any tasks partially or completely swapped out. (Otherwise, * the wakeup will come from the external trigger(s).) */ timeout = 0; if (start_swapping) timeout = task_swap_cycle_time; else { task_swapper_lock(); if (!queue_empty(&swapped_tasks)) timeout = min_swap_time; task_swapper_unlock(); } assert_wait((event_t)&swapped_tasks, THREAD_UNINT); if (timeout) { if (task_swap_debug) printf("task_swapper: set timeout of %d\n", timeout); thread_set_timeout(timeout, NSEC_PER_SEC); } if (task_swap_debug) printf("task_swapper: blocking\n"); thread_block(THREAD_CONTINUE_NULL); if (timeout) { thread_cancel_timeout(current_thread()); } /* reset locals */ loopcnt = 0; local_page_free_avg = vm_page_free_avg; } } } /* from BSD */ #define ave(smooth, cnt, time) \ smooth = ((time - 1) * (smooth) + ((cnt) * AVE_SCALE)) / (time) /* * We estimate the system paging load in more than one metric: * 1) the total number of calls into the function, vm_page_grab, * which allocates all page frames for real pages. * 2) the total number of pages paged in and out of paging files. * This is a measure of page cleaning and faulting from backing * store. * * When either metric passes a threshold, tasks are swapped out. */ long last_grab_count = 0; long last_pageout_count = 0; /* * compute_vm_averages: [exported] * * This function is to be called once a second to calculate average paging * demand and average numbers of free pages for use by the task swapper. * Can also be used to wake up task swapper at desired thresholds. * * NOTE: this function is single-threaded, and requires locking if * ever there are multiple callers. */ void compute_vm_averages(void) { extern unsigned long vm_page_grab_count; long grab_count, pageout_count; int i; ave(vm_page_free_avg, vm_page_free_count, short_avg_interval); ave(vm_page_free_longavg, vm_page_free_count, long_avg_interval); /* * NOTE: the vm_page_grab_count and vm_stat structure are * under control of vm_page_queue_free_lock. We're simply reading * memory here, and the numbers don't depend on each other, so * no lock is taken. */ grab_count = vm_page_grab_count; pageout_count = 0; for (i = 0; i < NCPUS; i++) { pageout_count += vm_stat[i].pageouts; } ave(vm_pageout_rate_avg, pageout_count - last_pageout_count, short_avg_interval); ave(vm_pageout_rate_longavg, pageout_count - last_pageout_count, long_avg_interval); ave(vm_grab_rate_avg, grab_count - last_grab_count, short_avg_interval); last_grab_count = grab_count; last_pageout_count = pageout_count; /* * Adjust swap_{start,stop}_pageout_rate to the paging rate peak. * This is an attempt to find the optimum paging rates at which * to trigger task swapping on or off to regulate paging activity, * depending on the hardware capacity. */ if (vm_pageout_rate_avg > vm_pageout_rate_peakavg) { unsigned int desired_max; vm_pageout_rate_peakavg = vm_pageout_rate_avg; swap_start_pageout_rate = vm_pageout_rate_peakavg * swap_pageout_high_water_mark / 100; swap_stop_pageout_rate = vm_pageout_rate_peakavg * swap_pageout_low_water_mark / 100; } #if TASK_SW_DEBUG /* * For measurements, allow fixed values. */ if (fixed_swap_start_pageout_rate) swap_start_pageout_rate = fixed_swap_start_pageout_rate; if (fixed_swap_stop_pageout_rate) swap_stop_pageout_rate = fixed_swap_stop_pageout_rate; #endif /* TASK_SW_DEBUG */ #if TASK_SW_DEBUG if (task_swap_stats) printf("vm_avgs: pageout_rate: %d %d (on/off: %d/%d); page_free: %d %d (tgt: %d)\n", vm_pageout_rate_avg / AVE_SCALE, vm_pageout_rate_longavg / AVE_SCALE, swap_start_pageout_rate / AVE_SCALE, swap_stop_pageout_rate / AVE_SCALE, vm_page_free_avg / AVE_SCALE, vm_page_free_longavg / AVE_SCALE, vm_page_free_target); #endif /* TASK_SW_DEBUG */ if (vm_page_free_avg / AVE_SCALE <= vm_page_free_target) { if (task_swap_on) { /* The following is a delicate attempt to balance the * need for reasonably rapid response to system * thrashing, with the equally important desire to * prevent the onset of swapping simply because of a * short burst of paging activity. */ if ((vm_pageout_rate_longavg > swap_stop_pageout_rate) && (vm_pageout_rate_avg > swap_start_pageout_rate) || (vm_pageout_rate_avg > vm_pageout_rate_peakavg) || (vm_grab_rate_avg > max_grab_rate)) wake_task_swapper(FALSE); } } else /* page demand is low; should consider swapin */ { if (tasks_swapped_out != 0) wake_task_swapper(TRUE); } } void task_swapout_eligible(task_t task) { #if TASK_SW_DEBUG task_swapper_lock(); if (task_swap_debug && on_swapped_list(task)) { printf("swapout_eligible: task 0x%X on swapped list\n", task); Debugger(""); } task_swapper_unlock(); #endif task_swapout_lock(); task_lock(task); #if TASK_SW_DEBUG if (task->swap_flags & TASK_SW_ELIGIBLE) { printf("swapout_eligible: task 0x%X already eligible\n", task); } #endif /* TASK_SW_DEBUG */ if ((task->swap_state == TASK_SW_IN) && ((task->swap_flags & TASK_SW_ELIGIBLE) == 0)) { queue_enter(&eligible_tasks,task,task_t,swapped_tasks); task->swap_flags |= TASK_SW_ELIGIBLE; } task_unlock(task); task_swapout_unlock(); } void task_swapout_ineligible(task_t task) { #if TASK_SW_DEBUG task_swapper_lock(); if (task_swap_debug && on_swapped_list(task)) { printf("swapout_ineligible: task 0x%X on swapped list\n", task); Debugger(""); } task_swapper_unlock(); #endif task_swapout_lock(); task_lock(task); #if TASK_SW_DEBUG if (!(task->swap_flags & TASK_SW_ELIGIBLE)) printf("swapout_ineligible: task 0x%X already inel.\n", task); #endif /* TASK_SW_DEBUG */ if ((task->swap_state != TASK_SW_IN) && (task->swap_flags & TASK_SW_ELIGIBLE)) { queue_remove(&eligible_tasks, task, task_t, swapped_tasks); task->swap_flags &= ~TASK_SW_ELIGIBLE; } task_unlock(task); task_swapout_unlock(); } int task_swap_ast_aborted = 0; /* * Process an AST_SWAPOUT. */ void swapout_ast() { spl_t s; thread_act_t act; thread_t thread; act = current_act(); /* * Task is being swapped out. First mark it as suspended * and halted, then call thread_swapout_enqueue to put * the thread on the queue for task_swap_swapout_threads * to swap out the thread. */ /* * Don't swap unswappable threads */ thread = act_lock_thread(act); s = splsched(); if (thread) thread_lock(thread); if ((act->ast & AST_SWAPOUT) == 0) { /* * Race with task_swapin. Abort swapout. */ task_swap_ast_aborted++; /* not locked XXX */ if (thread) thread_unlock(thread); splx(s); act_unlock_thread(act); } else if (act->swap_state == TH_SW_IN) { /* * Mark swap_state as TH_SW_TASK_SWAPPING to avoid * race with thread swapper, which will only * swap thread if swap_state is TH_SW_IN. * This way, the thread can only be swapped by * the task swapping mechanism. */ act->swap_state |= TH_SW_TASK_SWAPPING; /* assert(act->suspend_count == 0); XXX ? */ if (thread) thread_unlock(thread); if (act->suspend_count++ == 0) /* inline thread_hold */ install_special_handler(act); /* self->state |= TH_HALTED; */ thread_ast_clear(act, AST_SWAPOUT); /* * Initialize the swap_queue fields to allow an extra * queue_remove() in task_swapin if we lose the race * (task_swapin can be called before we complete * thread_swapout_enqueue). */ queue_init((queue_t) &act->swap_queue); splx(s); act_unlock_thread(act); /* this must be called at normal interrupt level */ thread_swapout_enqueue(act); } else { /* thread isn't swappable; continue running */ assert(act->swap_state == TH_SW_UNSWAPPABLE); if (thread) thread_unlock(thread); thread_ast_clear(act, AST_SWAPOUT); splx(s); act_unlock_thread(act); } } #endif /* TASK_SWAPPER */