/* * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * * The contents of this file constitute Original Code as defined in and * are subject to the Apple Public Source License Version 1.1 (the * "License"). You may not use this file except in compliance with the * License. Please obtain a copy of the License at * http://www.apple.com/publicsource and read it before using this file. * * This Original Code and all software distributed under the License are * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the * License for the specific language governing rights and limitations * under the License. * * @APPLE_LICENSE_HEADER_END@ */ /* * @OSF_FREE_COPYRIGHT@ */ /* * Mach Operating System * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University * All Rights Reserved. * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie Mellon * the rights to redistribute these changes. */ /* */ /* * File: sched_prim.c * Author: Avadis Tevanian, Jr. * Date: 1986 * * Scheduling primitives * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /*** ??? fix so this can be removed ***/ #include #define DEFAULT_PREEMPTION_RATE 100 /* (1/s) */ int default_preemption_rate = DEFAULT_PREEMPTION_RATE; #define MAX_UNSAFE_QUANTA 800 int max_unsafe_quanta = MAX_UNSAFE_QUANTA; #define MAX_POLL_QUANTA 2 int max_poll_quanta = MAX_POLL_QUANTA; #define SCHED_POLL_YIELD_SHIFT 4 /* 1/16 */ int sched_poll_yield_shift = SCHED_POLL_YIELD_SHIFT; uint32_t std_quantum_us; uint64_t max_unsafe_computation; uint32_t sched_safe_duration; uint64_t max_poll_computation; uint32_t std_quantum; uint32_t min_std_quantum; uint32_t max_rt_quantum; uint32_t min_rt_quantum; static uint32_t sched_tick_interval; unsigned sched_tick; #if SIMPLE_CLOCK int sched_usec; #endif /* SIMPLE_CLOCK */ /* Forwards */ void wait_queues_init(void); static thread_t choose_thread( processor_set_t pset, processor_t processor); static void do_thread_scan(void); #if DEBUG static boolean_t thread_runnable( thread_t thread); #endif /*DEBUG*/ /* * State machine * * states are combinations of: * R running * W waiting (or on wait queue) * N non-interruptible * O swapped out * I being swapped in * * init action * assert_wait thread_block clear_wait swapout swapin * * R RW, RWN R; setrun - - * RN RWN RN; setrun - - * * RW W R - * RWN WN RN - * * W R; setrun WO * WN RN; setrun - * * RO - - R * */ /* * Waiting protocols and implementation: * * Each thread may be waiting for exactly one event; this event * is set using assert_wait(). That thread may be awakened either * by performing a thread_wakeup_prim() on its event, * or by directly waking that thread up with clear_wait(). * * The implementation of wait events uses a hash table. Each * bucket is queue of threads having the same hash function * value; the chain for the queue (linked list) is the run queue * field. [It is not possible to be waiting and runnable at the * same time.] * * Locks on both the thread and on the hash buckets govern the * wait event field and the queue chain field. Because wakeup * operations only have the event as an argument, the event hash * bucket must be locked before any thread. * * Scheduling operations may also occur at interrupt level; therefore, * interrupts below splsched() must be prevented when holding * thread or hash bucket locks. * * The wait event hash table declarations are as follows: */ #define NUMQUEUES 59 struct wait_queue wait_queues[NUMQUEUES]; #define wait_hash(event) \ ((((int)(event) < 0)? ~(int)(event): (int)(event)) % NUMQUEUES) void sched_init(void) { /* * Calculate the timeslicing quantum * in us. */ if (default_preemption_rate < 1) default_preemption_rate = DEFAULT_PREEMPTION_RATE; std_quantum_us = (1000 * 1000) / default_preemption_rate; printf("standard timeslicing quantum is %d us\n", std_quantum_us); sched_safe_duration = (2 * max_unsafe_quanta / default_preemption_rate) * (1 << SCHED_TICK_SHIFT); wait_queues_init(); pset_sys_bootstrap(); /* initialize processor mgmt. */ sched_tick = 0; #if SIMPLE_CLOCK sched_usec = 0; #endif /* SIMPLE_CLOCK */ ast_init(); } void sched_timebase_init(void) { uint64_t abstime; clock_interval_to_absolutetime_interval( std_quantum_us, NSEC_PER_USEC, &abstime); assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); std_quantum = abstime; /* 250 us */ clock_interval_to_absolutetime_interval(250, NSEC_PER_USEC, &abstime); assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); min_std_quantum = abstime; /* 50 us */ clock_interval_to_absolutetime_interval(50, NSEC_PER_USEC, &abstime); assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); min_rt_quantum = abstime; /* 50 ms */ clock_interval_to_absolutetime_interval( 50, 1000*NSEC_PER_USEC, &abstime); assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); max_rt_quantum = abstime; clock_interval_to_absolutetime_interval(1000 >> SCHED_TICK_SHIFT, USEC_PER_SEC, &abstime); assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); sched_tick_interval = abstime; max_unsafe_computation = max_unsafe_quanta * std_quantum; max_poll_computation = max_poll_quanta * std_quantum; } void wait_queues_init(void) { register int i; for (i = 0; i < NUMQUEUES; i++) { wait_queue_init(&wait_queues[i], SYNC_POLICY_FIFO); } } /* * Thread wait timer expiration. */ void thread_timer_expire( timer_call_param_t p0, timer_call_param_t p1) { thread_t thread = p0; spl_t s; s = splsched(); thread_lock(thread); if (--thread->wait_timer_active == 1) { if (thread->wait_timer_is_set) { thread->wait_timer_is_set = FALSE; clear_wait_internal(thread, THREAD_TIMED_OUT); } } thread_unlock(thread); splx(s); } /* * thread_set_timer: * * Set a timer for the current thread, if the thread * is ready to wait. Must be called between assert_wait() * and thread_block(). */ void thread_set_timer( uint32_t interval, uint32_t scale_factor) { thread_t thread = current_thread(); uint64_t deadline; spl_t s; s = splsched(); thread_lock(thread); if ((thread->state & TH_WAIT) != 0) { clock_interval_to_deadline(interval, scale_factor, &deadline); timer_call_enter(&thread->wait_timer, deadline); assert(!thread->wait_timer_is_set); thread->wait_timer_active++; thread->wait_timer_is_set = TRUE; } thread_unlock(thread); splx(s); } void thread_set_timer_deadline( uint64_t deadline) { thread_t thread = current_thread(); spl_t s; s = splsched(); thread_lock(thread); if ((thread->state & TH_WAIT) != 0) { timer_call_enter(&thread->wait_timer, deadline); assert(!thread->wait_timer_is_set); thread->wait_timer_active++; thread->wait_timer_is_set = TRUE; } thread_unlock(thread); splx(s); } void thread_cancel_timer(void) { thread_t thread = current_thread(); spl_t s; s = splsched(); thread_lock(thread); if (thread->wait_timer_is_set) { if (timer_call_cancel(&thread->wait_timer)) thread->wait_timer_active--; thread->wait_timer_is_set = FALSE; } thread_unlock(thread); splx(s); } /* * Set up thread timeout element when thread is created. */ void thread_timer_setup( thread_t thread) { extern void thread_depress_expire( timer_call_param_t p0, timer_call_param_t p1); timer_call_setup(&thread->wait_timer, thread_timer_expire, thread); thread->wait_timer_is_set = FALSE; thread->wait_timer_active = 1; timer_call_setup(&thread->depress_timer, thread_depress_expire, thread); thread->depress_timer_active = 1; thread->ref_count++; } void thread_timer_terminate(void) { thread_t thread = current_thread(); wait_result_t res; spl_t s; s = splsched(); thread_lock(thread); if (thread->wait_timer_is_set) { if (timer_call_cancel(&thread->wait_timer)) thread->wait_timer_active--; thread->wait_timer_is_set = FALSE; } thread->wait_timer_active--; while (thread->wait_timer_active > 0) { thread_unlock(thread); splx(s); delay(1); s = splsched(); thread_lock(thread); } thread->depress_timer_active--; while (thread->depress_timer_active > 0) { thread_unlock(thread); splx(s); delay(1); s = splsched(); thread_lock(thread); } thread_unlock(thread); splx(s); thread_deallocate(thread); } /* * Routine: thread_go_locked * Purpose: * Start a thread running. * Conditions: * thread lock held, IPC locks may be held. * thread must have been pulled from wait queue under same lock hold. * Returns: * KERN_SUCCESS - Thread was set running * KERN_NOT_WAITING - Thread was not waiting */ kern_return_t thread_go_locked( thread_t thread, wait_result_t wresult) { assert(thread->at_safe_point == FALSE); assert(thread->wait_event == NO_EVENT64); assert(thread->wait_queue == WAIT_QUEUE_NULL); if ((thread->state & (TH_WAIT|TH_TERMINATE)) == TH_WAIT) { thread_roust_t roust_hint; thread->state &= ~(TH_WAIT|TH_UNINT); _mk_sp_thread_unblock(thread); roust_hint = thread->roust; thread->roust = NULL; if ( roust_hint != NULL && (*roust_hint)(thread, wresult) ) { if (thread->wait_timer_is_set) { if (timer_call_cancel(&thread->wait_timer)) thread->wait_timer_active--; thread->wait_timer_is_set = FALSE; } return (KERN_SUCCESS); } thread->wait_result = wresult; if (!(thread->state & TH_RUN)) { thread->state |= TH_RUN; if (thread->active_callout) call_thread_unblock(); pset_run_incr(thread->processor_set); if (thread->sched_mode & TH_MODE_TIMESHARE) pset_share_incr(thread->processor_set); thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ); } KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE, (int)thread, (int)thread->sched_pri, 0, 0, 0); return (KERN_SUCCESS); } return (KERN_NOT_WAITING); } /* * Routine: thread_mark_wait_locked * Purpose: * Mark a thread as waiting. If, given the circumstances, * it doesn't want to wait (i.e. already aborted), then * indicate that in the return value. * Conditions: * at splsched() and thread is locked. */ __private_extern__ wait_result_t thread_mark_wait_locked( thread_t thread, wait_interrupt_t interruptible) { boolean_t at_safe_point; /* * The thread may have certain types of interrupts/aborts masked * off. Even if the wait location says these types of interrupts * are OK, we have to honor mask settings (outer-scoped code may * not be able to handle aborts at the moment). */ if (interruptible > thread->interrupt_level) interruptible = thread->interrupt_level; at_safe_point = (interruptible == THREAD_ABORTSAFE); if ( interruptible == THREAD_UNINT || !(thread->state & TH_ABORT) || (!at_safe_point && (thread->state & TH_ABORT_SAFELY)) ) { thread->state |= (interruptible) ? TH_WAIT : (TH_WAIT | TH_UNINT); thread->at_safe_point = at_safe_point; thread->sleep_stamp = sched_tick; return (thread->wait_result = THREAD_WAITING); } else if (thread->state & TH_ABORT_SAFELY) thread->state &= ~(TH_ABORT|TH_ABORT_SAFELY); return (thread->wait_result = THREAD_INTERRUPTED); } /* * Routine: thread_interrupt_level * Purpose: * Set the maximum interruptible state for the * current thread. The effective value of any * interruptible flag passed into assert_wait * will never exceed this. * * Useful for code that must not be interrupted, * but which calls code that doesn't know that. * Returns: * The old interrupt level for the thread. */ __private_extern__ wait_interrupt_t thread_interrupt_level( wait_interrupt_t new_level) { thread_t thread = current_thread(); wait_interrupt_t result = thread->interrupt_level; thread->interrupt_level = new_level; return result; } /* * Routine: assert_wait_timeout * Purpose: * Assert that the thread intends to block, * waiting for a timeout (no user known event). */ unsigned int assert_wait_timeout_event; wait_result_t assert_wait_timeout( mach_msg_timeout_t msecs, wait_interrupt_t interruptible) { wait_result_t res; res = assert_wait((event_t)&assert_wait_timeout_event, interruptible); if (res == THREAD_WAITING) thread_set_timer(msecs, 1000*NSEC_PER_USEC); return res; } /* * Check to see if an assert wait is possible, without actually doing one. * This is used by debug code in locks and elsewhere to verify that it is * always OK to block when trying to take a blocking lock (since waiting * for the actual assert_wait to catch the case may make it hard to detect * this case. */ boolean_t assert_wait_possible(void) { thread_t thread; extern unsigned int debug_mode; #if DEBUG if(debug_mode) return TRUE; /* Always succeed in debug mode */ #endif thread = current_thread(); return (thread == NULL || wait_queue_assert_possible(thread)); } /* * assert_wait: * * Assert that the current thread is about to go to * sleep until the specified event occurs. */ wait_result_t assert_wait( event_t event, wait_interrupt_t interruptible) { register wait_queue_t wq; register int index; assert(event != NO_EVENT); index = wait_hash(event); wq = &wait_queues[index]; return wait_queue_assert_wait(wq, event, interruptible); } __private_extern__ wait_queue_t wait_event_wait_queue( event_t event) { assert(event != NO_EVENT); return (&wait_queues[wait_hash(event)]); } wait_result_t assert_wait_prim( event_t event, thread_roust_t roust_hint, uint64_t deadline, wait_interrupt_t interruptible) { thread_t thread = current_thread(); wait_result_t wresult; wait_queue_t wq; spl_t s; assert(event != NO_EVENT); wq = &wait_queues[wait_hash(event)]; s = splsched(); wait_queue_lock(wq); thread_lock(thread); wresult = wait_queue_assert_wait64_locked(wq, (uint32_t)event, interruptible, thread); if (wresult == THREAD_WAITING) { if (roust_hint != NULL) thread->roust = roust_hint; if (deadline != 0) { timer_call_enter(&thread->wait_timer, deadline); assert(!thread->wait_timer_is_set); thread->wait_timer_active++; thread->wait_timer_is_set = TRUE; } } thread_unlock(thread); wait_queue_unlock(wq); splx(s); return (wresult); } /* * thread_sleep_fast_usimple_lock: * * Cause the current thread to wait until the specified event * occurs. The specified simple_lock is unlocked before releasing * the cpu and re-acquired as part of waking up. * * This is the simple lock sleep interface for components that use a * faster version of simple_lock() than is provided by usimple_lock(). */ __private_extern__ wait_result_t thread_sleep_fast_usimple_lock( event_t event, simple_lock_t lock, wait_interrupt_t interruptible) { wait_result_t res; res = assert_wait(event, interruptible); if (res == THREAD_WAITING) { simple_unlock(lock); res = thread_block(THREAD_CONTINUE_NULL); simple_lock(lock); } return res; } /* * thread_sleep_usimple_lock: * * Cause the current thread to wait until the specified event * occurs. The specified usimple_lock is unlocked before releasing * the cpu and re-acquired as part of waking up. * * This is the simple lock sleep interface for components where * simple_lock() is defined in terms of usimple_lock(). */ wait_result_t thread_sleep_usimple_lock( event_t event, usimple_lock_t lock, wait_interrupt_t interruptible) { wait_result_t res; res = assert_wait(event, interruptible); if (res == THREAD_WAITING) { usimple_unlock(lock); res = thread_block(THREAD_CONTINUE_NULL); usimple_lock(lock); } return res; } /* * thread_sleep_mutex: * * Cause the current thread to wait until the specified event * occurs. The specified mutex is unlocked before releasing * the cpu. The mutex will be re-acquired before returning. * * JMM - Add hint to make sure mutex is available before rousting */ wait_result_t thread_sleep_mutex( event_t event, mutex_t *mutex, wait_interrupt_t interruptible) { wait_result_t res; res = assert_wait(event, interruptible); if (res == THREAD_WAITING) { mutex_unlock(mutex); res = thread_block(THREAD_CONTINUE_NULL); mutex_lock(mutex); } return res; } /* * thread_sleep_mutex_deadline: * * Cause the current thread to wait until the specified event * (or deadline) occurs. The specified mutex is unlocked before * releasing the cpu. The mutex will be re-acquired before returning. * * JMM - Add hint to make sure mutex is available before rousting */ wait_result_t thread_sleep_mutex_deadline( event_t event, mutex_t *mutex, uint64_t deadline, wait_interrupt_t interruptible) { wait_result_t res; res = assert_wait(event, interruptible); if (res == THREAD_WAITING) { mutex_unlock(mutex); thread_set_timer_deadline(deadline); res = thread_block(THREAD_CONTINUE_NULL); if (res != THREAD_TIMED_OUT) thread_cancel_timer(); mutex_lock(mutex); } return res; } /* * thread_sleep_lock_write: * * Cause the current thread to wait until the specified event * occurs. The specified (write) lock is unlocked before releasing * the cpu. The (write) lock will be re-acquired before returning. * * JMM - Add hint to make sure mutex is available before rousting */ wait_result_t thread_sleep_lock_write( event_t event, lock_t *lock, wait_interrupt_t interruptible) { wait_result_t res; res = assert_wait(event, interruptible); if (res == THREAD_WAITING) { lock_write_done(lock); res = thread_block(THREAD_CONTINUE_NULL); lock_write(lock); } return res; } /* * thread_sleep_funnel: * * Cause the current thread to wait until the specified event * occurs. If the thread is funnelled, the funnel will be released * before giving up the cpu. The funnel will be re-acquired before returning. * * JMM - Right now the funnel is dropped and re-acquired inside * thread_block(). At some point, this may give thread_block() a hint. */ wait_result_t thread_sleep_funnel( event_t event, wait_interrupt_t interruptible) { wait_result_t res; res = assert_wait(event, interruptible); if (res == THREAD_WAITING) { res = thread_block(THREAD_CONTINUE_NULL); } return res; } /* * thread_[un]stop(thread) * Once a thread has blocked interruptibly (via assert_wait) prevent * it from running until thread_unstop. * * If someone else has already stopped the thread, wait for the * stop to be cleared, and then stop it again. * * Return FALSE if interrupted. * * NOTE: thread_hold/thread_suspend should be called on the activation * before calling thread_stop. TH_SUSP is only recognized when * a thread blocks and only prevents clear_wait/thread_wakeup * from restarting an interruptible wait. The wake_active flag is * used to indicate that someone is waiting on the thread. */ boolean_t thread_stop( thread_t thread) { spl_t s = splsched(); wake_lock(thread); while (thread->state & TH_SUSP) { wait_result_t result; thread->wake_active = TRUE; result = assert_wait(&thread->wake_active, THREAD_ABORTSAFE); wake_unlock(thread); splx(s); if (result == THREAD_WAITING) result = thread_block(THREAD_CONTINUE_NULL); if (result != THREAD_AWAKENED) return (FALSE); s = splsched(); wake_lock(thread); } thread_lock(thread); thread->state |= TH_SUSP; while (thread->state & TH_RUN) { wait_result_t result; processor_t processor = thread->last_processor; if ( processor != PROCESSOR_NULL && processor->state == PROCESSOR_RUNNING && processor->active_thread == thread ) cause_ast_check(processor); thread_unlock(thread); thread->wake_active = TRUE; result = assert_wait(&thread->wake_active, THREAD_ABORTSAFE); wake_unlock(thread); splx(s); if (result == THREAD_WAITING) result = thread_block(THREAD_CONTINUE_NULL); if (result != THREAD_AWAKENED) { thread_unstop(thread); return (FALSE); } s = splsched(); wake_lock(thread); thread_lock(thread); } thread_unlock(thread); wake_unlock(thread); splx(s); return (TRUE); } /* * Clear TH_SUSP and if the thread has been stopped and is now runnable, * put it back on the run queue. */ void thread_unstop( thread_t thread) { spl_t s = splsched(); wake_lock(thread); thread_lock(thread); if ((thread->state & (TH_RUN|TH_WAIT|TH_SUSP)) == TH_SUSP) { thread->state &= ~TH_SUSP; thread->state |= TH_RUN; _mk_sp_thread_unblock(thread); pset_run_incr(thread->processor_set); if (thread->sched_mode & TH_MODE_TIMESHARE) pset_share_incr(thread->processor_set); thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ); KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE, (int)thread, (int)thread->sched_pri, 0, 0, 0); } else if (thread->state & TH_SUSP) { thread->state &= ~TH_SUSP; if (thread->wake_active) { thread->wake_active = FALSE; thread_unlock(thread); wake_unlock(thread); splx(s); thread_wakeup(&thread->wake_active); return; } } thread_unlock(thread); wake_unlock(thread); splx(s); } /* * Wait for the thread's RUN bit to clear */ boolean_t thread_wait( thread_t thread) { spl_t s = splsched(); wake_lock(thread); thread_lock(thread); while (thread->state & TH_RUN) { wait_result_t result; processor_t processor = thread->last_processor; if ( processor != PROCESSOR_NULL && processor->state == PROCESSOR_RUNNING && processor->active_thread == thread ) cause_ast_check(processor); thread_unlock(thread); thread->wake_active = TRUE; result = assert_wait(&thread->wake_active, THREAD_ABORTSAFE); wake_unlock(thread); splx(s); if (result == THREAD_WAITING) result = thread_block(THREAD_CONTINUE_NULL); if (result != THREAD_AWAKENED) return (FALSE); s = splsched(); wake_lock(thread); thread_lock(thread); } thread_unlock(thread); wake_unlock(thread); splx(s); return (TRUE); } /* * Routine: clear_wait_internal * * Clear the wait condition for the specified thread. * Start the thread executing if that is appropriate. * Arguments: * thread thread to awaken * result Wakeup result the thread should see * Conditions: * At splsched * the thread is locked. * Returns: * KERN_SUCCESS thread was rousted out a wait * KERN_FAILURE thread was waiting but could not be rousted * KERN_NOT_WAITING thread was not waiting */ __private_extern__ kern_return_t clear_wait_internal( thread_t thread, wait_result_t wresult) { wait_queue_t wq = thread->wait_queue; int i = LockTimeOut; do { if (wresult == THREAD_INTERRUPTED && (thread->state & TH_UNINT)) return (KERN_FAILURE); if (wq != WAIT_QUEUE_NULL) { if (wait_queue_lock_try(wq)) { wait_queue_pull_thread_locked(wq, thread, TRUE); /* wait queue unlocked, thread still locked */ } else { thread_unlock(thread); delay(1); thread_lock(thread); if (wq != thread->wait_queue) return (KERN_NOT_WAITING); continue; } } return (thread_go_locked(thread, wresult)); } while (--i > 0); panic("clear_wait_internal: deadlock: thread=0x%x, wq=0x%x, cpu=%d\n", thread, wq, cpu_number()); return (KERN_FAILURE); } /* * clear_wait: * * Clear the wait condition for the specified thread. Start the thread * executing if that is appropriate. * * parameters: * thread thread to awaken * result Wakeup result the thread should see */ kern_return_t clear_wait( thread_t thread, wait_result_t result) { kern_return_t ret; spl_t s; s = splsched(); thread_lock(thread); ret = clear_wait_internal(thread, result); thread_unlock(thread); splx(s); return ret; } /* * thread_wakeup_prim: * * Common routine for thread_wakeup, thread_wakeup_with_result, * and thread_wakeup_one. * */ kern_return_t thread_wakeup_prim( event_t event, boolean_t one_thread, wait_result_t result) { register wait_queue_t wq; register int index; index = wait_hash(event); wq = &wait_queues[index]; if (one_thread) return (wait_queue_wakeup_one(wq, event, result)); else return (wait_queue_wakeup_all(wq, event, result)); } /* * thread_bind: * * Force a thread to execute on the specified processor. * * Returns the previous binding. PROCESSOR_NULL means * not bound. * * XXX - DO NOT export this to users - XXX */ processor_t thread_bind( register thread_t thread, processor_t processor) { processor_t prev; run_queue_t runq = RUN_QUEUE_NULL; spl_t s; s = splsched(); thread_lock(thread); prev = thread->bound_processor; if (prev != PROCESSOR_NULL) runq = run_queue_remove(thread); thread->bound_processor = processor; if (runq != RUN_QUEUE_NULL) thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ); thread_unlock(thread); splx(s); return (prev); } struct { uint32_t idle_pset_last, idle_pset_any, idle_bound; uint32_t pset_self, pset_last, pset_other, bound_self, bound_other; uint32_t realtime_self, realtime_last, realtime_other; uint32_t missed_realtime, missed_other; } dispatch_counts; /* * Select a thread for the current processor to run. * * May select the current thread, which must be locked. */ thread_t thread_select( register processor_t processor) { register thread_t thread; processor_set_t pset; boolean_t other_runnable; /* * Check for other non-idle runnable threads. */ pset = processor->processor_set; thread = processor->active_thread; /* Update the thread's priority */ if (thread->sched_stamp != sched_tick) update_priority(thread); processor->current_pri = thread->sched_pri; simple_lock(&pset->sched_lock); other_runnable = processor->runq.count > 0 || pset->runq.count > 0; if ( thread->state == TH_RUN && thread->processor_set == pset && (thread->bound_processor == PROCESSOR_NULL || thread->bound_processor == processor) ) { if ( thread->sched_pri >= BASEPRI_RTQUEUES && first_timeslice(processor) ) { if (pset->runq.highq >= BASEPRI_RTQUEUES) { register run_queue_t runq = &pset->runq; register queue_t q; q = runq->queues + runq->highq; if (((thread_t)q->next)->realtime.deadline < processor->deadline) { thread = (thread_t)q->next; ((queue_entry_t)thread)->next->prev = q; q->next = ((queue_entry_t)thread)->next; thread->runq = RUN_QUEUE_NULL; assert(thread->sched_mode & TH_MODE_PREEMPT); runq->count--; runq->urgency--; if (queue_empty(q)) { if (runq->highq != IDLEPRI) clrbit(MAXPRI - runq->highq, runq->bitmap); runq->highq = MAXPRI - ffsbit(runq->bitmap); } } } processor->deadline = thread->realtime.deadline; simple_unlock(&pset->sched_lock); return (thread); } if ( (!other_runnable || (processor->runq.highq < thread->sched_pri && pset->runq.highq < thread->sched_pri)) ) { /* I am the highest priority runnable (non-idle) thread */ processor->deadline = UINT64_MAX; simple_unlock(&pset->sched_lock); return (thread); } } if (other_runnable) thread = choose_thread(pset, processor); else { /* * Nothing is runnable, so set this processor idle if it * was running. Return its idle thread. */ if (processor->state == PROCESSOR_RUNNING) { remqueue(&pset->active_queue, (queue_entry_t)processor); processor->state = PROCESSOR_IDLE; enqueue_tail(&pset->idle_queue, (queue_entry_t)processor); pset->idle_count++; } processor->deadline = UINT64_MAX; thread = processor->idle_thread; } simple_unlock(&pset->sched_lock); return (thread); } /* * Perform a context switch and start executing the new thread. * * If continuation is non-zero, resume the old (current) thread * next by executing at continuation on a new stack, in lieu * of returning. * * Returns TRUE if the hand-off succeeds. * * Called at splsched. */ #define funnel_release_check(thread, debug) \ MACRO_BEGIN \ if ((thread)->funnel_state & TH_FN_OWNED) { \ (thread)->funnel_state = TH_FN_REFUNNEL; \ KERNEL_DEBUG(0x603242c | DBG_FUNC_NONE, \ (thread)->funnel_lock, (debug), 0, 0, 0); \ funnel_unlock((thread)->funnel_lock); \ } \ MACRO_END #define funnel_refunnel_check(thread, debug) \ MACRO_BEGIN \ if ((thread)->funnel_state & TH_FN_REFUNNEL) { \ kern_return_t result = (thread)->wait_result; \ \ (thread)->funnel_state = 0; \ KERNEL_DEBUG(0x6032428 | DBG_FUNC_NONE, \ (thread)->funnel_lock, (debug), 0, 0, 0); \ funnel_lock((thread)->funnel_lock); \ KERNEL_DEBUG(0x6032430 | DBG_FUNC_NONE, \ (thread)->funnel_lock, (debug), 0, 0, 0); \ (thread)->funnel_state = TH_FN_OWNED; \ (thread)->wait_result = result; \ } \ MACRO_END static thread_t __current_thread(void) { return (current_thread()); } boolean_t thread_invoke( register thread_t old_thread, register thread_t new_thread, int reason, thread_continue_t old_cont) { thread_continue_t new_cont; processor_t processor; if (get_preemption_level() != 0) panic("thread_invoke: preemption_level %d\n", get_preemption_level()); /* * Mark thread interruptible. */ thread_lock(new_thread); new_thread->state &= ~TH_UNINT; assert(thread_runnable(new_thread)); assert(old_thread->continuation == NULL); /* * Allow time constraint threads to hang onto * a stack. */ if ( (old_thread->sched_mode & TH_MODE_REALTIME) && !old_thread->reserved_stack ) { old_thread->reserved_stack = old_thread->kernel_stack; } if (old_cont != NULL) { if (new_thread->state & TH_STACK_HANDOFF) { /* * If the old thread is using a privileged stack, * check to see whether we can exchange it with * that of the new thread. */ if ( old_thread->kernel_stack == old_thread->reserved_stack && !new_thread->reserved_stack) goto need_stack; new_thread->state &= ~TH_STACK_HANDOFF; new_cont = new_thread->continuation; new_thread->continuation = NULL; /* * Set up ast context of new thread and switch * to its timer. */ processor = current_processor(); processor->active_thread = new_thread; processor->current_pri = new_thread->sched_pri; new_thread->last_processor = processor; ast_context(new_thread->top_act, processor->slot_num); timer_switch(&new_thread->system_timer); thread_unlock(new_thread); current_task()->csw++; old_thread->reason = reason; old_thread->continuation = old_cont; _mk_sp_thread_done(old_thread, new_thread, processor); machine_stack_handoff(old_thread, new_thread); _mk_sp_thread_begin(new_thread, processor); wake_lock(old_thread); thread_lock(old_thread); /* * Inline thread_dispatch but * don't free stack. */ switch (old_thread->state & (TH_RUN|TH_WAIT|TH_UNINT|TH_IDLE)) { case TH_RUN | TH_UNINT: case TH_RUN: /* * Still running, put back * onto a run queue. */ old_thread->state |= TH_STACK_HANDOFF; _mk_sp_thread_dispatch(old_thread); thread_unlock(old_thread); wake_unlock(old_thread); break; case TH_RUN | TH_WAIT | TH_UNINT: case TH_RUN | TH_WAIT: { boolean_t term, wake, callout; /* * Waiting. */ old_thread->sleep_stamp = sched_tick; old_thread->state |= TH_STACK_HANDOFF; old_thread->state &= ~TH_RUN; term = (old_thread->state & TH_TERMINATE)? TRUE: FALSE; callout = old_thread->active_callout; wake = old_thread->wake_active; old_thread->wake_active = FALSE; if (old_thread->sched_mode & TH_MODE_TIMESHARE) pset_share_decr(old_thread->processor_set); pset_run_decr(old_thread->processor_set); thread_unlock(old_thread); wake_unlock(old_thread); if (callout) call_thread_block(); if (wake) thread_wakeup((event_t)&old_thread->wake_active); if (term) thread_reaper_enqueue(old_thread); break; } case TH_RUN | TH_IDLE: /* * The idle threads don't go * onto a run queue. */ old_thread->state |= TH_STACK_HANDOFF; thread_unlock(old_thread); wake_unlock(old_thread); break; default: panic("thread_invoke: state 0x%x\n", old_thread->state); } counter_always(c_thread_invoke_hits++); funnel_refunnel_check(new_thread, 2); (void) spllo(); assert(new_cont); call_continuation(new_cont); /*NOTREACHED*/ return (TRUE); } else if (new_thread->state & TH_STACK_ALLOC) { /* * Waiting for a stack */ counter_always(c_thread_invoke_misses++); thread_unlock(new_thread); return (FALSE); } else if (new_thread == old_thread) { /* same thread but with continuation */ counter(++c_thread_invoke_same); thread_unlock(new_thread); funnel_refunnel_check(new_thread, 3); (void) spllo(); call_continuation(old_cont); /*NOTREACHED*/ } } else { /* * Check that the new thread has a stack */ if (new_thread->state & TH_STACK_HANDOFF) { need_stack: if (!stack_alloc_try(new_thread, thread_continue)) { counter_always(c_thread_invoke_misses++); thread_swapin(new_thread); return (FALSE); } new_thread->state &= ~TH_STACK_HANDOFF; } else if (new_thread->state & TH_STACK_ALLOC) { /* * Waiting for a stack */ counter_always(c_thread_invoke_misses++); thread_unlock(new_thread); return (FALSE); } else if (old_thread == new_thread) { counter(++c_thread_invoke_same); thread_unlock(new_thread); return (TRUE); } } /* * Set up ast context of new thread and switch to its timer. */ processor = current_processor(); processor->active_thread = new_thread; processor->current_pri = new_thread->sched_pri; new_thread->last_processor = processor; ast_context(new_thread->top_act, processor->slot_num); timer_switch(&new_thread->system_timer); assert(thread_runnable(new_thread)); thread_unlock(new_thread); counter_always(c_thread_invoke_csw++); current_task()->csw++; assert(old_thread->runq == RUN_QUEUE_NULL); old_thread->reason = reason; old_thread->continuation = old_cont; _mk_sp_thread_done(old_thread, new_thread, processor); /* * Here is where we actually change register context, * and address space if required. Note that control * will not return here immediately. */ old_thread = machine_switch_context(old_thread, old_cont, new_thread); /* Now on new thread's stack. Set a local variable to refer to it. */ new_thread = __current_thread(); assert(old_thread != new_thread); assert(thread_runnable(new_thread)); _mk_sp_thread_begin(new_thread, new_thread->last_processor); /* * We're back. Now old_thread is the thread that resumed * us, and we have to dispatch it. */ thread_dispatch(old_thread); if (old_cont) { funnel_refunnel_check(new_thread, 3); (void) spllo(); call_continuation(old_cont); /*NOTREACHED*/ } return (TRUE); } /* * thread_continue: * * Called at splsched when a thread first receives * a new stack after a continuation. */ void thread_continue( register thread_t old_thread) { register thread_t self = current_thread(); register thread_continue_t continuation; continuation = self->continuation; self->continuation = NULL; _mk_sp_thread_begin(self, self->last_processor); /* * We must dispatch the old thread and then * call the current thread's continuation. * There might not be an old thread, if we are * the first thread to run on this processor. */ if (old_thread != THREAD_NULL) thread_dispatch(old_thread); funnel_refunnel_check(self, 4); (void)spllo(); call_continuation(continuation); /*NOTREACHED*/ } /* * thread_block_reason: * * Forces a reschedule, blocking the caller if a wait * has been asserted. * * If a continuation is specified, then thread_invoke will * attempt to discard the thread's kernel stack. When the * thread resumes, it will execute the continuation function * on a new kernel stack. */ counter(mach_counter_t c_thread_block_calls = 0;) int thread_block_reason( thread_continue_t continuation, ast_t reason) { register thread_t thread = current_thread(); register processor_t processor; register thread_t new_thread; spl_t s; counter(++c_thread_block_calls); check_simple_locks(); s = splsched(); if (!(reason & AST_PREEMPT)) funnel_release_check(thread, 2); processor = current_processor(); /* If we're explicitly yielding, force a subsequent quantum */ if (reason & AST_YIELD) processor->timeslice = 0; /* We're handling all scheduling AST's */ ast_off(AST_SCHEDULING); thread_lock(thread); new_thread = thread_select(processor); assert(new_thread && thread_runnable(new_thread)); thread_unlock(thread); while (!thread_invoke(thread, new_thread, reason, continuation)) { thread_lock(thread); new_thread = thread_select(processor); assert(new_thread && thread_runnable(new_thread)); thread_unlock(thread); } funnel_refunnel_check(thread, 5); splx(s); return (thread->wait_result); } /* * thread_block: * * Block the current thread if a wait has been asserted. */ int thread_block( thread_continue_t continuation) { return thread_block_reason(continuation, AST_NONE); } /* * thread_run: * * Switch directly from the current (old) thread to the * new thread, handing off our quantum if appropriate. * * New thread must be runnable, and not on a run queue. * * Called at splsched. */ int thread_run( thread_t old_thread, thread_continue_t continuation, thread_t new_thread) { ast_t handoff = AST_HANDOFF; assert(old_thread == current_thread()); funnel_release_check(old_thread, 3); while (!thread_invoke(old_thread, new_thread, handoff, continuation)) { register processor_t processor = current_processor(); thread_lock(old_thread); new_thread = thread_select(processor); thread_unlock(old_thread); handoff = AST_NONE; } funnel_refunnel_check(old_thread, 6); return (old_thread->wait_result); } /* * Dispatches a running thread that is not on a * run queue. * * Called at splsched. */ void thread_dispatch( register thread_t thread) { wake_lock(thread); thread_lock(thread); /* * If we are discarding the thread's stack, we must do it * before the thread has a chance to run. */ #ifndef i386 if (thread->continuation != NULL) { assert((thread->state & TH_STACK_STATE) == 0); thread->state |= TH_STACK_HANDOFF; stack_free(thread); } #endif switch (thread->state & (TH_RUN|TH_WAIT|TH_UNINT|TH_IDLE)) { case TH_RUN | TH_UNINT: case TH_RUN: /* * No reason to stop. Put back on a run queue. */ _mk_sp_thread_dispatch(thread); break; case TH_RUN | TH_WAIT | TH_UNINT: case TH_RUN | TH_WAIT: { boolean_t term, wake, callout; /* * Waiting */ thread->sleep_stamp = sched_tick; thread->state &= ~TH_RUN; term = (thread->state & TH_TERMINATE)? TRUE: FALSE; callout = thread->active_callout; wake = thread->wake_active; thread->wake_active = FALSE; if (thread->sched_mode & TH_MODE_TIMESHARE) pset_share_decr(thread->processor_set); pset_run_decr(thread->processor_set); thread_unlock(thread); wake_unlock(thread); if (callout) call_thread_block(); if (wake) thread_wakeup((event_t)&thread->wake_active); if (term) thread_reaper_enqueue(thread); return; } case TH_RUN | TH_IDLE: /* * The idle threads don't go * onto a run queue. */ break; default: panic("thread_dispatch: state 0x%x\n", thread->state); } thread_unlock(thread); wake_unlock(thread); } /* * Enqueue thread on run queue. Thread must be locked, * and not already be on a run queue. Returns TRUE * if a preemption is indicated based on the state * of the run queue. * * Run queue must be locked, see run_queue_remove() * for more info. */ static boolean_t run_queue_enqueue( register run_queue_t rq, register thread_t thread, integer_t options) { register int whichq = thread->sched_pri; register queue_t queue = &rq->queues[whichq]; boolean_t result = FALSE; assert(whichq >= MINPRI && whichq <= MAXPRI); assert(thread->runq == RUN_QUEUE_NULL); if (queue_empty(queue)) { enqueue_tail(queue, (queue_entry_t)thread); setbit(MAXPRI - whichq, rq->bitmap); if (whichq > rq->highq) { rq->highq = whichq; result = TRUE; } } else if (options & SCHED_HEADQ) enqueue_head(queue, (queue_entry_t)thread); else enqueue_tail(queue, (queue_entry_t)thread); thread->runq = rq; if (thread->sched_mode & TH_MODE_PREEMPT) rq->urgency++; rq->count++; return (result); } /* * Enqueue a thread for realtime execution, similar * to above. Handles preemption directly. */ static void realtime_schedule_insert( register processor_set_t pset, register thread_t thread) { register run_queue_t rq = &pset->runq; register int whichq = thread->sched_pri; register queue_t queue = &rq->queues[whichq]; uint64_t deadline = thread->realtime.deadline; boolean_t try_preempt = FALSE; assert(whichq >= BASEPRI_REALTIME && whichq <= MAXPRI); assert(thread->runq == RUN_QUEUE_NULL); if (queue_empty(queue)) { enqueue_tail(queue, (queue_entry_t)thread); setbit(MAXPRI - whichq, rq->bitmap); if (whichq > rq->highq) rq->highq = whichq; try_preempt = TRUE; } else { register thread_t entry = (thread_t)queue_first(queue); while (TRUE) { if ( queue_end(queue, (queue_entry_t)entry) || deadline < entry->realtime.deadline ) { entry = (thread_t)queue_prev((queue_entry_t)entry); break; } entry = (thread_t)queue_next((queue_entry_t)entry); } if ((queue_entry_t)entry == queue) try_preempt = TRUE; insque((queue_entry_t)thread, (queue_entry_t)entry); } thread->runq = rq; assert(thread->sched_mode & TH_MODE_PREEMPT); rq->count++; rq->urgency++; if (try_preempt) { register processor_t processor; processor = current_processor(); if ( pset == processor->processor_set && (thread->sched_pri > processor->current_pri || deadline < processor->deadline ) ) { dispatch_counts.realtime_self++; simple_unlock(&pset->sched_lock); ast_on(AST_PREEMPT | AST_URGENT); return; } if ( pset->processor_count > 1 || pset != processor->processor_set ) { processor_t myprocessor, lastprocessor; queue_entry_t next; myprocessor = processor; processor = thread->last_processor; if ( processor != myprocessor && processor != PROCESSOR_NULL && processor->processor_set == pset && processor->state == PROCESSOR_RUNNING && (thread->sched_pri > processor->current_pri || deadline < processor->deadline ) ) { dispatch_counts.realtime_last++; cause_ast_check(processor); simple_unlock(&pset->sched_lock); return; } lastprocessor = processor; queue = &pset->active_queue; processor = (processor_t)queue_first(queue); while (!queue_end(queue, (queue_entry_t)processor)) { next = queue_next((queue_entry_t)processor); if ( processor != myprocessor && processor != lastprocessor && (thread->sched_pri > processor->current_pri || deadline < processor->deadline ) ) { if (!queue_end(queue, next)) { remqueue(queue, (queue_entry_t)processor); enqueue_tail(queue, (queue_entry_t)processor); } dispatch_counts.realtime_other++; cause_ast_check(processor); simple_unlock(&pset->sched_lock); return; } processor = (processor_t)next; } } } simple_unlock(&pset->sched_lock); } /* * thread_setrun: * * Dispatch thread for execution, directly onto an idle * processor if possible. Else put on appropriate run * queue. (local if bound, else processor set) * * Thread must be locked. */ void thread_setrun( register thread_t new_thread, integer_t options) { register processor_t processor; register processor_set_t pset; register thread_t thread; ast_t preempt = (options & SCHED_PREEMPT)? AST_PREEMPT: AST_NONE; assert(thread_runnable(new_thread)); /* * Update priority if needed. */ if (new_thread->sched_stamp != sched_tick) update_priority(new_thread); /* * Check for urgent preemption. */ if (new_thread->sched_mode & TH_MODE_PREEMPT) preempt = (AST_PREEMPT | AST_URGENT); assert(new_thread->runq == RUN_QUEUE_NULL); if ((processor = new_thread->bound_processor) == PROCESSOR_NULL) { /* * First try to dispatch on * the last processor. */ pset = new_thread->processor_set; processor = new_thread->last_processor; if ( pset->processor_count > 1 && processor != PROCESSOR_NULL && processor->state == PROCESSOR_IDLE ) { processor_lock(processor); simple_lock(&pset->sched_lock); if ( processor->processor_set == pset && processor->state == PROCESSOR_IDLE ) { remqueue(&pset->idle_queue, (queue_entry_t)processor); pset->idle_count--; processor->next_thread = new_thread; if (new_thread->sched_pri >= BASEPRI_RTQUEUES) processor->deadline = new_thread->realtime.deadline; else processor->deadline = UINT64_MAX; processor->state = PROCESSOR_DISPATCHING; dispatch_counts.idle_pset_last++; simple_unlock(&pset->sched_lock); processor_unlock(processor); if (processor != current_processor()) machine_signal_idle(processor); return; } processor_unlock(processor); } else simple_lock(&pset->sched_lock); /* * Next pick any idle processor * in the processor set. */ if (pset->idle_count > 0) { processor = (processor_t)dequeue_head(&pset->idle_queue); pset->idle_count--; processor->next_thread = new_thread; if (new_thread->sched_pri >= BASEPRI_RTQUEUES) processor->deadline = new_thread->realtime.deadline; else processor->deadline = UINT64_MAX; processor->state = PROCESSOR_DISPATCHING; dispatch_counts.idle_pset_any++; simple_unlock(&pset->sched_lock); if (processor != current_processor()) machine_signal_idle(processor); return; } if (new_thread->sched_pri >= BASEPRI_RTQUEUES) realtime_schedule_insert(pset, new_thread); else { if (!run_queue_enqueue(&pset->runq, new_thread, options)) preempt = AST_NONE; /* * Update the timesharing quanta. */ timeshare_quanta_update(pset); /* * Preempt check. */ if (preempt != AST_NONE) { /* * First try the current processor * if it is a member of the correct * processor set. */ processor = current_processor(); thread = processor->active_thread; if ( pset == processor->processor_set && csw_needed(thread, processor) ) { dispatch_counts.pset_self++; simple_unlock(&pset->sched_lock); ast_on(preempt); return; } /* * If that failed and we have other * processors available keep trying. */ if ( pset->processor_count > 1 || pset != processor->processor_set ) { queue_t queue = &pset->active_queue; processor_t myprocessor, lastprocessor; queue_entry_t next; /* * Next try the last processor * dispatched on. */ myprocessor = processor; processor = new_thread->last_processor; if ( processor != myprocessor && processor != PROCESSOR_NULL && processor->processor_set == pset && processor->state == PROCESSOR_RUNNING && new_thread->sched_pri > processor->current_pri ) { dispatch_counts.pset_last++; cause_ast_check(processor); simple_unlock(&pset->sched_lock); return; } /* * Lastly, pick any other * available processor. */ lastprocessor = processor; processor = (processor_t)queue_first(queue); while (!queue_end(queue, (queue_entry_t)processor)) { next = queue_next((queue_entry_t)processor); if ( processor != myprocessor && processor != lastprocessor && new_thread->sched_pri > processor->current_pri ) { if (!queue_end(queue, next)) { remqueue(queue, (queue_entry_t)processor); enqueue_tail(queue, (queue_entry_t)processor); } dispatch_counts.pset_other++; cause_ast_check(processor); simple_unlock(&pset->sched_lock); return; } processor = (processor_t)next; } } } simple_unlock(&pset->sched_lock); } } else { /* * Bound, can only run on bound processor. Have to lock * processor here because it may not be the current one. */ processor_lock(processor); pset = processor->processor_set; if (pset != PROCESSOR_SET_NULL) { simple_lock(&pset->sched_lock); if (processor->state == PROCESSOR_IDLE) { remqueue(&pset->idle_queue, (queue_entry_t)processor); pset->idle_count--; processor->next_thread = new_thread; processor->deadline = UINT64_MAX; processor->state = PROCESSOR_DISPATCHING; dispatch_counts.idle_bound++; simple_unlock(&pset->sched_lock); processor_unlock(processor); if (processor != current_processor()) machine_signal_idle(processor); return; } } if (!run_queue_enqueue(&processor->runq, new_thread, options)) preempt = AST_NONE; if (preempt != AST_NONE) { if (processor == current_processor()) { thread = processor->active_thread; if (csw_needed(thread, processor)) { dispatch_counts.bound_self++; ast_on(preempt); } } else if ( processor->state == PROCESSOR_RUNNING && new_thread->sched_pri > processor->current_pri ) { dispatch_counts.bound_other++; cause_ast_check(processor); } } if (pset != PROCESSOR_SET_NULL) simple_unlock(&pset->sched_lock); processor_unlock(processor); } } /* * Check for a possible preemption point in * the (current) thread. * * Called at splsched. */ ast_t csw_check( thread_t thread, processor_t processor) { int current_pri = thread->sched_pri; ast_t result = AST_NONE; run_queue_t runq; if (first_timeslice(processor)) { runq = &processor->processor_set->runq; if (runq->highq >= BASEPRI_RTQUEUES) return (AST_PREEMPT | AST_URGENT); if (runq->highq > current_pri) { if (runq->urgency > 0) return (AST_PREEMPT | AST_URGENT); result |= AST_PREEMPT; } runq = &processor->runq; if (runq->highq > current_pri) { if (runq->urgency > 0) return (AST_PREEMPT | AST_URGENT); result |= AST_PREEMPT; } } else { runq = &processor->processor_set->runq; if (runq->highq >= current_pri) { if (runq->urgency > 0) return (AST_PREEMPT | AST_URGENT); result |= AST_PREEMPT; } runq = &processor->runq; if (runq->highq >= current_pri) { if (runq->urgency > 0) return (AST_PREEMPT | AST_URGENT); result |= AST_PREEMPT; } } if (result != AST_NONE) return (result); if (thread->state & TH_SUSP) result |= AST_PREEMPT; return (result); } /* * set_sched_pri: * * Set the scheduled priority of the specified thread. * * This may cause the thread to change queues. * * Thread must be locked. */ void set_sched_pri( thread_t thread, int priority) { register struct run_queue *rq = run_queue_remove(thread); if ( !(thread->sched_mode & TH_MODE_TIMESHARE) && (priority >= BASEPRI_PREEMPT || (thread->task_priority < MINPRI_KERNEL && thread->task_priority >= BASEPRI_BACKGROUND && priority > thread->task_priority) || (thread->sched_mode & TH_MODE_FORCEDPREEMPT) ) ) thread->sched_mode |= TH_MODE_PREEMPT; else thread->sched_mode &= ~TH_MODE_PREEMPT; thread->sched_pri = priority; if (rq != RUN_QUEUE_NULL) thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ); else if (thread->state & TH_RUN) { processor_t processor = thread->last_processor; if (thread == current_thread()) { ast_t preempt = csw_check(thread, processor); if (preempt != AST_NONE) ast_on(preempt); processor->current_pri = priority; } else if ( processor != PROCESSOR_NULL && processor->active_thread == thread ) cause_ast_check(processor); } } /* * run_queue_remove: * * Remove a thread from its current run queue and * return the run queue if successful. * * Thread must be locked. */ run_queue_t run_queue_remove( thread_t thread) { register run_queue_t rq = thread->runq; /* * If rq is RUN_QUEUE_NULL, the thread will stay out of the * run queues because the caller locked the thread. Otherwise * the thread is on a run queue, but could be chosen for dispatch * and removed. */ if (rq != RUN_QUEUE_NULL) { processor_set_t pset = thread->processor_set; processor_t processor = thread->bound_processor; /* * The run queues are locked by the pset scheduling * lock, except when a processor is off-line the * local run queue is locked by the processor lock. */ if (processor != PROCESSOR_NULL) { processor_lock(processor); pset = processor->processor_set; } if (pset != PROCESSOR_SET_NULL) simple_lock(&pset->sched_lock); if (rq == thread->runq) { /* * Thread is on a run queue and we have a lock on * that run queue. */ remqueue(&rq->queues[0], (queue_entry_t)thread); rq->count--; if (thread->sched_mode & TH_MODE_PREEMPT) rq->urgency--; assert(rq->urgency >= 0); if (queue_empty(rq->queues + thread->sched_pri)) { /* update run queue status */ if (thread->sched_pri != IDLEPRI) clrbit(MAXPRI - thread->sched_pri, rq->bitmap); rq->highq = MAXPRI - ffsbit(rq->bitmap); } thread->runq = RUN_QUEUE_NULL; } else { /* * The thread left the run queue before we could * lock the run queue. */ assert(thread->runq == RUN_QUEUE_NULL); rq = RUN_QUEUE_NULL; } if (pset != PROCESSOR_SET_NULL) simple_unlock(&pset->sched_lock); if (processor != PROCESSOR_NULL) processor_unlock(processor); } return (rq); } /* * choose_thread: * * Remove a thread to execute from the run queues * and return it. * * Called with pset scheduling lock held. */ static thread_t choose_thread( processor_set_t pset, processor_t processor) { register run_queue_t runq; register thread_t thread; register queue_t q; runq = &processor->runq; if (runq->count > 0 && runq->highq >= pset->runq.highq) { q = runq->queues + runq->highq; thread = (thread_t)q->next; ((queue_entry_t)thread)->next->prev = q; q->next = ((queue_entry_t)thread)->next; thread->runq = RUN_QUEUE_NULL; runq->count--; if (thread->sched_mode & TH_MODE_PREEMPT) runq->urgency--; assert(runq->urgency >= 0); if (queue_empty(q)) { if (runq->highq != IDLEPRI) clrbit(MAXPRI - runq->highq, runq->bitmap); runq->highq = MAXPRI - ffsbit(runq->bitmap); } processor->deadline = UINT64_MAX; return (thread); } runq = &pset->runq; assert(runq->count > 0); q = runq->queues + runq->highq; thread = (thread_t)q->next; ((queue_entry_t)thread)->next->prev = q; q->next = ((queue_entry_t)thread)->next; thread->runq = RUN_QUEUE_NULL; runq->count--; if (runq->highq >= BASEPRI_RTQUEUES) processor->deadline = thread->realtime.deadline; else processor->deadline = UINT64_MAX; if (thread->sched_mode & TH_MODE_PREEMPT) runq->urgency--; assert(runq->urgency >= 0); if (queue_empty(q)) { if (runq->highq != IDLEPRI) clrbit(MAXPRI - runq->highq, runq->bitmap); runq->highq = MAXPRI - ffsbit(runq->bitmap); } timeshare_quanta_update(pset); return (thread); } /* * no_dispatch_count counts number of times processors go non-idle * without being dispatched. This should be very rare. */ int no_dispatch_count = 0; /* * This is the idle thread, which just looks for other threads * to execute. */ void idle_thread_continue(void) { register processor_t processor; register volatile thread_t *threadp; register volatile int *gcount; register volatile int *lcount; register thread_t new_thread; register int state; register processor_set_t pset; int mycpu; mycpu = cpu_number(); processor = cpu_to_processor(mycpu); threadp = (volatile thread_t *) &processor->next_thread; lcount = (volatile int *) &processor->runq.count; gcount = (volatile int *)&processor->processor_set->runq.count; (void)splsched(); while ( (*threadp == (volatile thread_t)THREAD_NULL) && (*gcount == 0) && (*lcount == 0) ) { /* check for ASTs while we wait */ if (need_ast[mycpu] &~ ( AST_SCHEDULING | AST_BSD )) { /* no ASTs for us */ need_ast[mycpu] &= AST_NONE; (void)spllo(); } else machine_idle(); (void)splsched(); } /* * This is not a switch statement to avoid the * bounds checking code in the common case. */ pset = processor->processor_set; simple_lock(&pset->sched_lock); state = processor->state; if (state == PROCESSOR_DISPATCHING) { /* * Commmon case -- cpu dispatched. */ new_thread = *threadp; *threadp = (volatile thread_t) THREAD_NULL; processor->state = PROCESSOR_RUNNING; enqueue_tail(&pset->active_queue, (queue_entry_t)processor); if ( pset->runq.highq >= BASEPRI_RTQUEUES && new_thread->sched_pri >= BASEPRI_RTQUEUES ) { register run_queue_t runq = &pset->runq; register queue_t q; q = runq->queues + runq->highq; if (((thread_t)q->next)->realtime.deadline < processor->deadline) { thread_t thread = new_thread; new_thread = (thread_t)q->next; ((queue_entry_t)new_thread)->next->prev = q; q->next = ((queue_entry_t)new_thread)->next; new_thread->runq = RUN_QUEUE_NULL; processor->deadline = new_thread->realtime.deadline; assert(new_thread->sched_mode & TH_MODE_PREEMPT); runq->count--; runq->urgency--; if (queue_empty(q)) { if (runq->highq != IDLEPRI) clrbit(MAXPRI - runq->highq, runq->bitmap); runq->highq = MAXPRI - ffsbit(runq->bitmap); } dispatch_counts.missed_realtime++; simple_unlock(&pset->sched_lock); thread_lock(thread); thread_setrun(thread, SCHED_HEADQ); thread_unlock(thread); counter(c_idle_thread_handoff++); thread_run(processor->idle_thread, idle_thread_continue, new_thread); /*NOTREACHED*/ } simple_unlock(&pset->sched_lock); counter(c_idle_thread_handoff++); thread_run(processor->idle_thread, idle_thread_continue, new_thread); /*NOTREACHED*/ } if ( processor->runq.highq > new_thread->sched_pri || pset->runq.highq > new_thread->sched_pri ) { thread_t thread = new_thread; new_thread = choose_thread(pset, processor); dispatch_counts.missed_other++; simple_unlock(&pset->sched_lock); thread_lock(thread); thread_setrun(thread, SCHED_HEADQ); thread_unlock(thread); counter(c_idle_thread_handoff++); thread_run(processor->idle_thread, idle_thread_continue, new_thread); /* NOTREACHED */ } else { simple_unlock(&pset->sched_lock); counter(c_idle_thread_handoff++); thread_run(processor->idle_thread, idle_thread_continue, new_thread); /* NOTREACHED */ } } else if (state == PROCESSOR_IDLE) { /* * Processor was not dispatched (Rare). * Set it running again and force a * reschedule. */ no_dispatch_count++; pset->idle_count--; remqueue(&pset->idle_queue, (queue_entry_t)processor); processor->state = PROCESSOR_RUNNING; enqueue_tail(&pset->active_queue, (queue_entry_t)processor); simple_unlock(&pset->sched_lock); counter(c_idle_thread_block++); thread_block(idle_thread_continue); /* NOTREACHED */ } else if (state == PROCESSOR_SHUTDOWN) { /* * Going off-line. Force a * reschedule. */ if ((new_thread = (thread_t)*threadp) != THREAD_NULL) { *threadp = (volatile thread_t) THREAD_NULL; processor->deadline = UINT64_MAX; simple_unlock(&pset->sched_lock); thread_lock(new_thread); thread_setrun(new_thread, SCHED_HEADQ); thread_unlock(new_thread); } else simple_unlock(&pset->sched_lock); counter(c_idle_thread_block++); thread_block(idle_thread_continue); /* NOTREACHED */ } simple_unlock(&pset->sched_lock); panic("idle_thread: state %d\n", cpu_state(mycpu)); /*NOTREACHED*/ } void idle_thread(void) { counter(c_idle_thread_block++); thread_block(idle_thread_continue); /*NOTREACHED*/ } static uint64_t sched_tick_deadline; void sched_tick_thread(void); void sched_tick_init(void) { kernel_thread_with_priority(sched_tick_thread, MAXPRI_STANDARD); } /* * sched_tick_thread * * Perform periodic bookkeeping functions about ten * times per second. */ void sched_tick_thread_continue(void) { uint64_t abstime; #if SIMPLE_CLOCK int new_usec; #endif /* SIMPLE_CLOCK */ abstime = mach_absolute_time(); sched_tick++; /* age usage one more time */ #if SIMPLE_CLOCK /* * Compensate for clock drift. sched_usec is an * exponential average of the number of microseconds in * a second. It decays in the same fashion as cpu_usage. */ new_usec = sched_usec_elapsed(); sched_usec = (5*sched_usec + 3*new_usec)/8; #endif /* SIMPLE_CLOCK */ /* * Compute the scheduler load factors. */ compute_mach_factor(); /* * Scan the run queues for timesharing threads which * may need to have their priorities recalculated. */ do_thread_scan(); clock_deadline_for_periodic_event(sched_tick_interval, abstime, &sched_tick_deadline); assert_wait((event_t)sched_tick_thread_continue, THREAD_INTERRUPTIBLE); thread_set_timer_deadline(sched_tick_deadline); thread_block(sched_tick_thread_continue); /*NOTREACHED*/ } void sched_tick_thread(void) { sched_tick_deadline = mach_absolute_time(); thread_block(sched_tick_thread_continue); /*NOTREACHED*/ } /* * do_thread_scan: * * Scan the run queues for timesharing threads which need * to be aged, possibily adjusting their priorities upwards. * * Scanner runs in two passes. Pass one squirrels likely * thread away in an array (takes out references for them). * Pass two does the priority updates. This is necessary because * the run queue lock is required for the candidate scan, but * cannot be held during updates. * * Array length should be enough so that restart isn't necessary, * but restart logic is included. * */ #define MAX_STUCK_THREADS 128 static thread_t stuck_threads[MAX_STUCK_THREADS]; static int stuck_count = 0; /* * do_runq_scan is the guts of pass 1. It scans a runq for * stuck threads. A boolean is returned indicating whether * a retry is needed. */ static boolean_t do_runq_scan( run_queue_t runq) { register queue_t q; register thread_t thread; register int count; boolean_t result = FALSE; if ((count = runq->count) > 0) { q = runq->queues + runq->highq; while (count > 0) { queue_iterate(q, thread, thread_t, links) { if ( thread->sched_stamp != sched_tick && (thread->sched_mode & TH_MODE_TIMESHARE) ) { /* * Stuck, save its id for later. */ if (stuck_count == MAX_STUCK_THREADS) { /* * !@#$% No more room. */ return (TRUE); } if (thread_lock_try(thread)) { thread->ref_count++; thread_unlock(thread); stuck_threads[stuck_count++] = thread; } else result = TRUE; } count--; } q--; } } return (result); } boolean_t thread_scan_enabled = TRUE; static void do_thread_scan(void) { register boolean_t restart_needed = FALSE; register thread_t thread; register processor_set_t pset = &default_pset; register processor_t processor; spl_t s; if (!thread_scan_enabled) return; do { s = splsched(); simple_lock(&pset->sched_lock); restart_needed = do_runq_scan(&pset->runq); simple_unlock(&pset->sched_lock); if (!restart_needed) { simple_lock(&pset->sched_lock); processor = (processor_t)queue_first(&pset->processors); while (!queue_end(&pset->processors, (queue_entry_t)processor)) { if (restart_needed = do_runq_scan(&processor->runq)) break; thread = processor->idle_thread; if (thread->sched_stamp != sched_tick) { if (stuck_count == MAX_STUCK_THREADS) { restart_needed = TRUE; break; } stuck_threads[stuck_count++] = thread; } processor = (processor_t)queue_next(&processor->processors); } simple_unlock(&pset->sched_lock); } splx(s); /* * Ok, we now have a collection of candidates -- fix them. */ while (stuck_count > 0) { boolean_t idle_thread; thread = stuck_threads[--stuck_count]; stuck_threads[stuck_count] = THREAD_NULL; s = splsched(); thread_lock(thread); idle_thread = (thread->state & TH_IDLE) != 0; if ( !(thread->state & (TH_WAIT|TH_SUSP)) && thread->sched_stamp != sched_tick ) update_priority(thread); thread_unlock(thread); splx(s); if (!idle_thread) thread_deallocate(thread); } if (restart_needed) delay(1); /* XXX */ } while (restart_needed); } /* * Just in case someone doesn't use the macro */ #undef thread_wakeup void thread_wakeup( event_t x); void thread_wakeup( event_t x) { thread_wakeup_with_result(x, THREAD_AWAKENED); } #if DEBUG static boolean_t thread_runnable( thread_t thread) { return ((thread->state & (TH_RUN|TH_WAIT)) == TH_RUN); } #endif /* DEBUG */ #if MACH_KDB #include #define printf kdbprintf extern int db_indent; void db_sched(void); void db_sched(void) { iprintf("Scheduling Statistics:\n"); db_indent += 2; iprintf("Thread invocations: csw %d same %d\n", c_thread_invoke_csw, c_thread_invoke_same); #if MACH_COUNTERS iprintf("Thread block: calls %d\n", c_thread_block_calls); iprintf("Idle thread:\n\thandoff %d block %d no_dispatch %d\n", c_idle_thread_handoff, c_idle_thread_block, no_dispatch_count); iprintf("Sched thread blocks: %d\n", c_sched_thread_block); #endif /* MACH_COUNTERS */ db_indent -= 2; } #include void db_show_thread_log(void); void db_show_thread_log(void) { } #endif /* MACH_KDB */