/* * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * * The contents of this file constitute Original Code as defined in and * are subject to the Apple Public Source License Version 1.1 (the * "License"). You may not use this file except in compliance with the * License. Please obtain a copy of the License at * http://www.apple.com/publicsource and read it before using this file. * * This Original Code and all software distributed under the License are * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the * License for the specific language governing rights and limitations * under the License. * * @APPLE_LICENSE_HEADER_END@ */ #ifdef MACH_BSD #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include <../bsd/sys/sysent.h> extern struct proc *current_proc(void); kern_return_t thread_userstack( thread_t, int, thread_state_t, unsigned int, mach_vm_offset_t *, int * ); kern_return_t thread_entrypoint( thread_t, int, thread_state_t, unsigned int, mach_vm_offset_t * ); unsigned int get_msr_exportmask(void); unsigned int get_msr_nbits(void); unsigned int get_msr_rbits(void); kern_return_t thread_compose_cthread_desc(unsigned int addr, pcb_t pcb); void IOSleep(int); /* * thread_userstack: * * Return the user stack pointer from the machine * dependent thread state info. */ kern_return_t thread_userstack( __unused thread_t thread, int flavor, thread_state_t tstate, unsigned int count, user_addr_t *user_stack, int *customstack ) { struct i386_saved_state *state; i386_thread_state_t *state25; vm_offset_t uesp; if (customstack) *customstack = 0; switch (flavor) { case i386_THREAD_STATE: /* FIXME */ state25 = (i386_thread_state_t *) tstate; if (state25->esp) *user_stack = state25->esp; else *user_stack = USRSTACK; if (customstack && state25->esp) *customstack = 1; else *customstack = 0; break; case i386_NEW_THREAD_STATE: if (count < i386_NEW_THREAD_STATE_COUNT) return (KERN_INVALID_ARGUMENT); else { state = (struct i386_saved_state *) tstate; uesp = state->uesp; } /* If a valid user stack is specified, use it. */ if (uesp) *user_stack = uesp; else *user_stack = USRSTACK; if (customstack && uesp) *customstack = 1; else *customstack = 0; break; default : return (KERN_INVALID_ARGUMENT); } return (KERN_SUCCESS); } kern_return_t thread_entrypoint( __unused thread_t thread, int flavor, thread_state_t tstate, unsigned int count, mach_vm_offset_t *entry_point ) { struct i386_saved_state *state; i386_thread_state_t *state25; /* * Set a default. */ if (*entry_point == 0) *entry_point = VM_MIN_ADDRESS; switch (flavor) { case i386_THREAD_STATE: state25 = (i386_thread_state_t *) tstate; *entry_point = state25->eip ? state25->eip: VM_MIN_ADDRESS; break; case i386_NEW_THREAD_STATE: if (count < i386_THREAD_STATE_COUNT) return (KERN_INVALID_ARGUMENT); else { state = (struct i386_saved_state *) tstate; /* * If a valid entry point is specified, use it. */ *entry_point = state->eip ? state->eip: VM_MIN_ADDRESS; } break; } return (KERN_SUCCESS); } struct i386_saved_state * get_user_regs(thread_t th) { if (th->machine.pcb) return(USER_REGS(th)); else { printf("[get_user_regs: thread does not have pcb]"); return NULL; } } /* * Duplicate parent state in child * for U**X fork. */ kern_return_t machine_thread_dup( thread_t parent, thread_t child ) { struct i386_float_state floatregs; #ifdef XXX /* Save the FPU state */ if ((pcb_t)(per_proc_info[cpu_number()].fpu_pcb) == parent->machine.pcb) { fp_state_save(parent); } #endif if (child->machine.pcb == NULL || parent->machine.pcb == NULL) return (KERN_FAILURE); /* Copy over the i386_saved_state registers */ child->machine.pcb->iss = parent->machine.pcb->iss; /* Check to see if parent is using floating point * and if so, copy the registers to the child * FIXME - make sure this works. */ if (parent->machine.pcb->ims.ifps) { if (fpu_get_state(parent, &floatregs) == KERN_SUCCESS) fpu_set_state(child, &floatregs); } /* FIXME - should a user specified LDT, TSS and V86 info * be duplicated as well?? - probably not. */ // duplicate any use LDT entry that was set I think this is appropriate. #ifdef MACH_BSD if (parent->machine.pcb->uldt_selector!= 0) { child->machine.pcb->uldt_selector = parent->machine.pcb->uldt_selector; child->machine.pcb->uldt_desc = parent->machine.pcb->uldt_desc; } #endif return (KERN_SUCCESS); } /* * FIXME - thread_set_child */ void thread_set_child(thread_t child, int pid); void thread_set_child(thread_t child, int pid) { child->machine.pcb->iss.eax = pid; child->machine.pcb->iss.edx = 1; child->machine.pcb->iss.efl &= ~EFL_CF; } void thread_set_parent(thread_t parent, int pid); void thread_set_parent(thread_t parent, int pid) { parent->machine.pcb->iss.eax = pid; parent->machine.pcb->iss.edx = 0; parent->machine.pcb->iss.efl &= ~EFL_CF; } /* * System Call handling code */ #define ERESTART -1 /* restart syscall */ #define EJUSTRETURN -2 /* don't modify regs, just return */ #define NO_FUNNEL 0 #define KERNEL_FUNNEL 1 extern funnel_t * kernel_flock; extern int set_bsduthreadargs (thread_t, struct i386_saved_state *, void *); extern void * get_bsduthreadarg(thread_t); extern int * get_bsduthreadrval(thread_t th); extern int * get_bsduthreadlowpridelay(thread_t th); extern long fuword(vm_offset_t); extern void unix_syscall(struct i386_saved_state *); extern void unix_syscall_return(int); /* following implemented in bsd/dev/i386/unix_signal.c */ int __pthread_cset(struct sysent *); void __pthread_creset(struct sysent *); void unix_syscall_return(int error) { thread_t thread; volatile int *rval; struct i386_saved_state *regs; struct proc *p; unsigned short code; vm_offset_t params; struct sysent *callp; volatile int *lowpri_delay; thread = current_thread(); rval = get_bsduthreadrval(thread); lowpri_delay = get_bsduthreadlowpridelay(thread); p = current_proc(); regs = USER_REGS(thread); /* reconstruct code for tracing before blasting eax */ code = regs->eax; params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int)); callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; if (callp == sysent) { code = fuword(params); } if (error == ERESTART) { regs->eip -= 7; } else if (error != EJUSTRETURN) { if (error) { regs->eax = error; regs->efl |= EFL_CF; /* carry bit */ } else { /* (not error) */ regs->eax = rval[0]; regs->edx = rval[1]; regs->efl &= ~EFL_CF; } } ktrsysret(p, code, error, rval[0], (callp->sy_funnel & FUNNEL_MASK)); __pthread_creset(callp); if ((callp->sy_funnel & FUNNEL_MASK) != NO_FUNNEL) (void) thread_funnel_set(current_thread()->funnel_lock, FALSE); if (*lowpri_delay) { /* * task is marked as a low priority I/O type * and the I/O we issued while in this system call * collided with normal I/O operations... we'll * delay in order to mitigate the impact of this * task on the normal operation of the system */ IOSleep(*lowpri_delay); *lowpri_delay = 0; } KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, error, rval[0], rval[1], 0, 0); thread_exception_return(); /* NOTREACHED */ } void unix_syscall(struct i386_saved_state *regs) { thread_t thread; void *vt; unsigned short code; struct sysent *callp; int nargs; int error; int *rval; int funnel_type; vm_offset_t params; struct proc *p; volatile int *lowpri_delay; thread = current_thread(); p = current_proc(); rval = get_bsduthreadrval(thread); lowpri_delay = get_bsduthreadlowpridelay(thread); thread->task->syscalls_unix++; /* MP-safety ignored */ //printf("[scall : eax %x]", regs->eax); code = regs->eax; params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int)); callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; if (callp == sysent) { code = fuword(params); params += sizeof (int); callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; } vt = get_bsduthreadarg(thread); if ((nargs = (callp->sy_narg * sizeof (int))) && (error = copyin((user_addr_t) params, (char *) vt, nargs)) != 0) { regs->eax = error; regs->efl |= EFL_CF; thread_exception_return(); /* NOTREACHED */ } rval[0] = 0; rval[1] = regs->edx; if ((error = __pthread_cset(callp))) { /* cancelled system call; let it returned with EINTR for handling */ regs->eax = error; regs->efl |= EFL_CF; thread_exception_return(); /* NOTREACHED */ } funnel_type = (callp->sy_funnel & FUNNEL_MASK); if(funnel_type == KERNEL_FUNNEL) (void) thread_funnel_set(kernel_flock, TRUE); (void) set_bsduthreadargs(thread, regs, NULL); if (callp->sy_narg > 8) panic("unix_syscall max arg count exceeded (%d)", callp->sy_narg); ktrsyscall(p, code, callp->sy_narg, vt, funnel_type); { int *ip = (int *)vt; KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, *ip, *(ip+1), *(ip+2), *(ip+3), 0); } error = (*(callp->sy_call))((void *) p, (void *) vt, &rval[0]); #if 0 /* May be needed with vfork changes */ regs = USER_REGS(thread); #endif if (error == ERESTART) { regs->eip -= 7; } else if (error != EJUSTRETURN) { if (error) { regs->eax = error; regs->efl |= EFL_CF; /* carry bit */ } else { /* (not error) */ regs->eax = rval[0]; regs->edx = rval[1]; regs->efl &= ~EFL_CF; } } ktrsysret(p, code, error, rval[0], funnel_type); __pthread_creset(callp); if(funnel_type != NO_FUNNEL) (void) thread_funnel_set(current_thread()->funnel_lock, FALSE); if (*lowpri_delay) { /* * task is marked as a low priority I/O type * and the I/O we issued while in this system call * collided with normal I/O operations... we'll * delay in order to mitigate the impact of this * task on the normal operation of the system */ IOSleep(*lowpri_delay); *lowpri_delay = 0; } KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, error, rval[0], rval[1], 0, 0); thread_exception_return(); /* NOTREACHED */ } void machdep_syscall( struct i386_saved_state *regs) { int trapno, nargs; machdep_call_t *entry; trapno = regs->eax; if (trapno < 0 || trapno >= machdep_call_count) { regs->eax = (unsigned int)kern_invalid(NULL); thread_exception_return(); /* NOTREACHED */ } entry = &machdep_call_table[trapno]; nargs = entry->nargs; if (nargs > 0) { int args[nargs]; if (copyin((user_addr_t) regs->uesp + sizeof (int), (char *) args, nargs * sizeof (int))) { regs->eax = KERN_INVALID_ADDRESS; thread_exception_return(); /* NOTREACHED */ } switch (nargs) { case 1: regs->eax = (*entry->routine.args_1)(args[0]); break; case 2: regs->eax = (*entry->routine.args_2)(args[0],args[1]); break; case 3: regs->eax = (*entry->routine.args_3)(args[0],args[1],args[2]); break; case 4: regs->eax = (*entry->routine.args_4)(args[0],args[1],args[2],args[3]); break; default: panic("machdep_syscall(): too many args"); } } else regs->eax = (*entry->routine.args_0)(); if (current_thread()->funnel_lock) (void) thread_funnel_set(current_thread()->funnel_lock, FALSE); thread_exception_return(); /* NOTREACHED */ } kern_return_t thread_compose_cthread_desc(unsigned int addr, pcb_t pcb) { struct real_descriptor desc; mp_disable_preemption(); desc.limit_low = 1; desc.limit_high = 0; desc.base_low = addr & 0xffff; desc.base_med = (addr >> 16) & 0xff; desc.base_high = (addr >> 24) & 0xff; desc.access = ACC_P|ACC_PL_U|ACC_DATA_W; desc.granularity = SZ_32|SZ_G; pcb->cthread_desc = desc; *ldt_desc_p(USER_CTHREAD) = desc; mp_enable_preemption(); return(KERN_SUCCESS); } kern_return_t thread_set_cthread_self(uint32_t self) { current_thread()->machine.pcb->cthread_self = self; return (KERN_SUCCESS); } kern_return_t thread_get_cthread_self(void) { return ((kern_return_t)current_thread()->machine.pcb->cthread_self); } kern_return_t thread_fast_set_cthread_self(uint32_t self) { pcb_t pcb; pcb = (pcb_t)current_thread()->machine.pcb; thread_compose_cthread_desc(self, pcb); pcb->cthread_self = self; /* preserve old func too */ return (USER_CTHREAD); } /* * thread_set_user_ldt routine is the interface for the user level * settable ldt entry feature. allowing a user to create arbitrary * ldt entries seems to be too large of a security hole, so instead * this mechanism is in place to allow user level processes to have * an ldt entry that can be used in conjunction with the FS register. * * Swapping occurs inside the pcb.c file along with initialization * when a thread is created. The basic functioning theory is that the * pcb->uldt_selector variable will contain either 0 meaning the * process has not set up any entry, or the selector to be used in * the FS register. pcb->uldt_desc contains the actual descriptor the * user has set up stored in machine usable ldt format. * * Currently one entry is shared by all threads (USER_SETTABLE), but * this could be changed in the future by changing how this routine * allocates the selector. There seems to be no real reason at this * time to have this added feature, but in the future it might be * needed. * * address is the linear address of the start of the data area size * is the size in bytes of the area flags should always be set to 0 * for now. in the future it could be used to set R/W permisions or * other functions. Currently the segment is created as a data segment * up to 1 megabyte in size with full read/write permisions only. * * this call returns the segment selector or -1 if any error occurs */ kern_return_t thread_set_user_ldt(uint32_t address, uint32_t size, uint32_t flags) { pcb_t pcb; struct fake_descriptor temp; int mycpu; if (flags != 0) return -1; // flags not supported if (size > 0xFFFFF) return -1; // size too big, 1 meg is the limit mp_disable_preemption(); mycpu = cpu_number(); // create a "fake" descriptor so we can use fix_desc() // to build a real one... // 32 bit default operation size // standard read/write perms for a data segment pcb = (pcb_t)current_thread()->machine.pcb; temp.offset = address; temp.lim_or_seg = size; temp.size_or_wdct = SZ_32; temp.access = ACC_P|ACC_PL_U|ACC_DATA_W; // turn this into a real descriptor fix_desc(&temp,1); // set up our data in the pcb pcb->uldt_desc = *(struct real_descriptor*)&temp; pcb->uldt_selector = USER_SETTABLE; // set the selector value // now set it up in the current table... *ldt_desc_p(USER_SETTABLE) = *(struct real_descriptor*)&temp; mp_enable_preemption(); return USER_SETTABLE; } void mach25_syscall(struct i386_saved_state *regs) { printf("*** Atttempt to execute a Mach 2.5 system call at EIP=%x EAX=%x(%d)\n", regs->eip, regs->eax, -regs->eax); panic("FIXME!"); } #endif /* MACH_BSD */ /* This routine is called from assembly before each and every mach trap. */ extern unsigned int mach_call_start(unsigned int, unsigned int *); __private_extern__ unsigned int mach_call_start(unsigned int call_number, unsigned int *args) { int i, argc; unsigned int kdarg[3]; current_thread()->task->syscalls_mach++; /* MP-safety ignored */ /* Always prepare to trace mach system calls */ kdarg[0]=0; kdarg[1]=0; kdarg[2]=0; argc = mach_trap_table[call_number>>4].mach_trap_arg_count; if (argc > 3) argc = 3; for (i=0; i < argc; i++) kdarg[i] = (int)*(args + i); KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number>>4)) | DBG_FUNC_START, kdarg[0], kdarg[1], kdarg[2], 0, 0); return call_number; /* pass this back thru */ } /* This routine is called from assembly after each mach system call */ extern unsigned int mach_call_end(unsigned int, unsigned int); __private_extern__ unsigned int mach_call_end(unsigned int call_number, unsigned int retval) { KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number>>4)) | DBG_FUNC_END, retval, 0, 0, 0, 0); return retval; /* pass this back thru */ } typedef kern_return_t (*mach_call_t)(void *); extern __attribute__((regparm(1))) kern_return_t mach_call_munger(unsigned int call_number, unsigned int arg1, unsigned int arg2, unsigned int arg3, unsigned int arg4, unsigned int arg5, unsigned int arg6, unsigned int arg7, unsigned int arg8, unsigned int arg9 ); struct mach_call_args { unsigned int arg1; unsigned int arg2; unsigned int arg3; unsigned int arg4; unsigned int arg5; unsigned int arg6; unsigned int arg7; unsigned int arg8; unsigned int arg9; }; __private_extern__ __attribute__((regparm(1))) kern_return_t mach_call_munger(unsigned int call_number, unsigned int arg1, unsigned int arg2, unsigned int arg3, unsigned int arg4, unsigned int arg5, unsigned int arg6, unsigned int arg7, unsigned int arg8, unsigned int arg9 ) { int argc; mach_call_t mach_call; kern_return_t retval; struct mach_call_args args = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; current_thread()->task->syscalls_mach++; /* MP-safety ignored */ call_number >>= 4; argc = mach_trap_table[call_number].mach_trap_arg_count; switch (argc) { case 9: args.arg9 = arg9; case 8: args.arg8 = arg8; case 7: args.arg7 = arg7; case 6: args.arg6 = arg6; case 5: args.arg5 = arg5; case 4: args.arg4 = arg4; case 3: args.arg3 = arg3; case 2: args.arg2 = arg2; case 1: args.arg1 = arg1; } KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number)) | DBG_FUNC_START, args.arg1, args.arg2, args.arg3, 0, 0); mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function; retval = mach_call(&args); KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END, retval, 0, 0, 0, 0); return retval; } /* * thread_setuserstack: * * Sets the user stack pointer into the machine * dependent thread state info. */ void thread_setuserstack( thread_t thread, mach_vm_address_t user_stack) { struct i386_saved_state *ss = get_user_regs(thread); ss->uesp = CAST_DOWN(unsigned int,user_stack); } /* * thread_adjuserstack: * * Returns the adjusted user stack pointer from the machine * dependent thread state info. Used for small (<2G) deltas. */ uint64_t thread_adjuserstack( thread_t thread, int adjust) { struct i386_saved_state *ss = get_user_regs(thread); ss->uesp += adjust; return CAST_USER_ADDR_T(ss->uesp); } /* * thread_setentrypoint: * * Sets the user PC into the machine * dependent thread state info. */ void thread_setentrypoint( thread_t thread, mach_vm_address_t entry) { struct i386_saved_state *ss = get_user_regs(thread); ss->eip = CAST_DOWN(unsigned int,entry); }