/* * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this * file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ */ /* * Mach Operating System * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University * All Rights Reserved. * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie Mellon * the rights to redistribute these changes. */ /* */ /* * File: vm/vm_user.c * Author: Avadis Tevanian, Jr., Michael Wayne Young * * User-exported virtual memory functions. */ #include #include #include #include /* to get vm_address_t */ #include #include /* to get pointer_t */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include vm_size_t upl_offset_to_pagelist = 0; #if VM_CPM #include #endif /* VM_CPM */ ipc_port_t dynamic_pager_control_port=NULL; /* * vm_allocate allocates "zero fill" memory in the specfied * map. */ kern_return_t vm_allocate( register vm_map_t map, register vm_offset_t *addr, register vm_size_t size, int flags) { kern_return_t result; boolean_t anywhere = VM_FLAGS_ANYWHERE & flags; if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); if (size == 0) { *addr = 0; return(KERN_SUCCESS); } if (anywhere) *addr = vm_map_min(map); else *addr = trunc_page_32(*addr); size = round_page_32(size); if (size == 0) { return(KERN_INVALID_ARGUMENT); } result = vm_map_enter( map, addr, size, (vm_offset_t)0, flags, VM_OBJECT_NULL, (vm_object_offset_t)0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); return(result); } /* * vm_deallocate deallocates the specified range of addresses in the * specified address map. */ kern_return_t vm_deallocate( register vm_map_t map, vm_offset_t start, vm_size_t size) { if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); if (size == (vm_offset_t) 0) return(KERN_SUCCESS); return(vm_map_remove(map, trunc_page_32(start), round_page_32(start+size), VM_MAP_NO_FLAGS)); } /* * vm_inherit sets the inheritance of the specified range in the * specified map. */ kern_return_t vm_inherit( register vm_map_t map, vm_offset_t start, vm_size_t size, vm_inherit_t new_inheritance) { if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); if (new_inheritance > VM_INHERIT_LAST_VALID) return(KERN_INVALID_ARGUMENT); return(vm_map_inherit(map, trunc_page_32(start), round_page_32(start+size), new_inheritance)); } /* * vm_protect sets the protection of the specified range in the * specified map. */ kern_return_t vm_protect( register vm_map_t map, vm_offset_t start, vm_size_t size, boolean_t set_maximum, vm_prot_t new_protection) { if ((map == VM_MAP_NULL) || (new_protection & ~(VM_PROT_ALL | VM_PROT_COPY))) return(KERN_INVALID_ARGUMENT); return(vm_map_protect(map, trunc_page_32(start), round_page_32(start+size), new_protection, set_maximum)); } /* * Handle machine-specific attributes for a mapping, such * as cachability, migrability, etc. */ kern_return_t vm_machine_attribute( vm_map_t map, vm_address_t address, vm_size_t size, vm_machine_attribute_t attribute, vm_machine_attribute_val_t* value) /* IN/OUT */ { if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); return vm_map_machine_attribute(map, address, size, attribute, value); } kern_return_t vm_read( vm_map_t map, vm_address_t address, vm_size_t size, pointer_t *data, mach_msg_type_number_t *data_size) { kern_return_t error; vm_map_copy_t ipc_address; if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); if ((error = vm_map_copyin(map, address, size, FALSE, /* src_destroy */ &ipc_address)) == KERN_SUCCESS) { *data = (pointer_t) ipc_address; *data_size = size; } return(error); } kern_return_t vm_read_list( vm_map_t map, vm_read_entry_t data_list, mach_msg_type_number_t count) { mach_msg_type_number_t i; kern_return_t error; vm_map_copy_t ipc_address; if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); for(i=0; imap, &(data_list[i].address), (vm_map_copy_t) ipc_address); if(error != KERN_SUCCESS) { data_list[i].address = (vm_address_t)0; data_list[i].size = (vm_size_t)0; break; } } } return(error); } /* * This routine reads from the specified map and overwrites part of the current * activation's map. In making an assumption that the current thread is local, * it is no longer cluster-safe without a fully supportive local proxy thread/ * task (but we don't support cluster's anymore so this is moot). */ #define VM_OVERWRITE_SMALL 512 kern_return_t vm_read_overwrite( vm_map_t map, vm_address_t address, vm_size_t size, vm_address_t data, vm_size_t *data_size) { struct { long align; char buf[VM_OVERWRITE_SMALL]; } inbuf; vm_map_t oldmap; kern_return_t error = KERN_SUCCESS; vm_map_copy_t copy; if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); if (size <= VM_OVERWRITE_SMALL) { if(vm_map_read_user(map, (vm_offset_t)address, (vm_offset_t)&inbuf, size)) { error = KERN_INVALID_ADDRESS; } else { if(vm_map_write_user(current_map(), (vm_offset_t)&inbuf, (vm_offset_t)data, size)) error = KERN_INVALID_ADDRESS; } } else { if ((error = vm_map_copyin(map, address, size, FALSE, /* src_destroy */ ©)) == KERN_SUCCESS) { if ((error = vm_map_copy_overwrite( current_act()->map, data, copy, FALSE)) == KERN_SUCCESS) { } else { vm_map_copy_discard(copy); } } } *data_size = size; return(error); } /*ARGSUSED*/ kern_return_t vm_write( vm_map_t map, vm_address_t address, vm_offset_t data, mach_msg_type_number_t size) { if (map == VM_MAP_NULL) return KERN_INVALID_ARGUMENT; return vm_map_copy_overwrite(map, address, (vm_map_copy_t) data, FALSE /* interruptible XXX */); } kern_return_t vm_copy( vm_map_t map, vm_address_t source_address, vm_size_t size, vm_address_t dest_address) { vm_map_copy_t copy; kern_return_t kr; if (map == VM_MAP_NULL) return KERN_INVALID_ARGUMENT; kr = vm_map_copyin(map, source_address, size, FALSE, ©); if (kr != KERN_SUCCESS) return kr; kr = vm_map_copy_overwrite(map, dest_address, copy, FALSE /* interruptible XXX */); if (kr != KERN_SUCCESS) { vm_map_copy_discard(copy); return kr; } return KERN_SUCCESS; } /* * Routine: vm_map */ kern_return_t vm_map_64( vm_map_t target_map, vm_offset_t *address, vm_size_t initial_size, vm_offset_t mask, int flags, ipc_port_t port, vm_object_offset_t offset, boolean_t copy, vm_prot_t cur_protection, vm_prot_t max_protection, vm_inherit_t inheritance) { register vm_object_t object; vm_prot_t prot; vm_object_size_t size = (vm_object_size_t)initial_size; kern_return_t result; /* * Check arguments for validity */ if ((target_map == VM_MAP_NULL) || (cur_protection & ~VM_PROT_ALL) || (max_protection & ~VM_PROT_ALL) || (inheritance > VM_INHERIT_LAST_VALID) || size == 0) return(KERN_INVALID_ARGUMENT); /* * Find the vm object (if any) corresponding to this port. */ if (!IP_VALID(port)) { object = VM_OBJECT_NULL; offset = 0; copy = FALSE; } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) { vm_named_entry_t named_entry; named_entry = (vm_named_entry_t)port->ip_kobject; /* a few checks to make sure user is obeying rules */ if(size == 0) { if(offset >= named_entry->size) return(KERN_INVALID_RIGHT); size = named_entry->size - offset; } if((named_entry->protection & max_protection) != max_protection) return(KERN_INVALID_RIGHT); if((named_entry->protection & cur_protection) != cur_protection) return(KERN_INVALID_RIGHT); if(named_entry->size < (offset + size)) return(KERN_INVALID_ARGUMENT); /* the callers parameter offset is defined to be the */ /* offset from beginning of named entry offset in object */ offset = offset + named_entry->offset; named_entry_lock(named_entry); if(named_entry->is_sub_map) { vm_map_entry_t map_entry; named_entry_unlock(named_entry); *address = trunc_page_32(*address); size = round_page_64(size); vm_object_reference(vm_submap_object); if ((result = vm_map_enter(target_map, address, size, mask, flags, vm_submap_object, 0, FALSE, cur_protection, max_protection, inheritance )) != KERN_SUCCESS) { vm_object_deallocate(vm_submap_object); } else { char alias; VM_GET_FLAGS_ALIAS(flags, alias); if ((alias == VM_MEMORY_SHARED_PMAP) && !copy) { vm_map_submap(target_map, *address, (*address) + size, named_entry->backing.map, (vm_offset_t)offset, TRUE); } else { vm_map_submap(target_map, *address, (*address) + size, named_entry->backing.map, (vm_offset_t)offset, FALSE); } if(copy) { if(vm_map_lookup_entry( target_map, *address, &map_entry)) { map_entry->needs_copy = TRUE; } } } return(result); } else if(named_entry->object) { /* This is the case where we are going to map */ /* an already mapped object. If the object is */ /* not ready it is internal. An external */ /* object cannot be mapped until it is ready */ /* we can therefore avoid the ready check */ /* in this case. */ named_entry_unlock(named_entry); vm_object_reference(named_entry->object); object = named_entry->object; } else { unsigned int access; vm_prot_t protections; unsigned int wimg_mode; boolean_t cache_attr; protections = named_entry->protection & VM_PROT_ALL; access = GET_MAP_MEM(named_entry->protection); object = vm_object_enter( named_entry->backing.pager, named_entry->size, named_entry->internal, FALSE, FALSE); if (object == VM_OBJECT_NULL) { named_entry_unlock(named_entry); return(KERN_INVALID_OBJECT); } vm_object_lock(object); /* create an extra ref for the named entry */ vm_object_reference_locked(object); named_entry->object = object; named_entry_unlock(named_entry); wimg_mode = object->wimg_bits; if(access == MAP_MEM_IO) { wimg_mode = VM_WIMG_IO; } else if (access == MAP_MEM_COPYBACK) { wimg_mode = VM_WIMG_USE_DEFAULT; } else if (access == MAP_MEM_WTHRU) { wimg_mode = VM_WIMG_WTHRU; } else if (access == MAP_MEM_WCOMB) { wimg_mode = VM_WIMG_WCOMB; } if ((wimg_mode == VM_WIMG_IO) || (wimg_mode == VM_WIMG_WCOMB)) cache_attr = TRUE; else cache_attr = FALSE; if (named_entry->backing.pager) { /* wait for object (if any) to be ready */ while (!object->pager_ready) { vm_object_wait(object, VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT); vm_object_lock(object); } } if(object->wimg_bits != wimg_mode) { vm_page_t p; vm_object_paging_wait(object, THREAD_UNINT); object->wimg_bits = wimg_mode; queue_iterate(&object->memq, p, vm_page_t, listq) { if (!p->fictitious) { pmap_page_protect( p->phys_page, VM_PROT_NONE); if(cache_attr) pmap_sync_caches_phys( p->phys_page); } } } object->true_share = TRUE; if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; vm_object_unlock(object); } } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) { /* * JMM - This is temporary until we unify named entries * and raw memory objects. * * Detected fake ip_kotype for a memory object. In * this case, the port isn't really a port at all, but * instead is just a raw memory object. */ if ((object = vm_object_enter((memory_object_t)port, size, FALSE, FALSE, FALSE)) == VM_OBJECT_NULL) return(KERN_INVALID_OBJECT); /* wait for object (if any) to be ready */ if (object != VM_OBJECT_NULL) { if(object == kernel_object) { printf("Warning: Attempt to map kernel object" " by a non-private kernel entity\n"); return(KERN_INVALID_OBJECT); } vm_object_lock(object); while (!object->pager_ready) { vm_object_wait(object, VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT); vm_object_lock(object); } vm_object_unlock(object); } } else { return (KERN_INVALID_OBJECT); } *address = trunc_page_32(*address); size = round_page_64(size); /* * Perform the copy if requested */ if (copy) { vm_object_t new_object; vm_object_offset_t new_offset; result = vm_object_copy_strategically(object, offset, size, &new_object, &new_offset, ©); if (result == KERN_MEMORY_RESTART_COPY) { boolean_t success; boolean_t src_needs_copy; /* * XXX * We currently ignore src_needs_copy. * This really is the issue of how to make * MEMORY_OBJECT_COPY_SYMMETRIC safe for * non-kernel users to use. Solution forthcoming. * In the meantime, since we don't allow non-kernel * memory managers to specify symmetric copy, * we won't run into problems here. */ new_object = object; new_offset = offset; success = vm_object_copy_quickly(&new_object, new_offset, size, &src_needs_copy, ©); assert(success); result = KERN_SUCCESS; } /* * Throw away the reference to the * original object, as it won't be mapped. */ vm_object_deallocate(object); if (result != KERN_SUCCESS) return (result); object = new_object; offset = new_offset; } if ((result = vm_map_enter(target_map, address, size, mask, flags, object, offset, copy, cur_protection, max_protection, inheritance )) != KERN_SUCCESS) vm_object_deallocate(object); return(result); } /* temporary, until world build */ kern_return_t vm_map( vm_map_t target_map, vm_offset_t *address, vm_size_t size, vm_offset_t mask, int flags, ipc_port_t port, vm_offset_t offset, boolean_t copy, vm_prot_t cur_protection, vm_prot_t max_protection, vm_inherit_t inheritance) { return vm_map_64(target_map, address, size, mask, flags, port, (vm_object_offset_t)offset, copy, cur_protection, max_protection, inheritance); } /* * NOTE: this routine (and this file) will no longer require mach_host_server.h * when vm_wire is changed to use ledgers. */ #include /* * Specify that the range of the virtual address space * of the target task must not cause page faults for * the indicated accesses. * * [ To unwire the pages, specify VM_PROT_NONE. ] */ kern_return_t vm_wire( host_priv_t host_priv, register vm_map_t map, vm_offset_t start, vm_size_t size, vm_prot_t access) { kern_return_t rc; if (host_priv == HOST_PRIV_NULL) return KERN_INVALID_HOST; assert(host_priv == &realhost); if (map == VM_MAP_NULL) return KERN_INVALID_TASK; if (access & ~VM_PROT_ALL) return KERN_INVALID_ARGUMENT; if (access != VM_PROT_NONE) { rc = vm_map_wire(map, trunc_page_32(start), round_page_32(start+size), access, TRUE); } else { rc = vm_map_unwire(map, trunc_page_32(start), round_page_32(start+size), TRUE); } return rc; } /* * vm_msync * * Synchronises the memory range specified with its backing store * image by either flushing or cleaning the contents to the appropriate * memory manager engaging in a memory object synchronize dialog with * the manager. The client doesn't return until the manager issues * m_o_s_completed message. MIG Magically converts user task parameter * to the task's address map. * * interpretation of sync_flags * VM_SYNC_INVALIDATE - discard pages, only return precious * pages to manager. * * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS) * - discard pages, write dirty or precious * pages back to memory manager. * * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS * - write dirty or precious pages back to * the memory manager. * * NOTE * The memory object attributes have not yet been implemented, this * function will have to deal with the invalidate attribute * * RETURNS * KERN_INVALID_TASK Bad task parameter * KERN_INVALID_ARGUMENT both sync and async were specified. * KERN_SUCCESS The usual. */ kern_return_t vm_msync( vm_map_t map, vm_address_t address, vm_size_t size, vm_sync_t sync_flags) { msync_req_t msr; msync_req_t new_msr; queue_chain_t req_q; /* queue of requests for this msync */ vm_map_entry_t entry; vm_size_t amount_left; vm_object_offset_t offset; boolean_t do_sync_req; boolean_t modifiable; if ((sync_flags & VM_SYNC_ASYNCHRONOUS) && (sync_flags & VM_SYNC_SYNCHRONOUS)) return(KERN_INVALID_ARGUMENT); /* * align address and size on page boundaries */ size = round_page_32(address + size) - trunc_page_32(address); address = trunc_page_32(address); if (map == VM_MAP_NULL) return(KERN_INVALID_TASK); if (size == 0) return(KERN_SUCCESS); queue_init(&req_q); amount_left = size; while (amount_left > 0) { vm_size_t flush_size; vm_object_t object; vm_map_lock(map); if (!vm_map_lookup_entry(map, address, &entry)) { vm_size_t skip; /* * hole in the address map. */ /* * Check for empty map. */ if (entry == vm_map_to_entry(map) && entry->vme_next == entry) { vm_map_unlock(map); break; } /* * Check that we don't wrap and that * we have at least one real map entry. */ if ((map->hdr.nentries == 0) || (entry->vme_next->vme_start < address)) { vm_map_unlock(map); break; } /* * Move up to the next entry if needed */ skip = (entry->vme_next->vme_start - address); if (skip >= amount_left) amount_left = 0; else amount_left -= skip; address = entry->vme_next->vme_start; vm_map_unlock(map); continue; } offset = address - entry->vme_start; /* * do we have more to flush than is contained in this * entry ? */ if (amount_left + entry->vme_start + offset > entry->vme_end) { flush_size = entry->vme_end - (entry->vme_start + offset); } else { flush_size = amount_left; } amount_left -= flush_size; address += flush_size; if (entry->is_sub_map == TRUE) { vm_map_t local_map; vm_offset_t local_offset; local_map = entry->object.sub_map; local_offset = entry->offset; vm_map_unlock(map); vm_msync( local_map, local_offset, flush_size, sync_flags); continue; } object = entry->object.vm_object; /* * We can't sync this object if the object has not been * created yet */ if (object == VM_OBJECT_NULL) { vm_map_unlock(map); continue; } offset += entry->offset; modifiable = (entry->protection & VM_PROT_WRITE) != VM_PROT_NONE; vm_object_lock(object); if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) { boolean_t kill_pages = 0; if (sync_flags & VM_SYNC_KILLPAGES) { if (object->ref_count == 1 && !entry->needs_copy && !object->shadow) kill_pages = 1; else kill_pages = -1; } if (kill_pages != -1) vm_object_deactivate_pages(object, offset, (vm_object_size_t)flush_size, kill_pages); vm_object_unlock(object); vm_map_unlock(map); continue; } /* * We can't sync this object if there isn't a pager. * Don't bother to sync internal objects, since there can't * be any "permanent" storage for these objects anyway. */ if ((object->pager == MEMORY_OBJECT_NULL) || (object->internal) || (object->private)) { vm_object_unlock(object); vm_map_unlock(map); continue; } /* * keep reference on the object until syncing is done */ assert(object->ref_count > 0); object->ref_count++; vm_object_res_reference(object); vm_object_unlock(object); vm_map_unlock(map); do_sync_req = vm_object_sync(object, offset, flush_size, sync_flags & VM_SYNC_INVALIDATE, (modifiable && (sync_flags & VM_SYNC_SYNCHRONOUS || sync_flags & VM_SYNC_ASYNCHRONOUS))); /* * only send a m_o_s if we returned pages or if the entry * is writable (ie dirty pages may have already been sent back) */ if (!do_sync_req && !modifiable) { vm_object_deallocate(object); continue; } msync_req_alloc(new_msr); vm_object_lock(object); offset += object->paging_offset; new_msr->offset = offset; new_msr->length = flush_size; new_msr->object = object; new_msr->flag = VM_MSYNC_SYNCHRONIZING; re_iterate: queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) { /* * need to check for overlapping entry, if found, wait * on overlapping msr to be done, then reiterate */ msr_lock(msr); if (msr->flag == VM_MSYNC_SYNCHRONIZING && ((offset >= msr->offset && offset < (msr->offset + msr->length)) || (msr->offset >= offset && msr->offset < (offset + flush_size)))) { assert_wait((event_t) msr,THREAD_INTERRUPTIBLE); msr_unlock(msr); vm_object_unlock(object); thread_block((void (*)(void))0); vm_object_lock(object); goto re_iterate; } msr_unlock(msr); }/* queue_iterate */ queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q); vm_object_unlock(object); queue_enter(&req_q, new_msr, msync_req_t, req_q); (void) memory_object_synchronize( object->pager, offset, flush_size, sync_flags); }/* while */ /* * wait for memory_object_sychronize_completed messages from pager(s) */ while (!queue_empty(&req_q)) { msr = (msync_req_t)queue_first(&req_q); msr_lock(msr); while(msr->flag != VM_MSYNC_DONE) { assert_wait((event_t) msr, THREAD_INTERRUPTIBLE); msr_unlock(msr); thread_block((void (*)(void))0); msr_lock(msr); }/* while */ queue_remove(&req_q, msr, msync_req_t, req_q); msr_unlock(msr); vm_object_deallocate(msr->object); msync_req_free(msr); }/* queue_iterate */ return(KERN_SUCCESS); }/* vm_msync */ /* * task_wire * * Set or clear the map's wiring_required flag. This flag, if set, * will cause all future virtual memory allocation to allocate * user wired memory. Unwiring pages wired down as a result of * this routine is done with the vm_wire interface. */ kern_return_t task_wire( vm_map_t map, boolean_t must_wire) { if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); if (must_wire) map->wiring_required = TRUE; else map->wiring_required = FALSE; return(KERN_SUCCESS); } /* * vm_behavior_set sets the paging behavior attribute for the * specified range in the specified map. This routine will fail * with KERN_INVALID_ADDRESS if any address in [start,start+size) * is not a valid allocated or reserved memory region. */ kern_return_t vm_behavior_set( vm_map_t map, vm_offset_t start, vm_size_t size, vm_behavior_t new_behavior) { if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); return(vm_map_behavior_set(map, trunc_page_32(start), round_page_32(start+size), new_behavior)); } #if VM_CPM /* * Control whether the kernel will permit use of * vm_allocate_cpm at all. */ unsigned int vm_allocate_cpm_enabled = 1; /* * Ordinarily, the right to allocate CPM is restricted * to privileged applications (those that can gain access * to the host port). Set this variable to zero if you * want to let any application allocate CPM. */ unsigned int vm_allocate_cpm_privileged = 0; /* * Allocate memory in the specified map, with the caveat that * the memory is physically contiguous. This call may fail * if the system can't find sufficient contiguous memory. * This call may cause or lead to heart-stopping amounts of * paging activity. * * Memory obtained from this call should be freed in the * normal way, viz., via vm_deallocate. */ kern_return_t vm_allocate_cpm( host_priv_t host_priv, register vm_map_t map, register vm_offset_t *addr, register vm_size_t size, int flags) { vm_object_t cpm_obj; pmap_t pmap; vm_page_t m, pages; kern_return_t kr; vm_offset_t va, start, end, offset; #if MACH_ASSERT extern vm_offset_t avail_start, avail_end; vm_offset_t prev_addr; #endif /* MACH_ASSERT */ boolean_t anywhere = VM_FLAGS_ANYWHERE & flags; if (!vm_allocate_cpm_enabled) return KERN_FAILURE; if (vm_allocate_cpm_privileged && host_priv == HOST_PRIV_NULL) return KERN_INVALID_HOST; if (map == VM_MAP_NULL) return KERN_INVALID_ARGUMENT; assert(host_priv == &realhost); if (size == 0) { *addr = 0; return KERN_SUCCESS; } if (anywhere) *addr = vm_map_min(map); else *addr = trunc_page_32(*addr); size = round_page_32(size); if ((kr = cpm_allocate(size, &pages, TRUE)) != KERN_SUCCESS) return kr; cpm_obj = vm_object_allocate(size); assert(cpm_obj != VM_OBJECT_NULL); assert(cpm_obj->internal); assert(cpm_obj->size == size); assert(cpm_obj->can_persist == FALSE); assert(cpm_obj->pager_created == FALSE); assert(cpm_obj->pageout == FALSE); assert(cpm_obj->shadow == VM_OBJECT_NULL); /* * Insert pages into object. */ vm_object_lock(cpm_obj); for (offset = 0; offset < size; offset += PAGE_SIZE) { m = pages; pages = NEXT_PAGE(m); assert(!m->gobbled); assert(!m->wanted); assert(!m->pageout); assert(!m->tabled); assert(m->busy); assert(m->phys_page>=avail_start && m->phys_page<=avail_end); m->busy = FALSE; vm_page_insert(m, cpm_obj, offset); } assert(cpm_obj->resident_page_count == size / PAGE_SIZE); vm_object_unlock(cpm_obj); /* * Hang onto a reference on the object in case a * multi-threaded application for some reason decides * to deallocate the portion of the address space into * which we will insert this object. * * Unfortunately, we must insert the object now before * we can talk to the pmap module about which addresses * must be wired down. Hence, the race with a multi- * threaded app. */ vm_object_reference(cpm_obj); /* * Insert object into map. */ kr = vm_map_enter( map, addr, size, (vm_offset_t)0, flags, cpm_obj, (vm_object_offset_t)0, FALSE, VM_PROT_ALL, VM_PROT_ALL, VM_INHERIT_DEFAULT); if (kr != KERN_SUCCESS) { /* * A CPM object doesn't have can_persist set, * so all we have to do is deallocate it to * free up these pages. */ assert(cpm_obj->pager_created == FALSE); assert(cpm_obj->can_persist == FALSE); assert(cpm_obj->pageout == FALSE); assert(cpm_obj->shadow == VM_OBJECT_NULL); vm_object_deallocate(cpm_obj); /* kill acquired ref */ vm_object_deallocate(cpm_obj); /* kill creation ref */ } /* * Inform the physical mapping system that the * range of addresses may not fault, so that * page tables and such can be locked down as well. */ start = *addr; end = start + size; pmap = vm_map_pmap(map); pmap_pageable(pmap, start, end, FALSE); /* * Enter each page into the pmap, to avoid faults. * Note that this loop could be coded more efficiently, * if the need arose, rather than looking up each page * again. */ for (offset = 0, va = start; offset < size; va += PAGE_SIZE, offset += PAGE_SIZE) { vm_object_lock(cpm_obj); m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset); vm_object_unlock(cpm_obj); assert(m != VM_PAGE_NULL); PMAP_ENTER(pmap, va, m, VM_PROT_ALL, ((unsigned int)(m->object->wimg_bits)) & VM_WIMG_MASK, TRUE); } #if MACH_ASSERT /* * Verify ordering in address space. */ for (offset = 0; offset < size; offset += PAGE_SIZE) { vm_object_lock(cpm_obj); m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset); vm_object_unlock(cpm_obj); if (m == VM_PAGE_NULL) panic("vm_allocate_cpm: obj 0x%x off 0x%x no page", cpm_obj, offset); assert(m->tabled); assert(!m->busy); assert(!m->wanted); assert(!m->fictitious); assert(!m->private); assert(!m->absent); assert(!m->error); assert(!m->cleaning); assert(!m->precious); assert(!m->clustered); if (offset != 0) { if (m->phys_page != prev_addr + 1) { printf("start 0x%x end 0x%x va 0x%x\n", start, end, va); printf("obj 0x%x off 0x%x\n", cpm_obj, offset); printf("m 0x%x prev_address 0x%x\n", m, prev_addr); panic("vm_allocate_cpm: pages not contig!"); } } prev_addr = m->phys_page; } #endif /* MACH_ASSERT */ vm_object_deallocate(cpm_obj); /* kill extra ref */ return kr; } #else /* VM_CPM */ /* * Interface is defined in all cases, but unless the kernel * is built explicitly for this option, the interface does * nothing. */ kern_return_t vm_allocate_cpm( host_priv_t host_priv, register vm_map_t map, register vm_offset_t *addr, register vm_size_t size, int flags) { return KERN_FAILURE; } /* */ kern_return_t mach_memory_object_memory_entry_64( host_t host, boolean_t internal, vm_object_offset_t size, vm_prot_t permission, memory_object_t pager, ipc_port_t *entry_handle) { unsigned int access; vm_named_entry_t user_object; ipc_port_t user_handle; ipc_port_t previous; kern_return_t kr; if (host == HOST_NULL) return(KERN_INVALID_HOST); user_object = (vm_named_entry_t) kalloc(sizeof (struct vm_named_entry)); if(user_object == NULL) return KERN_FAILURE; named_entry_lock_init(user_object); user_handle = ipc_port_alloc_kernel(); ip_lock(user_handle); /* make a sonce right */ user_handle->ip_sorights++; ip_reference(user_handle); user_handle->ip_destination = IP_NULL; user_handle->ip_receiver_name = MACH_PORT_NULL; user_handle->ip_receiver = ipc_space_kernel; /* make a send right */ user_handle->ip_mscount++; user_handle->ip_srights++; ip_reference(user_handle); ipc_port_nsrequest(user_handle, 1, user_handle, &previous); /* nsrequest unlocks user_handle */ user_object->object = NULL; user_object->size = size; user_object->offset = 0; user_object->backing.pager = pager; user_object->protection = permission & VM_PROT_ALL; access = GET_MAP_MEM(permission); SET_MAP_MEM(access, user_object->protection); user_object->internal = internal; user_object->is_sub_map = FALSE; user_object->ref_count = 1; ipc_kobject_set(user_handle, (ipc_kobject_t) user_object, IKOT_NAMED_ENTRY); *entry_handle = user_handle; return KERN_SUCCESS; } kern_return_t mach_memory_object_memory_entry( host_t host, boolean_t internal, vm_size_t size, vm_prot_t permission, memory_object_t pager, ipc_port_t *entry_handle) { return mach_memory_object_memory_entry_64( host, internal, (vm_object_offset_t)size, permission, pager, entry_handle); } /* */ kern_return_t mach_make_memory_entry_64( vm_map_t target_map, vm_object_size_t *size, vm_object_offset_t offset, vm_prot_t permission, ipc_port_t *object_handle, ipc_port_t parent_entry) { vm_map_version_t version; vm_named_entry_t user_object; ipc_port_t user_handle; ipc_port_t previous; kern_return_t kr; vm_map_t pmap_map; /* needed for call to vm_map_lookup_locked */ boolean_t wired; vm_object_offset_t obj_off; vm_prot_t prot; vm_object_offset_t lo_offset, hi_offset; vm_behavior_t behavior; vm_object_t object; vm_object_t shadow_object; /* needed for direct map entry manipulation */ vm_map_entry_t map_entry; vm_map_entry_t next_entry; vm_map_t local_map; vm_map_t original_map = target_map; vm_offset_t local_offset; vm_object_size_t mappable_size; vm_object_size_t total_size; unsigned int access; vm_prot_t protections; unsigned int wimg_mode; boolean_t cache_attr; protections = permission & VM_PROT_ALL; access = GET_MAP_MEM(permission); offset = trunc_page_64(offset); *size = round_page_64(*size); if((parent_entry != NULL) && (permission & MAP_MEM_ONLY)) { vm_named_entry_t parent_object; if(ip_kotype(parent_entry) != IKOT_NAMED_ENTRY) { return KERN_INVALID_ARGUMENT; } parent_object = (vm_named_entry_t)parent_entry->ip_kobject; object = parent_object->object; if(object != VM_OBJECT_NULL) wimg_mode = object->wimg_bits; if((access != GET_MAP_MEM(parent_object->protection)) && !(parent_object->protection & VM_PROT_WRITE)) { return KERN_INVALID_RIGHT; } if(access == MAP_MEM_IO) { SET_MAP_MEM(access, parent_object->protection); wimg_mode = VM_WIMG_IO; } else if (access == MAP_MEM_COPYBACK) { SET_MAP_MEM(access, parent_object->protection); wimg_mode = VM_WIMG_DEFAULT; } else if (access == MAP_MEM_WTHRU) { SET_MAP_MEM(access, parent_object->protection); wimg_mode = VM_WIMG_WTHRU; } else if (access == MAP_MEM_WCOMB) { SET_MAP_MEM(access, parent_object->protection); wimg_mode = VM_WIMG_WCOMB; } if(object && (access != MAP_MEM_NOOP) && (!(object->nophyscache))) { if(object->wimg_bits != wimg_mode) { vm_page_t p; if ((wimg_mode == VM_WIMG_IO) || (wimg_mode == VM_WIMG_WCOMB)) cache_attr = TRUE; else cache_attr = FALSE; vm_object_lock(object); while(object->paging_in_progress) { vm_object_unlock(object); vm_object_wait(object, VM_OBJECT_EVENT_PAGING_IN_PROGRESS, THREAD_UNINT); vm_object_lock(object); } object->wimg_bits = wimg_mode; queue_iterate(&object->memq, p, vm_page_t, listq) { if (!p->fictitious) { pmap_page_protect( p->phys_page, VM_PROT_NONE); if(cache_attr) pmap_sync_caches_phys( p->phys_page); } } vm_object_unlock(object); } } return KERN_SUCCESS; } if(permission & MAP_MEM_ONLY) { return KERN_INVALID_ARGUMENT; } user_object = (vm_named_entry_t) kalloc(sizeof (struct vm_named_entry)); if(user_object == NULL) return KERN_FAILURE; named_entry_lock_init(user_object); user_handle = ipc_port_alloc_kernel(); ip_lock(user_handle); /* make a sonce right */ user_handle->ip_sorights++; ip_reference(user_handle); user_handle->ip_destination = IP_NULL; user_handle->ip_receiver_name = MACH_PORT_NULL; user_handle->ip_receiver = ipc_space_kernel; /* make a send right */ user_handle->ip_mscount++; user_handle->ip_srights++; ip_reference(user_handle); ipc_port_nsrequest(user_handle, 1, user_handle, &previous); /* nsrequest unlocks user_handle */ user_object->backing.pager = NULL; user_object->ref_count = 1; if(permission & MAP_MEM_NAMED_CREATE) { user_object->object = NULL; user_object->internal = TRUE; user_object->is_sub_map = FALSE; user_object->offset = 0; user_object->protection = protections; SET_MAP_MEM(access, user_object->protection); user_object->size = *size; /* user_object pager and internal fields are not used */ /* when the object field is filled in. */ ipc_kobject_set(user_handle, (ipc_kobject_t) user_object, IKOT_NAMED_ENTRY); *object_handle = user_handle; return KERN_SUCCESS; } if(parent_entry == NULL) { /* Create a named object based on address range within the task map */ /* Go find the object at given address */ vm_map_lock_read(target_map); /* get the object associated with the target address */ /* note we check the permission of the range against */ /* that requested by the caller */ kr = vm_map_lookup_locked(&target_map, offset, protections, &version, &object, &obj_off, &prot, &wired, &behavior, &lo_offset, &hi_offset, &pmap_map); if (kr != KERN_SUCCESS) { vm_map_unlock_read(target_map); goto make_mem_done; } if (((prot & protections) != protections) || (object == kernel_object)) { kr = KERN_INVALID_RIGHT; vm_object_unlock(object); vm_map_unlock_read(target_map); if(pmap_map != target_map) vm_map_unlock_read(pmap_map); if(object == kernel_object) { printf("Warning: Attempt to create a named" " entry from the kernel_object\n"); } goto make_mem_done; } /* We have an object, now check to see if this object */ /* is suitable. If not, create a shadow and share that */ redo_lookup: local_map = original_map; local_offset = offset; if(target_map != local_map) { vm_map_unlock_read(target_map); if(pmap_map != target_map) vm_map_unlock_read(pmap_map); vm_map_lock_read(local_map); target_map = local_map; pmap_map = local_map; } while(TRUE) { if(!vm_map_lookup_entry(local_map, local_offset, &map_entry)) { kr = KERN_INVALID_ARGUMENT; vm_object_unlock(object); vm_map_unlock_read(target_map); if(pmap_map != target_map) vm_map_unlock_read(pmap_map); goto make_mem_done; } if(!(map_entry->is_sub_map)) { if(map_entry->object.vm_object != object) { kr = KERN_INVALID_ARGUMENT; vm_object_unlock(object); vm_map_unlock_read(target_map); if(pmap_map != target_map) vm_map_unlock_read(pmap_map); goto make_mem_done; } if(map_entry->wired_count) { /* JMM - The check below should be reworked instead. */ object->true_share = TRUE; } break; } else { vm_map_t tmap; tmap = local_map; local_map = map_entry->object.sub_map; vm_map_lock_read(local_map); vm_map_unlock_read(tmap); target_map = local_map; pmap_map = local_map; local_offset = local_offset - map_entry->vme_start; local_offset += map_entry->offset; } } if(((map_entry->max_protection) & protections) != protections) { kr = KERN_INVALID_RIGHT; vm_object_unlock(object); vm_map_unlock_read(target_map); if(pmap_map != target_map) vm_map_unlock_read(pmap_map); goto make_mem_done; } mappable_size = hi_offset - obj_off; total_size = map_entry->vme_end - map_entry->vme_start; if(*size > mappable_size) { /* try to extend mappable size if the entries */ /* following are from the same object and are */ /* compatible */ next_entry = map_entry->vme_next; /* lets see if the next map entry is still */ /* pointing at this object and is contiguous */ while(*size > mappable_size) { if((next_entry->object.vm_object == object) && (next_entry->vme_start == next_entry->vme_prev->vme_end) && (next_entry->offset == next_entry->vme_prev->offset + (next_entry->vme_prev->vme_end - next_entry->vme_prev->vme_start))) { if(((next_entry->max_protection) & protections) != protections) { break; } if (next_entry->needs_copy != map_entry->needs_copy) break; mappable_size += next_entry->vme_end - next_entry->vme_start; total_size += next_entry->vme_end - next_entry->vme_start; next_entry = next_entry->vme_next; } else { break; } } } if(object->internal) { /* vm_map_lookup_locked will create a shadow if */ /* needs_copy is set but does not check for the */ /* other two conditions shown. It is important to */ /* set up an object which will not be pulled from */ /* under us. */ if ((map_entry->needs_copy || object->shadowed || (object->size > total_size)) && !object->true_share) { if (vm_map_lock_read_to_write(target_map)) { vm_map_lock_read(target_map); goto redo_lookup; } /* * JMM - We need to avoid coming here when the object * is wired by anybody, not just the current map. Why * couldn't we use the standard vm_object_copy_quickly() * approach here? */ /* create a shadow object */ vm_object_shadow(&map_entry->object.vm_object, &map_entry->offset, total_size); shadow_object = map_entry->object.vm_object; vm_object_unlock(object); vm_object_pmap_protect( object, map_entry->offset, total_size, ((map_entry->is_shared || target_map->mapped) ? PMAP_NULL : target_map->pmap), map_entry->vme_start, map_entry->protection & ~VM_PROT_WRITE); total_size -= (map_entry->vme_end - map_entry->vme_start); next_entry = map_entry->vme_next; map_entry->needs_copy = FALSE; while (total_size) { if(next_entry->object.vm_object == object) { next_entry->object.vm_object = shadow_object; next_entry->offset = next_entry->vme_prev->offset + (next_entry->vme_prev->vme_end - next_entry->vme_prev->vme_start); next_entry->needs_copy = FALSE; } else { panic("mach_make_memory_entry_64:" " map entries out of sync\n"); } total_size -= next_entry->vme_end - next_entry->vme_start; next_entry = next_entry->vme_next; } object = shadow_object; vm_object_lock(object); obj_off = (local_offset - map_entry->vme_start) + map_entry->offset; vm_map_lock_write_to_read(target_map); } } /* note: in the future we can (if necessary) allow for */ /* memory object lists, this will better support */ /* fragmentation, but is it necessary? The user should */ /* be encouraged to create address space oriented */ /* shared objects from CLEAN memory regions which have */ /* a known and defined history. i.e. no inheritence */ /* share, make this call before making the region the */ /* target of ipc's, etc. The code above, protecting */ /* against delayed copy, etc. is mostly defensive. */ wimg_mode = object->wimg_bits; if(!(object->nophyscache)) { if(access == MAP_MEM_IO) { wimg_mode = VM_WIMG_IO; } else if (access == MAP_MEM_COPYBACK) { wimg_mode = VM_WIMG_USE_DEFAULT; } else if (access == MAP_MEM_WTHRU) { wimg_mode = VM_WIMG_WTHRU; } else if (access == MAP_MEM_WCOMB) { wimg_mode = VM_WIMG_WCOMB; } } object->true_share = TRUE; if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; /* we now point to this object, hold on to it */ vm_object_reference_locked(object); vm_map_unlock_read(target_map); if(pmap_map != target_map) vm_map_unlock_read(pmap_map); if(object->wimg_bits != wimg_mode) { vm_page_t p; vm_object_paging_wait(object, THREAD_UNINT); queue_iterate(&object->memq, p, vm_page_t, listq) { if (!p->fictitious) { pmap_page_protect( p->phys_page, VM_PROT_NONE); if(cache_attr) pmap_sync_caches_phys( p->phys_page); } } object->wimg_bits = wimg_mode; } user_object->object = object; user_object->internal = object->internal; user_object->is_sub_map = FALSE; user_object->offset = obj_off; user_object->protection = permission; /* the size of mapped entry that overlaps with our region */ /* which is targeted for share. */ /* (entry_end - entry_start) - */ /* offset of our beg addr within entry */ /* it corresponds to this: */ if(*size > mappable_size) *size = mappable_size; user_object->size = *size; /* user_object pager and internal fields are not used */ /* when the object field is filled in. */ vm_object_unlock(object); ipc_kobject_set(user_handle, (ipc_kobject_t) user_object, IKOT_NAMED_ENTRY); *object_handle = user_handle; return KERN_SUCCESS; } else { vm_named_entry_t parent_object; /* The new object will be base on an existing named object */ if(ip_kotype(parent_entry) != IKOT_NAMED_ENTRY) { kr = KERN_INVALID_ARGUMENT; goto make_mem_done; } parent_object = (vm_named_entry_t)parent_entry->ip_kobject; if((offset + *size) > parent_object->size) { kr = KERN_INVALID_ARGUMENT; goto make_mem_done; } user_object->object = parent_object->object; user_object->size = *size; user_object->offset = parent_object->offset + offset; user_object->protection = parent_object->protection; user_object->protection &= ~VM_PROT_ALL; user_object->protection = permission & VM_PROT_ALL; if(access != MAP_MEM_NOOP) { SET_MAP_MEM(access, user_object->protection); } if(parent_object->is_sub_map) { user_object->backing.map = parent_object->backing.map; vm_map_lock(user_object->backing.map); user_object->backing.map->ref_count++; vm_map_unlock(user_object->backing.map); } else { user_object->backing.pager = parent_object->backing.pager; } user_object->internal = parent_object->internal; user_object->is_sub_map = parent_object->is_sub_map; if(parent_object->object != NULL) { /* we now point to this object, hold on */ vm_object_reference(parent_object->object); vm_object_lock(parent_object->object); parent_object->object->true_share = TRUE; if (parent_object->object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) parent_object->object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; vm_object_unlock(parent_object->object); } ipc_kobject_set(user_handle, (ipc_kobject_t) user_object, IKOT_NAMED_ENTRY); *object_handle = user_handle; return KERN_SUCCESS; } make_mem_done: ipc_port_dealloc_kernel(user_handle); kfree((vm_offset_t)user_object, sizeof (struct vm_named_entry)); return kr; } kern_return_t mach_make_memory_entry( vm_map_t target_map, vm_size_t *size, vm_offset_t offset, vm_prot_t permission, ipc_port_t *object_handle, ipc_port_t parent_entry) { vm_object_offset_t size_64; kern_return_t kr; size_64 = (vm_object_offset_t)*size; kr = mach_make_memory_entry_64(target_map, &size_64, (vm_object_offset_t)offset, permission, object_handle, parent_entry); *size = (vm_size_t)size_64; return kr; } /* */ kern_return_t vm_region_object_create( vm_map_t target_map, vm_size_t size, ipc_port_t *object_handle) { vm_named_entry_t user_object; ipc_port_t user_handle; kern_return_t kr; ipc_port_t previous; vm_map_t new_map; user_object = (vm_named_entry_t) kalloc(sizeof (struct vm_named_entry)); if(user_object == NULL) { return KERN_FAILURE; } named_entry_lock_init(user_object); user_handle = ipc_port_alloc_kernel(); ip_lock(user_handle); /* make a sonce right */ user_handle->ip_sorights++; ip_reference(user_handle); user_handle->ip_destination = IP_NULL; user_handle->ip_receiver_name = MACH_PORT_NULL; user_handle->ip_receiver = ipc_space_kernel; /* make a send right */ user_handle->ip_mscount++; user_handle->ip_srights++; ip_reference(user_handle); ipc_port_nsrequest(user_handle, 1, user_handle, &previous); /* nsrequest unlocks user_handle */ /* Create a named object based on a submap of specified size */ new_map = vm_map_create(0, 0, size, TRUE); user_object->backing.map = new_map; user_object->object = VM_OBJECT_NULL; user_object->internal = TRUE; user_object->is_sub_map = TRUE; user_object->offset = 0; user_object->protection = VM_PROT_ALL; user_object->size = size; user_object->ref_count = 1; ipc_kobject_set(user_handle, (ipc_kobject_t) user_object, IKOT_NAMED_ENTRY); *object_handle = user_handle; return KERN_SUCCESS; } /* For a given range, check all map entries. If the entry coresponds to */ /* the old vm_region/map provided on the call, replace it with the */ /* corresponding range in the new vm_region/map */ kern_return_t vm_map_region_replace( vm_map_t target_map, ipc_port_t old_region, ipc_port_t new_region, vm_offset_t start, vm_offset_t end) { vm_named_entry_t old_object; vm_named_entry_t new_object; vm_map_t old_submap; vm_map_t new_submap; vm_offset_t addr; vm_map_entry_t entry; int nested_pmap = 0; vm_map_lock(target_map); old_object = (vm_named_entry_t)old_region->ip_kobject; new_object = (vm_named_entry_t)new_region->ip_kobject; if((!old_object->is_sub_map) || (!new_object->is_sub_map)) { vm_map_unlock(target_map); return KERN_INVALID_ARGUMENT; } old_submap = (vm_map_t)old_object->backing.map; new_submap = (vm_map_t)new_object->backing.map; vm_map_lock(old_submap); if((old_submap->min_offset != new_submap->min_offset) || (old_submap->max_offset != new_submap->max_offset)) { vm_map_unlock(old_submap); vm_map_unlock(target_map); return KERN_INVALID_ARGUMENT; } if(!vm_map_lookup_entry(target_map, start, &entry)) { /* if the src is not contained, the entry preceeds */ /* our range */ addr = entry->vme_start; if(entry == vm_map_to_entry(target_map)) { vm_map_unlock(old_submap); vm_map_unlock(target_map); return KERN_SUCCESS; } } if ((entry->use_pmap) && (new_submap->pmap == NULL)) { new_submap->pmap = pmap_create((vm_size_t) 0); if(new_submap->pmap == PMAP_NULL) { vm_map_unlock(old_submap); vm_map_unlock(target_map); return(KERN_NO_SPACE); } } addr = entry->vme_start; vm_map_reference(old_submap); while((entry != vm_map_to_entry(target_map)) && (entry->vme_start < end)) { if((entry->is_sub_map) && (entry->object.sub_map == old_submap)) { if(entry->use_pmap) { if((start & 0x0fffffff) || ((end - start) != 0x10000000)) { vm_map_unlock(old_submap); vm_map_deallocate(old_submap); vm_map_unlock(target_map); return KERN_INVALID_ARGUMENT; } nested_pmap = 1; } entry->object.sub_map = new_submap; vm_map_reference(new_submap); vm_map_deallocate(old_submap); } entry = entry->vme_next; addr = entry->vme_start; } if(nested_pmap) { #ifndef i386 pmap_unnest(target_map->pmap, (addr64_t)start); if(target_map->mapped) { vm_map_submap_pmap_clean(target_map, start, end, old_submap, 0); } pmap_nest(target_map->pmap, new_submap->pmap, (addr64_t)start, (addr64_t)start, (addr64_t)(end - start)); #endif /* i386 */ } else { vm_map_submap_pmap_clean(target_map, start, end, old_submap, 0); } vm_map_unlock(old_submap); vm_map_deallocate(old_submap); vm_map_unlock(target_map); return KERN_SUCCESS; } void mach_destroy_memory_entry( ipc_port_t port) { vm_named_entry_t named_entry; #if MACH_ASSERT assert(ip_kotype(port) == IKOT_NAMED_ENTRY); #endif /* MACH_ASSERT */ named_entry = (vm_named_entry_t)port->ip_kobject; mutex_lock(&(named_entry)->Lock); named_entry->ref_count-=1; if(named_entry->ref_count == 0) { if(named_entry->object) { /* release the memory object we've been pointing to */ vm_object_deallocate(named_entry->object); } if(named_entry->is_sub_map) { vm_map_deallocate(named_entry->backing.map); } kfree((vm_offset_t)port->ip_kobject, sizeof (struct vm_named_entry)); } else mutex_unlock(&(named_entry)->Lock); } kern_return_t vm_map_page_query( vm_map_t target_map, vm_offset_t offset, int *disposition, int *ref_count) { vm_map_entry_t map_entry; vm_object_t object; vm_page_t m; restart_page_query: *disposition = 0; *ref_count = 0; vm_map_lock(target_map); if(!vm_map_lookup_entry(target_map, offset, &map_entry)) { vm_map_unlock(target_map); return KERN_FAILURE; } offset -= map_entry->vme_start; /* adjust to offset within entry */ offset += map_entry->offset; /* adjust to target object offset */ if(map_entry->object.vm_object != VM_OBJECT_NULL) { if(!map_entry->is_sub_map) { object = map_entry->object.vm_object; } else { vm_map_unlock(target_map); target_map = map_entry->object.sub_map; goto restart_page_query; } } else { vm_map_unlock(target_map); return KERN_FAILURE; } vm_object_lock(object); vm_map_unlock(target_map); while(TRUE) { m = vm_page_lookup(object, offset); if (m != VM_PAGE_NULL) { *disposition |= VM_PAGE_QUERY_PAGE_PRESENT; break; } else { if(object->shadow) { offset += object->shadow_offset; vm_object_unlock(object); object = object->shadow; vm_object_lock(object); continue; } vm_object_unlock(object); return KERN_FAILURE; } } /* The ref_count is not strictly accurate, it measures the number */ /* of entities holding a ref on the object, they may not be mapping */ /* the object or may not be mapping the section holding the */ /* target page but its still a ball park number and though an over- */ /* count, it picks up the copy-on-write cases */ /* We could also get a picture of page sharing from pmap_attributes */ /* but this would under count as only faulted-in mappings would */ /* show up. */ *ref_count = object->ref_count; if (m->fictitious) { *disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS; vm_object_unlock(object); return KERN_SUCCESS; } if (m->dirty) *disposition |= VM_PAGE_QUERY_PAGE_DIRTY; else if(pmap_is_modified(m->phys_page)) *disposition |= VM_PAGE_QUERY_PAGE_DIRTY; if (m->reference) *disposition |= VM_PAGE_QUERY_PAGE_REF; else if(pmap_is_referenced(m->phys_page)) *disposition |= VM_PAGE_QUERY_PAGE_REF; vm_object_unlock(object); return KERN_SUCCESS; } kern_return_t set_dp_control_port( host_priv_t host_priv, ipc_port_t control_port) { if (host_priv == HOST_PRIV_NULL) return (KERN_INVALID_HOST); if (IP_VALID(dynamic_pager_control_port)) ipc_port_release_send(dynamic_pager_control_port); dynamic_pager_control_port = control_port; return KERN_SUCCESS; } kern_return_t get_dp_control_port( host_priv_t host_priv, ipc_port_t *control_port) { if (host_priv == HOST_PRIV_NULL) return (KERN_INVALID_HOST); *control_port = ipc_port_copy_send(dynamic_pager_control_port); return KERN_SUCCESS; } /* Retrieve a upl for an object underlying an address range in a map */ kern_return_t vm_map_get_upl( vm_map_t map, vm_address_t offset, vm_size_t *upl_size, upl_t *upl, upl_page_info_array_t page_list, unsigned int *count, int *flags, int force_data_sync) { vm_map_entry_t entry; int caller_flags; int sync_cow_data = FALSE; vm_object_t local_object; vm_offset_t local_offset; vm_offset_t local_start; kern_return_t ret; caller_flags = *flags; if (!(caller_flags & UPL_COPYOUT_FROM)) { sync_cow_data = TRUE; } if(upl == NULL) return KERN_INVALID_ARGUMENT; REDISCOVER_ENTRY: vm_map_lock(map); if (vm_map_lookup_entry(map, offset, &entry)) { if (entry->object.vm_object == VM_OBJECT_NULL || !entry->object.vm_object->phys_contiguous) { if((*upl_size/page_size) > MAX_UPL_TRANSFER) { *upl_size = MAX_UPL_TRANSFER * page_size; } } if((entry->vme_end - offset) < *upl_size) { *upl_size = entry->vme_end - offset; } if (caller_flags & UPL_QUERY_OBJECT_TYPE) { if (entry->object.vm_object == VM_OBJECT_NULL) { *flags = 0; } else if (entry->object.vm_object->private) { *flags = UPL_DEV_MEMORY; if (entry->object.vm_object->phys_contiguous) { *flags |= UPL_PHYS_CONTIG; } } else { *flags = 0; } vm_map_unlock(map); return KERN_SUCCESS; } /* * Create an object if necessary. */ if (entry->object.vm_object == VM_OBJECT_NULL) { entry->object.vm_object = vm_object_allocate( (vm_size_t)(entry->vme_end - entry->vme_start)); entry->offset = 0; } if (!(caller_flags & UPL_COPYOUT_FROM)) { if (!(entry->protection & VM_PROT_WRITE)) { vm_map_unlock(map); return KERN_PROTECTION_FAILURE; } if (entry->needs_copy) { vm_map_t local_map; vm_object_t object; vm_object_offset_t offset_hi; vm_object_offset_t offset_lo; vm_object_offset_t new_offset; vm_prot_t prot; boolean_t wired; vm_behavior_t behavior; vm_map_version_t version; vm_map_t pmap_map; local_map = map; vm_map_lock_write_to_read(map); if(vm_map_lookup_locked(&local_map, offset, VM_PROT_WRITE, &version, &object, &new_offset, &prot, &wired, &behavior, &offset_lo, &offset_hi, &pmap_map)) { vm_map_unlock(local_map); return KERN_FAILURE; } if (pmap_map != map) { vm_map_unlock(pmap_map); } vm_object_unlock(object); vm_map_unlock(local_map); goto REDISCOVER_ENTRY; } } if (entry->is_sub_map) { vm_map_t submap; submap = entry->object.sub_map; local_start = entry->vme_start; local_offset = entry->offset; vm_map_reference(submap); vm_map_unlock(map); ret = (vm_map_get_upl(submap, local_offset + (offset - local_start), upl_size, upl, page_list, count, flags, force_data_sync)); vm_map_deallocate(submap); return ret; } if (sync_cow_data) { if (entry->object.vm_object->shadow || entry->object.vm_object->copy) { int flags; local_object = entry->object.vm_object; local_start = entry->vme_start; local_offset = entry->offset; vm_object_reference(local_object); vm_map_unlock(map); if(local_object->copy == NULL) { flags = MEMORY_OBJECT_DATA_SYNC; } else { flags = MEMORY_OBJECT_COPY_SYNC; } if((local_object->paging_offset) && (local_object->pager == 0)) { /* * do a little clean-up for our unorthodox * entry into a pager call from a non-pager * context. Normally the pager code * assumes that an object it has been called * with has a backing pager and so does * not bother to check the pager field * before relying on the paging_offset */ vm_object_lock(local_object); if (local_object->pager == 0) { local_object->paging_offset = 0; } vm_object_unlock(local_object); } if (entry->object.vm_object->shadow && entry->object.vm_object->copy) { vm_object_lock_request( local_object->shadow, (vm_object_offset_t) ((offset - local_start) + local_offset) + local_object->shadow_offset + local_object->paging_offset, *upl_size, FALSE, MEMORY_OBJECT_DATA_SYNC, VM_PROT_NO_CHANGE); } sync_cow_data = FALSE; vm_object_deallocate(local_object); goto REDISCOVER_ENTRY; } } if (force_data_sync) { local_object = entry->object.vm_object; local_start = entry->vme_start; local_offset = entry->offset; vm_object_reference(local_object); vm_map_unlock(map); if((local_object->paging_offset) && (local_object->pager == 0)) { /* * do a little clean-up for our unorthodox * entry into a pager call from a non-pager * context. Normally the pager code * assumes that an object it has been called * with has a backing pager and so does * not bother to check the pager field * before relying on the paging_offset */ vm_object_lock(local_object); if (local_object->pager == 0) { local_object->paging_offset = 0; } vm_object_unlock(local_object); } vm_object_lock_request( local_object, (vm_object_offset_t) ((offset - local_start) + local_offset) + local_object->paging_offset, (vm_object_size_t)*upl_size, FALSE, MEMORY_OBJECT_DATA_SYNC, VM_PROT_NO_CHANGE); force_data_sync = FALSE; vm_object_deallocate(local_object); goto REDISCOVER_ENTRY; } if(!(entry->object.vm_object->private)) { if(*upl_size > (MAX_UPL_TRANSFER*PAGE_SIZE)) *upl_size = (MAX_UPL_TRANSFER*PAGE_SIZE); if(entry->object.vm_object->phys_contiguous) { *flags = UPL_PHYS_CONTIG; } else { *flags = 0; } } else { *flags = UPL_DEV_MEMORY | UPL_PHYS_CONTIG; } local_object = entry->object.vm_object; local_offset = entry->offset; local_start = entry->vme_start; vm_object_reference(local_object); vm_map_unlock(map); if(caller_flags & UPL_SET_IO_WIRE) { ret = (vm_object_iopl_request(local_object, (vm_object_offset_t) ((offset - local_start) + local_offset), *upl_size, upl, page_list, count, caller_flags)); } else { ret = (vm_object_upl_request(local_object, (vm_object_offset_t) ((offset - local_start) + local_offset), *upl_size, upl, page_list, count, caller_flags)); } vm_object_deallocate(local_object); return(ret); } vm_map_unlock(map); return(KERN_FAILURE); } /* ******* Temporary Internal calls to UPL for BSD ***** */ kern_return_t kernel_upl_map( vm_map_t map, upl_t upl, vm_offset_t *dst_addr) { return (vm_upl_map(map, upl, dst_addr)); } kern_return_t kernel_upl_unmap( vm_map_t map, upl_t upl) { return(vm_upl_unmap(map, upl)); } kern_return_t kernel_upl_commit( upl_t upl, upl_page_info_t *pl, mach_msg_type_number_t count) { kern_return_t kr; kr = upl_commit(upl, pl, count); upl_deallocate(upl); return kr; } kern_return_t kernel_upl_commit_range( upl_t upl, vm_offset_t offset, vm_size_t size, int flags, upl_page_info_array_t pl, mach_msg_type_number_t count) { boolean_t finished = FALSE; kern_return_t kr; if (flags & UPL_COMMIT_FREE_ON_EMPTY) flags |= UPL_COMMIT_NOTIFY_EMPTY; kr = upl_commit_range(upl, offset, size, flags, pl, count, &finished); if ((flags & UPL_COMMIT_NOTIFY_EMPTY) && finished) upl_deallocate(upl); return kr; } kern_return_t kernel_upl_abort_range( upl_t upl, vm_offset_t offset, vm_size_t size, int abort_flags) { kern_return_t kr; boolean_t finished = FALSE; if (abort_flags & UPL_COMMIT_FREE_ON_EMPTY) abort_flags |= UPL_COMMIT_NOTIFY_EMPTY; kr = upl_abort_range(upl, offset, size, abort_flags, &finished); if ((abort_flags & UPL_COMMIT_FREE_ON_EMPTY) && finished) upl_deallocate(upl); return kr; } kern_return_t kernel_upl_abort( upl_t upl, int abort_type) { kern_return_t kr; kr = upl_abort(upl, abort_type); upl_deallocate(upl); return kr; } kern_return_t vm_get_shared_region( task_t task, shared_region_mapping_t *shared_region) { *shared_region = (shared_region_mapping_t) task->system_shared_region; return KERN_SUCCESS; } kern_return_t vm_set_shared_region( task_t task, shared_region_mapping_t shared_region) { task->system_shared_region = (vm_offset_t) shared_region; return KERN_SUCCESS; } kern_return_t shared_region_mapping_info( shared_region_mapping_t shared_region, ipc_port_t *text_region, vm_size_t *text_size, ipc_port_t *data_region, vm_size_t *data_size, vm_offset_t *region_mappings, vm_offset_t *client_base, vm_offset_t *alt_base, vm_offset_t *alt_next, unsigned int *fs_base, unsigned int *system, int *flags, shared_region_mapping_t *next) { shared_region_mapping_lock(shared_region); *text_region = shared_region->text_region; *text_size = shared_region->text_size; *data_region = shared_region->data_region; *data_size = shared_region->data_size; *region_mappings = shared_region->region_mappings; *client_base = shared_region->client_base; *alt_base = shared_region->alternate_base; *alt_next = shared_region->alternate_next; *flags = shared_region->flags; *fs_base = shared_region->fs_base; *system = shared_region->system; *next = shared_region->next; shared_region_mapping_unlock(shared_region); } kern_return_t shared_region_object_chain_attach( shared_region_mapping_t target_region, shared_region_mapping_t object_chain_region) { shared_region_object_chain_t object_ele; if(target_region->object_chain) return KERN_FAILURE; object_ele = (shared_region_object_chain_t) kalloc(sizeof (struct shared_region_object_chain)); shared_region_mapping_lock(object_chain_region); target_region->object_chain = object_ele; object_ele->object_chain_region = object_chain_region; object_ele->next = object_chain_region->object_chain; object_ele->depth = object_chain_region->depth; object_chain_region->depth++; target_region->alternate_next = object_chain_region->alternate_next; shared_region_mapping_unlock(object_chain_region); return KERN_SUCCESS; } kern_return_t shared_region_mapping_create( ipc_port_t text_region, vm_size_t text_size, ipc_port_t data_region, vm_size_t data_size, vm_offset_t region_mappings, vm_offset_t client_base, shared_region_mapping_t *shared_region, vm_offset_t alt_base, vm_offset_t alt_next) { *shared_region = (shared_region_mapping_t) kalloc(sizeof (struct shared_region_mapping)); if(*shared_region == NULL) return KERN_FAILURE; shared_region_mapping_lock_init((*shared_region)); (*shared_region)->text_region = text_region; (*shared_region)->text_size = text_size; (*shared_region)->fs_base = ENV_DEFAULT_ROOT; (*shared_region)->system = ENV_DEFAULT_SYSTEM; (*shared_region)->data_region = data_region; (*shared_region)->data_size = data_size; (*shared_region)->region_mappings = region_mappings; (*shared_region)->client_base = client_base; (*shared_region)->ref_count = 1; (*shared_region)->next = NULL; (*shared_region)->object_chain = NULL; (*shared_region)->self = *shared_region; (*shared_region)->flags = 0; (*shared_region)->depth = 0; (*shared_region)->default_env_list = NULL; (*shared_region)->alternate_base = alt_base; (*shared_region)->alternate_next = alt_next; return KERN_SUCCESS; } kern_return_t shared_region_mapping_set_alt_next( shared_region_mapping_t shared_region, vm_offset_t alt_next) { shared_region->alternate_next = alt_next; return KERN_SUCCESS; } kern_return_t shared_region_mapping_ref( shared_region_mapping_t shared_region) { if(shared_region == NULL) return KERN_SUCCESS; hw_atomic_add(&shared_region->ref_count, 1); return KERN_SUCCESS; } kern_return_t shared_region_mapping_dealloc( shared_region_mapping_t shared_region) { struct shared_region_task_mappings sm_info; shared_region_mapping_t next = NULL; int ref_count; while (shared_region) { if ((ref_count = hw_atomic_sub(&shared_region->ref_count, 1)) == 0) { shared_region_mapping_lock(shared_region); sm_info.text_region = shared_region->text_region; sm_info.text_size = shared_region->text_size; sm_info.data_region = shared_region->data_region; sm_info.data_size = shared_region->data_size; sm_info.region_mappings = shared_region->region_mappings; sm_info.client_base = shared_region->client_base; sm_info.alternate_base = shared_region->alternate_base; sm_info.alternate_next = shared_region->alternate_next; sm_info.flags = shared_region->flags; sm_info.self = (vm_offset_t)shared_region; if(shared_region->region_mappings) { lsf_remove_regions_mappings(shared_region, &sm_info); } if(((vm_named_entry_t) (shared_region->text_region->ip_kobject)) ->backing.map->pmap) { pmap_remove(((vm_named_entry_t) (shared_region->text_region->ip_kobject)) ->backing.map->pmap, sm_info.client_base, sm_info.client_base + sm_info.text_size); } ipc_port_release_send(shared_region->text_region); if(shared_region->data_region) ipc_port_release_send(shared_region->data_region); if (shared_region->object_chain) { next = shared_region->object_chain->object_chain_region; kfree((vm_offset_t)shared_region->object_chain, sizeof (struct shared_region_object_chain)); } else { next = NULL; } shared_region_mapping_unlock(shared_region); kfree((vm_offset_t)shared_region, sizeof (struct shared_region_mapping)); shared_region = next; } else { /* Stale indicates that a system region is no */ /* longer in the default environment list. */ if((ref_count == 1) && (shared_region->flags & SHARED_REGION_SYSTEM) && (shared_region->flags & ~SHARED_REGION_STALE)) { remove_default_shared_region(shared_region); } break; } } return KERN_SUCCESS; } ppnum_t vm_map_get_phys_page( vm_map_t map, vm_offset_t offset) { vm_map_entry_t entry; int ops; int flags; ppnum_t phys_page = 0; vm_object_t object; vm_map_lock(map); while (vm_map_lookup_entry(map, offset, &entry)) { if (entry->object.vm_object == VM_OBJECT_NULL) { vm_map_unlock(map); return (vm_offset_t) 0; } if (entry->is_sub_map) { vm_map_t old_map; vm_map_lock(entry->object.sub_map); old_map = map; map = entry->object.sub_map; offset = entry->offset + (offset - entry->vme_start); vm_map_unlock(old_map); continue; } if (entry->object.vm_object->phys_contiguous) { /* These are not standard pageable memory mappings */ /* If they are not present in the object they will */ /* have to be picked up from the pager through the */ /* fault mechanism. */ if(entry->object.vm_object->shadow_offset == 0) { /* need to call vm_fault */ vm_map_unlock(map); vm_fault(map, offset, VM_PROT_NONE, FALSE, THREAD_UNINT, NULL, 0); vm_map_lock(map); continue; } offset = entry->offset + (offset - entry->vme_start); phys_page = (ppnum_t) ((entry->object.vm_object->shadow_offset + offset) >> 12); break; } offset = entry->offset + (offset - entry->vme_start); object = entry->object.vm_object; vm_object_lock(object); while (TRUE) { vm_page_t dst_page = vm_page_lookup(object,offset); if(dst_page == VM_PAGE_NULL) { if(object->shadow) { vm_object_t old_object; vm_object_lock(object->shadow); old_object = object; offset = offset + object->shadow_offset; object = object->shadow; vm_object_unlock(old_object); } else { vm_object_unlock(object); break; } } else { phys_page = (ppnum_t)(dst_page->phys_page); vm_object_unlock(object); break; } } break; } vm_map_unlock(map); return phys_page; } kern_return_t kernel_object_iopl_request( vm_named_entry_t named_entry, memory_object_offset_t offset, vm_size_t size, upl_t *upl_ptr, upl_page_info_array_t user_page_list, unsigned int *page_list_count, int cntrl_flags) { vm_object_t object; kern_return_t ret; /* a few checks to make sure user is obeying rules */ if(size == 0) { if(offset >= named_entry->size) return(KERN_INVALID_RIGHT); size = named_entry->size - offset; } if(cntrl_flags & UPL_COPYOUT_FROM) { if((named_entry->protection & VM_PROT_READ) != VM_PROT_READ) { return(KERN_INVALID_RIGHT); } } else { if((named_entry->protection & (VM_PROT_READ | VM_PROT_WRITE)) != (VM_PROT_READ | VM_PROT_WRITE)) { return(KERN_INVALID_RIGHT); } } if(named_entry->size < (offset + size)) return(KERN_INVALID_ARGUMENT); /* the callers parameter offset is defined to be the */ /* offset from beginning of named entry offset in object */ offset = offset + named_entry->offset; if(named_entry->is_sub_map) return (KERN_INVALID_ARGUMENT); named_entry_lock(named_entry); if(named_entry->object) { /* This is the case where we are going to map */ /* an already mapped object. If the object is */ /* not ready it is internal. An external */ /* object cannot be mapped until it is ready */ /* we can therefore avoid the ready check */ /* in this case. */ vm_object_reference(named_entry->object); object = named_entry->object; named_entry_unlock(named_entry); } else { object = vm_object_enter(named_entry->backing.pager, named_entry->size, named_entry->internal, FALSE, FALSE); if (object == VM_OBJECT_NULL) { named_entry_unlock(named_entry); return(KERN_INVALID_OBJECT); } vm_object_lock(object); /* create an extra reference for the named entry */ vm_object_reference_locked(object); named_entry->object = object; named_entry_unlock(named_entry); /* wait for object (if any) to be ready */ while (!object->pager_ready) { vm_object_wait(object, VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT); vm_object_lock(object); } vm_object_unlock(object); } ret = vm_object_iopl_request(object, offset, size, upl_ptr, user_page_list, page_list_count, cntrl_flags); vm_object_deallocate(object); return ret; } #endif /* VM_CPM */