/* * Common functions related to playlist handling and the management of * the BootCache module. */ #include #include #include #include #include #include #include #include #include #include "BootCache.h" struct playlist_header { int ph_magic; #define PH_MAGIC_0 0xa1b2c3d4 /* old format with no blocksize */ #define PH_MAGIC_1 0xa1b2c3d5 /* blocksize, offsets in bytes */ #define PH_MAGIC 0xa1b2c3d6 /* added flags field */ int ph_entries; int ph_blocksize; }; /* * The blocksize is initialised from the first playlist read, the statistics * structure, or it can be pre-set by the caller. Once set, only playlists with * matching sizes can be read/written. */ int BC_blocksize = 0; #define BLOCK_ROUNDDOWN(x) (((x) / BC_blocksize) * BC_blocksize) #define BLOCK_ROUNDUP(x) ((((x) + (BC_blocksize - 1)) / BC_blocksize) * BC_blocksize) /* * optional for power-of-two blocksize roundup: * (((x) + (BC_blocksize - 1)) & (~(BC_blocksize - 1))) */ /* * Read the named playlist from disk into an allocated buffer. */ int BC_read_playlist(const char *pfname, struct BC_playlist_entry **ppc, int *pnentries) { struct BC_playlist_entry *pc; struct playlist_header ph; int error, fd; error = 0; fd = -1; pc = NULL; if (pfname == NULL) return(ENOENT); if ((fd = open(pfname, O_RDONLY)) == -1) { warn("could not open %s", pfname); error = errno; goto out; } if (read(fd, &ph, sizeof(ph)) != sizeof(ph)) { warnx("could not read header from %s", pfname); error = EINVAL; goto out; } if (ph.ph_magic != PH_MAGIC) { warnx("bad playlist magic"); error = EINVAL; goto out; } if (BC_blocksize == 0) { BC_blocksize = ph.ph_blocksize; } else if (BC_blocksize != ph.ph_blocksize) { warnx("bad playlist blocksize %d (should be %d)", ph.ph_blocksize, BC_blocksize); error = EINVAL; } if ((pc = malloc(sizeof(*pc) * ph.ph_entries)) == NULL) { warnx("could not allocate memory for playlist"); error = errno; goto out; } if (read(fd, pc, sizeof(*pc) * ph.ph_entries) != sizeof(*pc) * ph.ph_entries) { warnx("could not read playlist data"); error = EINVAL; goto out; } *ppc = pc; *pnentries = ph.ph_entries; out: if (fd != -1) close(fd); if (error && (pc != NULL)) free(pc); return(error); } /* * Write the playlist to the named file, securely. */ int BC_write_playlist(const char *pfname, const struct BC_playlist_entry *pc, int nentries) { struct playlist_header ph; char *tfname; int error, fd; error = 0; tfname = NULL; fd = -1; if (BC_blocksize == 0) { warnx("can't write playlist without setting block size"); error = EINVAL; goto out; } /* * Prepare the output file. * * Create a secure temporary file and write an invalid header. */ if (strlen(pfname) > (MAXPATHLEN - 8)) { warnx("playlist filename too long"); error = ENAMETOOLONG; goto out; } if ((tfname = malloc(strlen(pfname) + 7)) == NULL) { warn("could not allocate %d bytes for playlist filename", strlen(pfname)); error = errno; goto out; } sprintf(tfname, "%s.XXXXXX", pfname); if ((fd = mkstemp(tfname)) < 0) { warn("could not create temporary playlist file"); error = errno; goto out; } ph.ph_magic = 0; ph.ph_entries = 0; ph.ph_blocksize = 0; if (write(fd, &ph, sizeof(ph)) != sizeof(ph)) { warn("could not write initial header to temporary playlist file"); error = errno; goto out; } /* * Write the playlist entries. */ if (write(fd, pc, sizeof(*pc) * nentries) != (sizeof(*pc) * nentries)) { warn("could not write entry to temporary playlist file"); error = errno; goto out; } /* * Write an updated (valid) header to the playlist file. */ ph.ph_magic = PH_MAGIC; ph.ph_entries = nentries; ph.ph_blocksize = BC_blocksize; if (lseek(fd, 0, SEEK_SET) != 0) { warn("could not seek on temporary playlist file"); error = errno; goto out; } if (write(fd, &ph, sizeof(ph)) != sizeof(ph)) { warn("could not write header to temporary playlist file"); error = errno; goto out; } close(fd); fd = -1; /* * Rename the temporary playlist file over the original. */ if (rename((const char *)tfname, pfname) != 0) { warn("could not save playlist file %s->%s", tfname, pfname); error = errno; goto out; } /* free here to avoid exploitable race with unlink below */ free(tfname); tfname = NULL; out: if (tfname != NULL) { unlink(tfname); free(tfname); } if (fd != -1) close(fd); return(error); } /* * Merge two playlists 'a' and 'b' into 'a's buffer. * * Does not sort or coalesce the lists. Robust in the case * where any list pointer is NULL or length is zero. */ int BC_merge_playlists(struct BC_playlist_entry **ppa, int *pna, const struct BC_playlist_entry *pb, int nb) { struct BC_playlist_entry *pc; int nentries; pc = *ppa; nentries = *pna + nb; if ((pc = realloc(pc, sizeof(*pc) * nentries)) == NULL) return(ENOMEM); bcopy(pb, pc + *pna, nb * sizeof(*pc)); *ppa = pc; *pna = nentries; return(0); } /* * Sort a playlist. */ static int compare_playlist(const void *vfirst, const void *vsecond) { const struct BC_playlist_entry *first, *second; first = (const struct BC_playlist_entry *)vfirst; second = (const struct BC_playlist_entry *)vsecond; if (first->pce_offset == second->pce_offset) return(0); return((first->pce_offset < second->pce_offset) ? -1 : 1); } void BC_sort_playlist(struct BC_playlist_entry *pc, int nentries) { if ((pc != NULL) && (nentries > 0)) qsort((void *)pc, nentries, sizeof(*pc), compare_playlist); } /* * Coalesece a sorted playlist into the smallest set of contiguous * extents. Sets the new size of the playlist and realloc's the buffer. */ int BC_coalesce_playlist(struct BC_playlist_entry **ppc, int *pnentries) { struct BC_playlist_entry *pc, *dpc; int i, j, nentries, oentries; if ((*ppc == NULL) || (*pnentries <= 0)) return(0); /* * Scan the sorted list and emit coalesced playlist entries. */ pc = *ppc; nentries = *pnentries; oentries = 0; i = 0; dpc = pc; while (i < nentries) { /* entry is the first in a possible set */ /* scan following entries to see if they can be coalesced */ for (j = 1; (i + j) < nentries; j++) { /* entry is not inside or adjacent to preceeding */ if ((pc + j)->pce_offset > (pc->pce_offset + pc->pce_length)) break; /* adjust length if required */ pc->pce_length = MAX((pc->pce_offset + pc->pce_length), ((pc + j)->pce_offset + (pc + j)->pce_length)) - pc->pce_offset; if ((pc + j)->pce_flags & PCE_PREFETCH) pc->pce_flags |= PCE_PREFETCH; } /* save entry */ *(dpc++) = *pc; oentries++; i += j; pc += j; } /* * Shrink the alloction if possible. If realloc fails, handle it * gracefully. */ pc = *ppc; *ppc = realloc(pc, sizeof(*pc) * oentries); if (*ppc == NULL) *ppc = pc; *pnentries = oentries; return(0); } /* * Fetch cache statistics. */ int BC_fetch_statistics(struct BC_statistics **pss) { struct BC_command bc; static struct BC_statistics ss; int error; bc.bc_magic = BC_MAGIC; bc.bc_opcode = BC_OP_STATS; bc.bc_data = &ss; bc.bc_length = sizeof(ss); error = sysctlbyname(BC_SYSCTL, NULL, NULL, &bc, sizeof(bc)); if (error != 0) { warn("could not fetch cache statistics"); return(ENOENT); } *pss = &ss; if (BC_blocksize == 0) BC_blocksize = ss.ss_blocksize; return(0); } /* * Convert a list of history entries into a smaller list of * playlist entries, rounding them to match the current blocksize. * * Detects the presence of a prefetch tag and marks playlist entries prior * to the tag as requiring prefetch. * * Returns the playlist in an allocated buffer. */ int BC_convert_history(const struct BC_history_entry *he, struct BC_playlist_entry **ppc, int *pnentries) { struct BC_playlist_entry *pc, *pcp; u_int64_t end; int nentries, ent, pent, have_prefetch; if (he == NULL) return(0); nentries = *pnentries; if ((pc = malloc(sizeof(*pc) * nentries)) == NULL) return(ENOMEM); /* scan history and convert */ have_prefetch = 0; pcp = pc; for (ent = 0; ent < nentries; ent++) { /* if we find a prefetch tag, mark all earlier entries */ if (he->he_flags == BC_HE_TAG) { have_prefetch = 1; for (pent = 0; pent < ent; pent++) (pc + pent)->pce_flags |= PCE_PREFETCH; he++; continue; } /* convert history entry across */ pcp->pce_offset = BLOCK_ROUNDDOWN(he->he_offset); end = he->he_offset + he->he_length; pcp->pce_length = BLOCK_ROUNDUP(end) - pcp->pce_offset; he++; pcp++; } if (have_prefetch != NULL) nentries--; *ppc = pc; *pnentries = nentries; return(0); } /* * Start the cache, feed it the playlist if provided. */ int BC_start(struct BC_playlist_entry *pc, int nentries) { struct BC_command bc; bc.bc_magic = BC_MAGIC; bc.bc_opcode = BC_OP_START; bc.bc_param = BC_blocksize; bc.bc_data = pc; bc.bc_length = nentries * sizeof(*pc); return(sysctlbyname(BC_SYSCTL, NULL, NULL, &bc, sizeof(bc)) ? errno : 0); } int BC_stop(struct BC_history_entry **phe, int *pnentries) { struct BC_command bc; struct BC_history_entry *he; int error; size_t nsize; /* * Stop the cache and get the history buffer size. */ bc.bc_magic = BC_MAGIC; bc.bc_opcode = BC_OP_STOP; nsize = sizeof(bc); error = sysctlbyname(BC_SYSCTL, &bc, &nsize, &bc, nsize); if (error != 0) { /* if cache was not running, not really an error */ if (errno != ENXIO) warn("could not stop cache"); return(errno); } if (nsize != sizeof(bc)) { warnx("control structure wrong size, version mismatch?"); return(EINVAL); } /* * Fetch and clear the history buffer. */ bc.bc_opcode = BC_OP_HISTORY; he = NULL; if (bc.bc_length == 0) { bc.bc_data = NULL; } else { if ((he = malloc(bc.bc_length)) == NULL) { warnx("could not allocate history buffer memory"); return(ENOMEM); } bc.bc_data = he; } error = sysctlbyname(BC_SYSCTL, NULL, NULL, &bc, sizeof(bc)); if (error != 0) { warn("could not fetch history"); if (he != NULL) free(he); return(errno); } *phe = bc.bc_data; *pnentries = bc.bc_length / sizeof(struct BC_history_entry); return(0); } int BC_print_statistics(char *fname, struct BC_statistics *ss) { FILE *fp; struct timeval tv; int msec; if (ss == NULL) return(0); errno = 0; if (fname != NULL) { fp = fopen(fname, "w"); } else { fp = stdout; } if (fp == NULL) return(errno); /* readahead */ fprintf(fp, "block size %u\n", ss->ss_blocksize); fprintf(fp, "initiated reads %u\n", ss->ss_initiated_reads); fprintf(fp, "blocks read %u\n", ss->ss_read_blocks); fprintf(fp, "read errors %u\n", ss->ss_read_errors); fprintf(fp, "blocks discarded by error %u\n", ss->ss_error_discards); if ((ss->ss_pfetch_stop.tv_sec > 0) || (ss->ss_pfetch_stop.tv_usec > 0)) { timersub(&ss->ss_pfetch_stop, &ss->ss_cache_start, &tv); msec = (tv.tv_sec * 1000) + (tv.tv_usec / 1000); if (msec > 0) { fprintf(fp, "prefetch time %d.%03ds\n", msec / 1000, msec % 1000); } } if ((ss->ss_read_stop.tv_sec > 0) || (ss->ss_read_stop.tv_usec > 0)) { timersub(&ss->ss_read_stop, &ss->ss_cache_start, &tv); msec = (tv.tv_sec * 1000) + (tv.tv_usec / 1000); if (msec > 0) { fprintf(fp, "readahead time %d.%03ds\n", msec / 1000, msec % 1000); fprintf(fp, "readahead rate %ukB/s, %utps\n", (u_int)(((unsigned long long)ss->ss_read_blocks * ss->ss_blocksize * 1000) / (msec * 1024)), (ss->ss_initiated_reads * 1000) / msec); } } /* inbound strategy */ fprintf(fp, "total strategy calls %u\n", ss->ss_strategy_calls); fprintf(fp, "non-read strategy calls %u\n", ss->ss_strategy_nonread); fprintf(fp, "bypassed strategy calls %u\n", ss->ss_strategy_bypassed); fprintf(fp, "bypasses while active %u\n", ss->ss_strategy_bypass_active); fprintf(fp, "filled strategy calls %u\n", ss->ss_strategy_calls - ss->ss_strategy_bypassed); if ((ss->ss_cache_stop.tv_sec > 0) || (ss->ss_cache_stop.tv_usec > 0)) { timersub(&ss->ss_cache_stop, &ss->ss_cache_start, &tv); msec = (tv.tv_sec * 1000) + (tv.tv_usec / 1000); if (msec > 0) { fprintf(fp, "active time %d.%03ds\n", msec / 1000, msec % 1000); fprintf(fp, "read/write strategy rate %u/%utps\n", ((ss->ss_strategy_calls - ss->ss_strategy_nonread) * 1000) / msec, (ss->ss_strategy_nonread * 1000) / msec); } } if (ss->ss_strategy_blocked > 0) fprintf(fp, "callers blocked %u\n", ss->ss_strategy_blocked); if ((ss->ss_wait_time.tv_sec > 0) || (ss->ss_wait_time.tv_usec > 0)) { msec = (ss->ss_wait_time.tv_sec * 1000) + (ss->ss_wait_time.tv_usec / 1000); if (msec > 0) { fprintf(fp, "time blocked on extents %d.%03ds\n", msec / 1000, msec % 1000); } } /* extents */ fprintf(fp, "extents in cache %u\n", ss->ss_total_extents); fprintf(fp, "extent lookups %u\n", ss->ss_extent_lookups); fprintf(fp, "extent hits %u\n", ss->ss_extent_hits); if (ss->ss_extent_lookups > 0) fprintf(fp, "extent hit ratio %.2f%%\n", ((float)ss->ss_extent_hits * 100) / ss->ss_extent_lookups); fprintf(fp, "hits not fulfilled %u\n", ss->ss_hit_blkmissing); /* block/page activity */ fprintf(fp, "blocks requested %u\n", ss->ss_requested_blocks); fprintf(fp, "blocks hit %u\n", ss->ss_hit_blocks - ss->ss_write_discards); fprintf(fp, "blocks discarded by write %u\n", ss->ss_write_discards); if (ss->ss_requested_blocks > 0) fprintf(fp, "block hit ratio %.2f%%\n", ((float)(ss->ss_hit_blocks * 100) / ss->ss_requested_blocks)); fprintf(fp, "leftover blocks %u\n", ss->ss_spurious_blocks); fprintf(fp, "leftover pages %u\n", ss->ss_spurious_pages); if (ss->ss_read_blocks > 0) fprintf(fp, "block wastage %.2f%%\n", ((float)ss->ss_spurious_blocks * 100) / ss->ss_read_blocks); /* history */ fprintf(fp, "history clusters %u\n", ss->ss_history_clusters); if (fp == stdout) { fflush(fp); } else { fclose(fp); } return(0); } int BC_print_history(char *fname, struct BC_history_entry *he, int nentries) { FILE *fp; errno = 0; if (fname != NULL) { fp = fopen(fname, "w"); } else { fp = stdout; } if (fp == NULL) return(errno); while (nentries--) { fprintf(fp, "%-10llu %-5llu %s%s%s\n", he->he_offset, he->he_length, he->he_flags == BC_HE_HIT ? "hit" : "", he->he_flags == BC_HE_MISS ? "miss" : "", he->he_flags == BC_HE_TAG ? "tag" : ""); he++; } if (fp == stdout) { fflush(fp); } else { fclose(fp); } return(0); } int BC_tag_history(void) { struct BC_command bc; int error; bc.bc_magic = BC_MAGIC; bc.bc_opcode = BC_OP_TAG; bc.bc_data = NULL; bc.bc_length = 0; error = sysctlbyname(BC_SYSCTL, NULL, NULL, &bc, sizeof(bc)); if (error != 0) { warn("could not insert prefetch tag"); return(ENOENT); } return(0); }