/* * Permission to use, copy, modify, distribute, and sell this software * for any purpose and without fee, restriction or acknowledgement is * hereby granted. The author (James Knight of the Univ. of California, * Davis) places it in the public domain. * * This software is provided AS IS with no warranties of any kind. The * author shall have no liability with respect to the infringement of * copyrights, trade secrets or any patents by this software or any part * thereof. In no event will the author be liable for any lost revenue * or profits or other special, indirect and consequential damages. */ #include #include #include "seqio.h" /* * This is a simple version of a `wc' program for biological sequences. * It reads the input and counts the number of entries, number of sequences * and total length of the sequences. * * It reads the input a sequence at a time (making it simple to count * the number of sequences and their total length), and it determines * the number of entries by counting the number of times "seqfseqno" * (which gives the position of the sequence in the current entry) is 1. * * The program also reports the counts for each file of a database, using * bioseq_parse to retrieve the files. */ int main(int argc, char *argv[]) { int i, len, entries, seqs, total, bigentries, bigseqs, bigtotal; int multifile_mode, firsttimeflag, is_singleentry; char *s, *seq, *file, *filelist, *format; SEQFILE *sfp; if (argc == 1) { if ((sfp = seqfopen2("-")) != NULL) { entries = seqs = total = 0; while ((seq = seqfgetseq(sfp, &len, 0)) != NULL) { if (seqfseqno(sfp) == 1) entries++; seqs++; total += len; } seqfclose(sfp); printf(" %10d %10d %10d (stdin)\n", entries, seqs, total); } } else { entries = bigentries = bigseqs = bigtotal = 0; file = filelist = NULL; for (i=1; i < argc; i++) { /* * Figure out whether it's an existing file, a database specification * or neither. * * If it's a database specification and is a specification of single * entries of a database, only collect and print a total count of the * single entries. If the database specification consists of complete * files, then collect and print totals for each database file. */ is_singleentry = 0; if (seqfisafile(argv[i])) { file = argv[i]; format = NULL; multifile_mode = 0; } else if ((filelist = bioseq_parse(argv[i])) != NULL) { for (s=filelist; *s; s++) { if (*s == '\n') *s = '\0'; else if (*s == '@') is_singleentry = 1; } file = filelist; format = bioseq_info(argv[i], "Format"); multifile_mode = 1; } else { fprintf(stderr, "%s: Not a file or database.\n", argv[i]); continue; } entries = seqs = total = 0; firsttimeflag = 1; while (firsttimeflag || multifile_mode) { firsttimeflag = 0; /* * Read the file and count the entries and sequences. */ if ((sfp = seqfopen(file, "r", format)) != NULL) { if (!is_singleentry) entries = seqs = total = 0; while ((seq = seqfgetseq(sfp, &len, 0)) != NULL) { if (seqfseqno(sfp) == 1) entries++; seqs++; total += len; } seqfclose(sfp); if (!is_singleentry) printf(" %10d %10d %10d %s\n", entries, seqs, total, file); bigentries += entries; bigseqs += seqs; bigtotal += total; } /* * If reading a database, advance to the next file in the list. */ if (multifile_mode) { while (*file != '\0') file++; file++; if (*file == '\0') break; } } if (is_singleentry) printf(" %10d %10d %10d %s\n", entries, seqs, total, argv[i]); /* * If reading a database, free the space allocated by bioseq_parse * and bioseq_info. */ if (multifile_mode) { if (format != NULL) free(format); free(filelist); } } if (bigentries != entries) printf(" %10d %10d %10d total\n", bigentries, bigseqs, bigtotal); } return 0; }