/* * Permission to use, copy, modify, distribute, and sell this software * for any purpose and without fee, restriction or acknowledgement is * hereby granted. The author (James Knight of the Univ. of California, * Davis) places it in the public domain. * * This software is provided AS IS with no warranties of any kind. The * author shall have no liability with respect to the infringement of * copyrights, trade secrets or any patents by this software or any part * thereof. In no event will the author be liable for any lost revenue * or profits or other special, indirect and consequential damages. */ #include #include #include #include #include "seqio.h" /* * This program extracts one of the subfields of the EMBL feature table * entries (of a certain type). The way the program is configured right * now, the feature type and subfield type are fixed by constants, and * only subfields of the form * * /subfield="..." * * can be extracted. The program, however, does collapse subfields that * extend across multiple lines. Since this is just a demo program, I * didn't try to extend the user interface. */ #define FORMAT "EMBL" #define FEATURE "\nFT CDS" #define CONTLINE "\nFT " #define SUBFIELD "/note=\"" void extract_subfield(char *feature, SEQFILE *sfp, int num); int main(int argc, char *argv[]) { int i, len, count; char *entry, *s, *feature; SEQFILE *sfp; for (i=1; i < argc; i++) { if ((sfp = seqfopen2(argv[i])) == NULL) continue; /* * Make sure the file is in the right format. */ if (strcmp(seqfformat(sfp, 0), FORMAT) != 0) { fprintf(stderr, "%s: Not a %s-format file.\n", seqffilename(sfp, 0), FORMAT); continue; } /* * Read the entries. */ while ((entry = seqfgetentry(sfp, &len, 0)) != NULL) { count = 0; s = entry; while ((s = strstr(s, FEATURE)) != NULL) { /* * Find the end of the feature lines for that feature, and make * it NULL-terminated. */ feature = ++s; while (*s != '\n') s++; while (strncmp(s, CONTLINE, 6) == 0 && isspace(s[6])) { s++; while (*s != '\n') s++; } *s = '\0'; extract_subfield(feature, sfp, ++count); *s = '\n'; } } seqfclose(sfp); } return 0; } void extract_subfield(char *feature, SEQFILE *sfp, int num) { int contlinelen; char *s, *t, *id, *subfield, *fieldvalue; /* * Look for the subfield. */ if ((subfield = strstr(feature, SUBFIELD)) == NULL) return; /* * If the subfield is there, find the strings between * the quotes, squeezing out any line breaks. */ contlinelen = strlen(CONTLINE); fieldvalue = t = s = subfield + 7; while (*s && *s != '"') { if (*s == '\n') { s += contlinelen; /* Skip the CONTLINE and then the spaces */ while (*s != '\n' && isspace(*s)) s++; *t++ = ' '; } else { if (t != s) *t = *s; t++; s++; } } *t = '\0'; /* * Print 'em out. */ id = seqfmainid(sfp, 0); printf("In %s - %s\n", (id == NULL ? "Unknown" : id), fieldvalue); }