/* * FMTSEQ.C - A File Conversion Program (Version 1.2) * * Copyright (c) 1996 by James Knight at Univ. of California, Davis * * Permission to use, copy, modify, distribute and sell this software * and its documentation is hereby granted, subject to the following * restrictions and understandings: * * 1) Any copy of this software or any copy of software derived * from it must include this copyright notice in full. * * 2) All materials or software developed as a consequence of the * use of this software or software derived from it must duly * acknowledge such use, in accordance with the usual standards * of acknowledging credit in academic research. * * 3) The software may be used freely by anyone for any purpose, * commercial or non-commercial. That includes, but is not * limited to, its incorporation into software sold for a profit * or the development of commercial software derived from it. * * 4) This software is provided AS IS with no warranties of any * kind. The author shall have no liability with respect to the * infringement of copyrights, trade secrets or any patents by * this software or any part thereof. In no event will the * author be liable for any lost revenue or profits or other * special, indirect and consequential damages. */ #include #include #include #include #include #include #include #include "seqio.h" #ifdef WIN32 char dirch = '\\'; #else char dirch = '/'; #endif #if defined(__sun) && !defined(FILENAME_MAX) #include #define FILENAME_MAX MAXPATHLEN #endif /* * The main program variables and data structures. */ int case_mode, list_mode, pipe_mode, raw_mode, degap_mode; int bigalign_mode, reverse_mode, verbose_mode, item_mode; int help_mode, long_mode, split_mode, num_items, *item_list; char format[32], informat[32], idprefix[8], splitext[32], macro[256]; char input[FILENAME_MAX], output[FILENAME_MAX], gapin, gapout; SEQFILE *sfpout; #define UPPERCASE 1 #define LOWERCASE 2 #define NOCASE 3 #define ALL 1 #define ASK 2 #define ITEMLIST 3 #define MAXLINE 128 int pretty_flag, pretty_count; FILE *fpout; struct prettystruct { int colspace, gapcount, interleave, interline, skipempty; int tab, width, nameleft, nameright, nametop; int numleft, numright, numtop, numbottom; char match; } pretty; char dna_complement[MAXLINE], rna_complement[MAXLINE]; /* * Data structures specifying the options. */ #define NOPROC 0 #define CASETEST 1 #define FORMAT 2 #define GAPCHAR 3 #define HELP 4 #define IDPREFIX 5 #define INFORMAT 6 #define ITEM 7 #define LIST 8 #define OUTPUT 9 #define SPLIT 10 #define MACRO 11 typedef enum { OPT_SIMPLE, OPT_INT, OPT_CHAR, OPT_STRING, OPT_HELP, OPT_END } option_type; typedef struct { char *option_string; int minprefix; option_type type; void *variable; int set_value; /* In OPT_STRING, this is 1 if two-char option allowed In OPT_INT & OPT_CHAR, this is value if none given */ int unset_value; /* In OPT_STRING, this is a maximum length value */ int more_proc_flag; } options_struct; options_struct options[] = { /* * The main options (except "-quit" and "-run" which are interactive-only). */ { "all", 2, OPT_SIMPLE, &item_mode, ALL, ASK, NOPROC }, { "ask", 2, OPT_SIMPLE, &item_mode, ASK, ALL, NOPROC }, { "bigalign", 1, OPT_SIMPLE, &bigalign_mode, 1, 0, NOPROC }, { "caselower", 1, OPT_SIMPLE, &case_mode, LOWERCASE, NOCASE, CASETEST }, { "CASEUPPER", 2, OPT_SIMPLE, &case_mode, UPPERCASE, NOCASE, CASETEST }, { "degap", 1, OPT_SIMPLE, °ap_mode, 1, 0, NOPROC }, { "gapchar", 5, OPT_CHAR, &gapin, '-', '\0', GAPCHAR }, { "gapin", 4, OPT_CHAR, &gapin, '-', '\0', NOPROC }, { "gapout", 4, OPT_CHAR, &gapout, '-', '\0', NOPROC }, { "format", 1, OPT_STRING, format, 1, 32, FORMAT }, { "help", 1, OPT_HELP, &help_mode, 1, 0, NOPROC }, { "idprefix", 2, OPT_STRING, idprefix, 0, 8, IDPREFIX }, { "informat", 3, OPT_STRING, informat, 0, 32, INFORMAT }, { "item", 2, OPT_STRING, NULL, 1, 1024, ITEM }, { "list", 2, OPT_SIMPLE, &list_mode, 1, 0, LIST }, { "long", 2, OPT_SIMPLE, &long_mode, 1, 0, NOPROC }, { "mode", 2, OPT_STRING, ¯o, 0, 256, MACRO }, { "output", 1, OPT_STRING, output, 1, FILENAME_MAX, OUTPUT }, { "pipe", 1, OPT_SIMPLE, &pipe_mode, 1, 0, NOPROC }, { "raw", 2, OPT_SIMPLE, &raw_mode, 1, 0, NOPROC }, { "reverse", 2, OPT_SIMPLE, &reverse_mode, 1, 0, NOPROC }, { "split", 2, OPT_STRING, splitext, 0, 32, SPLIT }, { "verbose", 1, OPT_SIMPLE, &verbose_mode, 1, 0, NOPROC }, /* * The pretty print options. */ { "colspace", 2, OPT_INT, &pretty.colspace, 10, 0, NOPROC }, { "gapcount", 5, OPT_SIMPLE, &pretty.gapcount, 1, 0, NOPROC }, { "interleave", 7, OPT_SIMPLE, &pretty.interleave, 1, 0, NOPROC }, { "interline", 7, OPT_INT, &pretty.interline, 1, 0, NOPROC }, { "match", 2, OPT_CHAR, &pretty.match, '.', '\0', NOPROC }, { "nameleft", 5, OPT_INT, &pretty.nameleft, 8, 0, NOPROC }, { "nameright", 5, OPT_INT, &pretty.nameright, 8, 0, NOPROC }, { "nametop", 5, OPT_SIMPLE, &pretty.nametop, 1, 0, NOPROC }, { "numleft", 4, OPT_SIMPLE, &pretty.numleft, 1, 0, NOPROC }, { "numright", 4, OPT_SIMPLE, &pretty.numright, 1, 0, NOPROC }, { "numtop", 4, OPT_SIMPLE, &pretty.numtop, 1, 0, NOPROC }, { "numbottom", 4, OPT_SIMPLE, &pretty.numbottom, 1, 0, NOPROC }, { "skipempty", 2, OPT_SIMPLE, &pretty.skipempty, 1, 0, NOPROC }, { "tab", 1, OPT_INT, &pretty.tab, 0, 0, NOPROC }, { "width", 1, OPT_INT, &pretty.width, 50, 50, NOPROC }, { NULL, 0, OPT_END, NULL, 0, 0, NOPROC} }; #define pretty_format 22 #define fmt_split 11 #define num_formats 22 char *formats[num_formats] = { "Raw", "Plain", "EMBL", "Swiss-Prot", "GenBank", "PIR", "ASN.1", "FASTA", "FASTA-old", "FASTA-output", "BLAST-output", "NBRF", "NBRF-old", "IG", "IG-old", "GCG", "MSF", "PHYLIP", "PHYLIP-Int", "PHYLIP-Seq", "Clustalw", "Pretty" }; char *printfmts[num_formats] = { " 1. Raw", " 2. Plain", " 3. EMBL", " 4. Swiss-Prot (sprot)", " 5. GenBank (gb)", " 6. PIR (codata)", " 7. ASN.1 (asn)", " 8. FASTA (Pearson)", " 9. FASTA-old", "10. FASTA-output (fout)", "11. BLAST-output (bout)", "12. NBRF", "13. NBRF-old", "14. IG/Stanford (ig)", "15. IG-old", "16. GCG", "17. MSF (gcg-msf)", "18. PHYLIP", "19. PHYLIP-Int (phylipi)", "20. PHYLIP-Seq (phylips)", "21. Clustalw (clustal)", "22. Pretty" }; void print_options(int flag) { int i, j; char line[MAXLINE]; printf("Program Options (text in [...] is optional):\n"); printf(" -al[l] select all sequences\n"); printf(" -as[k] ask whether to select each sequence\n"); printf(" -b[igalign] convert FASTA program output to big alignment\n"); printf(" -c[aselower] convert to lowercase\n"); printf(" -C[ASEUPPER] convert to uppercase\n"); printf(" -d[egap] remove gaps from sequences\n"); printf(" -gapch[ar=-] set the gap symbol for both input and output\n"); printf(" -gapi[n=-] set the gap symbol for the input\n"); printf(" -gapo[ut=-] set the gap symbol for the output\n"); printf(" -id[prefix]=gb set identifier prefix for input\n"); printf(" -i[tem=]2,3,4 select sequences by position in input\n"); printf(" -li[st] only list sequence information\n"); printf(" -lo[ng] long form conversion (input header included as comment)\n"); printf(" -mo[de]=pretty1 run program in specified mode (listed in BIOSEQ entry)\n"); printf(" -p[ipe] read from standard input\n"); printf(" -no... unset any program option (eg `-noitem')\n"); printf(" -o[utput=]out.seq specify an output file\n"); printf(" -ra[w] leave gaps in sequences\n"); printf(" -re[verse] reverse-complement each sequence\n"); printf(" -sp[lit]=ext split output to separate files\n"); printf(" -v[erbose] output progress messages\n"); if (flag) { printf("More? "); fgets(line,MAXLINE-1,stdin); if (line[0] == 'q' || line[0] == 'Q') return; } printf(" -f[ormat=]name set output format by name\n"); printf(" -f[ormat=]# set output by number\n"); printf(" -inf[ormat]=name set input format by name\n"); printf(" -inf[ormat]=# set input format by number\n"); for (i=0,j=fmt_split; i < fmt_split; i++,j++) printf(" %-26s %-26s\n", printfmts[i], (j < num_formats ? printfmts[j] : "")); if (flag) { printf("More? "); fgets(line,MAXLINE-1,stdin); if (line[0] == 'q' || line[0] == 'Q') return; } printf("\nPretty-print Options:\n"); printf(" -interle[ave] output interleaved sequences\n"); printf(" -w[idth=#] sequence line width\n"); printf(" -t[ab=#] indent sequence\n"); printf(" -co[lspace=#] add space columms in sequence lines\n"); printf(" -gapco[unt] count gap chars in sequence numbers\n"); printf(" -namel[eft=#] print name to left of sequences\n"); printf(" -namer[ight=#] print name to right of sequences\n"); printf(" -namet[op] print names at top of output\n"); printf(" -numl[eft] print position numbers to left of sequences\n"); printf(" -numr[ight] print position numbers to right of sequences\n"); printf(" -numt[op] print position numbers above sequences\n"); printf(" -numb[ottom] print position numbers below sequences\n"); printf(" -ma[tch=.] replace matches to first sequence\n"); printf(" -interli[ne=#] add blank lines between sequence blocks\n"); printf(" -sk[ipempty] don't output lines with only gap characters\n"); } /* * Prototypes for the functions in this file. */ int parse_option(char *option, int flag); int query_for_filename(void); int query_for_seq(SEQFILE *sfp, int seqcount, int seqlen, SEQINFO *info); void print_oneline(FILE *fp, char *buf, int buflen, int seqcount, int seqlen, SEQINFO *info, int mode); void init_compl_tables(void); int is_idprefix(char *s); int pretty_store(char *seq, int seqlen, SEQINFO *info); void pretty_finish(void); void fastaout_queryinfo(SEQINFO *info); void fastaout_store(SEQFILE *sfp, char *seq, int seqlen, SEQINFO *info); void blastout_store(SEQFILE *sfp, char *seq, int seqlen, SEQINFO *info); void fastaout_finish(void); void *my_malloc(int size); void *my_realloc(void *ptr, int size); static int mycasecmp(char *s, char *t); static int myncasecmp(char *s, char *t, int n); static char *mystrdup(char *s); static char *mystrdup2(char *s, char *t); static int myatoi(char *s); static int isa_dir(char *directory); /* * * The Functions. * */ void prog_exit(int status) { if (pretty_flag && pretty_count) { pretty_flag = 0; pretty_finish(); } if (sfpout != NULL) seqfclose(sfpout); if (fpout != NULL && fpout != stdout) fclose(fpout); exit(status); } int main(int argc, char *argv[]) { int i, status, argcount, interactive, pipe_arg, stdin_flag, tempcount; int in_filetype, out_filetype, seqlen, seqcount, *ivar, fastaflag; int multifile_mode, firsttimeflag, changeflag, entrylen, unknown_count; char ch, *s, *t, *seq, *cvar, *svar, *table, *in_format, *out_format; char *s2, *t2, *mf_format, *mf_dbname, *mf_idpref, *current_file; char *entry, *files, *filename, *mf_alpha, real_output[FILENAME_MAX+1]; SEQINFO *info; SEQFILE *sfp; init_compl_tables(); /* * Initialize all of the option variables. */ for (i=0; options[i].type != OPT_END; i++) { switch (options[i].type) { case OPT_SIMPLE: case OPT_INT: case OPT_HELP: ivar = (int *) options[i].variable; *ivar = options[i].unset_value; break; case OPT_CHAR: cvar = (char *) options[i].variable; *cvar = options[i].unset_value; break; case OPT_STRING: svar = (char *) options[i].variable; if (svar != NULL) svar[0] = '\0'; break; case OPT_END: break; } } item_mode = ALL; gapin = gapout = '-'; bigalign_mode = 1; pretty.colspace = 10; pretty.gapcount = 1; pretty.interleave = 1; pretty.interline = 1; pretty.width = 50; verbose_mode = 2; /* * Initialize the main variables use to read and write the files. */ pretty_count = pretty_flag = 0; multifile_mode = 0; in_format = out_format = NULL; in_filetype = out_filetype = T_INVFORMAT; mf_format = mf_dbname = mf_alpha = mf_idpref = NULL; files = current_file = NULL; real_output[0] = '\0'; /* * Parse the options and determine whether it's interactive or not. */ interactive = 1; pipe_arg = 0; for (i=1; i < argc; i++) { if (argv[i][0] != '-') interactive = 0; else { status = parse_option(argv[i], 0); if (status == -1) prog_exit(1); else if (status == 0) { fprintf(stderr, "Error: Unknown option `%s'.", argv[i]); prog_exit(1); } if (pipe_arg == 0 && pipe_mode) pipe_arg = i; } } if (help_mode) exit(0); if (pipe_mode) interactive = 0; if (verbose_mode == 2) verbose_mode = (interactive ? 1 : 0); /* * The main loop. */ argcount = 1; while (1) { /* * Read the file/database. */ stdin_flag = 0; if (interactive) { if (query_for_filename() == 0) break; } else { while (argcount < argc && argv[argcount][0] == '-' && argcount != pipe_arg) argcount++; if (argcount == argc) break; if (argcount == pipe_arg) stdin_flag = 1; else strcpy(input, argv[argcount]); argcount++; } /* * Check for incompatibilities. */ if (pipe_mode && item_mode == ASK) { /* * Must be in non-interactive mode here. */ fprintf(stderr, "Error: Option conflict between `-pipe' and `-ask'.\n"); prog_exit(1); } if (!stdin_flag && input[0] == '\0') { /* * Must be in interactive mode here. */ printf("Error: No input file specified.\n"); continue; } if (!list_mode && format[0] == '\0') { if (interactive) { printf("Error: No output format specified.\n"); continue; } else { fprintf(stderr, "Error: No output format specified.\n"); prog_exit(1); } } if (splitext[0] && output[0] != '\0' && !isa_dir(output)) { if (interactive) { printf("Error: Split mode specified, but %s is not a directory.\n", output); continue; } else { fprintf(stderr, "Error: Split mode specified, but %s is not a directory.\n", output); prog_exit(1); } } /* * Open the output if necessary. */ if (splitext[0] == '\0' && sfpout == NULL && fpout == NULL) { if (verbose_mode && output[0] != '\0') fprintf(stderr, "Opening output file %s...\n", output); if (out_format != NULL) free(out_format); if (pretty_flag || list_mode) { if (output[0] == '\0') fpout = stdout; else if ((fpout = fopen(output, "w")) == NULL) { if (interactive) { printf("%s: %s\n", output, sys_errlist[errno]); continue; } else { fprintf(stderr, "%s: %s\n", output, sys_errlist[errno]); prog_exit(1); } } out_filetype = T_LIMITED; out_format = mystrdup((list_mode ? "List" : "Pretty")); } else { s = (output[0] == '\0' ? "-" : output); if ((sfpout = seqfopen(s, "w", format)) == NULL) { if (interactive) continue; else prog_exit(1); } seqfsetpretty(sfpout, 0); out_format = seqfformat(sfpout, 1); out_filetype = seqffmttype(out_format); } } /* * Open the input file or database. */ if (verbose_mode && !stdin_flag && input[0] != '\0') fprintf(stderr, "Opening input %s...\n", input); sfp = NULL; multifile_mode = 0; if (stdin_flag || input[0] == '\0' || seqfisafile(input)) { sfp = seqfopen((stdin_flag || !input[0] ? "-" : input), "r", (!informat[0] ? NULL : informat)); if (sfp == NULL) continue; } else if ((files = bioseq_parse(input)) != NULL) { multifile_mode = 1; mf_format = bioseq_info(input, "Format"); mf_dbname = bioseq_info(input, "Name"); mf_alpha = bioseq_info(input, "Alphabet"); mf_idpref = bioseq_info(input, "IdPrefix"); for (s=files; *s; s++) if (*s == '\n') *s = '\0'; current_file = files; sfp = seqfopen(current_file, "r", (!informat[0] ? mf_format : informat)); if (sfp == NULL) { free(files); files = NULL; if (mf_format != NULL) { free(mf_format); mf_format = NULL; } if (mf_dbname != NULL) { free(mf_dbname); mf_dbname = NULL; } if (mf_alpha != NULL) { free(mf_alpha); mf_alpha = NULL; } if (mf_idpref != NULL) { free(mf_idpref); mf_idpref = NULL; } multifile_mode = 0; continue; } if (mf_dbname != NULL) seqfsetdbname(sfp, mf_dbname); if (mf_alpha != NULL) seqfsetalpha(sfp, mf_alpha); if (mf_idpref != NULL) seqfsetidpref(sfp, mf_idpref); } else { if (interactive) { printf("%s: No such file or database.\n", input); continue; } else { fprintf(stderr, "%s: No such file or database.\n", input); continue; } } /* * Compute information about the new input file. */ if (idprefix[0] != '\0' && mf_idpref == NULL) seqfsetidpref(sfp, idprefix); if (in_format != NULL) free(in_format); in_format = seqfformat(sfp, 1); in_filetype = seqffmttype(in_format); fastaflag = 0; if (in_filetype == T_OUTPUT && strcmp(in_format, "FASTA-output") == 0) fastaflag = 1; /* * The sub-loop ranging through the files of a multifile conversion. */ firsttimeflag = 1; while (firsttimeflag || multifile_mode) { firsttimeflag = 0; /* * Figure out what the split mode is. */ split_mode = 0; if (splitext[0]) { split_mode = 1; if (!list_mode && in_filetype != T_OUTPUT && toupper(format[0]) == 'G' && toupper(format[1]) == 'C' && toupper(format[2]) == 'G') split_mode = 2; } unknown_count = 0; /* * Open the output when splitting it per file. */ if (split_mode == 1) { if (verbose_mode && (sfpout != NULL || fpout != NULL)) fprintf(stderr, "Closing output file %s...\n", output); if (sfpout != NULL) { seqfclose(sfpout); sfpout = NULL; } if (fpout != NULL) { if (fpout != stdout) fclose(fpout); fpout = NULL; } /* * Construct the output filename. */ if (output[0] == '\0') s = real_output; else { strcpy(real_output, output); for (s=real_output; *s; s++) ; if (*(s-1) != dirch) *s++ = dirch; } filename = seqffilename(sfp, 0); for (s2=filename; *s2; s2++) ; t2 = s2; if (s2 - filename > 5 && isalpha(s2[-1]) && isalpha(s2[-2]) && (s2[-3] == '.' || (isalpha(s2[-3]) && s2[-4] == '.'))) t2 = (s2[-3] == '.' ? s2 - 3 : s2 - 4); for (s2--; s2 >= filename && *s2 != dirch; s2--) ; s2++; while (s2 < t2) *s++ = *s2++; *s++ = '.'; strcpy(s, splitext); while (*s) s++; for (tempcount=1; seqfisafile(real_output); tempcount++) sprintf(s, ".%d", tempcount); if (verbose_mode) fprintf(stderr, "Opening output file %s...\n", real_output); if (out_format != NULL) free(out_format); if (pretty_flag || list_mode) { if ((fpout = fopen(real_output, "w")) == NULL) { if (interactive) printf("%s: %s\n", real_output, sys_errlist[errno]); else fprintf(stderr, "%s: %s\n", real_output, sys_errlist[errno]); break; } out_filetype = T_LIMITED; out_format = mystrdup((list_mode ? "List" : "Pretty")); } else { if ((sfpout = seqfopen(real_output, "w", format)) == NULL) break; seqfsetpretty(sfpout, 0); out_format = seqfformat(sfpout, 1); out_filetype = seqffmttype(out_format); } } /* * Output the header for the sequence listing. */ if (list_mode) { if (!split_mode && output[0] == '\0') fputc('\n', fpout); if (stdin_flag || input[0] == '\0') fprintf(fpout, "The Sequences:\n"); else fprintf(fpout, "The Sequences of %s (%s):\n", input, in_format); } /* * Do the reading and converting. */ seqcount = 0; while (seqfread(sfp, 0) == 0) { /* * Read the next sequence. */ if (in_filetype == T_OUTPUT && bigalign_mode) { if (seqcount == 0) fastaout_queryinfo(seqfinfo(sfp, 1)); seqfread(sfp, 0); } if (in_filetype == T_DATABANK && !raw_mode) seq = seqfsequence(sfp, &seqlen, 0); else seq = seqfrawseq(sfp, &seqlen, 0); if (seq == NULL || seqlen == 0) continue; info = (!long_mode ? seqfinfo(sfp, 0) : seqfallinfo(sfp, 0)); if (info == NULL) continue; seqcount++; /* * Depending on item_mode, possibly continue to the next sequence. */ if (item_mode == ASK) { status = query_for_seq(sfp, seqcount, seqlen, info); if (status == -1) break; else if (status == 0) continue; } else if (item_mode == ITEMLIST) { for (i=0; i < num_items; i++) if (item_list[i] == seqcount) break; if (i == num_items) continue; } if (verbose_mode && !stdin_flag && input[0] != '\0' && item_mode != ASK && (split_mode || output[0])) print_oneline(stderr, NULL, 78, seqcount, seqlen, info, 1); /* * If list_mode is set, just list the sequence information. */ if (list_mode) { print_oneline(fpout, NULL, 78, seqcount, seqlen, info, 0); continue; } /* * Do the FASTA/BLAST program output processing. */ if (in_filetype == T_OUTPUT && bigalign_mode) { if (fastaflag) fastaout_store(sfp, seq, seqlen, info); else blastout_store(sfp, seq, seqlen, info); continue; } /* * If the split mode is per sequence (during GCG output), open * the file for the current sequence. */ if (split_mode == 2) { if (verbose_mode && (sfpout != NULL || fpout != NULL)) fprintf(stderr, "Closing output file %s...\n", output); if (sfpout != NULL) { seqfclose(sfpout); sfpout = NULL; } if (fpout != NULL) { if (fpout != stdout) fclose(fpout); fpout = NULL; } /* * Construct the output filename for the entry. */ if (output[0] == '\0') s = real_output; else { strcpy(real_output, output); for (s=real_output; *s; s++) ; if (*(s-1) != dirch) *s++ = dirch; } if (info->idlist) { for (s2=info->idlist; *s2 && *s2 != ':' && *s2 != '|'; s2++) ; if (*s2 == ':') s2++; else s2 = info->idlist; while (*s2 && *s2 != '|') *s++ = *s2++; *s = '\0'; } else { sprintf(s, "unknwn%d", ++unknown_count); while (seqfisafile(real_output)) sprintf(s, "unknwn%d", ++unknown_count); while (*s) s++; } *s++ = '.'; strcpy(s, splitext); while (*s) s++; for (tempcount=1; seqfisafile(real_output); tempcount++) sprintf(s, ".%d", tempcount); if (verbose_mode) fprintf(stderr, "Opening output file %s...\n", real_output); if (out_format != NULL) free(out_format); if ((sfpout = seqfopen(real_output, "w", format)) == NULL) break; seqfsetpretty(sfpout, 0); out_format = seqfformat(sfpout, 1); out_filetype = seqffmttype(out_format); } /* * If no alterations are needed to the sequence, then check * to see if a more complete version of the entry, using * seqfputs, seqfgcgify or seqfungcgify, can be output. */ changeflag = (case_mode != NOCASE || reverse_mode || (in_filetype != T_DATABANK && out_filetype == T_DATABANK && !raw_mode) || (gapin != '\0' && (degap_mode || gapin != gapout))); if (!changeflag && !list_mode && !pretty_flag && (in_filetype == T_DATABANK || in_filetype == T_GENERAL) && (out_filetype == T_DATABANK || out_filetype == T_GENERAL)) { if ((strcmp(in_format, out_format) == 0 && strcmp(in_format, "ASN.1") != 0) || (strncmp(in_format, "GCG", 3) == 0 && strcmp(out_format, "GCG") == 0)) { entry = seqfentry(sfp, &entrylen, 0); status = seqfputs(sfpout, entry, entrylen); } else if (strncmp(out_format, "GCG", 3) == 0 && (out_format[3] == '\0' || strcmp(out_format + 4, in_format) == 0) && seqfcangcgify(in_format)) { entry = seqfentry(sfp, &entrylen, 0); status = seqfgcgify(sfpout, entry, entrylen); } else if (strncmp(in_format, "GCG-", 4) == 0 && strcmp(in_format + 4, out_format) == 0 && seqfcangcgify(out_format)) { entry = seqfentry(sfp, &entrylen, 0); status = seqfungcgify(sfpout, entry, entrylen); } else status = seqfwrite(sfpout, seq, seqlen, info); } else { /* * Make any alterations to the sequence, as specified by case_mode, * reverse_mode, degap_char, raw and the input/output filetypes. */ if (case_mode == LOWERCASE) { for (s=seq; *s; s++) if (isupper(*s)) *s = tolower(*s); } if (case_mode == UPPERCASE) { for (s=seq; *s; s++) if (islower(*s)) *s = toupper(*s); } if (reverse_mode) { if (info->alphabet == DNA || info->alphabet == RNA || info->alphabet == UNKNOWN) { table = dna_complement; if (info->alphabet == RNA) table = rna_complement; for (s=seq,t=seq+seqlen-1; s < t; s++,t--) { ch = *s; *s = table[(int) *t]; *t = table[(int) ch]; } } else { for (s=seq,t=seq+seqlen-1; s < t; s++,t--) { ch = *s; *s = *t; *t = ch; } } } if (in_filetype != T_DATABANK && out_filetype == T_DATABANK && !raw_mode) { for (s=seq,t=seq; *s; s++) { if (isalpha(*s)) { if (t < s) *t = *s; t++; } } *t = '\0'; seqlen = t - seq; } if (gapin != '\0' && (degap_mode || gapin != gapout)) { if (degap_mode || gapout == '\0') { for (s=seq,t=seq; *s; s++) { if (*s != gapin) { if (t < s) *t = *s; t++; } } *t = '\0'; seqlen = t - seq; } else { for (s=seq; *s; s++) if (*s == gapin) *s = gapout; } } if (pretty_flag) status = pretty_store(seq, seqlen, info); else status = seqfwrite(sfpout, seq, seqlen, info); } if (split_mode == 2) { if (verbose_mode) fprintf(stderr, "Closing output file %s...\n", real_output); if (sfpout != NULL) { seqfclose(sfpout); sfpout = NULL; } } else if (status == -1) break; } if (!list_mode) { if (in_filetype == T_OUTPUT && bigalign_mode) fastaout_finish(); if (pretty_flag && pretty_count && interactive && output[0] == '\0') pretty_finish(); } else fputc('\n', fpout); if (verbose_mode && !stdin_flag && input[0] != '\0') fprintf(stderr, "Closing input %s...\n", input); seqfclose(sfp); if (split_mode == 1) { if (verbose_mode) fprintf(stderr, "Closing output file %s...\n", real_output); if (sfpout != NULL) { seqfclose(sfpout); sfpout = NULL; } if (fpout != NULL) { if (fpout != stdout) fclose(fpout); fpout = NULL; } } if (multifile_mode) { while (*current_file) current_file++; current_file++; if (*current_file == '\0') break; if (verbose_mode) fprintf(stderr, "Opening input %s...\n", current_file); sfp = seqfopen(current_file, "r", (!informat[0] ? mf_format : informat)); if (sfp == NULL) break; /* * Compute information about the new input file. */ if (idprefix[0] != '\0') seqfsetidpref(sfp, idprefix); if (in_format != NULL) free(in_format); in_format = seqfformat(sfp, 1); in_filetype = seqffmttype(in_format); fastaflag = 0; if (in_filetype == T_OUTPUT && strcmp(in_format, "FASTA-output") == 0) fastaflag = 1; } } if (interactive && output[0] == '\0' && sfpout != NULL) { seqfclose(sfpout); sfpout = NULL; } if (multifile_mode) { free(files); files = NULL; if (mf_format != NULL) { free(mf_format); mf_format = NULL; } if (mf_dbname != NULL) { free(mf_dbname); mf_dbname = NULL; } if (mf_alpha != NULL) { free(mf_alpha); mf_alpha = NULL; } if (mf_idpref != NULL) { free(mf_idpref); mf_idpref = NULL; } multifile_mode = 0; } } if (pretty_flag && pretty_count) pretty_finish(); if (sfpout != NULL) seqfclose(sfpout); if (fpout != NULL && fpout != stdout) fclose(fpout); return 0; } int parse_option(char *option, int flag) { static int macro_depth = 0; int i, state, len, no_flag, ambiguous, value; int count, shortmatch, new_prettyflag, *ivar; char ch, *s, *t, *s2, *t2, *cvar, *macrostr, tempstr[1024]; s = option + 1; no_flag = 0; if (toupper(*s) == 'N' && toupper(s[1]) == 'O') { no_flag = 1; s += 2; } ambiguous = 0; shortmatch = -1; len = 0; for (i=0; options[i].type != OPT_END; i++) { t2 = options[i].option_string; for (s2=s,len=0; *s2 && *s2 != '=' && *t2; s2++,t2++,len++) if (toupper(*s2) != toupper(*t2)) break; if ((!*s2 || *s2 == '=')) { if (len >= options[i].minprefix) break; else ambiguous = 1; } else if (options[i].type == OPT_STRING && options[i].set_value && toupper(*s) == toupper(options[i].option_string[0])) { shortmatch = i; } } if (options[i].type == OPT_END) { if (shortmatch != -1) i = shortmatch; else { if (ambiguous) { fprintf(stderr, "Error: Ambiguous option `%s'.\n", option); return -1; } else return 0; } } switch (options[i].type) { case OPT_SIMPLE: if (s[len] == '=') { fprintf(stderr, "Error: Value specified for simple option `%s'.\n", option); return -1; } if (flag) { if (options[i].more_proc_flag == CASETEST) { if (no_flag) printf("Unsetting -%s\n", (case_mode == LOWERCASE ? "caselower" : "CASEUPPER")); } else printf("%s -%s\n", (!no_flag ? "Setting" : "Unsetting"), options[i].option_string); } ivar = (int *) options[i].variable; *ivar = (!no_flag ? options[i].set_value : options[i].unset_value); break; case OPT_INT: ivar = (int *) options[i].variable; if (no_flag) { if (flag) printf("Unsetting -%s\n", options[i].option_string); *ivar = options[i].unset_value; } else if (s[len] != '=') { if (flag) printf("Setting -%s to default value %d\n", options[i].option_string, options[i].set_value); *ivar = options[i].set_value; } else { if (sscanf(s+len+1, "%d", &value) != 1) { fprintf(stderr, "Error: Invalid option value in `%s'\n", option); return -1; } if (flag) printf("Setting -%s to %d\n", options[i].option_string, value); *ivar = value; } break; case OPT_CHAR: cvar = (char *) options[i].variable; if (no_flag) { if (flag) printf("Unsetting -%s\n", options[i].option_string); *cvar = options[i].unset_value; } else if (s[len] != '=') { if (flag) printf("Setting -%s to default value `%c'\n", options[i].option_string, options[i].set_value); *cvar = options[i].set_value; } else { if (s[len+1] == '\0') { if (flag) printf("Setting -%s to ` '\n", options[i].option_string); *cvar = ' '; } else { if (!isprint(s[len+1])) { fprintf(stderr, "Error: Invalid option value in `%s'\n", option); return -1; } if (flag) printf("Setting -%s to `%c'\n", options[i].option_string, s[len+1]); *cvar = s[len+1]; } } break; case OPT_STRING: if (no_flag) { if (flag) printf("Unsetting -%s\n", options[i].option_string); *((char *) options[i].variable) = '\0'; } else { if (len > 0 && (s[len] != '=' || s[len+1] == '\0')) { fprintf(stderr, "Error: Invalid option value in `%s'\n", option); return -1; } t = (len > 0 ? s + len + 1 : s + 1); if (strlen(t) >= options[i].unset_value) { fprintf(stderr, "Error: `%s': Option value is too long.\n", option); return -1; } strcpy(tempstr, t); if (flag) printf("Setting -%s to `%s'\n", options[i].option_string, tempstr); } break; case OPT_HELP: print_options(0); help_mode = 1; break; case OPT_END: break; } /* * Check to see if more processing must be done. */ state = options[i].more_proc_flag; switch (state) { case NOPROC: break; case CASETEST: if (!no_flag) { case_mode = (s[0] == 'c' ? LOWERCASE : UPPERCASE); if (flag) printf("Setting -%s\n", (case_mode == LOWERCASE ? "caselower" : "CASEUPPER")); } break; case GAPCHAR: gapout = gapin; break; case FORMAT: case INFORMAT: if (!no_flag) { if (isdigit(tempstr[0])) { if (sscanf(tempstr, "%d", &value) != 1 || value < 1 || value > num_formats) { fprintf(stderr, "Error: `%s' is not a valid format number.\n", tempstr); return -1; } strcpy(tempstr, formats[value-1]); } if (mycasecmp(tempstr, "Pretty") == 0) { if (state == INFORMAT) { fprintf(stderr, "Error: Pretty format is write-only.\n"); s[0] = '\0'; return -1; } new_prettyflag = 1; strcpy(format, tempstr); } else { if (!seqfisaformat(tempstr)) { fprintf(stderr, "Error: `%s' is not a valid file format.\n", tempstr); return -1; } new_prettyflag = 0; strcpy((state == FORMAT ? format : informat), tempstr); } } else new_prettyflag = pretty_flag; if (state == FORMAT && !list_mode && (sfpout != NULL || fpout != NULL)) { if (pretty_flag && pretty_count) pretty_finish(); if (sfpout != NULL) { seqfclose(sfpout); sfpout = NULL; } else { if (fpout != stdout) fclose(fpout); fpout = NULL; } } if (state == FORMAT) pretty_flag = new_prettyflag; break; case IDPREFIX: if (no_flag) break; for (s=tempstr,len=0; len < 6 && isalpha(*s); s++,len++) ; if (*s || len < 2 || len > 4) { fprintf(stderr, "Error: Invalid identifier prefix `%s'.\n", idprefix); return -1; } for (s=tempstr,t=idprefix; *s; s++,t++) *t = (isupper(*s) ? tolower(*s) : *s); *t = '\0'; break; case ITEM: if (no_flag) item_mode = ALL; else if (isdigit(tempstr[0])) { for (s=tempstr,len=0,count=1; *s; s++,len++) { if (*s == ',') count++; else if (!isdigit(*s)) break; } if (*s || tempstr[len-1] == ',') { fprintf(stderr, "Error: Invalid list of items `%s'.\n", tempstr); return -1; } if (item_list != NULL) free(item_list); num_items = count; item_list = (int *) my_malloc(num_items * sizeof(int)); item_list[0] = 0; for (s=tempstr,count=0; *s; s++) { if (*s == ',') item_list[++count] = 0; else { item_list[count] *= 10; item_list[count] += *s - '0'; } } item_mode = ITEMLIST; } else { fprintf(stderr, "Error: Invalid ITEM option value `%s'.\n", tempstr); return -1; } break; case LIST: case OUTPUT: if (state == OUTPUT && !no_flag) strcpy(output, tempstr); if (pretty_flag && pretty_count) pretty_finish(); if (sfpout != NULL) { seqfclose(sfpout); sfpout = NULL; } if (fpout != NULL) { if (fpout != stdout) fclose(fpout); fpout = NULL; } break; case SPLIT: if (!no_flag) strcpy(splitext, tempstr); break; case MACRO: if (no_flag || macro_depth) break; if ((macrostr = bioseq_info("fmtseq", tempstr)) == NULL) { fprintf(stderr, "Error: Unknown `mode' option value `%s'.\n", tempstr); return -1; } macro_depth++; for (s=macrostr; *s; ) { while (*s && isspace(*s)) s++; for (t=s; *s && !isspace(*s); s++) ; if (t < s) { ch = *s; *s = '\0'; if (parse_option(t, flag) == -1) { macro_depth--; free(macrostr); return -1; } *s = ch; } } macro_depth--; free(macrostr); break; } return 1; } int query_for_filename(void) { int i, pos, status, valid_flag; char ch, *s, *t, line[256]; while (1) { /* * Print the current values. */ printf("\n Input: %s (format: %s)\n", (input[0] ? input : "*none*"), (informat[0] ? informat : "*auto*")); printf(" Output: %s%s (format: %s)\n", (output[0] ? output : "*stdout*"), (output[0] && (sfpout != NULL || fpout != NULL) ? " *open*" : ""), (format[0] ? format : "*none*")); printf(" Deflts:"); if (verbose_mode) printf(" -verbose"); if (gapin) printf(" -gapin=%c", gapin); if (gapout) printf(" -gapout=%c", gapout); if (bigalign_mode) printf(" -bigalign"); putchar('\n'); printf("Options:"); pos = 8; if (item_mode == ALL) { printf(" -all"); pos += 6; } if (item_mode == ASK) { printf(" -ask"); pos += 6; } if (item_mode == ITEMLIST) { printf(" -item=%d", item_list[0]); pos += 10; for (i=1; i < num_items; i++) { printf(",%d", item_list[i]); pos += 3; } } if (idprefix[0]) { if (pos + 16 > 78) { printf("\n "); pos = 8; } printf(" -idprefix=%s", idprefix); pos += 12 + strlen(idprefix); } if (list_mode) { if (pos + 7 > 78) { printf("\n "); pos = 8; } printf(" -list"); pos += 7; } if (long_mode) { if (pos + 7 > 78) { printf("\n "); pos = 8; } printf(" -long"); pos += 7; } if (degap_mode) { if (pos + 8 > 78) { printf("\n "); pos = 8; } printf(" -degap"); pos += 8; } if (raw_mode) { if (pos + 6 > 78) { printf("\n "); pos = 8; } printf(" -raw"); pos += 6; } if (case_mode == LOWERCASE) { if (pos + 12 > 78) { printf("\n "); pos = 8; } printf(" -caselower"); pos += 12; } if (case_mode == UPPERCASE) { if (pos + 12 > 78) { printf("\n "); pos = 8; } printf(" -CASEUPPER"); pos += 12; } if (reverse_mode) { if (pos + 10 > 78) { printf("\n "); pos = 8; } printf(" -reverse"); pos += 10; } putchar('\n'); if (pretty_flag) { printf(" Pretty:"); pos = 8; if (pretty.interleave) { printf(" -interleave"); pos += 13; } if (pretty.width) { printf(" -width=%d", pretty.width); pos += 11; } if (pretty.tab) { printf(" -tab=%d", pretty.tab); pos += 9; } if (pretty.colspace) { printf(" -colspace=%d", pretty.colspace); pos += 14; } if (pretty.gapcount) { printf(" -gapcount"); pos += 11; } if (pretty.nameleft) { if (pos + 14 > 78) { printf("\n "); pos = 8; } printf(" -nameleft=%d", pretty.nameleft); pos += 14; } if (pretty.nameright) { if (pos + 15 > 78) { printf("\n "); pos = 8; } printf(" -nameright=%d", pretty.nameleft); pos += 15; } if (pretty.nametop) { if (pos + 10 > 78) { printf("\n "); pos = 8; } printf(" -nametop"); pos += 10; } if (pretty.numleft) { if (pos + 10 > 78) { printf("\n "); pos = 8; } printf(" -numleft"); pos += 10; } if (pretty.numright) { if (pos + 11 > 78) { printf("\n "); pos = 8; } printf(" -numright"); pos += 11; } if (pretty.numtop) { if (pos + 9 > 78) { printf("\n "); pos = 8; } printf(" -numtop"); pos += 9; } if (pretty.numbottom) { if (pos + 12 > 78) { printf("\n "); pos = 8; } printf(" -numbottom"); pos += 12; } if (pretty.match) { if (pos + 10 > 78) { printf("\n "); pos = 8; } printf(" -match=%c", pretty.match); pos += 10; } if (pretty.interline) { if (pos + 15 > 78) { printf("\n "); pos = 8; } printf(" -interline=%d", pretty.interline); pos += 15; } putchar('\n'); } /* * Warn for incompatibilities. */ if (input[0] == '\0') { if (!list_mode && format[0] == '\0') printf("\nWarning: An input file and output format must be " "specified.\n"); else printf("\nWarning: An input file must be specified.\n"); } else if (!list_mode && format[0] == '\0') printf("\nWarning: An output format must be specified.\n"); /* * Ask for the next option or "-run", "-q" or "-quit". */ putchar('\n'); printf("Commands (-option - set option, -no... - unset option, ? -help " "- list options,\n"); printf(" -r -run - execute, -q -quit - exit program, other " "- set input file)\n"); line[0] = '\0'; s = line; while (*s == '\0') { printf("Enter: "); fgets(line,MAXLINE-1,stdin); for (s=line; *s && isspace(*s); s++) ; } printf("----------------------\n\n"); valid_flag = 1; while (*s) { for (t=s; *s && !isspace(*s); s++) ; ch = *s; *s = '\0'; if (*t != '-' && *t != '?') { strcpy(input, t); printf("Setting input to `%s'\n", input); } else if (strcmp(t, "-r") == 0 || strcmp(t, "-run") == 0) { if (valid_flag) { printf("Running...\n"); return 1; } else { printf("Running...aborted, previous error.\n"); break; } } else if (strcmp(t, "-q") == 0 || strcmp(t, "-quit") == 0) return 0; else if (*t == '?' || strcmp(t, "-help") == 0) print_options(1); else { status = parse_option(t, 1); if (status <= 0) { valid_flag = 0; if (status == 0) printf("Error: `%s' is not a valid option.\n", t); } if (pipe_mode) { printf("Error: Standard input cannot be redirected in " "interactive mode.\n"); pipe_mode = 0; valid_flag = 0; } } *s = ch; while (*s && isspace(*s)) s++; } } return 0; } int query_for_seq(SEQFILE *sfp, int seqcount, int seqlen, SEQINFO *info) { static char defch = 'y'; int init_print, count; char *s, *entry, *eptr, line[MAXLINE]; putchar('\n'); if (seqcount == 1) { printf("Commands (y - select the sequence, n - skip the sequence,\n"); printf(" q - quit, p - print the entry, ? - list the " "commands)\n"); putchar('\n'); } print_oneline(stdout, NULL, 78, seqcount, seqlen, info, 1); init_print = 1; eptr = entry = NULL; while (1) { printf("Select? [%c] ", defch); fgets(line,MAXLINE-1,stdin); if (line[0] == '\0') line[0] = defch; switch (line[0]) { case 'y': case 'Y': defch = 'y'; return 1; case 'n': case 'N': defch = 'n'; return 0; case 'p': case 'P': if (init_print) { entry = seqfentry(sfp, NULL, 0); putchar('\n'); eptr = entry; init_print = 0; } for (s=eptr,count=0; *s; s++) if (*s == '\n' && ++count == 15) break; if (*s) s++; fwrite(eptr, 1, s - eptr, stdout); if (!*s) fputs("\n", stdout); eptr = (*s ? s : entry); defch = 'p'; break; case 'q': case 'Q': defch = 'y'; return -1; case '?': printf(" y - Select this sequence\n"); printf(" n - Skip this sequence\n"); printf(" p - Print the sequence's entry\n"); printf(" q - Quit reading this file\n"); printf(" ? - Display this message\n"); break; } } } void print_oneline(FILE *fp, char *buf, int buflen, int seqcount, int seqlen, SEQINFO *info, int mode) { int len; char *temp, tempbuffer[MAXLINE]; if (buf != NULL) temp = buf; else temp = tempbuffer; len = 0; if (mode != 2) { sprintf(temp, "%3d. ", seqcount); len = strlen(temp); } seqfoneline(info, temp, buflen - len, 0); if (buf == NULL) { fputs(temp, fp); fputc('\n', fp); } } int psize = 0; char **seqs, **ids; int *seqlens, *seqpos; int pretty_oneline(char *name, int pos, int realpos, char *seq, int seqlen, char *prevseq, int prevlen, int num_digits); void pretty_topnum(int pos, int maxlen, int num_digits); int pretty_store(char *seq, int seqlen, SEQINFO *info) { char buffer[MAXLINE]; if (pretty_count == psize) { if (psize == 0) { psize = 32; seqs = (char **) my_malloc(psize * sizeof(char *)); ids = (char **) my_malloc(psize * sizeof(char *)); seqlens = (int *) my_malloc(psize * sizeof(int)); seqpos = (int *) my_malloc(psize * sizeof(int)); } else { psize += psize; seqs = (char **) my_realloc(seqs, psize * sizeof(char *)); ids = (char **) my_realloc(ids, psize * sizeof(char *)); seqlens = (int *) my_realloc(seqlens, psize * sizeof(int)); seqpos = (int *) my_realloc(seqpos, psize * sizeof(int)); } } seqs[pretty_count] = mystrdup2(seq, seq+seqlen); seqlens[pretty_count] = seqlen; seqpos[pretty_count] = (info->fragstart ? info->fragstart - 1 : 0); print_oneline(NULL, buffer, 74, pretty_count, seqlen, info, 2); ids[pretty_count] = mystrdup(buffer); pretty_count++; return 0; } void pretty_finish(void) { int i, j, k, maxlen, maxpos, num_digits; if (pretty_count == 0) return; fputc('\n', fpout); if (pretty.nametop) { for (i=0; i < pretty_count; i++) fprintf(fpout, "%3d. %s\n", i+1, ids[i]); fputc('\n', fpout); } maxlen = maxpos = 0; for (i=0; i < pretty_count; i++) { if (seqlens[i] > maxlen) maxlen = seqlens[i]; if (seqpos[i] + seqlens[i] > maxpos) maxpos = seqpos[i] + seqlens[i]; } for (num_digits=0,i=maxpos; i; i/=10,num_digits++) ; if (!pretty.interleave) { for (i=0; i < pretty_count; i++) { k = seqpos[i]; for (j=0; j < seqlens[i]; j+=pretty.width) k = pretty_oneline(ids[i], k, j, seqs[i] + j, seqlens[i], (i == 0 ? NULL : seqs[0] + j), seqlens[0], num_digits); if (pretty.interline) for (j=0; j < pretty.interline; j++) fputc('\n', fpout); } } else { for (j=0; j < maxlen; j+=pretty.width) { if (pretty.numtop) pretty_topnum(j, maxlen, num_digits); for (i=0; i < pretty_count; i++) seqpos[i] = pretty_oneline(ids[i], seqpos[i], j, seqs[i] + j, seqlens[i], (i == 0 ? NULL : seqs[0] + j), seqlens[0], num_digits); if (pretty.numbottom) pretty_topnum(j, maxlen, num_digits); if (pretty.interline) for (i=0; i < pretty.interline; i++) fputc('\n', fpout); } } pretty_count = 0; } int pretty_oneline(char *name, int pos, int realpos, char *seq, int seqlen, char *prevseq, int prevlen, int num_digits) { static int numval = 0; static char numfmt[10]; int i, j, skipflag; char *s, *t; if (pretty.skipempty) { skipflag = 1; for (i=0,s=seq; realpos + i < seqlen && i < pretty.width; i++,s++) { if (*s != gapout && *s != '#') { skipflag = 0; break; } } if (skipflag) return realpos; } if (num_digits != numval) { sprintf(numfmt, " %%%dd ", num_digits); numval = num_digits; } if (pretty.nameleft) { s = name; if (is_idprefix(s)) while (*s++ != ':') ; for (i=0,t=s; i < pretty.nameleft && *s && !isspace(*s) && *s != '|'; i++) s++; fwrite(t, 1, i, fpout); while (i++ < pretty.nameleft) fputc(' ', fpout); fputc(' ', fpout); } if (pretty.numleft) fprintf(fpout, numfmt, pos + 1); if (pretty.tab) for (i=0; i < pretty.tab; i++) fputc(' ', fpout); fputc(' ', fpout); for (i=0,j=0,s=seq; i < pretty.width; i++,j++,s++) { if (pretty.colspace && j == pretty.colspace) { fputc(' ', fpout); j = 0; } if (realpos + i >= seqlen) { fputc(' ', fpout); continue; } if (pretty.gapcount || *s != gapout) pos++; if (!pretty.match || *s == gapout || prevseq == NULL || realpos + i >= prevlen || toupper(*s) != toupper(prevseq[i])) fputc(*s, fpout); else fputc(pretty.match, fpout); } fputc(' ', fpout); if (pretty.numright) fprintf(fpout, numfmt, pos); if (pretty.nameright) { s = name; if (is_idprefix(s)) while (*s++ != ':') ; for (i=0,t=s; i < pretty.nameright && *s && !isspace(*s) && *s != '|'; i++) s++; while (i++ < pretty.nameright) fputc(' ', fpout); fwrite(t, 1, i, fpout); } fputc('\n', fpout); return pos; } void pretty_topnum(int pos, int maxlen, int num_digits) { int i, j, k, col, oldcol, len; char buf[16]; oldcol = 0; col = (pretty.nameleft ? pretty.nameleft+1 : 0) + (pretty.numleft ? num_digits+2 : 0) + pretty.tab + 1; pos++; col++; for (j=1,k=1; j <= pretty.width && pos <= maxlen; j++,k++,pos++,col++) { if (pos % 10 == 0) { sprintf(buf, "%d", pos); len = strlen(buf); if (col - oldcol > len) { for (i=oldcol; i < col - len; i++) fputc(' ', fpout); fputs(buf, fpout); oldcol = col; } } if (pretty.colspace && k == pretty.colspace) { col++; k = 0; } } fputc('\n', fpout); } SEQINFO *query_info; typedef struct gap_node { int pos, len; struct gap_node *next; } GAP_NODE, *GAP; typedef struct align_node { int start, end, offset; char *seq; SEQINFO *info; GAP gaps; struct align_node *next; } ALIGN_NODE, *ALIGN; ALIGN alignlist = NULL; ALIGN aligntail = NULL; int align_error = 0; typedef struct frag_node { int start, end, seqcount, seqsize; char *seq; struct frag_node *next; } FRAG_NODE, *FRAGMENT; FRAGMENT fraglist = NULL; FRAGMENT current; int frag_init, temp_flag, tempcount, frag_mode; char tempstr[16]; void new_build(int flag) { frag_init = 1; temp_flag = flag; tempcount = 0; } void add_to_build(int pos, char ch) { FRAGMENT node, back, newfrag; if (temp_flag) { tempstr[tempcount++] = ch; return; } if (frag_init) { back = NULL; for (node=fraglist; node != NULL && node->end < pos; node=node->next) back = node; if (node == NULL || node->start > pos) { if (back != NULL && back->end + 1 == pos) { current = back; frag_mode = 2; } else { newfrag = (FRAGMENT) my_malloc(sizeof(FRAG_NODE)); newfrag->start = pos; newfrag->end = pos; newfrag->seqcount = 1; newfrag->seqsize = 256; newfrag->seq = (char *) my_malloc(256); *newfrag->seq = ch; if (back == NULL) fraglist = newfrag; else back->next = newfrag; newfrag->next = node; current = newfrag; frag_mode = 2; return; } } else { current = node; frag_mode = 1; } frag_init = 0; } if (frag_mode == 1) { if (pos == current->end + 1) frag_mode = 2; else return; } if (frag_mode == 2) { if (current->next && pos == current->next->start) { current = current->next; frag_mode = 1; } else { if (current->seqcount == current->seqsize) { current->seqsize += current->seqsize; current->seq = (char *) my_realloc(current->seq, current->seqsize); } current->seq[current->seqcount++] = ch; current->end++; } } } void fix_build(int pos) { int i; temp_flag = 0; for (i=0; i < tempcount; i++) add_to_build(pos+i, tempstr[i]); } void fastaout_queryinfo(SEQINFO *info) { char *s, *t; query_info = info; if (info->comment && strncmp(info->comment, "From ", 5) == 0) { for (t=info->comment,s=t+5; *s && !isspace(*s); t++,s++) *t = *s; for (s++; *s && *s != '\n'; s++) ; for (s++; *s && *s != '>'; s++) ; memcpy(t, " query sequence:\n ", 18); t += 18; while (*s && *s != '\n') *t++ = *s++; *t++ = *s++; *t = '\0'; } } void fastaout_store(SEQFILE *sfp, char *seq, int seqlen, SEQINFO *info) { int i, pos, pos2, num, diff, len, offset, ingap, maxlen, initpos; int al_start, al_stop, al_disp, al_start2, al_disp2, matchoffset; int matchpos, posflag, alstart_flag, seq1only, seq2only, seq1len; int line1end, line2end, line3end, line4end, markx2_flag; char *s, *s2, *entry, *end, *line1, *line2, *line3, *line4; ALIGN newalign; GAP gaplist, gaptail, newgap, node, next, back; if (align_error) return; entry = seqfentry(sfp, &len, 0); end = entry + len; if (entry[0] == '>' && entry[1] == '>') { /* * The format of the entry is MARKX10. */ for (s=entry; s < end && (*s != '\n' || s[1] != '>'); s++) ; al_start = al_stop = al_disp = 0; for (s++; s < end && (*s != '\n' || s[1] == ';'); s++) { if (*s == '\n') { if (strncmp(s+1, "; al_", 5) == 0) { if (strncmp(s+6, "start:", 6) == 0) { for (s+=12; s < end && isspace(*s); s++) ; for ( ;s < end && isdigit(*s); s++) { al_start *= 10; al_start += *s - '0'; } s--; } else if (strncmp(s+6, "stop:", 5) == 0) { for (s+=11; s < end && isspace(*s); s++) ; for ( ;s < end && isdigit(*s); s++) { al_stop *= 10; al_stop += *s - '0'; } s--; } else if (strncmp(s+6, "display_start:", 14) == 0) { for (s+=20; s < end && isspace(*s); s++) ; for ( ;s < end && isdigit(*s); s++) { al_disp *= 10; al_disp += *s - '0'; } s--; } } } } if (s == end || s[1] == '>' || al_start == 0 || al_stop == 0 || al_disp == 0) { fprintf(stderr, "%s, entry %d: Parse error in FASTA-output.\n", info->filename, info->entryno); align_error = 1; return; } pos = al_disp; if (raw_mode) al_start = al_disp; new_build(0); gaplist = gaptail = newgap = NULL; ingap = 0; for ( ; s < end && (*s != '\n' || s[1] != '>'); ) { for (s++; s < end && *s != '\n'; s++) { if (*s != '-' && *s != ' ') { add_to_build(pos, *s); pos++; ingap = 0; } else if (ingap) gaptail->len++; else { newgap = (GAP) my_malloc(sizeof(GAP_NODE)); newgap->pos = pos; newgap->len = 1; newgap->next = NULL; if (gaplist == NULL) gaplist = gaptail = newgap; else { gaptail->next = newgap; gaptail = newgap; } ingap = 1; } } } if (raw_mode) { al_stop = pos - 1; seq1len = al_stop - al_start + 1; for (newgap=gaplist; newgap != NULL; newgap=newgap->next) seq1len += newgap->len; if (seq1len < seqlen) { newgap = (GAP) my_malloc(sizeof(GAP_NODE)); newgap->pos = pos; newgap->len = seqlen - seq1len; newgap->next = NULL; if (gaplist == NULL) gaplist = gaptail = newgap; else { gaptail->next = newgap; gaptail = newgap; } } } if (s == end) { fprintf(stderr, "%s, entry %d: Premature end-of-entry found.\n", info->filename, info->entryno); for (node=gaplist; node != NULL; node=next) { next = node; free(next); } align_error = 1; return; } offset = 0; if (!raw_mode) { al_start2 = al_disp2 = 0; for (s++; s < end && (*s != '\n' || s[1] == ';'); s++) { if (*s == '\n') { if (strncmp(s+1, "; al_", 5) == 0) { if (strncmp(s+6, "start:", 6) == 0) { for (s+=12; s < end && isspace(*s); s++) ; for ( ; s < end && isdigit(*s); s++) { al_start2 *= 10; al_start2 += *s - '0'; } s--; } else if (strncmp(s+6, "display_start:", 14) == 0) { for (s+=20; s < end && isspace(*s); s++) ; for ( ; s < end && isdigit(*s); s++) { al_disp2 *= 10; al_disp2 += *s - '0'; } s--; } } } } pos = al_disp2; for (s=seq; *s; s++) { if (*s != '-') { if (pos == al_start2) { offset = s - seq; break; } pos++; } } if (!*s) { fprintf(stderr, "%s, entry %d: Parse error while reading " "FASTA-output entry.\n", info->filename, info->entryno); align_error = 1; return; } } } else { /* * The format of the entry is MARKX0 - MARKX3. */ for (s=entry; s < end; s++) if (*s == '\n' && (s[1] == '>' || s[1] == '\n')) break; if (s == end) { fprintf(stderr, "%s, entry %d: Premature end of entry.\n", info->filename, info->entryno); align_error = 1; return; } if (s[1] == '>') { fprintf(stderr, "%s, entry %d: Cannot align FASTA-output using MARKX=3 " "format.\n", info->filename, info->entryno); align_error = 1; return; } s++; initpos = pos = 1; pos2 = 0; offset = -1; matchpos = -1; matchoffset = -1; al_start = al_stop = 0; posflag = alstart_flag = 0; gaplist = gaptail = NULL; ingap = 0; markx2_flag = 0; new_build(1); while (1) { while (s < end && *s != '\n') s++; /* Skip blank line */ s++; if (s >= end || (*s == '-' && s[1] == '-' && s[2] == '-')) break; if (*s == '\n' && s + 1 == end) /* This handles the FASTA output bug */ break; /* where an extra blank line is added */ /* to the end of MARKX=2 entries. */ seq1only = seq2only = 0; line1 = line2 = line3 = line4 = NULL; if (s < end && *s == ' ') { line1 = s; while (s < end && *s != '\n') s++; /* Skip seq 1 positions line */ s++; } else seq2only = 1; if (!seq2only) { line2 = s; while (s < end && *s != '\n') s++; /* Skip sequence 1 line */ s++; if (s < end && *s == '\n') seq1only = 1; else if (*s == ' ') { line3 = s; while (s < end && *s != '\n') s++; /* Skip line of matches */ s++; } else markx2_flag = 1; } if (!seq1only) { line4 = s; while (s < end && *s != '\n') s++; /* Skip sequence 2 line */ s++; if (*s == ' ') { while (s < end && *s != '\n') s++; /* Skip seq 2 positions line */ s++; } } maxlen = 0; if (line1) { for (s2=line1,len=0; *s2!= '\n'; s2++,len++) ; if (len > maxlen) maxlen = len; } if (line2) { for (s2=line2,len=0; *s2 != '\n'; s2++,len++) ; if (len > maxlen) maxlen = len; } if (line3) { for (s2=line3,len=0; *s2!= '\n'; s2++,len++) ; if (len > maxlen) maxlen = len; } if (line4) { for (s2=line4,len=0; *s2!= '\n'; s2++,len++) ; if (len > maxlen) maxlen = len; } line1end = (line1 ? 0 : 1); line2end = (line2 ? 0 : 1); line3end = (line3 ? 0 : 1); line4end = (line4 ? 0 : 1); for (i=7; i < maxlen; i++) { if (!line1end && line1[i] == '\n') line1end = 1; if (!line2end && line2[i] == '\n') line2end = 1; if (!line3end && line3[i] == '\n') line3end = 1; if (!line4end && line4[i] == '\n') line4end = 1; if (!posflag && !line1end && isdigit(line1[i]) && isspace(line1[i+1])) { for (s2=line1+i; s2 > line1 && isdigit(*(s2-1)); s2--) ; num = *s2 - '0'; for (s2++; isdigit(*s2); s2++) { num *= 10; num += *s2 - '0'; } diff = num - pos; initpos = diff + 1; fix_build(initpos); if (diff) { for (node=gaplist; node != NULL; node=node->next) node->pos += diff; pos = num; } if (alstart_flag) { al_start += diff; al_stop += diff; } if (matchpos != -1) matchpos += diff; posflag = 1; } if (seq2only || !line2end) { if (!line2end && line2[i] != '-' && line2[i] != ' ') ingap = 0; else if (ingap) gaptail->len++; else { newgap = (GAP) my_malloc(sizeof(GAP_NODE)); newgap->pos = pos; newgap->len = 1; newgap->next = NULL; if (gaplist == NULL) gaplist = gaptail = newgap; else { gaptail->next = newgap; gaptail = newgap; } ingap = 1; } } if (!raw_mode) { if (markx2_flag) { if (!line4end && line4[i] == '.') { if (!alstart_flag) { al_start = al_stop = pos; offset = pos2; alstart_flag = 1; } else al_stop = pos; } } else if (!alstart_flag) { if (!line3end && line3[i] != ' ') { al_start = al_stop = (matchpos == -1 ? pos : matchpos); offset = (matchpos == -1 ? pos2 : matchoffset); alstart_flag = 1; } if (!line2end || !line4end) { if (!(!line2end && !line4end) || (line2[i] != line4[i] && line2[i] != '-' && line4[i] != '-')) matchpos = -1; else if (matchpos == -1) { matchpos = pos; matchoffset = pos2; } } } else { if (al_start == pos || (!line3end && line3[i] != ' ')) al_stop = matchpos = pos; else if (matchpos != -1 && (!line2end || !line4end)) { if (!line2end && !line4end && (line2[i] == line4[i] || line2[i] == '-' || line4[i] == '-')) al_stop = matchpos = pos; else matchpos = -1; } } } if (!line2end && line2[i] != '-' && line2[i] != ' ') { add_to_build(pos, line2[i]); pos++; } pos2++; } } if (raw_mode || (markx2_flag && !alstart_flag)) { al_start = initpos; al_stop = pos - 1; offset = 0; alstart_flag = 1; } else if (!alstart_flag && matchpos != -1) { alstart_flag = 1; al_start = matchpos; offset = matchoffset; al_stop = pos - 1; } if (!alstart_flag || al_start == 0 || al_stop == 0 || offset == -1) { fprintf(stderr, "%s, entry %d: Parse error while reading " "FASTA-output entry.\n", info->filename, info->entryno); align_error = 1; return; } } while (gaplist != NULL && gaplist->pos < al_start + (raw_mode ? 0 : 1)) { next = gaplist->next; free(gaplist); gaplist = next; } for (node=gaplist,back=NULL; node != NULL; back=node,node=node->next) if (node->pos > al_stop + (raw_mode ? 1 : 0)) break; if (node != NULL) { if (back == NULL) gaplist = NULL; else back->next = NULL; for ( ; node != NULL; node=next) { next = node->next; free(node); } } newalign = (ALIGN) my_malloc(sizeof(ALIGN_NODE)); newalign->start = al_start; newalign->end = al_stop; newalign->gaps = gaplist; newalign->seq = mystrdup2(seq, seq + seqlen); newalign->info = seqfinfo(sfp, 1); newalign->offset = offset; newalign->next = NULL; if (alignlist == NULL) alignlist = aligntail = newalign; else { aligntail->next = newalign; aligntail = newalign; } } #define MAXSTRAND 50000000 void blastout_store(SEQFILE *sfp, char *seq, int seqlen, SEQINFO *info) { static int otherstrand = 0; int pos, al_start, ingap, len; char *s, *t1, *entry, *end, *line1, *line2; ALIGN newalign; GAP gaplist, gaptail, newgap; if (align_error) return; entry = seqfentry(sfp, &len, 0); end = entry + len; if (seqfentryno(sfp) == 1) otherstrand = 0; for (s=entry; s < end && isspace(*s); s++) ; if (strncmp(s, "Minus", 5) == 0) otherstrand = 1; else if (strncmp(s, "Plus", 4) == 0) otherstrand = 0; for (s=entry; s < end; s++) if (myncasecmp(s, "\nQuery:", 7) == 0) break; for (line1=++s; s < end && *s != '\n'; s++) ; for (s++; s < end && *s != '\n'; s++) ; for (line2=++s; s < end && *s != '\n'; s++) ; if (s >= end) { fprintf(stderr, "%s, entry %d: Parse error in BLAST-output.\n", info->filename, info->entryno); align_error = 1; return; } al_start = myatoi(line1 + 6); if (al_start == 0) { fprintf(stderr, "%s, entry %d: Parse error in BLAST-output.\n", info->filename, info->entryno); align_error = 1; return; } new_build(0); gaplist = gaptail = NULL; ingap = 0; pos = (!otherstrand ? al_start : 2 * MAXSTRAND - al_start); while (s < end) { for (t1=line1+6; *t1 != '\n' && (isspace(*t1) || isdigit(*t1)); t1++) ; for ( ; !isspace(*t1); t1++) { if (*t1 != '-') { add_to_build(pos, *t1); pos++; ingap = 0; } else if (ingap) gaptail->len++; else { newgap = (GAP) my_malloc(sizeof(GAP_NODE)); newgap->pos = pos; newgap->len = 1; newgap->next = NULL; if (gaplist == NULL) gaplist = gaptail = newgap; else { gaptail->next = newgap; gaptail = newgap; } ingap = 1; } } for (s++; s < end && *s != '\n'; s++) ; for (line1=++s; s < end && *s != '\n'; s++) ; for (s++; s < end && *s != '\n'; s++) ; for (line2=++s; s < end && *s != '\n'; s++) ; } newalign = (ALIGN) my_malloc(sizeof(ALIGN_NODE)); newalign->start = (!otherstrand ? al_start : 2 * MAXSTRAND - al_start); newalign->end = pos - 1; newalign->gaps = gaplist; newalign->seq = mystrdup2(seq, seq + seqlen); newalign->info = seqfinfo(sfp, 1); newalign->offset = 0; newalign->next = NULL; if (alignlist == NULL) alignlist = aligntail = newalign; else { aligntail->next = newalign; aligntail = newalign; } } void fastaout_finish(void) { int j, len, pos, seqlen, startpos, endpos; char ch, *s, *t, *s2, *t2, *seq, *table; FRAGMENT fnode, fnext; ALIGN node, next; GAP gnode, gnext, gback, gnode2, glist, newgap; if (!align_error && alignlist != NULL && fraglist != NULL) { /* * Build the complete list of gaps to be included with the query sequence. */ glist = NULL; startpos = endpos = -1; for (node=alignlist; node != NULL; node=node->next) { if (startpos == -1 || node->start < startpos) startpos = node->start; if (endpos == -1 || node->end > endpos) endpos = node->end; gback = NULL; for (gnode=node->gaps,gnode2=glist; gnode != NULL; gnode=gnode->next) { while (gnode2 != NULL && gnode2->pos < gnode->pos) { gback = gnode2; gnode2 = gnode2->next; } if (gnode2 != NULL && gnode2->pos == gnode->pos) { if (gnode->len > gnode2->len) gnode2->len = gnode->len; } else { newgap = (GAP) my_malloc(sizeof(GAP_NODE)); newgap->pos = gnode->pos; newgap->len = gnode->len; newgap->next = gnode2; if (gback == NULL) glist = newgap; else gback->next = newgap; gnode2 = newgap; } } } /* * Construct the query sequence. */ seqlen = 0; for (gnode=glist; gnode != NULL; gnode=gnode->next) seqlen += gnode->len; for (fnode=fraglist; fnode != NULL; fnode=fnode->next) { seqlen += fnode->seqcount; if (fnode->next != NULL && fnode->next->start > fnode->end + 1) { seqlen += 3; if (fnode->end < MAXSTRAND && fnode->next->start > MAXSTRAND) seqlen += 3; } } s = seq = (char *) my_malloc(seqlen+1); for (fnode=fraglist,gnode=glist; fnode != NULL; fnode=fnode->next) { if (fnode->end < startpos || fnode->start > endpos + 1) continue; for (pos=fnode->start,t=fnode->seq; pos <= fnode->end; pos++,t++) { if (pos < startpos || pos > endpos + 1) continue; if (gnode != NULL && gnode->pos == pos) { for (j=0; j < gnode->len; j++) *s++ = '-'; gnode = gnode->next; } if (pos <= endpos) *s++ = *t; } if (gnode != NULL && gnode->pos == pos && (fnode->next == NULL || fnode->next->start > pos)) { for (j=0; j < gnode->len; j++) *s++ = '-'; gnode = gnode->next; } if (fnode->next != NULL && fnode->next->start > pos && fnode->next->start <= endpos) { *s++ = '#'; *s++ = '#'; *s++ = '#'; if (fnode->end < MAXSTRAND && fnode->next->start > MAXSTRAND) { *s++ = '#'; *s++ = '#'; *s++ = '#'; } } } *s = '\0'; /* * Make any alterations to the sequence, as specified by case_mode, * reverse_mode, degap_char, raw and the input/output filetypes. */ if (case_mode == LOWERCASE) { for (t=seq; *t; t++) if (isupper(*t)) *t = tolower(*t); } if (case_mode == UPPERCASE) { for (t=seq; *t; t++) if (islower(*t)) *t = toupper(*t); } if (reverse_mode) { if (query_info->alphabet == DNA || query_info->alphabet == RNA || query_info->alphabet == UNKNOWN) { table = dna_complement; if (query_info->alphabet == RNA) table = rna_complement; for (s2=seq,t2=s-1; s2 < t2; s2++,t2--) { ch = *s2; *s2 = table[(int) *t2]; *t2 = table[(int) ch]; } } else { for (s2=seq,t2=s-1; s2 < t2; s2++,t2--) { ch = *s2; *s2 = *t2; *t2 = ch; } } } if (gapin != '\0' && (degap_mode || gapin != gapout)) { if (degap_mode || gapout == '\0') { for (s2=seq,t2=seq; *s2; s2++) { if (*s2 != gapin) { if (t2 < s2) *t2 = *s2; t2++; } } *t2 = '\0'; s = t2; } else { for (s2=seq; *s2; s2++) if (*s2 == gapin) *s2 = gapout; } } if (pretty_flag) pretty_store(seq, s - seq, query_info); else seqfwrite(sfpout, seq, s - seq, query_info); /* * Construct the rest of the sequences. */ for (node=alignlist; node != NULL; node=node->next) { s = seq; gnode2 = node->gaps; t = node->seq + node->offset; for (fnode=fraglist,gnode=glist; fnode != NULL; fnode=fnode->next) { if (fnode->end < startpos || fnode->start > endpos + 1) continue; for (pos=fnode->start; pos <= fnode->end; pos++) { if (pos < startpos || pos > endpos + 1) continue; if (pos < node->start || pos > node->end + 1) { if (gnode != NULL && gnode->pos == pos) { for (j=0; j < gnode->len; j++) *s++ = '-'; gnode = gnode->next; } *s++ = '-'; } else { if (gnode != NULL && gnode->pos == pos) { len = 0; if (gnode2 != NULL && gnode2->pos == pos) { len = gnode2->len; gnode2 = gnode2->next; } if (pos > node->start) for (j=0; j < len; j++) *s++ = (*t ? *t++ : '-'); for (j=0; j < gnode->len - len; j++) *s++ = '-'; if (pos == node->start) for (j=0; j < len; j++) *s++ = (*t ? *t++ : '-'); gnode = gnode->next; } if (pos <= endpos) { if (pos == node->end + 1) *s++ = '-'; else *s++ = (*t ? *t++ : '-'); } } } if (gnode != NULL && gnode->pos == pos && (fnode->next == NULL || fnode->next->start > pos)) { len = 0; if (gnode2 != NULL && gnode2->pos == pos) { len = gnode2->len; gnode2 = gnode2->next; } for (j=0; j < len; j++) *s++ = (*t ? *t++ : '-'); for (j=0; j < gnode->len - len; j++) *s++ = '-'; gnode = gnode->next; } if (fnode->next != NULL && fnode->next->start > pos && fnode->next->start <= endpos) { *s++ = '#'; *s++ = '#'; *s++ = '#'; if (fnode->end < MAXSTRAND && fnode->next->start > MAXSTRAND) { *s++ = '#'; *s++ = '#'; *s++ = '#'; } } } *s = '\0'; /* * Make any alterations to the sequence, as specified by case_mode, * reverse_mode, degap_char, raw and the input/output filetypes. */ if (case_mode == LOWERCASE) { for (t=seq; *t; t++) if (isupper(*t)) tolower(*t); } if (case_mode == UPPERCASE) { for (t=seq; *t; t++) if (isupper(*t)) tolower(*t); } if (reverse_mode) { if (query_info->alphabet == DNA || query_info->alphabet == RNA || query_info->alphabet == UNKNOWN) { table = dna_complement; if (query_info->alphabet == RNA) table = rna_complement; for (s2=seq,t2=s-1; s2 < t2; s2++,t2--) { ch = *s2; *s2 = table[(int) *t2]; *t2 = table[(int) ch]; } } else { for (s2=seq,t2=s-1; s2 < t2; s2++,t2--) { ch = *s2; *s2 = *t2; *t2 = ch; } } } if (gapin != '\0' && (degap_mode || gapin != gapout)) { if (degap_mode || gapout == '\0') { for (s2=seq,t2=seq; *s2; s2++) { if (*s2 != gapin) { if (t2 < s2) *t2 = *s2; t2++; } } *t2 = '\0'; s = t2; } else { for (s2=seq; *s2; s2++) if (*s2 == gapin) *s2 = gapout; } } if (pretty_flag) pretty_store(seq, s - seq, node->info); else seqfwrite(sfpout, seq, s - seq, node->info); } for (gnode=glist; gnode != NULL; gnode=gnext) { gnext = gnode->next; free(gnode); } } /* * Free everything. */ for (node=alignlist; node != NULL; node=next) { next = node->next; if (node->seq != NULL) free(node->seq); if (node->info != NULL) free(node->info); for (gnode=node->gaps; gnode != NULL; gnode=gnext) { gnext = gnode->next; free(gnode); } free(node); } for (fnode=fraglist; fnode != NULL; fnode=fnext) { fnext = fnode->next; if (fnode->seq) free(fnode->seq); free(fnode); } free(query_info); query_info = NULL; fraglist = NULL; alignlist = aligntail = NULL; align_error = 0; } void init_compl_tables(void) { int i; for (i=0; i < MAXLINE; i++) dna_complement[i] = dna_complement[i] = i; dna_complement['a'] = 't'; dna_complement['A'] = 'T'; dna_complement['c'] = 'g'; dna_complement['C'] = 'G'; dna_complement['g'] = 'c'; dna_complement['G'] = 'C'; dna_complement['t'] = 'a'; dna_complement['T'] = 'A'; dna_complement['u'] = 'a'; dna_complement['U'] = 'A'; dna_complement['r'] = 'y'; dna_complement['R'] = 'Y'; dna_complement['y'] = 'r'; dna_complement['Y'] = 'R'; dna_complement['w'] = 'w'; dna_complement['W'] = 'W'; dna_complement['s'] = 's'; dna_complement['S'] = 'S'; dna_complement['m'] = 'k'; dna_complement['M'] = 'K'; dna_complement['k'] = 'm'; dna_complement['K'] = 'M'; dna_complement['h'] = 'd'; dna_complement['H'] = 'D'; dna_complement['b'] = 'v'; dna_complement['B'] = 'V'; dna_complement['v'] = 'b'; dna_complement['V'] = 'B'; dna_complement['d'] = 'h'; dna_complement['D'] = 'H'; dna_complement['n'] = 'n'; dna_complement['N'] = 'N'; rna_complement['a'] = 'u'; rna_complement['A'] = 'U'; rna_complement['c'] = 'g'; rna_complement['C'] = 'G'; rna_complement['g'] = 'c'; rna_complement['G'] = 'C'; rna_complement['t'] = 'a'; rna_complement['T'] = 'A'; rna_complement['u'] = 'a'; rna_complement['U'] = 'A'; rna_complement['r'] = 'y'; rna_complement['R'] = 'Y'; rna_complement['y'] = 'r'; rna_complement['Y'] = 'R'; rna_complement['w'] = 'w'; rna_complement['W'] = 'W'; rna_complement['s'] = 's'; rna_complement['S'] = 'S'; rna_complement['m'] = 'k'; rna_complement['M'] = 'K'; rna_complement['k'] = 'm'; rna_complement['K'] = 'M'; rna_complement['h'] = 'd'; rna_complement['H'] = 'D'; rna_complement['b'] = 'v'; rna_complement['B'] = 'V'; rna_complement['v'] = 'b'; rna_complement['V'] = 'B'; rna_complement['d'] = 'h'; rna_complement['D'] = 'H'; rna_complement['n'] = 'n'; rna_complement['N'] = 'N'; } int is_idprefix(char *s) { return (isalnum(s[0]) && isalnum(s[1]) && (s[2] == ':' || (isalnum(s[2]) && (s[3] == ':' || (isalnum(s[3]) && s[4] == ':'))))); } void *my_malloc(int size) { void *buf; if ((buf = malloc(size)) == NULL) { fprintf(stderr, "Memory Error: Ran out of memory.\n"); prog_exit(1); } return buf; } void *my_realloc(void *ptr, int size) { void *buf; if ((buf = realloc(ptr, size)) == NULL) { fprintf(stderr, "Memory Error: Ran out of memory.\n"); prog_exit(1); } return buf; } static int mycasecmp(char *s, char *t) { int diff; for ( ; !(diff = toupper(*s) - toupper(*t)) && *s; s++,t++) ; return diff; } static int myncasecmp(char *s, char *t, int n) { int diff, i; diff = 0; for (i=0; i < n && !(diff = toupper(*s) - toupper(*t)) && *s; s++,t++,i++) ; return diff; } static char *mystrdup(char *s) { char *temp; temp = (char *) my_malloc(strlen(s)+1); return (temp == NULL ? NULL : strcpy(temp, s)); } static char *mystrdup2(char *s, char *t) { char *temp; if ((temp = (char *) my_malloc(t - s + 1)) == NULL) return NULL; memcpy(temp, s, t - s); temp[t - s] = '\0'; return temp; } static int myatoi(char *s) { int num; while (isspace(*s)) s++; for (num=0; isdigit(*s); s++) { num *= 10; num += *s - '0'; } return num; } static int isa_dir(char *directory) { struct stat sbuf; return (stat(directory, &sbuf) >= 0 && (sbuf.st_mode & S_IFMT) == S_IFDIR); }