seqaln.h

Go to the documentation of this file.
00001 /* declarations common to all dynamic programming molecular biology modules */
00002 #ifndef SEQALN_H
00003 #define SEQALN_H
00004 
00005 #ifndef MAXLINE
00006 #define MAXLINE 512
00007 #endif
00008 
00009 #ifndef MAXPATH
00010 #define MAXPATH 2000
00011 #endif
00012 
00013 #ifndef MAXSEQNAME
00014 #define MAXSEQNAME 1024
00015 #endif
00016 
00017 #ifndef stdout
00018 #include <stdio.h>  // there are pure C programs not C++
00019 #endif
00020 
00021 static char *SEQALN_VERSION="1.20";
00022 
00023 /* the type of file, for a file used for input   */
00024 typedef enum { SEQ, MAT, PRO, DIS, DB, PIPE, SOCKET } SEQALN_FTYPE;
00025 
00026 /* the type of database, for a sequence file used for input 
00027  * file format*/
00028 typedef enum { UNKNOWN, GENBANK, NEWAT, FASTA, PIR, SWISSPROT }
00029    SEQALN_DBTYPE;
00030 
00031 /* the io struct holds input/output information */
00032 struct SEQALN_IO {
00033    FILE *STDIN;          /* Place to obtain terminal input */
00034    FILE *STDOUT;         /* Place to put stdout output.    */
00035    FILE *STDERR;         /* Place to put stderr output.    */
00036 };
00037 
00038 /* type SEQALN_TRACE is used for tracebacks; a node */
00039 typedef struct SEQALN_LSTRUCT { struct SEQALN_LSTRUCT *next; int i,j; }
00040    SEQALN_TRACE;
00041 
00042 /* holds (i,j) pairs for multiple locations of one score */
00043 typedef struct { int score, i, j; } SEQALN_SIJ;
00044 
00045 /*
00046    Sequences begin in position 1 in seq[] and code[]; 
00047         position 0 corresponds to row or column zero in the scoring matrix,
00048         which holds initial gap scores.
00049    To print a sequence, use for example `printf("%s\n",&seq->seq[1]);'
00050 */
00051 struct SEQALN_SEQUENCE {
00052    int  len;             /* Sequence length       */
00053    char name[MAXSEQNAME];/* Sequence name         */
00054    int reverse;          /* The sequence has been reversed   */
00055    int complement;       /* The sequence has been complemented  */
00056    int revcomp;   /* This is the reverse complement of the real seq   */
00057    int xlated;    /* Whether nucleotide->protein translation was done */
00058    char *seq;     /* To hold letters of the sequence len+2 size       */
00059    char *code;    /* To hold letter numbers [0..26] of the sequence   */
00060    int  bstart, bend; /* Start, stop locations of band sequence (for band)*/
00061    int  fstart, fend; /* Start, stop locations of ORF      */
00062    double *freq;  /* Frequency distribution of letters (for p-value)  */
00063    char *fname;   /* File name, where we obtain the sequence(s)       */
00064    FILE *fptr;    /* Pointer to current location in database          */
00065    long fposn;    /* Offset from start of file for next sequence      */
00066    SEQALN_FTYPE ftype; /*Type of file from which input is obtained */
00067    SEQALN_DBTYPE dbtype; /* Type of database, if one is used for input*/
00068    int  maxlen;   /* Length allocated for this buffer (max seq len)   */
00069    int  resized;  /* Denotes whether maxlen changed for last seq read */
00070  };
00071 
00072 /* type prorow is used for profiles */
00073 typedef int SEQALN_PROROW[26];
00074 
00075 /*
00076    A profile is an array of 26 columns and `len' rows.
00077 */
00078 struct SEQALN_PROFILE {
00079    int len;
00080    char name[MAXSEQNAME]; /* Profile name */
00081    SEQALN_PROROW *profile;
00082    int alpha,beta;
00083    char *fname;           /* Name of file containing profile */
00084    FILE *fptr;            /* Offset into profile file [not used] */
00085  };
00086 
00087 enum PTYPE { GLOBAL, FIT, OVER, LOCAL };  /* Program type    */
00088 enum STYPE { MATRIX, NONMATRIX, PROFILE };  /* Score function type  */
00089 enum MTYPE { DISTANCE, SIMILARITY };
00090           /* Full / self-repeat / tandem-repeat / band matrix    */
00091 enum BTYPE { FULL, SELF, TANDEM, BAND };
00092 enum MEMUSE { LINEAR, SQUARE };/* How much memory we use           */
00093 enum STATS {NONE, PVALUE };     /* Type of statistics to compute    */
00094 
00095 /*
00096    These are all the scoring constants used by the software;
00097         once established for comparing two sequences, they are expected to
00098         remain unchanged.
00099 */
00100 struct SEQALN_CONSTANTS {
00101    int alpha, beta;      /* Gap score parameters, positive */
00102    int csub;     /* Limit for printing conservative substitutions */
00103    int revcomp;   /* 0: seq1 is normal; 1: use seq1's reverse compl. */
00104    int cutoff;    /* Minimum score when reporting multiple scores   */
00105    int naligns;   /* No. of multiple alignments requested             */
00106    int rptalign;  /* Whether to report alignment                      */
00107    int rptmat;     /* Whether to report scoring matrix dump            */
00108    int rptscore;  /* Whether to report score                          */
00109    int rptptable; /* Table of score/obs. prob/est. prob for p-value   */
00110    int rpttrace;  /* Whether to report traceback list                 */
00111    int rptboth;   /* Whether to perform both upper & lower tracebacks */
00112    int verbose;   /* Whether to print in verbose mode                 */
00113    int width;           /* No. of chars in a line of seq. alignment         */
00114         /* problem for C++ compiler used for pdelta(int, char, void*)*/
00115    int (*deltafn)(); /* used for delta(a,b) scoring of 2 chars  */
00116    int traceupper;   /* Preference to take in tracebacks */
00117                      /* 0 ==> lower envelope; non-0 ==> upper envelope*/
00118    int envelope;  /* =1 if tracing upper & lower alignment envelopes  */
00119    int flip;            /* To flip (toggle) traceupper                      */
00120    int showmmatch; /* Print '*' (mismatch) in align., not '|' (match) */
00121    /* Profile specific */
00122    int palpha, pbeta;    /* Gap scores for indels in a profile */
00123    /* Overlap specific */
00124    int overout;     /* Types of overlap alignments to output: */
00125                   /*  0:highest score; 1:last row; 2:last col. 3:both */
00126    /* P-value specific */
00127    int nsims;      /* # of simulations to perform  */
00128    int ndeclumps;        /* # of declumps for each simulation  */
00129    int allscores;  /* 1: use all simulation scores; 0: use middle 80% */
00130    double gamma;   /* Value of gamma           */
00131    double p;       /* Value of p               */
00132    char *randfile; /* File with GFSR random number generator state */
00133    /* Non-matrix specific */
00134    int match, mismatch;  /*Match/mismatch non-matrix score parameters*/
00135    /* Matrix specific */
00136    int xmatrix[32][32]; /*Scoring matrix; use 26x26 */
00137    int offset;                /* Offset for elements of matrix */
00138    /* Tandem repeat specific */
00139    int window;           /* Pattern stride for wraparound local similarity */
00140    /* Band specific */
00141    int center, Delta1, Delta2;  /* center & Delta on each side for 2nd seq.  */
00142    /* Miscellaneous paramters */
00143    int Debug;            /* Debug flag, for debugging output                 */
00144         enum PTYPE ptype;
00145         enum STYPE stype;
00146         enum MTYPE mtype;
00147         enum BTYPE btype;
00148         enum MEMUSE memuse;
00149         enum STATS stats;
00150 };
00151 
00152 /*
00153    This structure holds intermediate and final results of scoring.
00154 */
00155 struct SEQALN_RESULTS {
00156    int **S;              /* Scoring matrix size=colxrow                     */
00157         int S_size;  // maximum allocated size of S 2d-array
00158         //int S_size;  /* max(seq1len)*max(seq2len) so far */
00159    int *E;               /* Indels from one direction, only for declumping   */
00160    int *F;               /* Indels from other direction                      */
00161    int *best;  /* Best score limit for any row or column           */
00162    int bestS;   /* Maximum score                                    */
00163    int bestSi, bestSj;   /* Coordinates in S of maxS    */
00164    int naligns;  /* No. of multiple alignments left, when requested  */
00165    int jstart;  /* Starting location in scoring matrix, for band    */
00166    int jmax;   /* Ending location in scoring matrix, for band      */
00167    int maxentry;  /* High score for each match                        */
00168    SEQALN_TRACE *trace;  /* Current traceback list                   */
00169    char **aligned; /* For reporting multiple alignments -- need 1 bit */
00170    char *linbits; /* For reporting multiple alignments with wrapping  */
00171    SEQALN_SIJ *listsij; /* For holding (i,j) pairs for multiple alignments  */
00172    int listcount;        /* Count of multiple alignments              */
00173    struct SEQALN_PVALUE *pvalue; /*holding p-value related information*/
00174    /* Information for alignment outputs. */
00175    int start1,start2; /*Starting positions of alignment in seq1, seq2 */
00176    char *align0;  /* Alignment string for match/mismatch of seqs      */
00177    char *align1;  /* Alignment string for 1st sequence                */
00178    char *align2;  /* Alignment string for 2nd sequence                */
00179    int align_len; /* Length of each of the above 3 alignment strings  */
00180         int nogap_len; 
00181         int max_align_len;
00182         int collen;  /* current length of column */
00183         int rowlen;  /* intented to save the allocation deallocation */
00184         /* the next four is for looking at the distribution of the match */
00185         int match_freq1[26]; // residue frequence in the matched region
00186         int match_freq2[26]; // [Residue_char - 'A'] is the index number
00187         int iden_count;  // identical residues
00188         int siml_count;  // similar residue counts
00189         int end1, end2;    /*end position of align in each sequence */
00190 };
00191 
00192 /*
00193    This structure holds intermediate and final results unique to p-value
00194    software, so the results data structure doesn't have to hold them if
00195    p-value software is not used.
00196 */
00197 struct SEQALN_PVALUE {
00198    double p;             /* value for p in lambda = gamma * m * n * p^t      */
00199    double gamma;         /* value for gamma  in lambda = gamma * m * n * p^t */
00200    double pvalue;        /* value for p-value                                */
00201    double lambda;        /* lambda = gamma * seq1len * seq2len * p^score     */
00202    double coeff;         /* if -0.99 < coeff < 0.99, unreliable parameters   */
00203    int nsims;            /* number of simulations requested                  */
00204    int ndeclumps;        /* number of declumps for each simulation           */
00205    int sims2go;          /* simulations left to perform                      */
00206    int declumps2go;      /* declumps left to perform for this simulation     */
00207    int seq1len;          /* length to use for `m', the size of sequence 1    */
00208    double seq1dist[26];  /* letter frequency distribution of sequence 1      */
00209    int seq2len;          /* length to use for `n', the size of sequence 2    */
00210    double seq2dist[26];  /* letter frequency distribution of sequence 2      */
00211    int *hist;            /* histogram of scores, for linear regression       */
00212    int nhist;            /* number of elements in histogram                  */
00213    int npvals;           /* number of p-values computed w/ declumping so far */
00214 };
00215 
00216 #ifdef __STDC__
00217 #if __STDC__==1
00218 #define INLINE inline
00219 #else
00220 #define INLINE
00221 #endif
00222 #endif
00223 
00224 /* The Makefile-supplied definition overrides the compiler's default */
00225 
00226 #ifdef HAS_INLINE
00227 #if HAS_INLINE==1
00228 #define INLINE inline
00229 #else
00230 #define INLINE
00231 #endif
00232 #endif
00233 
00234 /* For non-ANSI cc compiler, don't expect `inline' support */
00235 
00236 #ifndef INLINE
00237 #define INLINE
00238 #endif
00239 
00240 #endif

Generated on Wed Aug 10 11:56:53 2011 for Softwares from Orpara by  doxygen 1.5.6