#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#include <assert.h>#include "psublast.h"#include "sim4.h"#include "sim4b1.h"#include "Xtend1.h"#include "align.h"#include "splice.h"Classes | |
| struct | hash_node |
Defines | |
| #define | EXTEND_FW (command.acc_flag?Xextend_fw:extend_fw) |
| #define | EXTEND_BW (command.acc_flag?Xextend_bw:extend_bw) |
| #define | SLIDE_INTRON(x) (((x)==TRUE)?sync_slide_intron:slide_intron) |
| #define | HASH_SIZE 32767 |
| #define | GEN_LOG4_ENTRIES 45 |
| #define | CDNA_LOG4_ENTRIES 25 |
Functions | |
| static void | merge (Exon **, Exon **) |
| static bool | get_sync_flag (Exon *, Exon *, int) |
| static void | slide_intron (int w, Exon **, const char *, const char *) |
| static void | sync_slide_intron (int w, Exon **, const char *, const char *) |
| static void | wobble (Exon **, Exon **, const char *, const char *, const char *seq1) |
| static Exon * | bmatch (const char *, const char *, int, int, int, int) |
| static Exon * | fmatch (const char *, const char *, int, int, int, int) |
| static void | compact_list (Exon **Lblock, Exon **Rblock) |
| static int | resolve_overlap (Exon *, Exon *, const char *) |
| static int | greedy (const char *, const char *, int, int, int, int, Exon **, Exon **) |
| static int | extend_bw (const char *, const char *, int, int, int, int, int *, int *) |
| static int | extend_fw (const char *, const char *, int, int, int, int, int *, int *) |
| static void | pluri_align (int *, int *, Exon *, struct edit_script_list **) |
| static void | get_stats (Exon *, sim4_stats_t *) |
| static int | get_edist (int, int, int, int, const char *, const char *) |
| static int | get_msp_threshold (int len1, int len2) |
| static int | find_log_entry (long *log4s, int n, int len, int offset) |
| static Exon * | new_exon (int, int, int, int, int, int, int, Exon *) |
| static void | add_word (int, int) |
| static void | extend_hit (int, int, const char *const, const char *const, int, int, int) |
| static void | sort_msps (void) |
| static void | heapify (int, int) |
| static int | smaller (int, int) |
| static void | search (const char *, const char *, int, int, int) |
| static int | link_msps (Msp_ptr *msp, int, int) |
| static void | msp2exons (Msp_ptr *, int, const char *, const char *) |
| static void | free_msps (Msp_ptr **, int *) |
| static void | exon_cores (const char *s1, const char *s2, int len1, int len2, int offset1, int offset2, int flag, int in_W, int in_K, int type) |
| static void | relink (Msp_ptr *, int, int, int, int, int, const char *, const char *) |
| static Exon * | find_previous (Exon *, Exon *) |
| static int | dispatch_find_ends (int, int, int *, int *, edit_script_list *, int, int, int) |
| static int | find_ends (edit_script_list *, int) |
| static void | script_flip_list (edit_script_list **) |
| static bool | get_match_quality (Exon *, Exon *, sim4_stats_t *, int) |
| static void | check_consistency_intron_ori (Exon *, int, const char *) |
| edit_script_list * | SIM4 (const kzseq &so1, const kzseq &so2, const sim4Args &cmd, int *dist_ptr, Exon **Exons, sim4_stats_t *st) |
| void | bld_table (const char *s, int len, int in_W, int type) |
| void | complement_exons (Exon **left, int M, int N) |
| void | print_exons (const Exon *left) |
| void | print_pipmaker_exons (Exon *exons, edit_script_list *aligns, const char *gene, int from, int to, int M, int N, const char *seq1, const char *seq2, int match_ori) |
| void | flip_list (Exon **left, Exon **right) |
| void | link_to_data_list (Pointer data, ValNodePtr *head, ValNodePtr *prev) |
| void | ValNodeFreeData (ValNodePtr data_list) |
| int | good_ratio (int length) |
| void | free_align (edit_script_list *aligns) |
| void | free_list (Exon *left) |
| void | free_table (void) |
Variables | |
| static const char | rcsid [] = "$Id: sim4b1.cpp,v 1.1.1.1 2007-07-13 03:44:06 kzhou Exp $" |
| const char * | seq1 |
| const char * | seq2 |
| int | M |
| int | N |
| int | encoding [NACHARS] |
| coords | last_GT |
| coords | last_CT |
| coords | last_AG |
| coords | last_AC |
| comparisonType | file_type |
| sim4Args | command |
| static int | numMSPs |
| static int | K |
| static int | W |
| static int | X |
| static int | G_score |
| static int | C_score |
| static int * | diag_lev |
| static Msp_ptr | msp_list |
| static Msp_ptr * | msp |
| static Exon_ptr | exon_list |
| static struct hash_node * | phashtab [HASH_SIZE+1] |
| static struct hash_node ** | hashtab |
| static int | mask |
| static int * | next_pos |
| static int * | pnext_pos |
| static long | genomic_log4s [] |
| static long | cDNA_log4s [] |
| #define CDNA_LOG4_ENTRIES 25 |
Referenced by get_msp_threshold().
| #define EXTEND_BW (command.acc_flag?Xextend_bw:extend_bw) |
Referenced by SIM4().
| #define EXTEND_FW (command.acc_flag?Xextend_fw:extend_fw) |
Referenced by SIM4().
| #define GEN_LOG4_ENTRIES 45 |
Referenced by get_msp_threshold().
| #define HASH_SIZE 32767 |
Referenced by add_word(), bld_table(), and exon_cores().
| #define SLIDE_INTRON | ( | x | ) | (((x)==TRUE)?sync_slide_intron:slide_intron) |
Referenced by SIM4().
| static void add_word | ( | int | ecode, | |
| int | pos | |||
| ) | [static] |
References ckalloc(), hash_node::ecode, HASH_SIZE, hash_node::link, next_pos, and hash_node::pos.
Referenced by bld_table().
| void bld_table | ( | const char * | s, | |
| int | len, | |||
| int | in_W, | |||
| int | type | |||
| ) |
References add_word(), ckalloc(), hash_node::ecode, encoding, HASH_SIZE, INIT, L, mask, NACHARS, next_pos, PERM, and pnext_pos.
Referenced by exon_cores(), and main().
| static Exon * bmatch | ( | const char * | s1, | |
| const char * | s2, | |||
| int | len1, | |||
| int | len2, | |||
| int | offset1, | |||
| int | offset2 | |||
| ) | [static] |
| static void check_consistency_intron_ori | ( | Exon * | exons, | |
| int | match_ori, | |||
| const char * | gene | |||
| ) | [static] |
References BWD, fatal(), FWD, exon::next_exon, exon::ori, and exon::to1.
Referenced by print_pipmaker_exons().
References exon::edist, exon::from2, exon::length, MAX_INTERNAL_GAP, exon::next_exon, P, exon::to1, exon::to2, and W.
Referenced by SIM4().
| void complement_exons | ( | Exon ** | left, | |
| int | M, | |||
| int | N | |||
| ) |
References fatal(), flip_list(), exon::from1, exon::from2, exon::next_exon, exon::ori, exon::to1, and exon::to2.
Referenced by main().
| static int dispatch_find_ends | ( | int | from, | |
| int | to, | |||
| int * | From, | |||
| int * | To, | |||
| edit_script_list * | aligns, | |||
| int | M, | |||
| int | N, | |||
| int | match_ori | |||
| ) | [static] |
References BWD, EST_GEN, file_type, find_ends(), FREE_BOTH_ENDS, FREE_END, FREE_START, FWD, edit_script_list::len1, edit_script_list::len2, edit_script_list::next_script, edit_script_list::offset1, edit_script_list::offset2, OK, and script_flip_list().
Referenced by print_pipmaker_exons().
| static void exon_cores | ( | const char * | s1, | |
| const char * | s2, | |||
| int | len1, | |||
| int | len2, | |||
| int | offset1, | |||
| int | offset2, | |||
| int | flag, | |||
| int | in_W, | |||
| int | in_K, | |||
| int | type | |||
| ) | [static] |
References bld_table(), ckalloc(), DEFAULT_C, diag_lev, EST_GEN, fatal(), file_type, exon::flag, exon::from1, exon::from2, GEN_EST, get_msp_threshold(), HASH_SIZE, K, exon::length, hash_node::link, link_msps(), min, msp2exons(), exon::next_exon, msp::next_msp, next_pos, numMSPs, msp::pos1, msp::pos2, msp::prev, search, sort_msps(), TEMP, exon::to1, exon::to2, upper(), W, and sim4Args::weight.
Referenced by SIM4().
| static int extend_bw | ( | const char * | s1, | |
| const char * | s2, | |||
| int | m, | |||
| int | n, | |||
| int | offset1, | |||
| int | offset2, | |||
| int * | line1, | |||
| int * | line2 | |||
| ) | [static] |
References ckalloc(), good_ratio(), and upper().
| static int extend_fw | ( | const char * | s1, | |
| const char * | s2, | |||
| int | m, | |||
| int | n, | |||
| int | offset1, | |||
| int | offset2, | |||
| int * | line1, | |||
| int * | line2 | |||
| ) | [static] |
References ckalloc(), good_ratio(), and upper().
| static void extend_hit | ( | int | pos1, | |
| int | pos2, | |||
| const char * const | s1, | |||
| const char * const | s2, | |||
| int | len1, | |||
| int | len2, | |||
| int | in_W | |||
| ) | [static] |
| static int find_ends | ( | edit_script_list * | head, | |
| int | j0 | |||
| ) | [static] |
| static int find_log_entry | ( | long * | log4s, | |
| int | n, | |||
| int | len, | |||
| int | offset | |||
| ) | [static] |
| static Exon * fmatch | ( | const char * | s1, | |
| const char * | s2, | |||
| int | len1, | |||
| int | len2, | |||
| int | offset1, | |||
| int | offset2 | |||
| ) | [static] |
| void free_align | ( | edit_script_list * | aligns | ) |
References Free_script(), edit_script_list::next_script, and edit_script_list::script.
| void free_list | ( | Exon * | left | ) |
| void free_table | ( | void | ) |
| static int get_edist | ( | int | f1, | |
| int | f2, | |||
| int | t1, | |||
| int | t2, | |||
| const char * | seq1, | |||
| const char * | seq2 | |||
| ) | [static] |
Referenced by msp2exons().
| static bool get_match_quality | ( | Exon * | lblock, | |
| Exon * | rblock, | |||
| sim4_stats_t * | st, | |||
| int | N | |||
| ) | [static] |
References exon::edist, FALSE, exon::from2, sim4_stats::icoverage, max, exon::next_exon, exon::to1, exon::to2, and TRUE.
Referenced by SIM4().
| static int get_msp_threshold | ( | int | len1, | |
| int | len2 | |||
| ) | [static] |
References CDNA_LOG4_ENTRIES, cDNA_log4s, find_log_entry(), GEN_LOG4_ENTRIES, and genomic_log4s.
Referenced by exon_cores().
| static void get_stats | ( | Exon * | lblock, | |
| sim4_stats_t * | st | |||
| ) | [static] |
References sim4_stats::fcoverage, exon::from2, sim4_stats::icoverage, sim4_stats::internal, exon::length, sim4_stats::marginals, sim4_stats::mult, N, exon::next_exon, exon::to1, exon::to2, and while().
Referenced by SIM4().
References FALSE, exon::from2, exon::next_exon, exon::to1, exon::to2, and TRUE.
Referenced by SIM4().
| int good_ratio | ( | int | length | ) |
References sim4Args::cutoff, P, and W.
Referenced by extend_bw(), extend_fw(), Xextend_bw(), and Xextend_fw().
| static int greedy | ( | const char * | s1, | |
| const char * | s2, | |||
| int | m, | |||
| int | n, | |||
| int | offset1, | |||
| int | offset2, | |||
| Exon ** | lblock, | |||
| Exon ** | rblock | |||
| ) | [static] |
References ckalloc(), DELETE, INSERT, link_to_data_list(), max, new_exon(), P, SUBSTITUTE, ValNodeFreeData(), and W.
Referenced by SIM4().
| static void heapify | ( | int | i, | |
| int | last | |||
| ) | [static] |
| static int link_msps | ( | Msp_ptr * | msp, | |
| int | numMSPs, | |||
| int | H | |||
| ) | [static] |
References best, sim4Args::DRANGE, L, msp::len, MIN_INTRON, MININT, msp::pos1, msp::pos2, msp::prev, msp::score, and msp::Score.
Referenced by exon_cores(), and relink().
| void link_to_data_list | ( | Pointer | data, | |
| ValNodePtr * | head, | |||
| ValNodePtr * | prev | |||
| ) |
References ckalloc(), ValNode::data, and ValNode::next.
Referenced by greedy(), Xextend_bw(), and Xextend_fw().
References exon::edist, exon::flag, exon::from1, exon::from2, exon::length, max, min, exon::next_exon, P, exon::to1, exon::to2, and W.
Referenced by SIM4().
| static void msp2exons | ( | Msp_ptr * | msp, | |
| int | last_msp, | |||
| const char * | s1, | |||
| const char * | s2 | |||
| ) | [static] |
References exon::edist, exon::from1, exon::from2, get_edist(), L, msp::len, MATCH, max, MAX_INTERNAL_GAP, min, MISMATCH, new_exon(), P, msp::pos1, msp::pos2, msp::prev, msp::score, exon::to1, and exon::to2.
Referenced by exon_cores(), and relink().
| static Exon * new_exon | ( | int | f1, | |
| int | f2, | |||
| int | t1, | |||
| int | t2, | |||
| int | len, | |||
| int | edist, | |||
| int | flag, | |||
| Exon * | next | |||
| ) | [static] |
References ckalloc(), exon::edist, exon::flag, exon::from1, exon::from2, exon::length, exon::next_exon, exon::to1, and exon::to2.
Referenced by bmatch(), fmatch(), greedy(), msp2exons(), and SIM4().
| static void pluri_align | ( | int * | dist_ptr, | |
| int * | num_matches, | |||
| Exon * | lblock, | |||
| struct edit_script_list ** | Aligns | |||
| ) | [static] |
References align_get_dist(), align_path(), ckalloc(), Condense_both_Ends(), DELETE, exon::from1, exon::from2, INSERT, M, N, edit_script::next, exon::next_exon, edit_script_list::next_script, edit_script::num, edit_script_list::offset1, edit_script::op_type, P, edit_script_list::script, seq1, seq2, SUBSTITUTE, and exon::to1.
Referenced by SIM4().
| void print_exons | ( | const Exon * | left | ) |
References EST_GEN, fatal(), file_type, exon::from1, exon::from2, exon::match, exon::next_exon, exon::ori, exon::to1, and exon::to2.
Referenced by main().
| void print_pipmaker_exons | ( | Exon * | exons, | |
| edit_script_list * | aligns, | |||
| const char * | gene, | |||
| int | from, | |||
| int | to, | |||
| int | M, | |||
| int | N, | |||
| const char * | seq1, | |||
| const char * | seq2, | |||
| int | match_ori | |||
| ) |
References BWD, check_consistency_intron_ori(), dispatch_find_ends(), fatal(), flip_list(), FREE_BOTH_ENDS, FREE_END, FREE_START, exon::from1, FWD, exon::next_exon, OK, exon::to1, and while().
Referenced by main().
| static void relink | ( | Msp_ptr * | in_msp, | |
| int | in_numMSPs, | |||
| int | H, | |||
| int | offset1, | |||
| int | offset2, | |||
| int | flag, | |||
| const char * | s1, | |||
| const char * | s2 | |||
| ) | [static] |
References exon::flag, exon::from1, exon::from2, exon::length, link_msps(), msp2exons(), exon::next_exon, exon::to1, and exon::to2.
Referenced by SIM4().
| static void script_flip_list | ( | edit_script_list ** | left | ) | [static] |
| static void search | ( | const char * | s1, | |
| const char * | s2, | |||
| int | len1, | |||
| int | len2, | |||
| int | in_W | |||
| ) | [static] |
| edit_script_list* SIM4 | ( | const kzseq & | so1, | |
| const kzseq & | so2, | |||
| const sim4Args & | cmd, | |||
| int * | dist_ptr, | |||
| Exon ** | Exons, | |||
| sim4_stats_t * | st | |||
| ) |
References sim4Args::acc_flag, sim4Args::ali_flag, bmatch(), sim4Args::C, C_score, compact_list(), END_SIG, exon_cores(), EXTEND_BW, EXTEND_FW, FALSE, find_previous(), flip_list(), fmatch(), free_align(), free_list(), free_msps(), exon::from1, exon::from2, G_score, get_match_quality(), get_stats(), get_sync_flag(), greedy(), I, sim4Args::K, exon::length, M, max, MAX_GRINIT, merge(), min, N, new_exon(), exon::next_exon, sim4_stats::nmatches, numMSPs, exon::ori, P, PERM, pluri_align(), sim4Args::poly_flag, kzseq::polyATsize(), coordinates::pos1, coordinates::pos2, relink(), resolve_overlap(), seq1, seq2, kzseq::seqchar(), kzseq::seqlen(), sim4Args::set_C, sim4Args::set_K, SLIDE_INTRON, START_SIG, TEMP, exon::to1, exon::to2, sim4Args::W, W, sim4Args::X, and X.
Referenced by main().
| static void slide_intron | ( | int | w, | |
| Exon ** | lblock, | |||
| const char * | seq1, | |||
| const char * | seq2 | |||
| ) | [static] |
References BOTH, exon::from1, exon::from2, exon::length, min, exon::next_exon, exon::ori, spliced::score, splice(), exon::to1, exon::to2, type, wobble(), spliced::xe, spliced::xs, spliced::ye, and spliced::ys.
| static int smaller | ( | int | i, | |
| int | j | |||
| ) | [static] |
| static void sort_msps | ( | void | ) | [static] |
| static void sync_slide_intron | ( | int | w, | |
| Exon ** | lblock, | |||
| const char * | seq1, | |||
| const char * | seq2 | |||
| ) | [static] |
References BOTH, BWD, fatal(), exon::from1, exon::from2, FWD, exon::length, min, new_splice(), exon::next_exon, exon::ori, spliced::score, splice(), exon::to1, exon::to2, wobble(), spliced::xe, spliced::xs, spliced::ye, and spliced::ys.
| void ValNodeFreeData | ( | ValNodePtr | data_list | ) |
References ValNode::data, and ValNode::next.
Referenced by greedy(), Xextend_bw(), and Xextend_fw().
| static void wobble | ( | Exon ** | t0, | |
| Exon ** | t1, | |||
| const char * | donor, | |||
| const char * | acceptor, | |||
| const char * | seq1 | |||
| ) | [static] |
Referenced by slide_intron(), and sync_slide_intron().
long cDNA_log4s[] [static] |
Initial value:
{1, 1, 2, 4, 7, 11, 19, 32, 52, 86,
141, 231, 380, 624, 1024, 1680, 2756, 4522, 7419, 12173,
19972, 32768, 53761, 88204, 144715
}
Referenced by get_msp_threshold().
int* diag_lev [static] |
Referenced by exon_cores(), and extend_hit().
| int encoding[NACHARS] |
Referenced by bld_table(), and splice().
Referenced by dispatch_find_ends(), exon_cores(), main(), operator<<(), and print_exons().
long genomic_log4s[] [static] |
Initial value:
{1, 2, 3, 5, 9, 15, 26, 42, 70, 114,
188, 309, 507, 832, 1365, 1365, 2240, 2240, 3675, 6029,
9892, 16231, 26629, 43690, 71681,
117606, 192953, 316573, 519392, 852152,
1398101, 2293823, 3763409, 6174516, 10130347,
16620564, 27268873, 44739242, 73402365, 120429110,
197584514, 324171126, 531858072, 872603963, 1431655765
}
Referenced by get_msp_threshold().
int K [static] |
Referenced by exon_cores(), and extend_hit().
Referenced by Xextend_bw().
Referenced by Xextend_bw().
Referenced by Xextend_fw().
Referenced by Xextend_fw().
| int M |
Referenced by align_path(), pluri_align(), print_align(), and SIM4().
int mask [static] |
Referenced by bld_table().
| int N |
Referenced by ChisquareAndRatio(), get_stats(), main(), pluri_align(), print_align(), and SIM4().
int* next_pos [static] |
Referenced by add_word(), bld_table(), and exon_cores().
int numMSPs [static] |
Referenced by exon_cores(), extend_hit(), SIM4(), and sort_msps().
int * pnext_pos [static] |
Referenced by bld_table(), and free_table().
const char rcsid[] = "$Id: sim4b1.cpp,v 1.1.1.1 2007-07-13 03:44:06 kzhou Exp $" [static] |
| const char* seq1 |
Referenced by main(), mgsaln(), pluri_align(), print_align_blk(), print_align_lat(), rsnake(), SIM4(), snake(), and storeBadInTable().
| const char * seq2 |
Referenced by main(), mgsaln(), pluri_align(), print_align_blk(), print_align_lat(), rsnake(), SIM4(), and snake().
int W [static] |
Referenced by compact_list(), exon_cores(), good_ratio(), greedy(), merge(), Linkbox::repairEnds(), and SIM4().
int X [static] |
Referenced by extend_hit(), SIM4(), splice_acceptor(), splice_acceptor_uni(), splice_donor(), and splice_donor_uni().
1.5.6