ARB
ali_main.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : ali_main.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "ali_global.hxx"
12 
13 
14 #define HELIX_PAIRS "helix_pairs"
15 #define HELIX_LINE "helix_line"
16 #define ALI_CONSENSUS "ALI_CON"
17 #define ALI_ERROR "ALI_ERR"
18 #define ALI_INTERVALS "ALI_INT"
19 
21 
22 
23 void message(char *errortext);
24 
25 
26 static const char *ali_version =
27  "\nALIGNER V2.0 (Boris Reichel 5/95)\n";
28 
29 static const char *ali_man_line[] = {
30  "Parameter:",
31  "-s<species> aligne the sequence of <species>",
32  "-f<species_1>,...,<species_n>[;<extension_1>,...,<extension_k>] use specified family and family extension",
33  "-P<pt_server> use the PT_server <pt_server>",
34  "[-D<db_server>] use the DB_server <db_server>",
35  "[-nx] not exclusive mode (profile)",
36  "[-ms] mark species (profile)",
37  "[-mf] mark used family (profile)",
38  "[-mfe] mark used family extension (profile)",
39  "[-mgf] multi gap factor (0.1) (profile)",
40  "[-if] insert factor (2.0) (profile)",
41  "[-mif] multi insert factor (0.5) (profile)",
42  "[-m] mark all (-m == -ms -mf -mfe) (profile)",
43  "[-d<filename>] use <filename> for default values",
44  "[-msubX1,X2,...,X25] use X1,...,X25 for the substitute matrix: (profile)",
45  " a c g u -",
46  " a X1 X2 X3 X4 X5",
47  " c X6 X7 X8 X9 X10",
48  " g X11 X12 X13 X14 X15",
49  " u X16 X17 X18 X19 X20",
50  " - X21 X22 X23 X24 X25",
51  "[-mbindX1,X2,...,X25] use X1,...,X25 for the binding matrix: (profile)",
52  " a c g u -",
53  " a X1 X2 X3 X4 X5",
54  " c X6 X7 X8 X9 X10",
55  " g X11 X12 X13 X14 X15",
56  " u X16 X17 X18 X19 X20",
57  " - X21 X22 X23 X24 X25",
58  "[-maxf] maximal number of family members (10) (profile)",
59  "[-minf] minimal number of family members (5) (profile)",
60  "[-minw] minimal weight for family members (0.7) (profile)",
61  "[-maxew] maximal weight for family extension members (0.2) (profile)",
62  "unused [-cl] cost threshold (low) (0.25) ALI_ERR = ','",
63  "unused [-cm] cost threshold (middle) (0.5) ALI_ERR = '-'",
64  "unused [-ch] cost threshold (high) (0.8) ALI_ERR = '='",
65  "[-mm] maximal number of maps for solution (prealigner) (1000)",
66  "[-mma] maximal number of maps for aligner (prealigner) (2)",
67  "[-csub] cost threshold for substitution (prealigner) (0.5)",
68  "[-chel] cost threshold for helix binding (prealigner) (2.0)",
69  "[-ec] error count (prealigner) (2)",
70  "[-ib] interval border (prealigner) (5)",
71  "[-ic] interval center (prealigner) (5)",
72  NULp
73 };
74 
75 
76 static void print_man() {
77  // Print a short parameter description
78  int i;
79 
80  for (i = 0; ali_man_line[i]; i++)
81  fprintf(stderr, "%s\n", ali_man_line[i]);
82 }
83 
84 
85 void ali_fatal_error(const char *message, const char *func) {
86  fprintf(stderr, "FATAL ERROR %s: %s\n", func, message);
87  exit(-1);
88 }
89 
90 void ali_error(const char *message, const char *func) {
91  fprintf(stderr, "ERROR %s: %s\n", func, message);
92  exit(-1);
93 }
94 
95 
96 
97 static int get_species(char *species_string, unsigned int species_number, char *buffer) {
98  // Get one species of a list
99  while (species_number > 0 && *species_string != '\0') {
100  while (*species_string != '\0' && *species_string != ',')
101  species_string++;
102  if (*species_string != '\0')
103  species_string++;
104  species_number--;
105  }
106 
107  if (*species_string != '\0') {
108  while (*species_string != '\0' && *species_string != ',')
109  *buffer++ = *species_string++;
110  *buffer = '\0';
111  }
112  else {
113  return 0;
114  }
115 
116  return 1;
117 }
118 
119 
120 static int check_base_invariance(char *seq1, char *seq2) {
121  while (*seq1 != '\0' && !ali_is_base(*seq1))
122  seq1++;
123  while (*seq2 != '\0' && !ali_is_base(*seq2))
124  seq2++;
125  while (*seq1 != '\0' && *seq2 != '\0') {
126  if (*seq1 != *seq2)
127  return 0;
128  seq1++;
129  seq2++;
130  while (*seq1 != '\0' && !ali_is_base(*seq1))
131  seq1++;
132  while (*seq2 != '\0' && !ali_is_base(*seq2))
133  seq2++;
134  }
135 
136  if (*seq1 == *seq2)
137  return 1;
138 
139  return 0;
140 }
141 
142 
143 static int convert_for_back_write(char *seq_new, char *seq_orig) {
144  // Convert the working sequenz into the original bases
145 
146  while (*seq_new != '\0' && (ali_is_dot(*seq_new) || ali_is_gap(*seq_new)))
147  seq_new++;
148  while (*seq_orig != '\0' && (ali_is_dot(*seq_orig) || ali_is_gap(*seq_orig)))
149  seq_orig++;
150 
151  while (*seq_new != '\0' && *seq_orig != '\0') {
152  if (*seq_new != *seq_orig) {
153  switch (*seq_new) {
154  case 'a':
155  switch (*seq_orig) {
156  case 'A':
157  *seq_new = 'A';
158  break;
159  default:
160  ali_error("Unexpected character in original sequence");
161  }
162  break;
163  case 'c':
164  switch (*seq_orig) {
165  case 'C':
166  *seq_new = 'C';
167  break;
168  default:
169  ali_error("Unexpected character in original sequence");
170  }
171  break;
172  case 'g':
173  switch (*seq_orig) {
174  case 'G':
175  *seq_new = 'G';
176  break;
177  default:
178  ali_error("Unexpected character in original sequence");
179  }
180  break;
181  case 'u':
182  switch (*seq_orig) {
183  case 'U':
184  *seq_new = 'U';
185  break;
186  case 't':
187  *seq_new = 't';
188  break;
189  case 'T':
190  *seq_new = 'T';
191  break;
192  }
193  break;
194  case 'n':
195  *seq_new = *seq_orig;
196  break;
197  default:
198  ali_fatal_error("Unexpected character in generated sequence");
199  }
200  }
201  seq_new++;
202  seq_orig++;
203  while (*seq_new != '\0' && (ali_is_dot(*seq_new) || ali_is_gap(*seq_new)))
204  seq_new++;
205  while (*seq_orig != '\0' && (ali_is_dot(*seq_orig) || ali_is_gap(*seq_orig)))
206  seq_orig++;
207  }
208 
209  if (*seq_new == *seq_orig)
210  return 1;
211 
212  return 0;
213 }
214 
215 
216 
217 int ARB_main(int argc, char *argv[]) {
218  int i;
219  char message_buffer[200];
220  char species_name[100], species_number;
221  ALI_PREALIGNER *align_prealigner;
222  ali_prealigner_approx_element *approx_elem;
223  ALI_SEQUENCE *sequence;
224 
225 
226  ali_message(ali_version);
227 
228  aligs.init(&argc, (const char**)argv);
229 
230  if (!aligs.species_name || argc > 1) {
231  printf("Unknowen : ");
232  for (i = 1; i < argc; i++)
233  printf("%s ", argv[i]);
234  printf("\n\n");
235  print_man();
236  exit (-1);
237  }
238 
239  // Main loop
240  species_number = 0;
241  while (get_species(aligs.species_name, species_number, species_name)) {
242  species_number++;
243 
244  sprintf(message_buffer,
245  "\nStarting alignment of sequence: %s", species_name);
246  ali_message(message_buffer);
247 
248  // Get all information of the sequence
249  aligs.arbdb.begin_transaction();
250  ALI_SEQUENCE *align_sequence;
251  align_sequence = aligs.arbdb.get_sequence(species_name,
252  aligs.mark_species_flag);
253  if (!align_sequence) {
254  ali_error("Can't read sequence from database");
255  ali_message("Aborting alignment of sequence");
256  }
257  else {
258  char *align_string;
259  char *align_string_original;
260 
261  align_string = align_sequence->string();
262  align_string_original = aligs.arbdb.get_sequence_string(species_name,
263  aligs.mark_species_flag);
264  aligs.arbdb.commit_transaction();
265 
266  if (!align_sequence)
267  ali_warning("Can't read sequence from database");
268  else {
269  // make profile for sequence
270  ALI_PROFILE *align_profile;
271  align_profile = new ALI_PROFILE(align_sequence, &aligs.prof_context);
272 
273  // write information about the profile to the database
274  aligs.arbdb.begin_transaction();
275  char *String = align_profile->cheapest_sequence();
276  aligs.arbdb.put_SAI("ALI_CON", String);
277  freeset(String, align_profile->borders_sequence());
278  free(String);
279  aligs.arbdb.commit_transaction();
280 
281  // make prealignment
282  align_prealigner = new ALI_PREALIGNER(&aligs.preali_context,
283  align_profile,
284  0, align_profile->sequence_length() - 1,
285  0, align_profile->length() - 1);
286  ALI_SEQUENCE *align_pre_sequence_i, *align_pre_sequence;
287  ALI_SUB_SOLUTION *align_pre_solution;
289  align_pre_sequence_i = align_prealigner->sequence();
290  align_pre_sequence = align_prealigner->sequence_without_inserts();
291  align_pre_solution = align_prealigner->solution();
292  align_pre_approx = align_prealigner->approximation();
293  delete align_prealigner;
294 
295  align_pre_solution->print();
296 
297  // write result of alignment into database
298  aligs.arbdb.begin_transaction();
299  String = align_pre_sequence_i->string();
300  aligs.arbdb.put_SAI("ALI_PRE_I", String);
301  freeset(String, align_pre_sequence->string());
302  aligs.arbdb.put_SAI("ALI_PRE", String);
303  free(String);
304  aligs.arbdb.commit_transaction();
305 
306 
307  sprintf(message_buffer, "%d solutions generated (taking the first)",
308  align_pre_approx->cardinality());
309  ali_message(message_buffer);
310 
311  if (align_pre_approx->is_empty())
312  ali_fatal_error("List of approximations is empty");
313 
314  // Write result back to the database
315  approx_elem = align_pre_approx->first();
316 
317  sequence = approx_elem->map->sequence(align_profile->sequence());
318  String = sequence->string();
319 
320  if (!check_base_invariance(String, align_string))
321  ali_error("Bases changed in output sequence");
322 
323  if (!convert_for_back_write(String, align_string_original))
324  ali_fatal_error("Can't convert correctly");
325 
326  aligs.arbdb.begin_transaction();
327  aligs.arbdb.put_sequence_string(species_name, String);
328  aligs.arbdb.put_SAI("ALI_INSERTS", approx_elem->ins_marker);
329  aligs.arbdb.commit_transaction();
330  delete sequence;
331 
332  // Delete all Objects
333  free(align_string);
334  free(align_string_original);
335  delete align_pre_solution;
336  delete align_pre_approx;
337  delete align_profile;
338  delete align_pre_sequence;
339  delete align_pre_sequence_i;
340  }
341  delete align_sequence;
342  }
343  }
344 
345  ali_message("Aligner terminated\n");
346  return 0;
347 }
int ali_is_base(char c)
Definition: ali_misc.hxx:66
int ARB_main(int argc, char *argv[])
Definition: ali_main.cxx:217
char * get_sequence_string(char *name, int and_mark=0)
Definition: ali_arbdb.cxx:48
static const char * ali_man_line[]
Definition: ali_main.cxx:29
ALI_SEQUENCE * sequence()
static ALI_GLOBAL aligs
Definition: ali_main.cxx:20
int put_SAI(const char *name, char *sequence)
Definition: ali_arbdb.cxx:150
unsigned long length()
static const char * ali_version
Definition: ali_main.cxx:26
int cardinality()
Definition: ali_tlist.hxx:162
char buffer[MESSAGE_BUFFERSIZE]
Definition: seq_search.cxx:34
ALI_PREALIGNER_CONTEXT preali_context
Definition: ali_global.hxx:44
const char * String
Definition: gb_aci_impl.h:59
static int get_species(char *species_string, unsigned int species_number, char *buffer)
Definition: ali_main.cxx:97
char * cheapest_sequence()
int ali_is_gap(char c)
Definition: ali_misc.hxx:112
ALI_NORM_SEQUENCE * sequence()
void init(int *argc, const char *argv[])
Definition: ali_global.cxx:62
int mark_species_flag
Definition: ali_global.hxx:34
void message(char *errortext)
ALI_SEQUENCE * get_sequence(char *name, int and_mark=0)
Definition: ali_arbdb.cxx:69
void ali_error(const char *message, const char *func)
Definition: ali_main.cxx:90
int is_empty()
Definition: ali_tlist.hxx:165
void begin_transaction()
Definition: ali_arbdb.hxx:35
ALI_TLIST< ali_prealigner_approx_element * > * approximation()
char * string()
char * species_name
Definition: ali_global.hxx:24
ALI_ARBDB arbdb
Definition: ali_global.hxx:30
static void print_man()
Definition: ali_main.cxx:76
void ali_fatal_error(const char *message, const char *func)
Definition: ali_main.cxx:85
static int convert_for_back_write(char *seq_new, char *seq_orig)
Definition: ali_main.cxx:143
void ali_warning(const char *message)
Definition: ali_misc.hxx:47
unsigned long sequence_length()
int ali_is_dot(char c)
Definition: ali_misc.hxx:96
void ali_message(const char *message)
Definition: ali_misc.hxx:46
ALI_SUB_SOLUTION * solution()
static int check_base_invariance(char *seq1, char *seq2)
Definition: ali_main.cxx:120
ALI_SEQUENCE * sequence_without_inserts()
#define NULp
Definition: cxxforward.h:97
void commit_transaction()
Definition: ali_arbdb.hxx:38
char * borders_sequence()
int put_sequence_string(char *name, char *sequence)
Definition: ali_arbdb.cxx:114
ALI_SEQUENCE * sequence(ALI_NORM_SEQUENCE *ref_seq)
ALI_PROFILE_CONTEXT prof_context
Definition: ali_global.hxx:43