ARB
NT_sort.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : NT_sort.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "NT_local.h"
12 
13 #include <item_sel_list.h>
14 #include <aw_awar.hxx>
15 #include <arb_progress.h>
16 #include <aw_msg.hxx>
17 #include <aw_root.hxx>
18 #include <TreeNode.h>
19 #include <arb_sort.h>
20 #include <arb_global_defs.h>
21 
22 #define FIELD_FILTER_RESORT (1<<GB_STRING)|(1<<GB_INT)|(1<<GB_FLOAT) // field types supported by cmpByKey()
23 
24 #define CUSTOM_CRITERIA 3
25 
27  char *key;
28  bool reverse;
29  bool is_valid;
30 
31  void check_valid() {
32  is_valid = key && strcmp(key, NO_FIELD_SELECTED) != 0;
33  }
34 
35  customCriterion() : key(NULp), reverse(false) { check_valid(); }
36  customCriterion(const char *key_, bool reverse_) : key(ARB_strdup(key_)), reverse(reverse_) { check_valid(); }
37  customCriterion(const customCriterion& other) : key(nulldup(other.key)), reverse(other.reverse) { check_valid(); }
39  ~customCriterion() { free(key); }
40 };
41 
42 static int cmpByKey(GBDATA *gbd1, GBDATA *gbd2, const customCriterion& by) {
43  int cmp = 0;
44  if (by.is_valid) {
45  GBDATA *gb_field1 = GB_entry(gbd1, by.key);
46  GBDATA *gb_field2 = GB_entry(gbd2, by.key);
47 
48  if (gb_field1) {
49  if (gb_field2) {
50  switch (GB_read_type(gb_field1)) {
51  case GB_STRING: {
52  const char *s1 = GB_read_char_pntr(gb_field1);
53  const char *s2 = GB_read_char_pntr(gb_field2);
54 
55  cmp = strcmp(s1, s2);
56  break;
57  }
58  case GB_FLOAT: {
59  float f1 = GB_read_float(gb_field1);
60  float f2 = GB_read_float(gb_field2);
61 
62  cmp = f1<f2 ? -1 : (f1>f2 ? 1 : 0);
63  break;
64  }
65  case GB_INT: {
66  int i1 = GB_read_int(gb_field1);
67  int i2 = GB_read_int(gb_field2);
68 
69  cmp = i1-i2;
70  break;
71  }
72  default:
73  cmp = 0; // other field type -> no idea how to compare
74  break;
75  }
76 
77  if (by.reverse) cmp = -cmp;
78  }
79  else cmp = -1; // existing < missing!
80  }
81  else cmp = gb_field2 ? 1 : 0;
82  }
83  return cmp;
84 }
85 
88 
90  if (tree) {
91  if (tree->is_leaf()) {
92  if (tree->gb_node) {
93  gb_resort_data_list[gb_resort_data_count++] = tree->gb_node;
94  }
95  }
96  else {
97  NT_resort_data_base_by_tree(tree->get_leftson(), gb_species_data);
98  NT_resort_data_base_by_tree(tree->get_rightson(), gb_species_data);
99  }
100  }
101 }
102 
103 static bool customOrderIsStrict = true;
104 static bool customDefinesOrder = false;
105 
106 static int resort_data_by_customOrder(const void *v1, const void *v2, void *cd_sortBy) {
107  GBDATA *gbd1 = (GBDATA*)v1;
108  GBDATA *gbd2 = (GBDATA*)v2;
109 
110  const customCriterion *sortBy = (const customCriterion *)cd_sortBy;
111 
112  int cmp = 0;
113  for (int c = 0; !cmp && c<CUSTOM_CRITERIA; ++c) {
114  cmp = cmpByKey(gbd1, gbd2, sortBy[c]);
115  }
116 
117  if (!cmp) customOrderIsStrict = false;
118  else customDefinesOrder = true;
119 
120  return cmp;
121 }
122 
124  nt_assert(contradicted(tree, sortBy));
125 
127  if (!error) {
128  GBDATA *gb_sd = GBT_get_species_data(gb_main);
129  if (!gb_sd) error = GB_await_error();
130  else {
131  if (tree) {
133  ARB_calloc(gb_resort_data_list, GB_nsons(gb_sd) + 256);
134  NT_resort_data_base_by_tree(tree, gb_sd);
135  }
136  else {
137  gb_resort_data_list = GBT_gen_species_array(gb_main, &gb_resort_data_count);
138 
140  nt_assert(implicated(gb_resort_data_count>0, gb_resort_data_list)); // accept NULp array (if no species exists). GB_sort can handle that and GB_resort_data_base should as well
141 
142  GB_sort((void **)gb_resort_data_list, 0, gb_resort_data_count, resort_data_by_customOrder, (void*)sortBy);
143  }
144 
145  error = GB_resort_data_base(gb_main, gb_resort_data_list, gb_resort_data_count);
146 
147  free(gb_resort_data_list);
148  }
149  }
150  return GB_end_transaction(gb_main, error);
151 }
152 
154  customOrderIsStrict = true;
155  customDefinesOrder = false;
156 
157  GB_ERROR error_or_warning = resort_data_base(gb_main, NULp, sortBy);
158  // Note: if a real error occurs, transaction was aborted inside resort_data_base.
159 
160  if (!error_or_warning) {
161  if (!customDefinesOrder) error_or_warning = "Warning: No order is defined by the specified fields";
162  else if (!customOrderIsStrict) error_or_warning = "Note: The specified fields do not define a strict order";
163  }
164  return error_or_warning;
165 }
166 
168  arb_progress progress("Sorting data");
169  GB_ERROR error = NULp;
171 
172  if (!tree) error = "Please select/build a tree first";
173  if (!error) error = resort_data_base(GLOBAL.gb_main, tree, NULp);
174  if (error) aw_message(error);
175 }
176 
177 #define AWAR_TREE_SORT1 "db_sort/sort_1"
178 #define AWAR_TREE_SORT2 "db_sort/sort_2"
179 #define AWAR_TREE_SORT3 "db_sort/sort_3"
180 
181 #define AWAR_TREE_REV1 "db_sort/rev1"
182 #define AWAR_TREE_REV2 "db_sort/rev2"
183 #define AWAR_TREE_REV3 "db_sort/rev3"
184 
186  arb_progress progress("Sorting data");
187 
188  AW_root *aw_root = aw->get_root();
189 
191  sortBy[0] = customCriterion(aw_root->awar(AWAR_TREE_SORT1)->read_char_pntr(), aw_root->awar(AWAR_TREE_REV1)->read_int());
192  sortBy[1] = customCriterion(aw_root->awar(AWAR_TREE_SORT2)->read_char_pntr(), aw_root->awar(AWAR_TREE_REV2)->read_int());
193  sortBy[2] = customCriterion(aw_root->awar(AWAR_TREE_SORT3)->read_char_pntr(), aw_root->awar(AWAR_TREE_REV3)->read_int());
194 
195  GB_ERROR error_or_warning = strict_resort_data_base(GLOBAL.gb_main, sortBy);
196  if (error_or_warning) aw_message(error_or_warning);
197 }
198 
200  awr->awar_string(AWAR_TREE_SORT1, "name", aw_def);
201  awr->awar_string(AWAR_TREE_SORT2, "full_name", aw_def);
203 
204  awr->awar_int(AWAR_TREE_REV1, 0, aw_def);
205  awr->awar_int(AWAR_TREE_REV2, 0, aw_def);
206  awr->awar_int(AWAR_TREE_REV3, 0, aw_def);
207 }
208 
210  AW_window_simple *aws = new AW_window_simple;
211  aws->init(awr, "SORT_DB_ENTRIES", "SORT DATABASE");
212  aws->load_xfig("nt_sort.fig");
213 
214  aws->at("close");
215  aws->callback(AW_POPDOWN);
216  aws->create_button("CLOSE", "CLOSE", "C");
217 
218  aws->callback(makeHelpCallback("sp_sort_fld.hlp"));
219  aws->at("help");
220  aws->create_button("HELP", "HELP", "H");
221 
225 
226  aws->at("rev1"); aws->label("Reverse"); aws->create_toggle(AWAR_TREE_REV1);
227  aws->at("rev2"); aws->label("Reverse"); aws->create_toggle(AWAR_TREE_REV2);
228  aws->at("rev3"); aws->label("Reverse"); aws->create_toggle(AWAR_TREE_REV3);
229 
230  aws->at("go");
231  aws->callback(NT_resort_data_by_user_criteria);
232  aws->create_button("GO", "GO", "G");
233 
234  return aws;
235 }
236 
237 
238 // --------------------------------------------------------------------------------
239 
240 #ifdef UNIT_TESTS
241 #ifndef TEST_UNIT_H
242 #include <test_unit.h>
243 #endif
244 
245 #define TEST_EXPECT_DATABASE_ORDER(expected_order) do{ \
246  GB_transaction ta1(gb_main); \
247  char *got_order = GBT_store_marked_species(gb_main, false); \
248  TEST_EXPECT_EQUAL(got_order, expected_order); \
249  free(got_order); \
250  }while(0)
251 
252 #define TEST_EXPECT_ANNOTATD_ORDER(expected_order,expected_values,key) do{ \
253  GB_transaction ta2(gb_main); \
254  TEST_EXPECT_DATABASE_ORDER(expected_order); \
255  char *aci = GBS_global_string_copy("split(\";\")|findspec(readdb(%s))|merge(\";\")", key); \
256  char *gen_values = GB_command_interpreter(expected_order, aci, gb_main); \
257  if (!gen_values) TEST_EXPECT_NO_ERROR(GB_await_error()); \
258  TEST_EXPECT_EQUAL(gen_values, expected_values); \
259  free(gen_values); \
260  free(aci); \
261  }while(0)
262 
263 void TEST_resort_database() {
264  GB_shell shell;
265  GBDATA *gb_main = GB_open("TEST_fields_ascii.arb", "r"); // ../UNIT_TESTER/run/TEST_fields_ascii.arb
266 
267  GBT_mark_all(gb_main, 1); // needed to test result
268 
269  const char *const BYNAME_ORDER = "AcsCell2;AgrSp148;BurCalid;ClaMich8;CytHutc2;DstHafn2;ErwOleae;GlmApico;HrpAura2;IlyPoly2;MhyPalud;MtpKand2;OcgTerie;PesPropi;PslBats2;PurGergo;SrrAquat;StxAceti;TreIsopt;VibJapon;XanCucur;YerKrist";
270  const char *const PHYLO_ORDER = "ErwOleae;PurGergo;YerKrist;SrrAquat;GlmApico;VibJapon;MhyPalud;BurCalid;StxAceti;XanCucur;AgrSp148;PslBats2;MtpKand2;OcgTerie;PesPropi;DstHafn2;ClaMich8;AcsCell2;HrpAura2;CytHutc2;TreIsopt;IlyPoly2";
271  const char *const INITIAL_ORDER = "ErwOleae;PurGergo;YerKrist;SrrAquat;GlmApico;VibJapon;MhyPalud;BurCalid;StxAceti;XanCucur;AgrSp148;PslBats2;TreIsopt;IlyPoly2;CytHutc2;ClaMich8;AcsCell2;HrpAura2;PesPropi;DstHafn2;OcgTerie;MtpKand2";
272 
273  // test initial order:
274  TEST_EXPECT_DATABASE_ORDER(INITIAL_ORDER);
275 
276  // tests for NT_resort_data_by_phylogeny():
277  {
278  TreeNode *tree;
279  {
280  GB_transaction ta(gb_main);
281  tree = GBT_read_tree(gb_main, "tree_LTPs132_SSU", new SimpleRoot);
282  TEST_REJECT_NULL(tree);
283 
284  TEST_EXPECT_NO_ERROR(GBT_link_tree(tree, gb_main, false, NULp, NULp)); // link the tree
285  }
286  TEST_EXPECT_NO_ERROR(resort_data_base(gb_main, tree, NULp));
287 
288  // test resulting order:
289  TEST_EXPECT_DATABASE_ORDER(PHYLO_ORDER);
290 
291  destroy(tree);
292  }
293 
294  // tests for NT_resort_data_by_user_criteria():
296  csc[0] = customCriterion("name", 0);
297 
298  // test resorting by dbfield (string 'name'):
300  TEST_EXPECT_DATABASE_ORDER(BYNAME_ORDER);
301 
302  // test resorting by dbfield (int) + use reverse order:
303  csc[0] = customCriterion("align_bp_score_slv", 1);
304  TEST_EXPECT_ERROR_CONTAINS(strict_resort_data_base(gb_main, csc), "specified fields do not define a strict order");
306 #if defined(LINUX)
307  // the order produced by this sort is unstable and implementation dependant (e.g. it differs between LINUX and OSX).
308  // Its enough to test the result for one OS. The fact that non-strictness is detected is more important and tested above.
309  TEST_EXPECT_ANNOTATD_ORDER("AcsCell2;MtpKand2;ClaMich8;AgrSp148;BurCalid;PslBats2;ErwOleae;GlmApico;XanCucur;YerKrist;IlyPoly2;PurGergo;StxAceti;PesPropi;VibJapon;SrrAquat;DstHafn2;TreIsopt;MhyPalud;HrpAura2;OcgTerie;CytHutc2",
310  "124;124;122;121;121;121;120;119;119;118;117;117;117;116;116;115;114;113;112;111;105;101",
311  "align_bp_score_slv");
312 #endif
313  csc[1] = customCriterion("align_quality_slv", 0); // add a 2nd criterion (making sort stable)
315  // --- diff to above------: XXXXXXXXXXXXXXXXX-------------------XXXXXXXXXXXXXXXXX----------------------------------------------XXXXXXXXXXXXXXXXX---------------------------------------------------------------------------------
316  TEST_EXPECT_ANNOTATD_ORDER("MtpKand2;AcsCell2;ClaMich8;AgrSp148;PslBats2;BurCalid;ErwOleae;GlmApico;XanCucur;YerKrist;IlyPoly2;StxAceti;PurGergo;PesPropi;VibJapon;SrrAquat;DstHafn2;TreIsopt;MhyPalud;HrpAura2;OcgTerie;CytHutc2",
317  //XXXX-------XXXXX----------------XXXXX--------------------------- marks those values which make the sort stable!
318  "91;96;99;95;96;97;97;92;99;99;87;93;99;86;99;99;91;72;92;83;82;88",
319  "align_quality_slv");
320  csc[1] = customCriterion(); // "remove" criterion
321 
322  // test resorting by dbfield (float)
323  csc[0] = customCriterion("homop_slv", 0);
324  TEST_EXPECT_ERROR_CONTAINS(strict_resort_data_base(gb_main, csc), "specified fields do not define a strict order");
326 #if defined(LINUX)
327  // (see comment about 'non-strictness' above)
328  TEST_EXPECT_ANNOTATD_ORDER("DstHafn2;XanCucur;AgrSp148;GlmApico;IlyPoly2;PesPropi;ClaMich8;StxAceti;OcgTerie;MhyPalud;CytHutc2;AcsCell2;BurCalid;PslBats2;ErwOleae;YerKrist;PurGergo;SrrAquat;TreIsopt;VibJapon;HrpAura2;MtpKand2",
329  "0.12;0.13;0.2;0.2;0.2;0.2;0.27;0.27;0.27;0.28;0.29;0.33;0.33;0.35;0.4;0.41;0.41;0.41;0.41;0.47;0.68;1.6",
330  "homop_slv");
331 #endif
332 
333  csc[0] = customCriterion("align_ident_slv", 0);
335  TEST_EXPECT_ANNOTATD_ORDER("OcgTerie;HrpAura2;TreIsopt;IlyPoly2;PesPropi;MtpKand2;DstHafn2;CytHutc2;StxAceti;MhyPalud;BurCalid;GlmApico;AgrSp148;AcsCell2;PslBats2;ErwOleae;ClaMich8;PurGergo;SrrAquat;VibJapon;YerKrist;XanCucur",
336  "69.761269;73.481384;74.731186;76.5748;77.514793;79.776756;79.802635;82.919708;87.339195;87.470451;89.498642;89.625168;89.759888;90.149460;92.114960;94.846054;95.121948;97.172417;97.569443;99.170128;99.247093;100", // that is strict
337  "align_ident_slv");
338 
339  GB_close(gb_main);
340 }
341 
342 #endif // UNIT_TESTS
343 
344 // --------------------------------------------------------------------------------
345 
GB_ERROR GB_begin_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2528
const char * GB_ERROR
Definition: arb_core.h:25
customCriterion(const customCriterion &other)
Definition: NT_sort.cxx:37
#define AWAR_TREE_SORT1
Definition: NT_sort.cxx:177
GBDATA * GB_open(const char *path, const char *opent)
Definition: ad_load.cxx:1363
static void NT_resort_data_base_by_tree(TreeNode *tree, GBDATA *gb_species_data)
Definition: NT_sort.cxx:89
long GB_read_int(GBDATA *gbd)
Definition: arbdb.cxx:729
#define implicated(hypothesis, conclusion)
Definition: arb_assert.h:289
#define FIELD_FILTER_RESORT
Definition: NT_sort.cxx:22
void load_xfig(const char *file, bool resize=true)
Definition: AW_window.cxx:720
GBDATA ** GBT_gen_species_array(GBDATA *gb_main, long *speciesCountPtr)
Definition: aditem.cxx:485
long GBT_mark_all(GBDATA *gb_main, int flag)
Definition: aditem.cxx:295
void GB_sort(void **array, size_t first, size_t behind_last, gb_compare_function compare, void *client_data)
Definition: arb_sort.cxx:27
#define AWAR_TREE_REV2
Definition: NT_sort.cxx:182
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
Definition: arbdb.cxx:2561
void NT_create_resort_awars(AW_root *awr, AW_default aw_def)
Definition: NT_sort.cxx:199
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
TreeNode * GBT_read_tree(GBDATA *gb_main, const char *tree_name, TreeRoot *troot)
Definition: adtree.cxx:837
long read_int() const
Definition: AW_awar.cxx:184
void AW_POPDOWN(AW_window *window)
Definition: AW_window.cxx:52
void check_valid()
Definition: NT_sort.cxx:31
#define AWAR_TREE_REV1
Definition: NT_sort.cxx:181
GB_ERROR GBT_link_tree(TreeNode *tree, GBDATA *gb_main, bool show_status, int *zombies, int *duplicates)
Definition: adtree.cxx:953
#define NO_FIELD_SELECTED
customCriterion(const char *key_, bool reverse_)
Definition: NT_sort.cxx:36
void create_itemfield_selection_button(AW_window *aws, const FieldSelDef &selDef, const char *at)
const char * read_char_pntr() const
Definition: AW_awar.cxx:168
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:342
static bool customOrderIsStrict
Definition: NT_sort.cxx:103
TreeNode * NT_get_tree_root_of_canvas(TREE_canvas *ntw)
Definition: NT_extern.cxx:858
WindowCallback makeHelpCallback(const char *helpfile)
Definition: aw_window.hxx:106
Definition: arbdb.h:67
GB_TYPES GB_read_type(GBDATA *gbd)
Definition: arbdb.cxx:1643
static GBDATA ** gb_resort_data_list
Definition: NT_sort.cxx:86
static void NT_resort_data_by_user_criteria(AW_window *aw)
Definition: NT_sort.cxx:185
GBDATA * gb_species_data
Definition: adname.cxx:33
void NT_resort_data_by_phylogeny(AW_window *, TREE_canvas *ntw)
Definition: NT_sort.cxx:167
#define false
Definition: ureadseq.h:13
static bool customDefinesOrder
Definition: NT_sort.cxx:104
#define TEST_REJECT_NULL(n)
Definition: test_unit.h:1325
static void error(const char *msg)
Definition: mkptypes.cxx:96
float GB_read_float(GBDATA *gbd)
Definition: arbdb.cxx:744
static long gb_resort_data_count
Definition: NT_sort.cxx:87
#define cmp(h1, h2)
Definition: admap.cxx:50
static SearchTree * tree[SEARCH_PATTERNS]
Definition: ED4_search.cxx:629
int GB_nsons(GBDATA *gbd)
Definition: arbdb.cxx:2636
AW_awar * awar(const char *awar)
Definition: AW_root.cxx:554
#define AWAR_TREE_SORT3
Definition: NT_sort.cxx:179
#define nt_assert(cond)
Definition: NT_local.h:27
#define AWAR_TREE_REV3
Definition: NT_sort.cxx:183
bool is_leaf() const
Definition: TreeNode.h:211
AW_awar * awar_int(const char *var_name, long default_value=0, AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:580
TYPE * ARB_calloc(size_t nelem)
Definition: arb_mem.h:81
static int resort_data_by_customOrder(const void *v1, const void *v2, void *cd_sortBy)
Definition: NT_sort.cxx:106
ItemSelector & SPECIES_get_selector()
Definition: species.cxx:139
#define CUSTOM_CRITERIA
Definition: NT_sort.cxx:24
#define TEST_EXPECT_NO_ERROR(call)
Definition: test_unit.h:1118
void aw_message(const char *msg)
Definition: AW_status.cxx:1142
static int cmpByKey(GBDATA *gbd1, GBDATA *gbd2, const customCriterion &by)
Definition: NT_sort.cxx:42
AW_root * get_root()
Definition: aw_window.hxx:359
#define NULp
Definition: cxxforward.h:116
GBDATA * gb_main
Definition: NT_local.h:37
#define TEST_EXPECT_ERROR_CONTAINS(call, part)
Definition: test_unit.h:1114
static GB_ERROR strict_resort_data_base(GBDATA *gb_main, const customCriterion *sortBy)
Definition: NT_sort.cxx:153
GB_transaction ta(gb_var)
void destroy(TreeNode *that)
Definition: TreeNode.h:600
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
Definition: arbdb.cxx:904
GBDATA * gb_node
Definition: TreeNode.h:173
GBDATA * gb_main
Definition: adname.cxx:32
GB_ERROR GB_resort_data_base(GBDATA *gb_main, GBDATA **new_order_list, long listsize)
Definition: arbdb.cxx:2655
AW_awar * awar_string(const char *var_name, const char *default_value="", AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:570
DECLARE_ASSIGNMENT_OPERATOR(customCriterion)
#define AWAR_TREE_SORT2
Definition: NT_sort.cxx:178
GBDATA * GB_entry(GBDATA *father, const char *key)
Definition: adquery.cxx:334
static GB_ERROR resort_data_base(GBDATA *gb_main, TreeNode *tree, const customCriterion *sortBy)
Definition: NT_sort.cxx:123
void GB_close(GBDATA *gbd)
Definition: arbdb.cxx:655
NT_global GLOBAL
Definition: NT_main.cxx:46
Definition: arbdb.h:66
AW_window * NT_create_resort_window(AW_root *awr)
Definition: NT_sort.cxx:209
GBDATA * GBT_get_species_data(GBDATA *gb_main)
Definition: aditem.cxx:105