ARB
NT_sort.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : NT_sort.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "NT_local.h"
12 
13 #include <item_sel_list.h>
14 #include <aw_awar.hxx>
15 #include <arb_progress.h>
16 #include <aw_msg.hxx>
17 #include <aw_root.hxx>
18 #include <TreeNode.h>
19 #include <TreeDisplay.hxx>
20 #include <arb_sort.h>
21 #include <arb_global_defs.h>
22 
23 #define FIELD_FILTER_RESORT (1<<GB_STRING)|(1<<GB_INT)|(1<<GB_FLOAT) // field types supported by cmpByKey()
24 
25 #define CUSTOM_CRITERIA 3
26 
28  char *key;
29  bool reverse;
30  bool is_valid;
31 
32  void check_valid() {
33  is_valid = key && strcmp(key, NO_FIELD_SELECTED) != 0;
34  }
35 
36  customCriterion() : key(NULp), reverse(false) { check_valid(); }
37  customCriterion(const char *key_, bool reverse_) : key(ARB_strdup(key_)), reverse(reverse_) { check_valid(); }
38  customCriterion(const customCriterion& other) : key(nulldup(other.key)), reverse(other.reverse) { check_valid(); }
40  ~customCriterion() { free(key); }
41 };
42 
43 static int cmpByKey(GBDATA *gbd1, GBDATA *gbd2, const customCriterion& by) {
44  int cmp = 0;
45  if (by.is_valid) {
46  GBDATA *gb_field1 = GB_entry(gbd1, by.key);
47  GBDATA *gb_field2 = GB_entry(gbd2, by.key);
48 
49  if (gb_field1) {
50  if (gb_field2) {
51  switch (GB_read_type(gb_field1)) {
52  case GB_STRING: {
53  const char *s1 = GB_read_char_pntr(gb_field1);
54  const char *s2 = GB_read_char_pntr(gb_field2);
55 
56  cmp = strcmp(s1, s2);
57  break;
58  }
59  case GB_FLOAT: {
60  float f1 = GB_read_float(gb_field1);
61  float f2 = GB_read_float(gb_field2);
62 
63  cmp = f1<f2 ? -1 : (f1>f2 ? 1 : 0);
64  break;
65  }
66  case GB_INT: {
67  int i1 = GB_read_int(gb_field1);
68  int i2 = GB_read_int(gb_field2);
69 
70  cmp = i1-i2;
71  break;
72  }
73  default:
74  cmp = 0; // other field type -> no idea how to compare
75  break;
76  }
77 
78  if (by.reverse) cmp = -cmp;
79  }
80  else cmp = -1; // existing < missing!
81  }
82  else cmp = gb_field2 ? 1 : 0;
83  }
84  return cmp;
85 }
86 
89 
91  if (tree) {
92  if (tree->is_leaf()) {
93  if (tree->gb_node) {
94  gb_resort_data_list[gb_resort_data_count++] = tree->gb_node;
95  }
96  }
97  else {
98  NT_resort_data_base_by_tree(tree->get_leftson(), gb_species_data);
99  NT_resort_data_base_by_tree(tree->get_rightson(), gb_species_data);
100  }
101  }
102 }
103 
104 static bool customOrderIsStrict = true;
105 static bool customDefinesOrder = false;
106 
107 static int resort_data_by_customOrder(const void *v1, const void *v2, void *cd_sortBy) {
108  GBDATA *gbd1 = (GBDATA*)v1;
109  GBDATA *gbd2 = (GBDATA*)v2;
110 
111  const customCriterion *sortBy = (const customCriterion *)cd_sortBy;
112 
113  int cmp = 0;
114  for (int c = 0; !cmp && c<CUSTOM_CRITERIA; ++c) {
115  cmp = cmpByKey(gbd1, gbd2, sortBy[c]);
116  }
117 
118  if (!cmp) customOrderIsStrict = false;
119  else customDefinesOrder = true;
120 
121  return cmp;
122 }
123 
125  nt_assert(contradicted(tree, sortBy));
126 
128  if (!error) {
129  GBDATA *gb_sd = GBT_get_species_data(gb_main);
130  if (!gb_sd) error = GB_await_error();
131  else {
132  if (tree) {
134  ARB_calloc(gb_resort_data_list, GB_nsons(gb_sd) + 256);
135  NT_resort_data_base_by_tree(tree, gb_sd);
136  }
137  else {
138  gb_resort_data_list = GBT_gen_species_array(gb_main, &gb_resort_data_count);
139 
141  nt_assert(implicated(gb_resort_data_count>0, gb_resort_data_list)); // accept NULp array (if no species exists). GB_sort can handle that and GB_resort_data_base should as well
142 
143  GB_sort((void **)gb_resort_data_list, 0, gb_resort_data_count, resort_data_by_customOrder, (void*)sortBy);
144  }
145 
146  error = GB_resort_data_base(gb_main, gb_resort_data_list, gb_resort_data_count);
147 
148  free(gb_resort_data_list);
149  }
150  }
151  return GB_end_transaction(gb_main, error);
152 }
153 
155  customOrderIsStrict = true;
156  customDefinesOrder = false;
157 
158  GB_ERROR error_or_warning = resort_data_base(gb_main, NULp, sortBy);
159  // Note: if a real error occurs, transaction was aborted inside resort_data_base.
160 
161  if (!error_or_warning) {
162  if (!customDefinesOrder) error_or_warning = "Warning: No order is defined by the specified fields";
163  else if (!customOrderIsStrict) error_or_warning = "Note: The specified fields do not define a strict order";
164  }
165  return error_or_warning;
166 }
167 
169  arb_progress progress("Sorting data");
170  GB_ERROR error = NULp;
171  TreeNode *tree = ntw->get_tree_root_node();
172 
173  if (!tree) error = "Please select/build a tree first";
174  if (!error) error = resort_data_base(GLOBAL.gb_main, tree, NULp);
175  if (error) aw_message(error);
176 }
177 
178 #define AWAR_TREE_SORT1 "db_sort/sort_1"
179 #define AWAR_TREE_SORT2 "db_sort/sort_2"
180 #define AWAR_TREE_SORT3 "db_sort/sort_3"
181 
182 #define AWAR_TREE_REV1 "db_sort/rev1"
183 #define AWAR_TREE_REV2 "db_sort/rev2"
184 #define AWAR_TREE_REV3 "db_sort/rev3"
185 
187  arb_progress progress("Sorting data");
188 
189  AW_root *aw_root = aw->get_root();
190 
192  sortBy[0] = customCriterion(aw_root->awar(AWAR_TREE_SORT1)->read_char_pntr(), aw_root->awar(AWAR_TREE_REV1)->read_int());
193  sortBy[1] = customCriterion(aw_root->awar(AWAR_TREE_SORT2)->read_char_pntr(), aw_root->awar(AWAR_TREE_REV2)->read_int());
194  sortBy[2] = customCriterion(aw_root->awar(AWAR_TREE_SORT3)->read_char_pntr(), aw_root->awar(AWAR_TREE_REV3)->read_int());
195 
196  GB_ERROR error_or_warning = strict_resort_data_base(GLOBAL.gb_main, sortBy);
197  if (error_or_warning) aw_message(error_or_warning);
198 }
199 
201  awr->awar_string(AWAR_TREE_SORT1, "name", aw_def);
202  awr->awar_string(AWAR_TREE_SORT2, "full_name", aw_def);
204 
205  awr->awar_int(AWAR_TREE_REV1, 0, aw_def);
206  awr->awar_int(AWAR_TREE_REV2, 0, aw_def);
207  awr->awar_int(AWAR_TREE_REV3, 0, aw_def);
208 }
209 
211  AW_window_simple *aws = new AW_window_simple;
212  aws->init(awr, "SORT_DB_ENTRIES", "SORT DATABASE");
213  aws->load_xfig("nt_sort.fig");
214 
215  aws->at("close");
216  aws->callback(AW_POPDOWN);
217  aws->create_button("CLOSE", "CLOSE", "C");
218 
219  aws->callback(makeHelpCallback("sp_sort_fld.hlp"));
220  aws->at("help");
221  aws->create_button("HELP", "HELP", "H");
222 
226 
227  aws->at("rev1"); aws->label("Reverse"); aws->create_toggle(AWAR_TREE_REV1);
228  aws->at("rev2"); aws->label("Reverse"); aws->create_toggle(AWAR_TREE_REV2);
229  aws->at("rev3"); aws->label("Reverse"); aws->create_toggle(AWAR_TREE_REV3);
230 
231  aws->at("go");
232  aws->callback(NT_resort_data_by_user_criteria);
233  aws->create_button("GO", "GO", "G");
234 
235  return aws;
236 }
237 
238 
239 // --------------------------------------------------------------------------------
240 
241 #ifdef UNIT_TESTS
242 #ifndef TEST_UNIT_H
243 #include <test_unit.h>
244 #endif
245 
246 #define TEST_EXPECT_DATABASE_ORDER(expected_order) do{ \
247  GB_transaction ta1(gb_main); \
248  char *got_order = GBT_store_marked_species(gb_main, false); \
249  TEST_EXPECT_EQUAL(got_order, expected_order); \
250  free(got_order); \
251  }while(0)
252 
253 #define TEST_EXPECT_ANNOTATD_ORDER(expected_order,expected_values,key) do{ \
254  GB_transaction ta2(gb_main); \
255  TEST_EXPECT_DATABASE_ORDER(expected_order); \
256  char *aci = GBS_global_string_copy("split(\";\")|findspec(readdb(%s))|merge(\";\")", key); \
257  char *gen_values = GB_command_interpreter(expected_order, aci, gb_main); \
258  if (!gen_values) TEST_EXPECT_NO_ERROR(GB_await_error()); \
259  TEST_EXPECT_EQUAL(gen_values, expected_values); \
260  free(gen_values); \
261  free(aci); \
262  }while(0)
263 
264 void TEST_resort_database() {
265  GB_shell shell;
266  GBDATA *gb_main = GB_open("TEST_fields_ascii.arb", "r"); // ../UNIT_TESTER/run/TEST_fields_ascii.arb
267 
268  GBT_mark_all(gb_main, 1); // needed to test result
269 
270  const char *const BYNAME_ORDER = "AcsCell2;AgrSp148;BurCalid;ClaMich8;CytHutc2;DstHafn2;ErwOleae;GlmApico;HrpAura2;IlyPoly2;MhyPalud;MtpKand2;OcgTerie;PesPropi;PslBats2;PurGergo;SrrAquat;StxAceti;TreIsopt;VibJapon;XanCucur;YerKrist";
271  const char *const PHYLO_ORDER = "ErwOleae;PurGergo;YerKrist;SrrAquat;GlmApico;VibJapon;MhyPalud;BurCalid;StxAceti;XanCucur;AgrSp148;PslBats2;MtpKand2;OcgTerie;PesPropi;DstHafn2;ClaMich8;AcsCell2;HrpAura2;CytHutc2;TreIsopt;IlyPoly2";
272  const char *const INITIAL_ORDER = "ErwOleae;PurGergo;YerKrist;SrrAquat;GlmApico;VibJapon;MhyPalud;BurCalid;StxAceti;XanCucur;AgrSp148;PslBats2;TreIsopt;IlyPoly2;CytHutc2;ClaMich8;AcsCell2;HrpAura2;PesPropi;DstHafn2;OcgTerie;MtpKand2";
273 
274  // test initial order:
275  TEST_EXPECT_DATABASE_ORDER(INITIAL_ORDER);
276 
277  // tests for NT_resort_data_by_phylogeny():
278  {
279  TreeNode *tree;
280  {
281  GB_transaction ta(gb_main);
282  tree = GBT_read_tree(gb_main, "tree_LTPs132_SSU", new SimpleRoot);
283  TEST_REJECT_NULL(tree);
284 
285  TEST_EXPECT_NO_ERROR(GBT_link_tree(tree, gb_main, false, NULp, NULp)); // link the tree
286  }
287  TEST_EXPECT_NO_ERROR(resort_data_base(gb_main, tree, NULp));
288 
289  // test resulting order:
290  TEST_EXPECT_DATABASE_ORDER(PHYLO_ORDER);
291 
292  destroy(tree);
293  }
294 
295  // tests for NT_resort_data_by_user_criteria():
297  csc[0] = customCriterion("name", 0);
298 
299  // test resorting by dbfield (string 'name'):
301  TEST_EXPECT_DATABASE_ORDER(BYNAME_ORDER);
302 
303  // test resorting by dbfield (int) + use reverse order:
304  csc[0] = customCriterion("align_bp_score_slv", 1);
305  TEST_EXPECT_ERROR_CONTAINS(strict_resort_data_base(gb_main, csc), "specified fields do not define a strict order");
307 #if defined(LINUX)
308  // the order produced by this sort is unstable and implementation dependant (e.g. it differs between LINUX and OSX).
309  // Its enough to test the result for one OS. The fact that non-strictness is detected is more important and tested above.
310  TEST_EXPECT_ANNOTATD_ORDER("AcsCell2;MtpKand2;ClaMich8;AgrSp148;BurCalid;PslBats2;ErwOleae;GlmApico;XanCucur;YerKrist;IlyPoly2;PurGergo;StxAceti;PesPropi;VibJapon;SrrAquat;DstHafn2;TreIsopt;MhyPalud;HrpAura2;OcgTerie;CytHutc2",
311  "124;124;122;121;121;121;120;119;119;118;117;117;117;116;116;115;114;113;112;111;105;101",
312  "align_bp_score_slv");
313 #endif
314  csc[1] = customCriterion("align_quality_slv", 0); // add a 2nd criterion (making sort stable)
316  // --- diff to above------: XXXXXXXXXXXXXXXXX-------------------XXXXXXXXXXXXXXXXX----------------------------------------------XXXXXXXXXXXXXXXXX---------------------------------------------------------------------------------
317  TEST_EXPECT_ANNOTATD_ORDER("MtpKand2;AcsCell2;ClaMich8;AgrSp148;PslBats2;BurCalid;ErwOleae;GlmApico;XanCucur;YerKrist;IlyPoly2;StxAceti;PurGergo;PesPropi;VibJapon;SrrAquat;DstHafn2;TreIsopt;MhyPalud;HrpAura2;OcgTerie;CytHutc2",
318  //XXXX-------XXXXX----------------XXXXX--------------------------- marks those values which make the sort stable!
319  "91;96;99;95;96;97;97;92;99;99;87;93;99;86;99;99;91;72;92;83;82;88",
320  "align_quality_slv");
321  csc[1] = customCriterion(); // "remove" criterion
322 
323  // test resorting by dbfield (float)
324  csc[0] = customCriterion("homop_slv", 0);
325  TEST_EXPECT_ERROR_CONTAINS(strict_resort_data_base(gb_main, csc), "specified fields do not define a strict order");
327 #if defined(LINUX)
328  // (see comment about 'non-strictness' above)
329  TEST_EXPECT_ANNOTATD_ORDER("DstHafn2;XanCucur;AgrSp148;GlmApico;IlyPoly2;PesPropi;ClaMich8;StxAceti;OcgTerie;MhyPalud;CytHutc2;AcsCell2;BurCalid;PslBats2;ErwOleae;YerKrist;PurGergo;SrrAquat;TreIsopt;VibJapon;HrpAura2;MtpKand2",
330  "0.12;0.13;0.2;0.2;0.2;0.2;0.27;0.27;0.27;0.28;0.29;0.33;0.33;0.35;0.4;0.41;0.41;0.41;0.41;0.47;0.68;1.6",
331  "homop_slv");
332 #endif
333 
334  csc[0] = customCriterion("align_ident_slv", 0);
336  TEST_EXPECT_ANNOTATD_ORDER("OcgTerie;HrpAura2;TreIsopt;IlyPoly2;PesPropi;MtpKand2;DstHafn2;CytHutc2;StxAceti;MhyPalud;BurCalid;GlmApico;AgrSp148;AcsCell2;PslBats2;ErwOleae;ClaMich8;PurGergo;SrrAquat;VibJapon;YerKrist;XanCucur",
337  "69.761269;73.481384;74.731186;76.5748;77.514793;79.776756;79.802635;82.919708;87.339195;87.470451;89.498642;89.625168;89.759888;90.149460;92.114960;94.846054;95.121948;97.172417;97.569443;99.170128;99.247093;100", // that is strict
338  "align_ident_slv");
339 
340  GB_close(gb_main);
341 }
342 
343 #endif // UNIT_TESTS
344 
345 // --------------------------------------------------------------------------------
346 
GB_ERROR GB_begin_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2528
const char * GB_ERROR
Definition: arb_core.h:25
customCriterion(const customCriterion &other)
Definition: NT_sort.cxx:38
#define AWAR_TREE_SORT1
Definition: NT_sort.cxx:178
GBDATA * GB_open(const char *path, const char *opent)
Definition: ad_load.cxx:1363
static void NT_resort_data_base_by_tree(TreeNode *tree, GBDATA *gb_species_data)
Definition: NT_sort.cxx:90
long GB_read_int(GBDATA *gbd)
Definition: arbdb.cxx:729
#define implicated(hypothesis, conclusion)
Definition: arb_assert.h:289
#define FIELD_FILTER_RESORT
Definition: NT_sort.cxx:23
void load_xfig(const char *file, bool resize=true)
Definition: AW_window.cxx:707
GBDATA ** GBT_gen_species_array(GBDATA *gb_main, long *speciesCountPtr)
Definition: aditem.cxx:485
long GBT_mark_all(GBDATA *gb_main, int flag)
Definition: aditem.cxx:295
void GB_sort(void **array, size_t first, size_t behind_last, gb_compare_function compare, void *client_data)
Definition: arb_sort.cxx:27
#define AWAR_TREE_REV2
Definition: NT_sort.cxx:183
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
Definition: arbdb.cxx:2561
void NT_create_resort_awars(AW_root *awr, AW_default aw_def)
Definition: NT_sort.cxx:200
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
TreeNode * GBT_read_tree(GBDATA *gb_main, const char *tree_name, TreeRoot *troot)
Definition: adtree.cxx:837
long read_int() const
Definition: AW_awar.cxx:184
void AW_POPDOWN(AW_window *window)
Definition: AW_window.cxx:52
void check_valid()
Definition: NT_sort.cxx:32
#define AWAR_TREE_REV1
Definition: NT_sort.cxx:182
GB_ERROR GBT_link_tree(TreeNode *tree, GBDATA *gb_main, bool show_status, int *zombies, int *duplicates)
Definition: adtree.cxx:953
#define NO_FIELD_SELECTED
customCriterion(const char *key_, bool reverse_)
Definition: NT_sort.cxx:37
void create_itemfield_selection_button(AW_window *aws, const FieldSelDef &selDef, const char *at)
const char * read_char_pntr() const
Definition: AW_awar.cxx:168
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:342
static bool customOrderIsStrict
Definition: NT_sort.cxx:104
WindowCallback makeHelpCallback(const char *helpfile)
Definition: aw_window.hxx:106
Definition: arbdb.h:67
GB_TYPES GB_read_type(GBDATA *gbd)
Definition: arbdb.cxx:1643
static GBDATA ** gb_resort_data_list
Definition: NT_sort.cxx:87
static void NT_resort_data_by_user_criteria(AW_window *aw)
Definition: NT_sort.cxx:186
GBDATA * gb_species_data
Definition: adname.cxx:33
void NT_resort_data_by_phylogeny(AW_window *, TREE_canvas *ntw)
Definition: NT_sort.cxx:168
#define false
Definition: ureadseq.h:13
TreeNode * get_tree_root_node() const
static bool customDefinesOrder
Definition: NT_sort.cxx:105
#define TEST_REJECT_NULL(n)
Definition: test_unit.h:1325
static void error(const char *msg)
Definition: mkptypes.cxx:96
float GB_read_float(GBDATA *gbd)
Definition: arbdb.cxx:744
static long gb_resort_data_count
Definition: NT_sort.cxx:88
#define cmp(h1, h2)
Definition: admap.cxx:50
static SearchTree * tree[SEARCH_PATTERNS]
Definition: ED4_search.cxx:629
int GB_nsons(GBDATA *gbd)
Definition: arbdb.cxx:2636
AW_awar * awar(const char *awar)
Definition: AW_root.cxx:606
#define AWAR_TREE_SORT3
Definition: NT_sort.cxx:180
#define nt_assert(cond)
Definition: NT_local.h:27
#define AWAR_TREE_REV3
Definition: NT_sort.cxx:184
bool is_leaf() const
Definition: TreeNode.h:211
AW_awar * awar_int(const char *var_name, long default_value=0, AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:632
TYPE * ARB_calloc(size_t nelem)
Definition: arb_mem.h:81
static int resort_data_by_customOrder(const void *v1, const void *v2, void *cd_sortBy)
Definition: NT_sort.cxx:107
ItemSelector & SPECIES_get_selector()
Definition: species.cxx:140
#define CUSTOM_CRITERIA
Definition: NT_sort.cxx:25
#define TEST_EXPECT_NO_ERROR(call)
Definition: test_unit.h:1118
void aw_message(const char *msg)
Definition: AW_status.cxx:1142
static int cmpByKey(GBDATA *gbd1, GBDATA *gbd2, const customCriterion &by)
Definition: NT_sort.cxx:43
AW_root * get_root()
Definition: aw_window.hxx:362
#define NULp
Definition: cxxforward.h:116
GBDATA * gb_main
Definition: NT_local.h:37
#define TEST_EXPECT_ERROR_CONTAINS(call, part)
Definition: test_unit.h:1114
static GB_ERROR strict_resort_data_base(GBDATA *gb_main, const customCriterion *sortBy)
Definition: NT_sort.cxx:154
GB_transaction ta(gb_var)
void destroy(TreeNode *that)
Definition: TreeNode.h:600
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
Definition: arbdb.cxx:904
GBDATA * gb_node
Definition: TreeNode.h:173
GBDATA * gb_main
Definition: adname.cxx:32
GB_ERROR GB_resort_data_base(GBDATA *gb_main, GBDATA **new_order_list, long listsize)
Definition: arbdb.cxx:2655
AW_awar * awar_string(const char *var_name, const char *default_value="", AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:622
DECLARE_ASSIGNMENT_OPERATOR(customCriterion)
#define AWAR_TREE_SORT2
Definition: NT_sort.cxx:179
GBDATA * GB_entry(GBDATA *father, const char *key)
Definition: adquery.cxx:334
static GB_ERROR resort_data_base(GBDATA *gb_main, TreeNode *tree, const customCriterion *sortBy)
Definition: NT_sort.cxx:124
void GB_close(GBDATA *gbd)
Definition: arbdb.cxx:655
NT_global GLOBAL
Definition: NT_main.cxx:47
Definition: arbdb.h:66
AW_window * NT_create_resort_window(AW_root *awr)
Definition: NT_sort.cxx:210
GBDATA * GBT_get_species_data(GBDATA *gb_main)
Definition: aditem.cxx:105