31 class GroupSearchTree;
49 mutable Lazy<int,-1> marked;
50 mutable Lazy<int,-1> zombies;
58 void update_info(UpdateWhat what)
const;
59 void calc_average_ingroup_distance(
int group_size)
const;
60 double weighted_branchlength_sum(
int group_size)
const;
75 if (size.
needs_eval()) update_info(UPDATE_SIZE);
83 if (marked.
needs_eval()) update_info(UPDATE_MARKED);
87 if (zombies.
needs_eval()) update_info(UPDATE_MARKED);
92 if (aid.
needs_eval()) calc_average_ingroup_distance(get_leaf_count());
99 inline TreeNode *GroupSearchRoot::makeNode()
const {
return new GroupSearchTree(const_cast<GroupSearchRoot*>(
this)); }
100 inline void GroupSearchRoot::destroyNode(
TreeNode *
node)
const {
delete DOWNCAST(GroupSearchTree*,node); }
102 void GroupSearchTree::update_info(UpdateWhat what)
const {
105 if (what == UPDATE_MARKED) {
122 marked = get_leftson()->get_marked_count() + get_rightson()->get_marked_count();
123 zombies = get_leftson()->get_zombie_count() + get_rightson()->get_zombie_count();
126 size = get_leftson()->get_leaf_count() + get_rightson()->get_leaf_count();
141 GroupSearchRootPtr troot;
145 gs_assert(!tree_is_loaded() && !failed_to_load());
146 troot =
new GroupSearchRoot;
153 gs_assert(rootNode == troot->get_root_node());
165 if (gb_nnodes) inner_nodes =
GB_read_int(gb_nnodes);
169 const char *
get_name()
const {
return name.c_str(); }
178 return load_error.c_str();
181 if (!tree_is_loaded()) load_tree();
182 return failed_to_load() ?
NULp : &*troot;
194 int FoundGroup::get_name_length()
const {
205 GBDATA *gb_tree = get_tree_data();
227 if (gb_gname) error =
GB_delete(gb_gname);
231 if (gb_grouped) error =
GB_delete(gb_grouped);
235 bool keep_node =
false;
243 error =
GB_delete(gb_group.pointer_ref());
257 error =
"FATAL: unnamed group detected";
263 if (!error && new_name[0]) {
275 if (!gb_group)
return false;
285 if (!gb_grouped && folded) {
293 #if defined(ASSERTION_USED)
306 ARB_ERROR FoundGroup::set_folded(
bool folded) {
309 ARB_ERROR FoundGroup::set_overlap_folded(
bool folded) {
318 bool was_folded = is_folded();
319 bool knows_overlap = knows_details();
320 bool overlap_was_folded = knows_overlap && overlap_is_folded();
321 bool want_folded = was_folded;
324 case GFM_TOGGLE: want_folded = !(was_folded || overlap_was_folded);
break;
327 default: error =
"invalid collapse mode";
gs_assert(0);
break;
330 if (!error && want_folded != was_folded) {
331 error = set_folded(want_folded);
333 if (!error && want_folded != overlap_was_folded && knows_overlap && gb_overlap_group) {
334 error = set_overlap_folded(want_folded);
340 void ColumnWidths::track(
int wName,
int wReason,
int nesting,
int size,
int marked,
int clusID,
double aid,
bool keeled) {
341 seen_keeled = seen_keeled || keeled;
348 max_nesting =
std::max(max_nesting, nesting);
349 max_size =
std::max(max_size, size);
350 max_marked =
std::max(max_marked, marked);
351 max_marked_pc =
std::max(max_marked_pc, percent(marked, size));
352 max_cluster_id =
std::max(max_cluster_id, clusID);
353 max_aid =
std::max(max_aid,
int(aid));
357 widths.
track(get_name_length(),
358 get_hit_reason().
length(),
371 typedef map<GBDATA*,GBDATA*> Cache;
378 cache[gb_child_group] = gb_parent_group;
381 Cache::const_iterator found = cache.find(gb_child_group);
382 return found == cache.end() ?
NULp : found->second;
387 for (GBDATAset::const_iterator del = deleted_groups.begin(); del != deleted_groups.end(); ++del) {
388 GBDATA *gb_remaining_father = lookupParent(*del);
389 if (gb_remaining_father) {
390 while (gb_remaining_father) {
391 if (deleted_groups.find(gb_remaining_father) == deleted_groups.end()) {
394 gb_remaining_father = lookupParent(gb_remaining_father);
401 for (GBDATAset::const_iterator del = deleted_groups.begin(); del != deleted_groups.end(); ++del) {
406 for (Cache::iterator c = cache.begin(); c != cache.end(); ++c) {
407 GBDATA *gb_child = c->first;
408 GBDATA *gb_parent = c->second;
409 if (deleted_groups.find(gb_parent) != deleted_groups.end()) {
410 defineParentOf(gb_child, translate.
lookupParent(gb_parent));
419 #define TRIGGER_UPDATE_GROUP_RESULTS "/tmp/trigger/group_result_update"
424 typedef set<GroupSearch*> GroupSearchSet;
426 GroupSearchSet searches;
439 void remove_callbacks(
GBDATA *gb_main);
441 void trigger_group_search_update() {
GB_touch(gb_trigger); }
445 cbs_installed(
false),
463 add_callbacks(gb_main);
470 GBDATA *gb_main = gs->get_gb_main();
471 remove_callbacks(gb_main);
474 bool empty()
const {
return searches.empty(); }
477 deleted_groups.clear();
478 modified_groups.clear();
481 return !(deleted_groups.empty() && modified_groups.empty());
485 if (has_notifications()) {
487 for (GroupSearchSet::iterator gs = searches.begin(); gs != searches.end(); ++gs) {
491 clear_notifications();
499 if (!mark_as_deleted) {
500 if (!
GB_entry(gb_node,
"group_name")) {
501 mark_as_deleted =
true;
505 if (mark_as_deleted) {
534 else cbs_installed =
true;
537 void GroupSearchCommon::remove_callbacks(
GBDATA *gb_main) {
545 else cbs_installed =
false;
556 redisplay_cb(redisplay_results_cb),
565 if (common->
empty()) {
576 bool search_all = trees_to_search.empty();
577 for (
int t = 0; tree_names[t]; ++t) {
578 if (search_all || trees_to_search.find(tree_names[t]) != trees_to_search.end()) {
579 searched_tree.push_back(
SearchedTree(tree_names[t], gb_main));
604 TreeNode *keeld = node->keelTarget();
605 return keeld ?
DOWNCAST(GroupSearchTree*, keeld) : &*node;
629 if (!clade->is_leaf() && clade->is_normal_group()) {
635 if (node->is_keeled_group()) {
676 #if defined(ASSERTION_USED)
689 if (node->
gb_node == gb_group) {
691 return pnode->
gb_node == gb_parent_group;
700 return ppnode->
gb_node == gb_parent_group;
703 #if defined(ASSERTION_USED)
709 return gb_parent_group ==
NULp;
713 double GroupSearchTree::weighted_branchlength_sum(
int group_size)
const {
714 int leafs = get_leaf_count();
715 double sum =
father ? get_branchlength() * leafs * (group_size-leafs) : 0.0;
718 sum += get_leftson()->weighted_branchlength_sum(group_size);
719 sum += get_rightson()->weighted_branchlength_sum(group_size);
725 void GroupSearchTree::calc_average_ingroup_distance(
int group_size)
const {
726 long pairs =
long(group_size)*(group_size-1)/2;
729 double wbranchsum = weighted_branchlength_sum(group_size);
730 aid = wbranchsum / pairs;
740 typedef set< RefPtr<GBDATA> > ExistingHits;
742 ExistingHits existing_hits;
746 existing_hits.insert(prev->get_pointer());
750 bool match_unlisted = mode&
GSM_ADD;
761 SearchedTreeContainer searched_tree;
768 long overall_iter_count = 0;
769 for (SearchedTreeIter st = searched_tree.begin(); st != searched_tree.end(); ++st) {
770 overall_iter_count += st->get_edge_iteration_count();
774 arb_progress progress(
"Searching groups", overall_iter_count);
776 bool load_failures =
false;
777 for (SearchedTreeIter st = searched_tree.begin(); !error && st != searched_tree.end(); ++st) {
778 GroupSearchRoot *troot = st->get_tree_root();
784 progress.
inc_by(st->get_edge_iteration_count());
785 load_failures =
true;
788 CandidateList candidate;
800 gb_parent_group =
NULp;
812 gb_parent_group = gb_group;
820 gb_parent_group = gb_group;
823 ExistingHits::iterator prev_hit = existing_hits.find(gb_group);
825 bool was_listed = prev_hit != existing_hits.end();
826 bool test_match = !was_listed == match_unlisted;
857 while (e != start && !error);
861 bool was_listed = !match_unlisted;
862 for (CandidateList::iterator cand = candidate.begin(); !error && cand != candidate.end(); ++cand) {
863 target_group.aimTo(*cand);
866 if (query_expr->
matches(target_group, hit_reason)) {
873 ExistingHits::iterator prev_hit = existing_hits.find(cand->get_group().get_pointer());
874 gs_assert(prev_hit != existing_hits.end());
875 existing_hits.erase(prev_hit);
879 target_group.unAim();
880 st->flush_loaded_tree();
886 SearchedTreeContainer reduced;
887 for (
unsigned t = 0; t<searched_tree.size(); ++t) {
888 if (!searched_tree[t].failed_to_load()) {
889 reduced.push_back(searched_tree[t]);
892 int failed_trees = searched_tree.size()-reduced.size();
894 swap(reduced, searched_tree);
897 if (!match_unlisted && !error) {
901 if (existing_hits.find(prev->get_pointer()) != existing_hits.end()) {
913 error = clusterDuplicates();
921 sortedByOrder =
false;
927 inline bool contains(
const WordSet& ws,
const string& w) {
return ws.find(w) != ws.end(); }
931 char *namedup = strdup(name);
937 for (
int i = 0; w[i]; ++i) {
938 if (!
contains(ignored_words, w[i])) words.insert(w[i]);
942 for (string::iterator c = s.begin(); c != s.end(); ++c) {
954 tree(g.get_tree_data())
966 return words.
isNull() ? 1 : words->size();
978 string wordSeparators;
992 min_words(min_words_),
993 wordSeparators(wordSeparators_)
998 for (WordSet::const_iterator wi = ignored_words_.begin(); wi != ignored_words_.end(); ++wi) {
1001 ignored_words.insert(word);
1018 if (max_possible_word_matches<min_words)
return false;
1025 return gi1.
name.compare(gi2.
name) == 0;
1036 int matched_words = 0;
1037 for (WordSet::const_iterator wi = gi1.
words->begin(); wi != gi1.
words->end(); ++wi) {
1041 return matched_words>=min_words ? matched_words :
false;
1055 GroupClusterSet members;
1058 mutable vector<uint8_t> lookup;
1060 inline bool valid(
int i)
const {
return i >= 0 && i<num_groups; }
1061 inline bool have_lookup()
const {
return !lookup.empty(); }
1065 : num_groups(num_of_groups)
1070 members(other.members),
1071 num_groups(other.num_groups)
1076 if (!have_lookup()) {
1077 lookup.resize(num_groups,
int(
false));
1078 for (GroupClusterCIter ci =
begin(); ci !=
end(); ++ci) {
1087 if (have_lookup()) {
1088 for (GroupClusterCIter ci =
begin(); ci !=
end(); ++ci) lookup[*ci] =
false;
1096 if (have_lookup()) lookup[i] =
true;
1101 if (have_lookup()) lookup[i] =
false;
1110 bool empty()
const {
return members.empty(); }
1111 size_t size()
const {
return members.size(); }
1113 GroupClusterCIter
begin()
const {
return members.begin(); }
1114 GroupClusterCIter
end()
const {
return members.end(); }
1126 listDups(listDups_),
1154 did_match = data1 == data2;
1158 did_match = data1 != data2;
1182 int *firstIndexOfRow;
1183 void init_firstIndexOfRow() {
1184 firstIndexOfRow[0] = 0;
1185 for (
int y = 1;
y<size; ++
y) {
1186 firstIndexOfRow[
y] = firstIndexOfRow[
y-1]+(
y-1);
1193 lin_size(size*(size-1)/2),
1194 firstIndexOfRow(new
int[size])
1197 init_firstIndexOfRow();
1200 delete [] firstIndexOfRow;
1205 if (x>y)
swap(x, y);
1211 return firstIndexOfRow[
y]+x;
1214 #if defined(UNIT_TESTS)
1215 void to_xy(
int lin,
int& x,
int&
y)
const {
1216 for (y = 1; y<size && lin>=
y; ++
y) lin -= y;
1227 vector<uint8_t> name_matches;
1228 vector<bool> tree_matches;
1230 vector<uint8_t> words;
1235 int pairIdx(
int i,
int j)
const {
return symmap.
linear_index(i, j); }
1236 void calc_matches(
GBDATA *gb_main);
1238 int fits_into_cluster(
int idx,
const GroupCluster& cluster,
bool strong_fit)
const {
1240 bool enough_words = min_words<2 || words[idx] >= min_words;
1246 bool fitsAll =
true;
1247 bool weakFitAny =
true;
1249 for (GroupClusterCIter ci = cluster.
begin(); fitsAll && ci != cluster.
end(); ++ci) {
1250 const int pi = pairIdx(idx, *ci);
1251 bool fitWeak = name_matches[pi] >= min_words;
1253 fitsAll = fitWeak && tree_matches[pi];
1254 weakFitAny = weakFitAny || fitWeak;
1257 if (fitsAll) fitting = idx;
1258 else if (weakFitAny && !strong_fit) fitting = -idx;
1263 int find_next_group_fitting_into(
const GroupCluster& cluster,
int behind_idx,
bool strong_fit)
const {
1274 const int gcount = groups->
size();
1277 for (
int idx = behind_idx+1; idx<gcount && !fitting; ++idx) {
1278 fitting = fits_into_cluster(idx, cluster, strong_fit);
1287 int find_next_candidate_group_fitting_into(
const GroupCluster& cluster,
const vector<int>& candidates,
int& cand_idx,
bool strong_fit)
const {
1296 const int cand_size = candidates.size();
1299 for (
int cidx = cand_idx+1; cidx<cand_size; ++cidx) {
1300 int idx = candidates[cidx];
1302 fitting = fits_into_cluster(idx, cluster, strong_fit);
1320 criteria(criteria_),
1321 symmap(groups->size()),
1323 delivered(groups->size())
1325 calc_matches(gb_main);
1335 int this_id = next_id++;
1336 for (GroupClusterCIter ci = ofCluster.
begin(); ci != ofCluster.
end(); ++ci) {
1363 const int maxidx = groups->
size();
1365 for (
int idx = 0; idx<maxidx; ++idx) {
1366 int thisWords = words[idx];
1368 if (thisWords>maxWords && (ignore_delivered ? !
already_delivered(idx) :
true)) {
1369 maxWords = thisWords;
1378 void Clusterer::calc_matches(
GBDATA *gb_main) {
1379 const int gcount = groups->
size();
1381 const long way_to_go =
long(gcount) + lin_range;
1385 name_matches.reserve(lin_range);
1386 tree_matches.reserve(lin_range);
1389 info.reserve(gcount);
1400 info.push_back(
GroupInfo(*g, prep_wordwise, sens, wordSeparators, ignoredWords));
1401 if (prep_wordwise) {
1409 for (
int i1 = 0; i1<gcount && !progress.aborted(); ++i1) {
1410 for (
int i2 = i1+1; i2<gcount && !progress.aborted(); ++i2) {
1424 int gcount = groups->
size();
1429 cluster.
insert(start_idx);
1434 int pcount = start_idx;
1435 int curr_idx = start_idx;
1436 while (!progress_build.
aborted()) {
1437 const int addable = find_next_group_fitting_into(cluster, curr_idx,
false);
1438 if (!addable)
break;
1446 weakCand.
insert(-addable);
1447 curr_idx = -addable;
1451 progress_build.
inc_by(curr_idx-pcount);
1460 if (!weakCand.
empty()) {
1464 for (GroupClusterCIter w = weakCand.
begin(); w != weakCand.
end(); ++w) {
1465 int nameFitsAll =
true;
1466 for (GroupClusterCIter ci = cluster.
begin(); nameFitsAll && ci != cluster.
end(); ++ci) {
1467 int pi = pairIdx(*w, *ci);
1468 nameFitsAll = name_matches[pi];
1470 if (nameFitsAll) toAdd.
insert(*w);
1473 for (GroupClusterCIter a = toAdd.
begin(); a != toAdd.
end(); ++a) cluster.
insert(*a);
1480 progress_build.
done();
1486 return elems*elems/2-elems;
1493 vector<int> candidates;
1497 const int addable = find_next_group_fitting_into(curr, idx,
true);
1498 if (!addable)
break;
1500 candidates.push_back(addable);
1505 if ((candidates.size()+curr.
size()) > best.
size()) {
1506 stack<int> previous;
1509 const int del_size = delivered.
size();
1510 const unsigned long permutation_count =
permutations(candidates.size());
1512 while (!progress_cluster.
aborted()) {
1513 int addable = find_next_candidate_group_fitting_into(curr, candidates, curr_idx,
true);
1517 previous.push(curr_idx);
1523 const unsigned long permutations_left =
permutations(candidates.size()-best.
size());
1524 const double done_percent = (permutation_count-permutations_left) /
double(permutation_count);
1525 const double overall_done_percent = done_low + (done_high-done_low)*done_percent;
1529 if (previous.empty())
break;
1531 const int last_cidx = previous.top();
1532 const int last_add = candidates[last_cidx];
1534 curr.
erase(last_add);
1536 curr_idx = last_cidx;
1538 const int rest_cand = candidates.size() - (curr_idx+1);
1539 const size_t poss_size = rest_cand + curr.
size();
1540 if (poss_size<best.
size())
break;
1556 const int gcount = groups->
size();
1562 const int extension_count = 1+(wanted_words-1-old_min_words);
1563 const double done_per_extension = 1.0/extension_count;
1565 int first_index = 0;
1567 for (
int start_idx = first_cluster_found_from_index; start_idx<max_start_idx && !progress_cluster.
aborted(); ++start_idx) {
1572 extendClusterToBiggest(curr, start_idx, best, progress_cluster, 0.0, done_per_extension);
1573 if (!first_index && !best.
empty()) {
1574 first_cluster_found_from_index = first_index = start_idx;
1579 if (wanted_words>old_min_words && !best.
empty() && !progress_cluster.
aborted()) {
1583 for (
int fewer_words = wanted_words-1; fewer_words>=old_min_words && !progress_cluster.
aborted(); --fewer_words, ++ext_done) {
1589 const double done_start = ext_done*done_per_extension;
1590 extendClusterToBiggest(curr, 0, best, progress_cluster, done_start, done_start+done_per_extension);
1602 int gcount = groups->
size();
1612 arb_progress progress_cluster(
"[pass 2/2: fast duplicate search]",
long(max_i));
1613 for (
int i = 0; i<max_i && !progress_cluster.
aborted(); ++i) {
1617 if (!curr.
empty()) {
1628 long groups_with_min_words = 0;
1629 for (
int gidx = 0; gidx<gcount; ++gidx) {
1630 if (words[gidx]>=min_words) ++groups_with_min_words;
1633 arb_progress progress_cluster(
"[pass 2/2: thorough duplicate search]", groups_with_min_words);
1635 int first_cluster_found_from_index = 0;
1636 while (max_words >= min_words && !progress_cluster.
aborted()) {
1642 first_cluster_found_from_index = 0;
1649 progress_cluster.
done();
1653 GB_ERROR GroupSearch::clusterDuplicates() {
1655 bool enough_hits = found->
size()>=2;
1658 arb_progress progress(
"Restricting to duplicate groups", 2L);
1659 Clusterer clusterer(gb_main, found, dups);
1661 if (clusterer.max_cluster_start_index()<0) {
1662 enough_hits =
false;
1667 clusterer.find_and_deliverTo(*found);
1672 clusterer.deliverRest(*nonDupGroups);
1673 found = nonDupGroups;
1677 if (!error) error = progress.error_if_aborted();
1680 if (!enough_hits && !error) {
1689 if (!sortedByOrder) sort_results();
1706 bool erased = first_removed != found.end();
1708 found.erase(first_removed, found.end());
1709 invalidate_widths();
1714 return modified != found.end();
1722 bool last_was_modifier =
false;
1723 bool reversed =
false;
1725 SortCriteria::const_iterator crit = by.begin();
1726 while ((!cmp || last_was_modifier) && crit != by.end()) {
1752 return reversed ? cmp>0 : cmp<0;
1763 FoundGroupContainer::iterator del = found.begin();
1766 invalidate_widths();
1775 g->track_max_widths(w);
1792 for (SortCriteria::const_iterator sc = sorted_by->begin(); sc != sorted_by->end(); ++sc) {
1844 if (show_tree_name) {
1861 bool changed =
false;
1865 if (erased || changed) {
1882 if (!order.empty() && order.front() == gsc) {
1886 sortedByOrder =
false;
1893 SortCriteria::iterator dup = find(order.begin(), order.end(), gsc);
1894 if (dup != order.end()) {
1895 SortCriteria::iterator pre = dup;
1896 do --pre;
while (pre != order.end() && *pre ==
GSC_REVERSE);
1898 if (pre == order.end()) pre = order.begin();
1902 order.erase(pre,dup);
1906 order.push_front(gsc);
1907 sortedByOrder =
false;
1912 void GroupSearch::sort_results() {
1913 if (!order.empty()) {
1916 sortedByOrder =
true;
1926 dups =
new DupCriteria(listDups,
DupNameCriterion(ntype, sens, min_words, ignored_words, wordSeparators), ttype, min_cluster_size);
1931 string2WordSet(ignored_words, ignoredWordsSet, wordSeparators, none);
1932 setDupCriteria(listDups, ntype, sens, min_words, ignoredWordsSet, wordSeparators, ttype, min_cluster_size);
1941 if (idx<found->size())
return (*found)[idx].delete_from_DB();
1942 return "index out-of-bounds";
1951 error =
group->delete_from_DB();
1953 error = ta.
close(error);
1976 if (legal_hit_index())
return &queried[hit_idx];
1989 error =
"no duplicate";
1996 if (&*g == group)
return dupidx;
1997 if (g->get_cluster_id() == cluster) dupidx++;
2001 error =
"unknown error";
2033 const int dupidx = callEnv.
get_dupidx(error);
2132 if (!input_name || !input_name[0]) {
2133 error =
"Error: empty input groupname";
2137 bool know_hit = hit_idx>=0 && unsigned(hit_idx)<queried.
size();
2155 if (idx<found->size()) {
2156 return (*found)[idx].rename_by_ACI(acisrt, *found, idx);
2158 return "index out-of-bounds";
2163 if (has_results()) {
2170 error =
group->rename_by_ACI(acisrt, *found, idx);
2172 error = ta.
close(error);
2178 if (idx<found->size()) {
2179 return (*found)[idx].change_folding(mode);
2181 return "index out-of-bounds";
2186 return common->get_parent_cache().lookupParent(gb_group);
2192 gb_group = get_parent_group(gb_group);
2193 if (gb_group) ++nesting;
2208 GBDATA *gb_group = g->get_pointer();
2209 targetGroups.insert(gb_group);
2215 while (!testParentsOf.empty()) {
2217 for (GBDATAset::iterator t = testParentsOf.begin(); t != testParentsOf.end(); ++t) {
2218 GBDATA *gb_parent_group = get_parent_group(*t);
2219 if (gb_parent_group && targetGroups.find(gb_parent_group) == targetGroups.end()) {
2220 addedParents.insert(gb_parent_group);
2221 targetGroups.insert(gb_parent_group);
2224 testParentsOf = addedParents;
2229 for (GBDATAset::iterator n = targetGroups.begin(); n != targetGroups.end() && !
error; ++n) {
2234 SearchedTreeContainer searched_tree;
2237 for (SearchedTreeIter t = searched_tree.begin(); t != searched_tree.end() && !
error; ++t) {
2238 GBDATA *gb_tree_data = t->get_tree_data();
2242 if (targetGroups.find(gb_node) == targetGroups.end()) {
2250 return ta.
close(error);
2254 SearchedTreeContainer searched_tree;
2258 for (SearchedTreeIter t = searched_tree.begin(); t != searched_tree.end() && !
error; ++t) {
2261 if (t->get_tree_data() == g->get_tree_data()) {
2262 groupsFoundInTree.insert(g->get_pointer());
2266 if (!groupsFoundInTree.empty()) {
2268 GroupSearchRoot *troot = t->get_tree_root();
2276 if (groupsFoundInTree.find(node->
gb_node) != groupsFoundInTree.end()) {
2282 while (sub != stop) {
2285 if (leaf->
name) speciesInGroup.insert(leaf->
name);
2290 if (species.empty()) {
2292 species = speciesInGroup;
2298 speciesInGroup.begin(), speciesInGroup.end(),
2299 species.begin(), species.end(),
2301 inserter(combined, combined.begin())
2304 if (combined.empty()) {
2305 error =
"No species is member of ALL groups";
2311 speciesInGroup.begin(), speciesInGroup.end(),
2312 species.begin(), species.end(),
2314 inserter(combined, combined.begin())
2324 while (e != start && !error);
2331 if (!targetSpecies.empty()) {
2338 if (targetSpecies.find(name) != targetSpecies.end()) {
2350 size_t targetted = targetSpecies.size();
2351 if (found<targetted) {
2352 size_t zombies = targetted-found;
2360 if (idx<found->size()) {
2365 error = collectSpecies(groups,
UNITE, targetSpecies);
2366 if (!error)
set_marks_of(targetSpecies, gb_main, mode);
2373 if (has_results()) {
2375 error = collectSpecies(*found, cmode, targetSpecies);
2376 if (!error)
set_marks_of(targetSpecies, gb_main, mode);
2440 double pc = 100.0*marked/size;
2453 group_search(group_search_)
2466 bool directParentOnly;
2468 mutable GBDATA *gb_parent;
2469 mutable int distance;
2485 group_search(group_search_),
2502 if (!gb_parent)
return strdup(
"");
2511 return "parent-name";
2536 if (query_expr.isNull()) {
2567 if (query_expr.isNull()) {
2575 query_expr.setNull();
2586 enum GroupListType {
2593 GLT_NAME_AND_PARENT,
2600 ParentCache& pcache = GroupSearch::get_common()->get_parent_cache();
2606 entries.put(strdup(g->get_name()));
2625 case GLT_NAME_FOLD: {
2626 const char *
format = g->is_folded() ?
"[%s]" :
"%s";
2630 case GLT_NAME_AND_PARENT: {
2636 entries.put(strdup(g->get_name()));
2640 case GLT_KNAME_NEST: {
2641 int kstate = g->get_keeled();
2642 const char *kprefix = kstate ? (kstate == 1 ?
"!" :
"?") :
"";
2650 const char *found_entries = &*found_entriesP;
2651 return that(found_entries).is_equal_to(expected_entries);
2661 ARB_ERROR error = gs.collectSpecies(groups, cmode, species);
2666 for (SpeciesNames::const_iterator n = species.begin(); n != species.end(); ++n) {
2667 entries.
put(n->c_str());
2672 const char *contained_species = &*contained_speciesP;
2675 return all().ofgroup(fulfilled);
2684 return groupListingIs(results, type, expected_entries);
2690 const int MAX_ORDER = 20;
2691 char found_order[MAX_ORDER];
2695 for (SortCriteria::const_iterator i = order.begin(); i != order.end(); ++i) {
2712 found_order[off++] = c;
2715 found_order[off] = 0;
2716 return that(found_order).is_equal_to(expected_order);
2725 fulfilled.
add(hasOrder(gs, expected_order));
2726 fulfilled.
add(resultListingIs(gs, GLT_NAME_TREE, expected_entries));
2728 return all().ofgroup(fulfilled);
2731 static int refreshes_traced = 0;
2732 static void trace_refresh_cb() { ++refreshes_traced; }
2734 void TEST_group_search() {
2738 GroupSearchCallback traceRefresh_cb = makeGroupSearchCallback(trace_refresh_cb);
2739 refreshes_traced = 0;
2745 allGroups.perform_search(
GSM_FIND);
2749 "last/tree_test*another group/tree_test*outer/tree_test*inner/tree_test*test/tree_test*outer/tree_test*test/tree_test*xx/tree_test*"
2750 "outer/tree_tree2*g2/tree_tree2*xx/tree_tree2*test/tree_tree2*outer/tree_tree2*inner/tree_tree2*test/tree_tree2*"
2751 "zombsub/tree_zomb*zomb/tree_zomb*ZOMB/tree_zomb*dup/tree_zomb*inner outer group/tree_zomb*inner group/tree_zomb*outer group/tree_zomb*g4/tree_zomb*g3/tree_zomb*g2/tree_zomb*xx/tree_zomb*yy/tree_zomb*eee/tree_zomb"
2755 allGroups.addSortCriterion(
GSC_NAME);
2759 "another group/tree_test*dup/tree_zomb*eee/tree_zomb*"
2760 "g2/tree_tree2*g2/tree_zomb*"
2761 "g3/tree_zomb*g4/tree_zomb*"
2762 "inner/tree_test*inner/tree_tree2*"
2763 "inner group/tree_zomb*inner outer group/tree_zomb*last/tree_test*"
2764 "outer/tree_test*outer/tree_test*outer/tree_tree2*outer/tree_tree2*"
2765 "outer group/tree_zomb*"
2766 "test/tree_test*test/tree_test*test/tree_tree2*test/tree_tree2*"
2767 "xx/tree_test*xx/tree_tree2*xx/tree_zomb*"
2768 "yy/tree_zomb*zomb/tree_zomb*zombsub/tree_zomb"
2773 tree2.insert(
"tree_tree2");
2774 allGroups.setSearchRange(tree2);
2775 allGroups.perform_search(
GSM_FIND);
2778 TEST_EXPECTATION(resultListingIs(allGroups, GLT_NAME_TREE,
"g2/tree_tree2*inner/tree_tree2*outer/tree_tree2*outer/tree_tree2*test/tree_tree2*test/tree_tree2*xx/tree_tree2"));
2787 TEST_EXPECTATION(resultListingIs(some, GLT_NAME,
"another group*outer*outer*outer*outer*inner outer group*inner group*outer group"));
2791 some.forgetQExpressions();
2794 TEST_EXPECTATION(resultListingIs(some, GLT_NAME_TREE,
"outer/tree_test*inner/tree_test*outer/tree_test*outer/tree_tree2*outer/tree_tree2*inner/tree_tree2"));
2799 const char *BY_NAME_FWD =
"inner/tree_test*inner/tree_tree2*outer/tree_test*outer/tree_test*outer/tree_tree2*outer/tree_tree2";
2800 const char *BY_NAME_REV =
"outer/tree_test*outer/tree_test*outer/tree_tree2*outer/tree_tree2*inner/tree_test*inner/tree_tree2";
2812 TEST_EXPECTATION(addingCriterionProduces(some,
GSC_TREENAME,
"T!N",
"outer/tree_test*outer/tree_test*inner/tree_test*outer/tree_tree2*outer/tree_tree2*inner/tree_tree2"));
2813 TEST_EXPECTATION(addingCriterionProduces(some,
GSC_REVERSE,
"!T!N",
"inner/tree_tree2*outer/tree_tree2*outer/tree_tree2*inner/tree_test*outer/tree_test*outer/tree_test"));
2816 TEST_EXPECTATION(addingCriterionProduces(some,
GSC_TREEORDER,
"O!T!N",
"inner/tree_test*outer/tree_test*outer/tree_test*inner/tree_tree2*outer/tree_tree2*outer/tree_tree2"));
2817 TEST_EXPECTATION(addingCriterionProduces(some,
GSC_REVERSE,
"!O!T!N",
"outer/tree_tree2*outer/tree_tree2*inner/tree_tree2*outer/tree_test*outer/tree_test*inner/tree_test"));
2819 some.forgetSortCriteria();
2825 TEST_EXPECTATION(resultListingIs(some, GLT_NAME_TREE,
"outer/tree_test*outer/tree_test*outer/tree_tree2*outer/tree_tree2"));
2828 some.forgetQExpressions();
2831 TEST_EXPECTATION(resultListingIs(some, GLT_NAME,
"outer*outer*outer*outer*xx*xx*xx"));
2833 some.forgetQExpressions();
2836 TEST_EXPECTATION(resultListingIs(some, GLT_NAME,
"outer*outer*outer*outer*xx*xx*xx*another group*inner*inner*inner outer group*inner group*outer group"));
2841 TEST_EXPECTATION(resultListingIs(some, GLT_NAME,
"outer*outer*xx*another group*inner*outer*outer*xx*inner*xx*inner outer group*inner group*outer group"));
2843 const char *FIRST_XX_REMOVED =
"outer*outer*another group*inner*outer*outer*xx*inner*xx*inner outer group*inner group*outer group";
2847 some.remove_hit(-10);
TEST_EXPECTATION(resultListingIs(some, GLT_NAME, FIRST_XX_REMOVED));
2848 some.remove_hit(100);
TEST_EXPECTATION(resultListingIs(some, GLT_NAME, FIRST_XX_REMOVED));
2852 some.forgetQExpressions();
2855 TEST_EXPECTATION(resultListingIs(some, GLT_NAME,
"outer*outer*another group*outer*outer*inner outer group*inner group*outer group"));
2858 some.forgetQExpressions();
2861 TEST_EXPECTATION(resultListingIs(some, GLT_NAME,
"another group*inner outer group*inner group*outer group"));
2867 const char *TOP_GROUPS =
"last*another group*outer*test*outer*outer*zombsub*dup*inner outer group";
2874 keyed.forgetQExpressions();
2877 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT,
"outer<inner>*outer<test>*outer<xx>*outer<g2>*outer<test>*outer<inner>*outer<test>"));
2880 keyed.forgetQExpressions();
2883 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT,
"outer<inner>*outer<test>*outer<xx>*outer<g2>*g2<xx>*outer<test>*test<outer>*outer<inner>*outer<test>"));
2886 keyed.forgetQExpressions();
2890 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT,
"last*test*zombsub*zombsub<zomb>*zombsub<ZOMB>*dup"));
2893 keyed.forgetQExpressions();
2898 keyed.forgetQExpressions();
2903 keyed.forgetQExpressions();
2908 keyed.forgetQExpressions();
2913 keyed.forgetQExpressions();
2916 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT,
"outer<inner>*g2<xx>*g2<yy>*yy<eee>"));
2918 keyed.forgetQExpressions();
2922 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT,
"g2<xx>*test<outer>*outer<inner>*outer group<g4>*outer group<g3>*outer group<g2>*g2<xx>*g2<yy>"));
2924 keyed.forgetQExpressions();
2927 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT,
"g2<xx>*test<outer>*outer group<g4>*outer group<g3>*outer group<g2>"));
2930 const char *EXPANDED_GROUPS =
"last*outer*outer<inner>*outer*outer*zombsub";
2931 keyed.forgetQExpressions();
2934 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT, EXPANDED_GROUPS));
2936 keyed.forgetQExpressions();
2939 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT, EXPANDED_GROUPS));
2942 keyed.forgetQExpressions();
2945 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_SIZE,
"another group(29)*outer(15)*outer(47)*zombsub(14)*inner outer group(19)*outer group(15)"));
2951 keyed.forgetQExpressions();
2954 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME,
"another group*outer*inner outer group*outer group"));
2956 const char *COMPLETELY_MARKED_GROUPS =
"test*xx*xx*g4*xx*eee";
2957 keyed.forgetQExpressions();
2960 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME, COMPLETELY_MARKED_GROUPS));
2961 keyed.forgetQExpressions();
2964 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME, COMPLETELY_MARKED_GROUPS));
2965 keyed.forgetQExpressions();
2969 resultListingIs(keyed, GLT_NAME,
""));
2972 keyed.forgetQExpressions();
2979 keyed.forgetQExpressions();
2985 keyed.forgetQExpressions();
2988 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AID,
"outer(1.0996)*outer(1.1605)"));
2990 keyed.forgetQExpressions();
2993 keyed.addSortCriterion(
GSC_AID);
2995 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AID,
"xx(0.0786)*xx(0.0786)*g3(0.0665)*dup(0.0399)*inner group(0.0259)"));
3005 refreshes_traced = 0;
3013 const char *ACI_add_tag =
"\"[TAG] \";dd";
3015 const char *BEFORE_RENAME =
"outer*inner*test*outer*test*outer*test*outer*inner*test*eee";
3016 const char *OUTER_PREFIXED =
"[TAG] outer*inner*test*outer*test*outer*test*outer*inner*test*eee";
3025 TEST_EXPECTATION(resultListingIs(misc, GLT_NAME,
"[X][TAG] outer*[X]inner*[X]test*[X]outer*[X]test*[X]outer*[X]test*[X]outer*[X]inner*[X]test*[X]eee"));
3034 TEST_EXPECTATION(resultListingIs(misc, GLT_NAME,
"outer_1/11*inner_2/11*test_3/11*outer_4/11*test_5/11*outer_6/11*test_7/11*outer_8/11*inner_9/11*test_10/11*eee_11/11"));
3036 TEST_EXPECT_NO_ERROR(misc.rename_found_groups(
"command(\"/_.*$//\")|dd;\"_\";markedInGroup;\"/\";groupSize"));
3037 TEST_EXPECTATION(resultListingIs(misc, GLT_NAME,
"outer_6/11*inner_4/5*test_7/7*outer_7/15*test_0/4*outer_20/47*test_6/12*outer_6/11*inner_4/5*test_2/6*eee_3/3"));
3040 TEST_EXPECTATION(resultListingIs(misc, GLT_NAME,
"outer_L0=0.695293*inner_L1=0.269289*test_L0=0.160956*outer_L0=1.099650*test_L1=0.591923*outer_L0=1.160535*test_L1=0.726679*outer_L2=0.704352*inner_L3=0.265516*test_L1=0.303089*eee_L4=0.229693"));
3047 refreshes_traced = 0;
3056 TEST_EXPECTATION( resultListingIs(misc, GLT_NAME_FOLD,
"outer*inner*[test]*outer*[test]*outer*[test]*[outer]*[inner]*[test]*[eee]"));
3061 refreshes_traced = 0;
3074 TEST_EXPECTATION(resultListingIs(
all, GLT_NAME_FOLD,
"last*[another group]*outer*inner*[test]*outer*[test]*[xx]*outer*[g2]*[xx]*[test]*[outer]*[inner]*[test]*zombsub*[zomb]*[ZOMB]*[dup]*[inner outer group]*[inner group]*[outer group]*[g4]*[g3]*[g2]*[xx]*[yy]*[eee]"));
3080 TEST_EXPECT_NO_ERROR( few.fold_found_groups(
GFM_EXPANDREC));
TEST_EXPECTATION(resultListingIs(
all, GLT_NAME_FOLD,
"last*[another group]*outer*inner*[test]*outer*[test]*[xx]*" "outer*[g2]*[xx]*test*outer*inner*[test]*" "zombsub*[zomb]*[ZOMB]*[dup]*[inner outer group]*[inner group]*[outer group]*[g4]*[g3]*[g2]*[xx]*[yy]*[eee]"));
3081 TEST_EXPECT_NO_ERROR(misc.fold_found_groups(
GFM_EXPANDREC));
TEST_EXPECTATION(resultListingIs(
all, GLT_NAME_FOLD,
"last*[another group]*outer*inner*test*outer*test*[xx]*" "outer*[g2]*[xx]*test*outer*inner*test*" "zombsub*[zomb]*[ZOMB]*[dup]*inner outer group*[inner group]*outer group*[g4]*[g3]*g2*[xx]*yy*eee"));
3082 TEST_EXPECT_NO_ERROR(misc.fold_found_groups(
GFM_COLLAPSE));
TEST_EXPECTATION(resultListingIs(
all, GLT_NAME_FOLD,
"last*[another group]*[outer]*[inner]*[test]*[outer]*[test]*[xx]*" "[outer]*[g2]*[xx]*[test]*[outer]*[inner]*[test]*" "zombsub*[zomb]*[ZOMB]*[dup]*inner outer group*[inner group]*outer group*[g4]*[g3]*g2*[xx]*yy*[eee]"));
3083 TEST_EXPECT_NO_ERROR( few.fold_found_groups(
GFM_EXPANDREC_COLLREST));
TEST_EXPECTATION(resultListingIs(
all, GLT_NAME_FOLD,
"[last]*[another group]*outer*inner*[test]*[outer]*[test]*[xx]*" "outer*[g2]*[xx]*test*outer*inner*[test]*" "[zombsub]*[zomb]*[ZOMB]*[dup]*[inner outer group]*[inner group]*[outer group]*[g4]*[g3]*[g2]*[xx]*[yy]*[eee]"));
3084 TEST_EXPECT_NO_ERROR(
none.fold_found_groups(
GFM_EXPANDREC_COLLREST));
TEST_EXPECTATION(resultListingIs(
all, GLT_NAME_FOLD,
"[last]*[another group]*[outer]*[inner]*[test]*[outer]*[test]*[xx]*" "[outer]*[g2]*[xx]*[test]*[outer]*[inner]*[test]*" "[zombsub]*[zomb]*[ZOMB]*[dup]*[inner outer group]*[inner group]*[outer group]*[g4]*[g3]*[g2]*[xx]*[yy]*[eee]"));
3085 TEST_EXPECT_NO_ERROR(misc.fold_found_groups(
GFM_EXPANDPARENTS));
TEST_EXPECTATION(resultListingIs(
all, GLT_NAME_FOLD,
"[last]*[another group]*outer*[inner]*[test]*outer*[test]*[xx]*" "outer*[g2]*[xx]*test*outer*[inner]*[test]*" "[zombsub]*[zomb]*[ZOMB]*[dup]*inner outer group*[inner group]*outer group*[g4]*[g3]*g2*[xx]*yy*[eee]"));
3088 refreshes_traced = 0;
3101 const char *INNER_SPECIES =
"McpCapri,McpMyco2,McpMycoi,McpSpeci,SpiMelli";
3105 TEST_EXPECTATION(speciesInGroupsAre(group2,
UNITE,
"AnaAbact,BacMegat,BacPaste,CloTyro2,CloTyro4,CloTyrob,StaAureu,StaEpide"));
3110 TEST_EXPECTATION(resultListingIs(misc, GLT_NAME_AND_PARENT,
"outer*outer<inner>*test*outer*outer<test>*outer*outer<test>*test<outer>*outer<inner>*outer<test>*yy<eee>"));
3121 refreshes_traced = 0;
3123 TEST_EXPECTATION(resultListingIs(misc, GLT_NAME_AND_PARENT,
"outer*outer<inner>*test*test*outer*outer<outer>*outer<inner>*outer<test>*yy<eee>"));
3126 misc.forgetQExpressions();
3129 TEST_EXPECTATION(resultListingIs(misc, GLT_NAME_TREE,
"outer/tree_test*outer/tree_tree2*outer/tree_tree2"));
3132 TEST_EXPECTATION(resultListingIs(misc, GLT_NAME_TREE,
"outer/tree_test*outer/tree_tree2"));
3138 TEST_EXPECTATION(resultListingIs(misc, GLT_NAME_TREE,
"outer/tree_tree2"));
3144 refreshes_traced = 0;
3149 TEST_EXPECTATION(resultListingIs(outer, GLT_NAME_TREE,
"test/tree_test*test/tree_test*test/tree_tree2"));
3159 TEST_EXPECTATION(resultListingIs(outer, GLT_NAME_TREE,
"test/tree_test*test/tree_test"));
3166 void TEST_keeled_group_search() {
3170 GroupSearchCallback traceRefresh_cb = makeGroupSearchCallback(trace_refresh_cb);
3171 refreshes_traced = 0;
3177 GroupSearch keeledGroups(gb_main, traceRefresh_cb);
3178 GroupSearch normalGroups(gb_main, traceRefresh_cb);
3188 allGroups.perform_search(
GSM_FIND);
3189 keeledGroups.perform_search(
GSM_FIND);
3190 normalGroups.perform_search(
GSM_FIND);
3198 keeledGroups.get_results().size()+normalGroups.get_results().size());
3204 "outer/tree_tree2*g2/tree_tree2*"
3205 "outer/tree_removal*g2 [was: test]/tree_removal*"
3206 "lower/tree_groups*low2/tree_groups*twoleafs/tree_groups*low1/tree_groups*upper/tree_groups*"
3207 "twoleafs/tree_keeled*low2/tree_keeled*lower/tree_keeled*upper/tree_keeled*low1/tree_keeled*"
3208 "low2/tree_keeled_2*twoleafs/tree_keeled_2*lower/tree_keeled_2*upper/tree_keeled_2*low1/tree_keeled_2*allButOne/tree_keeled_2"
3212 "!twoleafs(L0)*!low2(L1)*?lower(L2)*"
3213 "!low2(L0)*?lower(L1)*!allButOne(L2)"
3218 keeledTrees.insert(
"tree_keeled");
3219 keeledTrees.insert(
"tree_keeled_2");
3221 allGroups.setSearchRange(keeledTrees);
3222 allGroups.perform_search(
GSM_FIND);
3226 "twoleafs*twoleafs<low2>*low2<lower>*lower<upper>*"
3240 "twoleafs*low2*lower*upper*low1*"
3241 "low2*twoleafs*lower*upper*low1*allButOne"
3248 "[twoleafs]*low2*[lower]*[upper]*low1*"
3249 "low2*twoleafs*lower*upper*low1*allButOne"
3256 "[twoleafs]*low2*lower*upper*low1*"
3257 "low2*twoleafs*lower*upper*low1*[allButOne]"
3264 "twoleafs*low2*lower*upper*low1*"
3265 "low2*twoleafs*lower*upper*low1*allButOne"
3270 "twoleafs(1.4310)*low2(1.4436)*lower(1.0288)*upper(1.0288)*low1(1.1200)*"
3273 "low2(1.4436)*twoleafs(0.0087)*lower(1.0288)*upper(1.0288)*low1(1.1200)*"
3277 keeledTrees.insert(
"tree_groups");
3278 allGroups.setSearchRange(keeledTrees);
3279 allGroups.perform_search(
GSM_FIND);
3283 "lower(L0)*low2(L1)*twoleafs(L2)*low1(L1)*upper(L0)*"
3286 "!twoleafs(L0)*!low2(L1)*?lower(L2)*upper(L3)*"
3292 "?lower(L1)*upper(L2)*low1(L1)*!allButOne(L2)"
3297 "lower(10)*low2(3)*twoleafs(2)*low1(7)*upper(5)*"
3302 "lower(5)*upper(5)*"
3315 "?lower(L2)*?lower(L1)*!twoleafs(L0)*!low2(L1)*!low2(L0)*!allButOne(L2)*lower(L0)*low2(L1)*twoleafs(L2)*low1(L1)*upper(L0)*upper(L3)*low1(L2)*twoleafs(L0)*upper(L2)*low1(L1)"
3332 mm.to_xy(lin, rx, ry);
3333 if (x>y)
swap(x, y);
3338 return all().ofgroup(fulfilled);
3341 void TEST_SymmetricMatrixMapper() {
3364 void TEST_group_duplicate_detection() {
3368 GroupSearchCallback traceRefresh_cb = makeGroupSearchCallback(trace_refresh_cb);
3371 refreshes_traced = 0;
3381 "1/outer/tree_test*"
3382 "1/outer/tree_test*"
3385 "3/outer/tree_tree2*"
3386 "3/outer/tree_tree2*"
3387 "4/test/tree_tree2*"
3406 "3/test/tree_tree2*"
3407 "3/test/tree_tree2*"
3408 "2/inner/tree_test*"
3409 "2/inner/tree_tree2*"
3410 "1/outer/tree_test*"
3411 "1/outer/tree_test*"
3412 "1/outer/tree_tree2*"
3413 "1/outer/tree_tree2"
3419 "0/another group/tree_test*"
3426 "0/inner group/tree_zomb*"
3427 "0/inner outer group/tree_zomb*"
3428 "0/outer group/tree_zomb*"
3431 "0/zombsub/tree_zomb"
3442 "1/outer/tree_test*"
3443 "1/outer/tree_test*"
3444 "1/outer/tree_tree2*"
3445 "1/outer/tree_tree2*"
3446 "2/inner/tree_test*"
3447 "2/inner/tree_tree2*"
3450 "3/test/tree_tree2*"
3451 "3/test/tree_tree2*"
3470 const char *word_sep =
" ";
3475 "1/another group/tree_test*"
3476 "1/inner group/tree_zomb*"
3477 "1/inner outer group/tree_zomb*"
3478 "1/outer group/tree_zomb*"
3480 "2/outer/tree_test*"
3481 "2/outer/tree_test*"
3482 "2/outer/tree_tree2*"
3483 "2/outer/tree_tree2*"
3487 "3/test/tree_tree2*"
3488 "3/test/tree_tree2*"
3494 "5/inner/tree_test*"
3495 "5/inner/tree_tree2*"
3504 "1/inner group/tree_zomb*"
3505 "1/inner outer group/tree_zomb"
3518 "1/inner outer group/tree_zomb*"
3519 "1/outer group/tree_zomb"
3528 "1/inner outer group/tree_zomb*"
3529 "1/inner,group/tree_zomb"
3533 ignore_group.insert(
"Group");
3538 "1/outer/tree_test*"
3539 "1/outer/tree_test*"
3540 "1/outer/tree_tree2*"
3541 "1/outer/tree_tree2*"
3542 "1/inner outer group/tree_zomb*"
3543 "1/outer group/tree_zomb*"
3547 "2/test/tree_tree2*"
3548 "2/test/tree_tree2*"
3550 "3/inner/tree_test*"
3551 "3/inner/tree_tree2*"
3552 "3/inner,group/tree_zomb*"
3572 "1/outer/tree_test*"
3573 "1/outer/tree_test*"
3574 "1/outer/tree_tree2*"
3575 "1/outer/tree_tree2*"
3579 "2/test/tree_tree2*"
3580 "2/test/tree_tree2*"
3586 "4/inner/tree_test*"
3587 "4/inner/tree_tree2*"
3610 "1/group inner outer/tree_test*"
3611 "1/group outer/tree_test*"
3612 "1/outer group/tree_tree2*"
3613 "1/inner outer group/tree_zomb*"
3614 "1/outer group/tree_zomb"
3622 static double bruteForce_calc_average_ingroup_distance(GroupSearchTree *node) {
3623 unsigned leafs = node->get_leaf_count();
3625 if (leafs == 1)
return 0.0;
3630 if (start == last) {
3632 start = start.next();
3636 double dist_sum = 0.0;
3638 for (
ARB_edge e1 = start; e1 != last; e1 = e1.
next()) {
3639 if (e1.is_edge_to_leaf()) {
3641 if (e2.is_edge_to_leaf()) {
3642 dist_sum += e1.dest()->intree_distance_to(e2.dest());
3649 #if defined(ASSERTION_USED)
3650 const unsigned calc_pairs = (leafs*(leafs-1))/2;
3654 return dist_sum/pairs;
3657 #define TEST_EXPECT_PROPER_AID(node) do{ \
3658 const double EPSILON = 0.000001; \
3659 TEST_EXPECT_SIMILAR(bruteForce_calc_average_ingroup_distance(node), \
3660 (node)->get_average_ingroup_distance(), \
3664 void TEST_ingroup_distance() {
3672 GroupSearchRoot *troot = stree.get_tree_root();
3676 GroupSearchTree *
rootNode = troot->get_root_node();
3677 GroupSearchTree *leftSon = rootNode->get_leftson();
3678 GroupSearchTree *grandSon = leftSon->get_rightson();
3680 GroupSearchTree *someLeaf = grandSon->get_leftson();
3681 while (!someLeaf->is_leaf()) {
3682 GroupSearchTree *L = someLeaf->get_leftson();
3683 GroupSearchTree *R = someLeaf->get_rightson();
3685 someLeaf = L->get_leaf_count() > R->get_leaf_count() ? L : R;
3690 GroupSearchTree *minSubtree = someLeaf->get_father();
3695 const double EPSILON = 0.000001;
3697 TEST_EXPECT_SIMILAR(bruteForce_calc_average_ingroup_distance(minSubtree), minSubtree->leftlen + minSubtree->rightlen, EPSILON);
3698 TEST_EXPECT_SIMILAR(bruteForce_calc_average_ingroup_distance(grandSon), 0.534927, EPSILON);
3699 TEST_EXPECT_SIMILAR(bruteForce_calc_average_ingroup_distance(leftSon), 0.976091, EPSILON);
3700 TEST_EXPECT_SIMILAR(bruteForce_calc_average_ingroup_distance(rootNode), 1.108438, EPSILON);
3704 TEST_EXPECT_PROPER_AID(someLeaf);
3705 TEST_EXPECT_PROPER_AID(minSubtree);
3706 TEST_EXPECT_PROPER_AID(grandSon);
3707 TEST_EXPECT_PROPER_AID(leftSon);
3708 TEST_EXPECT_PROPER_AID(rootNode);
3712 TEST_EXPECT_PROPER_AID(
DOWNCAST(GroupSearchTree*, e.
dest()));
3718 #endif // UNIT_TESTS
const char * get_tree_name() const
void compute_tree() OVERRIDE
bool big_enough(const GroupCluster &cluster) const
static GB_ERROR grl_hitcount(GBL_command_arguments *args)
GBDATA * GB_open(const char *path, const char *opent)
const std::string & get_hit_reason() const
const char * get_group_display(const FoundGroup &g, bool show_tree_name) const
GroupSearchRoot * get_tree_root()
void inc_to_avoid_overflow(PINT x)
compare_by_criteria(const SortCriteria &by_)
void put(const char *elem)
void forgetSortCriteria()
GroupClusterCIter begin() const
std::set< std::string > SpeciesNames
bool group_is_folded(GBDATA *gb_group)
int get_marked_pc() const
GBDATA * get_parent_group(GBDATA *gb_group) const
AliDataPtr format(AliDataPtr data, const size_t wanted_len, GB_ERROR &error)
unsigned get_leaf_count() const FINAL_OVERRIDE
#define TRIGGER_UPDATE_GROUP_RESULTS
~GroupSearchRoot() FINAL_OVERRIDE
#define TEST_EXPECT_SIMILAR(expr, want, epsilon)
long GB_read_int(GBDATA *gbd)
GBDATA * GB_child(GBDATA *father)
GB_ERROR GB_add_hierarchy_callback(GBDATA *gb_main, const char *db_path, GB_CB_TYPE type, const DatabaseCallback &dbcb)
#define implicated(hypothesis, conclusion)
return string(buffer, length)
bool overlap_is_folded() const
GB_ERROR delete_group(size_t idx)
const WordSet & get_ignored_words() const
NestingLevelKey(const GroupSearch &group_search_)
static void collect_searched_trees(GBDATA *gb_main, const TreeNameSet &trees_to_search, SearchedTreeContainer &searched_tree)
bool has_group_info() const
void addSortCriterion(GroupSortCriterion gsc)
GB_ERROR GB_add_callback(GBDATA *gbd, GB_CB_TYPE type, const DatabaseCallback &dbcb)
#define DOWNCAST_REFERENCE(totype, expr)
GroupSearchCommon * common
static void result_update_cb(GBDATA *, GroupSearchCommon *common)
void string_to_lower(string &s)
GB_ERROR delete_from_DB()
#define DEFINE_TREE_RELATIVES_ACCESSORS(TreeType)
void setDupCriteria(bool listDups, DupNameCriterionType ntype, GB_CASE sens, DupTreeCriterionType ttype, int min_cluster_size)
int get_tree_order() const
const FoundGroup * get_hit_group() const
GB_ERROR delete_found_groups()
match_expectation doesnt_report_error(const char *error)
const FoundGroup & get_group() const
GBDATA * GB_nextEntry(GBDATA *entry)
#define DEFINE_TREE_ROOT_ACCESSORS(RootType, TreeType)
const char * get_load_error() const
unsigned get_marked_count() const
bool contains_changed(GroupSearchCommon *common) const
void find_and_deliverTo(QueriedGroups &toResult)
void buildInferableClusterStartingWith(int start_idx, GroupCluster &cluster)
ARB_edge_type get_type() const
const char * get_name() const OVERRIDE
ARB_ERROR set_marks_in_found_groups(GroupMarkMode mode, CollectMode cmode)
void allow_lookup() const
double get_average_ingroup_distance() const
TreeNode * GBT_read_tree(GBDATA *gb_main, const char *tree_name, TreeRoot *troot)
const char * get_name() const OVERRIDE
void inform_group(const GroupSearch &group_search, const string &hitReason)
bool isCorrectParent(TreeNode *node, GBDATA *gb_group, GBDATA *gb_parent_group)
static GB_ERROR grl_nesting(GBL_command_arguments *args)
int calc_nesting_level(GBDATA *gb_group) const
bool tree_matches(const GBDATA *data1, const GBDATA *data2) const
double get_average_ingroup_distance() const
const char * GBS_global_string(const char *templat,...)
const char * get_name() const OVERRIDE
int get_leaf_count() const
SmartPtr< GroupSearchRoot > GroupSearchRootPtr
void cat(const char *from)
bool isNull() const
test if SmartPtr is NULp
GroupRename_callenv(const QueriedGroups &queried_, int hit_idx_, const GBL_env &env_)
bool has_been_deleted(GBDATA *gb_node)
was_modified(GroupSearchCommon *common_)
void refresh_results_after_DBchanges()
const TreeNode * find_parent_with_groupInfo(bool skipKeeledBrothers=false) const
ARB_edge rootEdge(TreeRoot *root)
void findBestClusterBasedOnWords(int wanted_words, GroupCluster &best, arb_progress &progress_cluster, int &first_cluster_found_from_index)
bool already_delivered(int idx) const
GroupCluster(const GroupCluster &other)
static void set_marks_of(const SpeciesNames &targetSpecies, GBDATA *gb_main, GroupMarkMode mode)
#define ARRAY_ELEMS(array)
int name_matches(const GroupInfo &gi1, const GroupInfo &gi2) const
void setNull()
set SmartPtr to NULp
int max_cluster_start_index() const
GBDATA * GB_get_father(GBDATA *gbd)
const char * get_name() const OVERRIDE
GBDATA * get_gb_main() const
const GBL_call_env & get_callEnv() const
std::set< std::string > WordSet
int linear_index(int x, int y) const
GroupSearchTree(GroupSearchRoot *root)
#define DOWNCAST(totype, expr)
ARB_ERROR fold_found_groups(GroupFoldingMode mode)
DupNameCriterionType get_name_type() const
GB_ERROR check_no_parameter(GBL_command_arguments *args)
GB_ERROR GB_delete(GBDATA *&source)
int GB_string_comparator(const void *v0, const void *v1, void *)
static HelixNrInfo * start
ARB_edge parentEdge(TreeNode *son)
unsigned long permutations(int elems)
GroupInfo(const FoundGroup &g, bool prep_wordwise, GB_CASE sens, const char *wordSeparators, const WordSet &ignored_words)
GroupClusterCIter end() const
GroupCluster(int num_of_groups)
size_t GB_read_string_count(GBDATA *gbd)
GB_ERROR GB_await_error()
int get_keeledStateInfo() const
ARB_ERROR rename_by_ACI(const char *acisrt, const QueriedGroups &results, int hit_idx)
#define TEST_EXPECT(cond)
static void set_species_data(GBDATA *gb_species_data_)
char * get_target_data(const QueryTarget &target, GB_ERROR &) const OVERRIDE
DupCriteria(bool listDups_, const DupNameCriterion &nameCrit_, DupTreeCriterionType ttype_, int minSize_)
std::set< GBDATA * > GBDATAset
const GroupSearchTree * get_clade() const
bool erase_deleted(GroupSearchCommon *common)
GB_CSTR GB_read_key_pntr(GBDATA *gbd)
const char * get_word_separators() const
const char * get_name() const OVERRIDE
bool isSet() const
test if SmartPtr is not NULp
GBDATA * GB_create(GBDATA *father, const char *key, GB_TYPES type)
GB_CASE get_sensitivity() const
bool knows_details() const
list< Candidate > CandidateList
void addQueryExpression(CriterionOperator op, CriterionType type, CriterionMatch mtype, const char *expression)
vector< GroupInfo > GroupInfoVec
char * GBS_trim(const char *str)
GB_ERROR GBT_write_group_name(GBDATA *gb_group_name, const char *new_group_name, bool pedantic)
void deliverRest(QueriedGroups &toResult)
#define COMMAND_DROPS_INPUT_STREAMS(args)
const char * GBS_readable_size(unsigned long long size, const char *unit_suffix)
#define TEST_REJECT(cond)
#define TEST_REJECT_NULL(n)
const QueriedGroups & queried
static void error(const char *msg)
std::set< std::string > TreeNameSet
unsigned get_group_size() const
GBDATA * GB_get_root(GBDATA *gbd)
GroupSearch(GBDATA *gb_main_, const GroupSearchCallback &redisplay_results_cb)
static void string2WordSet(const char *name, WordSet &words, const char *wordSeparators, const WordSet &ignored_words)
bool contains(int i) const
bool tree_is_loaded() const
bool operator()(const FoundGroup &g)
ARB_ERROR rename_group(size_t idx, const char *acisrt)
expectation_group & add(const expectation &e)
static GB_ERROR grl_dupidx(GBL_command_arguments *args)
CONSTEXPR_INLINE_Cxx14 void swap(unsigned char &c1, unsigned char &c2)
has_been_deleted(GroupSearchCommon *common_)
size_t get_word_count() const
ASSERTING_CONSTEXPR_INLINE int info2bio(int infopos)
bool is_keeled_group() const
void track_max_widths(ColumnWidths &widths) const
const char * get_name() const
DupNameCriterion(DupNameCriterionType exact, GB_CASE sens_)
bool has_been_modified(GBDATA *gb_node)
set< int > GroupClusterSet
void notify_modified(GBDATA *gb_node)
bool iterate() const OVERRIDE
FoundGroupCIter end() const
char * get_target_data(const QueryTarget &target, GB_ERROR &) const OVERRIDE
void deliverCluster(const GroupCluster &ofCluster, QueriedGroups &toResult)
GroupSearchTree * get_clade()
ARB_ERROR set_marks_in_group(size_t idx, GroupMarkMode mode)
bool wordwise_name_matching() const
GroupClusterSet::const_iterator GroupClusterCIter
GBQUARK GB_get_quark(GBDATA *gbd)
void fix_deleted_groups(const GBDATAset &deleted_groups)
int GB_read_flag(GBDATA *gbd)
GBDATA * GBT_find_species_rel_species_data(GBDATA *gb_species_data, const char *name)
void forgetQExpressions()
void clear_notifications()
AP_tree_nlen * rootNode()
bool contains(const WordSet &ws, const string &w)
void remove_hit(size_t idx)
void track(int wName, int wReason, int nesting, int size, int marked, int clusID, double aid, bool keeled)
Candidate(GBDATA *gb_group_, GroupSearchTree *node_)
void remove(GroupSearch *gs)
ARB_ERROR group_set_folded(GBDATA *gb_group, bool folded)
static int max2width(const int &i)
char * GS_calc_resulting_groupname(GBDATA *gb_main, const QueriedGroups &queried, int hit_idx, const char *input_name, const char *acisrt, ARB_ERROR &error)
static GBL_command_definition groupRename_command_table[]
void sort_by(const SortCriteria &by)
Candidate(const FoundGroup &group_, GroupSearchTree *node_)
void append(QueryExpr *&tail)
void add_informed_group(const FoundGroup &group)
FoundGroupContainer::const_iterator FoundGroupCIter
GB_ERROR inc_and_error_if_aborted()
static GB_ERROR grl_aid(GBL_command_arguments *args)
#define TEST_EXPECTATION(EXPCTN)
SearchedTreeContainer::iterator SearchedTreeIter
const FoundGroup & get_group() const
int get_edge_iteration_count() const
char * GBT_join_strings(const CharPtrArray &strings, char separator)
const GroupRename_callenv & custom_gr_env(GBL_command_arguments *args)
static GB_ERROR grl_groupsize(GBL_command_arguments *args)
const char * get_name() const OVERRIDE
bool is_edge_to_leaf() const
int get_cluster_id() const
void set_cluster_id(int id)
GB_ERROR GB_remove_hierarchy_callback(GBDATA *gb_main, const char *db_path, GB_CB_TYPE type, const DatabaseCallback &dbcb)
char * get_target_data(const QueryTarget &target, GB_ERROR &) const OVERRIDE
DupTreeCriterionType get_tree_type() const
GB_ERROR close(GB_ERROR error)
void GB_write_flag(GBDATA *gbd, long flag)
int min_cluster_size() const
void refresh_all_results()
#define FORMAT_2_OUT(args, fmt, value)
bool operator()(const FoundGroup &g)
GBDATA * get_tree_data() const
bool is_inferable() const
#define TEST_EXPECTATION__BROKEN(WANTED, GOT)
FoundGroupContainer::iterator FoundGroupIter
static void tree_node_deleted_cb(GBDATA *gb_node, GroupSearchCommon *common, GB_CB_TYPE cbtype)
void GB_touch(GBDATA *gbd)
GBQUARK GB_find_existing_quark(GBDATA *gbd, const char *key)
Clusterer(GBDATA *gb_main, SmartPtr< QueriedGroups > groups_, SmartPtr< DupCriteria > criteria_)
void nprintf(size_t maxlen, const char *templat,...) __ATTR__FORMAT_MEMBER(2)
int GB_read_byte(GBDATA *gbd)
bool matches(const QueryTarget &target, std::string &hit_reason) const
void forget_lookup() const
char * get_target_data(const QueryTarget &target, GB_ERROR &) const OVERRIDE
static GB_ERROR grl_markedingroup(GBL_command_arguments *args)
GBDATA * lookupParent(GBDATA *gb_child_group) const
char * get_target_data(const QueryTarget &target, GB_ERROR &) const OVERRIDE
GBDATA * get_ACI_item() const
char * GB_read_string(GBDATA *gbd)
GB_ERROR GB_write_byte(GBDATA *gbd, int i)
int name_matches_wordwise(const GroupInfo &gi1, const GroupInfo &gi2) const
bool want_unique_groups() const
void GB_remove_callback(GBDATA *gbd, GB_CB_TYPE type, const DatabaseCallback &dbcb)
~ParentGroupNameQueryKey() OVERRIDE
FoundGroupCIter begin() const
GBDATA * GBT_first_species(GBDATA *gb_main)
void GBT_get_tree_names(ConstStrArray &names, GBDATA *gb_main, bool sorted)
void GBT_message(GBDATA *gb_main, const char *msg)
std::list< GroupSortCriterion > SortCriteria
unsigned get_marked_count() const
const char * get_name() const
#define TEST_EXPECT_NO_ERROR(call)
const char * get_group_name() const
GBDATA * get_pointer() const
int get_keeledStateInfo() const
DECLARE_ASSIGNMENT_OPERATOR(GroupCluster)
const ColumnWidths & get_column_widths() const
char * get_target_data(const QueryTarget &target, GB_ERROR &) const OVERRIDE
const char * get_name() const OVERRIDE
DupNameCriterion(DupNameCriterionType wordwise, GB_CASE sens_, int min_words_, const WordSet &ignored_words_, const char *wordSeparators_)
void sort(CharPtrArray_compare_fun compare, void *client_data)
ARB_ERROR fold_group(size_t idx, GroupFoldingMode mode)
bool is_inner_edge() const
GBDATA * GBT_next_species(GBDATA *gb_species)
static const GBL_command_lookup_table & get_GroupRename_customized_ACI_commands()
void add(GroupSearch *gs)
#define TEST_EXPECT_ERROR_CONTAINS(call, part)
vector< SearchedTree > SearchedTreeContainer
void add_candidate(const GroupSearch &group_search, Candidate &cand, const std::string &hit_reason)
const char * get_data() const
int get_min_wanted_words() const
RefPtr< GBDATA > gb_overlap_group
NOT4PERL char * GB_command_interpreter_in_env(const char *str, const char *commands, const GBL_call_env &callEnv)
char * get_target_data(const QueryTarget &target, GB_ERROR &) const OVERRIDE
GBDATA * GB_nextChild(GBDATA *child)
void notify_deleted(GBDATA *gb_node)
const char * get_name() const OVERRIDE
ParentCache & get_parent_cache()
GBDATA * GBT_find_tree(GBDATA *gb_main, const char *tree_name)
GB_transaction ta(gb_var)
int calc_max_used_words(bool ignore_delivered)
const QueriedGroups & get_results()
void reset() const OVERRIDE
SymmetricMatrixMapper(int elements)
static void group_name_changed_cb(GBDATA *gb_group_name, GroupSearchCommon *common)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
TargetGroup(GBDATA *gb_main_, const char *treename_)
ParentGroupNameQueryKey(const GroupSearch &group_search_, CriterionType ctype)
bool operator()(const FoundGroup &g1, const FoundGroup &g2) const
void defineParentOf(GBDATA *gb_child_group, GBDATA *gb_parent_group)
ARB_ERROR rename_found_groups(const char *acisrt)
GroupSearchCommon * common
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
char * get_target_data(const QueryTarget &target, GB_ERROR &) const OVERRIDE
void aimTo(const Candidate &c)
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
ARB_ERROR change_folding(GroupFoldingMode mode)
void perform_search(GroupSearchMode mode)
char * get_target_data(const QueryTarget &target, GB_ERROR &) const OVERRIDE
static int info[maxsites+1]
void GBT_splitNdestroy_string(ConstStrArray &names, char *&namelist, const char *separator, SplitMode mode)
static GB_ERROR grl_hitidx(GBL_command_arguments *args)
const GroupSearchTree * get_clade() const
void set_min_wanted_words(int words)
unsigned get_zombie_count() const
SearchedTree(const char *name_, GBDATA *gb_main)
const char * get_name() const OVERRIDE
GroupMarkedKey(bool percent_)
#define TEST_EXPECT_EQUAL(expr, want)
const GBL_command_lookup_table & ACI_get_standard_commands()
SmartPtr< WordSet > words
bool failed_to_load() const
bool is_normal_group() const
GBDATA * GB_entry(GBDATA *father, const char *key)
bool legal_hit_index() const
char * GBS_global_string_copy(const char *templat,...)
void GB_close(GBDATA *gbd)
TreeNode * source() const
unsigned get_zombie_count() const
int get_dupidx(GB_ERROR &error) const
static int iteration_count(int leafs_in_tree)
GBDATA * GBT_get_species_data(GBDATA *gb_main)
GB_write_int const char s