31 class GroupSearchTree;
49 mutable Lazy<int,-1> marked;
50 mutable Lazy<int,-1> zombies;
58 void update_info(UpdateWhat what)
const;
59 void calc_average_ingroup_distance(
int group_size)
const;
60 double weighted_branchlength_sum(
int group_size)
const;
75 if (size.
needs_eval()) update_info(UPDATE_SIZE);
83 if (marked.
needs_eval()) update_info(UPDATE_MARKED);
87 if (zombies.
needs_eval()) update_info(UPDATE_MARKED);
92 if (aid.
needs_eval()) calc_average_ingroup_distance(get_leaf_count());
99 inline TreeNode *GroupSearchRoot::makeNode()
const {
return new GroupSearchTree(const_cast<GroupSearchRoot*>(
this)); }
100 inline void GroupSearchRoot::destroyNode(
TreeNode *
node)
const {
delete DOWNCAST(GroupSearchTree*,node); }
102 void GroupSearchTree::update_info(UpdateWhat what)
const {
105 if (what == UPDATE_MARKED) {
122 marked = get_leftson()->get_marked_count() + get_rightson()->get_marked_count();
123 zombies = get_leftson()->get_zombie_count() + get_rightson()->get_zombie_count();
126 size = get_leftson()->get_leaf_count() + get_rightson()->get_leaf_count();
139 GroupSearchRootPtr troot;
143 gs_assert(!tree_is_loaded() && !failed_to_load());
144 troot =
new GroupSearchRoot;
151 gs_assert(rootNode == troot->get_root_node());
163 if (gb_nnodes) inner_nodes =
GB_read_int(gb_nnodes);
167 const char *
get_name()
const {
return name.c_str(); }
176 return load_error.c_str();
179 if (!tree_is_loaded()) load_tree();
180 return failed_to_load() ?
NULp : &*troot;
192 int FoundGroup::get_name_length()
const {
203 GBDATA *gb_tree = get_tree_data();
225 if (gb_gname) error =
GB_delete(gb_gname);
229 if (gb_grouped) error =
GB_delete(gb_grouped);
233 bool keep_node =
false;
241 error =
GB_delete(gb_group.pointer_ref());
255 error =
"FATAL: unnamed group detected";
261 if (!error && new_name[0]) {
273 if (!gb_group)
return false;
283 if (!gb_grouped && folded) {
291 #if defined(ASSERTION_USED)
304 ARB_ERROR FoundGroup::set_folded(
bool folded) {
307 ARB_ERROR FoundGroup::set_overlap_folded(
bool folded) {
316 bool was_folded = is_folded();
317 bool knows_overlap = knows_details();
318 bool overlap_was_folded = knows_overlap && overlap_is_folded();
319 bool want_folded = was_folded;
322 case GFM_TOGGLE: want_folded = !(was_folded || overlap_was_folded);
break;
325 default: error =
"invalid collapse mode";
gs_assert(0);
break;
328 if (!error && want_folded != was_folded) {
329 error = set_folded(want_folded);
331 if (!error && want_folded != overlap_was_folded && knows_overlap && gb_overlap_group) {
332 error = set_overlap_folded(want_folded);
338 void ColumnWidths::track(
int wName,
int wReason,
int nesting,
int size,
int marked,
int clusID,
double aid,
bool keeled) {
339 seen_keeled = seen_keeled || keeled;
346 max_nesting =
std::max(max_nesting, nesting);
347 max_size =
std::max(max_size, size);
348 max_marked =
std::max(max_marked, marked);
349 max_marked_pc =
std::max(max_marked_pc, percent(marked, size));
350 max_cluster_id =
std::max(max_cluster_id, clusID);
351 max_aid =
std::max(max_aid,
int(aid));
355 widths.
track(get_name_length(),
356 get_hit_reason().
length(),
369 typedef map<GBDATA*,GBDATA*> Cache;
376 cache[gb_child_group] = gb_parent_group;
379 Cache::const_iterator found = cache.find(gb_child_group);
380 return found == cache.end() ?
NULp : found->second;
385 for (GBDATAset::const_iterator del = deleted_groups.begin(); del != deleted_groups.end(); ++del) {
386 GBDATA *gb_remaining_father = lookupParent(*del);
387 if (gb_remaining_father) {
388 while (gb_remaining_father) {
389 if (deleted_groups.find(gb_remaining_father) == deleted_groups.end()) {
392 gb_remaining_father = lookupParent(gb_remaining_father);
399 for (GBDATAset::const_iterator del = deleted_groups.begin(); del != deleted_groups.end(); ++del) {
404 for (Cache::iterator c = cache.begin(); c != cache.end(); ++c) {
405 GBDATA *gb_child = c->first;
406 GBDATA *gb_parent = c->second;
407 if (deleted_groups.find(gb_parent) != deleted_groups.end()) {
408 defineParentOf(gb_child, translate.
lookupParent(gb_parent));
417 #define TRIGGER_UPDATE_GROUP_RESULTS "/tmp/trigger/group_result_update"
422 typedef set<GroupSearch*> GroupSearchSet;
424 GroupSearchSet searches;
437 void remove_callbacks(
GBDATA *gb_main);
439 void trigger_group_search_update() {
GB_touch(gb_trigger); }
443 cbs_installed(
false),
461 add_callbacks(gb_main);
468 GBDATA *gb_main = gs->get_gb_main();
469 remove_callbacks(gb_main);
472 bool empty()
const {
return searches.empty(); }
475 deleted_groups.clear();
476 modified_groups.clear();
479 return !(deleted_groups.empty() && modified_groups.empty());
483 if (has_notifications()) {
485 for (GroupSearchSet::iterator gs = searches.begin(); gs != searches.end(); ++gs) {
489 clear_notifications();
497 if (!mark_as_deleted) {
498 if (!
GB_entry(gb_node,
"group_name")) {
499 mark_as_deleted =
true;
503 if (mark_as_deleted) {
532 else cbs_installed =
true;
535 void GroupSearchCommon::remove_callbacks(
GBDATA *gb_main) {
543 else cbs_installed =
false;
554 redisplay_cb(redisplay_results_cb),
563 if (common->
empty()) {
574 bool search_all = trees_to_search.empty();
575 for (
int t = 0; tree_names[t]; ++t) {
576 if (search_all || trees_to_search.find(tree_names[t]) != trees_to_search.end()) {
577 searched_tree.push_back(
SearchedTree(tree_names[t], gb_main));
602 TreeNode *keeld = node->keelTarget();
603 return keeld ?
DOWNCAST(GroupSearchTree*, keeld) : &*node;
627 if (!clade->is_leaf() && clade->is_normal_group()) {
633 if (node->is_keeled_group()) {
674 #if defined(ASSERTION_USED)
687 if (node->
gb_node == gb_group) {
689 return pnode->
gb_node == gb_parent_group;
698 return ppnode->
gb_node == gb_parent_group;
701 #if defined(ASSERTION_USED)
707 return gb_parent_group ==
NULp;
711 double GroupSearchTree::weighted_branchlength_sum(
int group_size)
const {
712 int leafs = get_leaf_count();
713 double sum =
father ? get_branchlength() * leafs * (group_size-leafs) : 0.0;
716 sum += get_leftson()->weighted_branchlength_sum(group_size);
717 sum += get_rightson()->weighted_branchlength_sum(group_size);
723 void GroupSearchTree::calc_average_ingroup_distance(
int group_size)
const {
724 long pairs =
long(group_size)*(group_size-1)/2;
727 double wbranchsum = weighted_branchlength_sum(group_size);
728 aid = wbranchsum / pairs;
738 typedef set< RefPtr<GBDATA> > ExistingHits;
740 ExistingHits existing_hits;
744 existing_hits.insert(prev->get_pointer());
748 bool match_unlisted = mode&
GSM_ADD;
759 SearchedTreeContainer searched_tree;
766 long overall_iter_count = 0;
767 for (SearchedTreeIter st = searched_tree.begin(); st != searched_tree.end(); ++st) {
768 overall_iter_count += st->get_edge_iteration_count();
772 arb_progress progress(
"Searching groups", overall_iter_count);
774 bool load_failures =
false;
775 for (SearchedTreeIter st = searched_tree.begin(); !error && st != searched_tree.end(); ++st) {
776 GroupSearchRoot *troot = st->get_tree_root();
782 progress.
inc_by(st->get_edge_iteration_count());
783 load_failures =
true;
786 CandidateList candidate;
798 gb_parent_group =
NULp;
810 gb_parent_group = gb_group;
818 gb_parent_group = gb_group;
821 ExistingHits::iterator prev_hit = existing_hits.find(gb_group);
823 bool was_listed = prev_hit != existing_hits.end();
824 bool test_match = !was_listed == match_unlisted;
855 while (e != start && !error);
859 bool was_listed = !match_unlisted;
860 for (CandidateList::iterator cand = candidate.begin(); !error && cand != candidate.end(); ++cand) {
861 target_group.aimTo(*cand);
864 if (query_expr->
matches(target_group, hit_reason)) {
871 ExistingHits::iterator prev_hit = existing_hits.find(cand->get_group().get_pointer());
872 gs_assert(prev_hit != existing_hits.end());
873 existing_hits.erase(prev_hit);
877 target_group.unAim();
878 st->flush_loaded_tree();
884 SearchedTreeContainer reduced;
885 for (
unsigned t = 0; t<searched_tree.size(); ++t) {
886 if (!searched_tree[t].failed_to_load()) {
887 reduced.push_back(searched_tree[t]);
890 int failed_trees = searched_tree.size()-reduced.size();
892 swap(reduced, searched_tree);
895 if (!match_unlisted && !error) {
899 if (existing_hits.find(prev->get_pointer()) != existing_hits.end()) {
911 error = clusterDuplicates();
919 sortedByOrder =
false;
925 inline bool contains(
const WordSet& ws,
const string& w) {
return ws.find(w) != ws.end(); }
929 char *namedup = strdup(name);
935 for (
int i = 0; w[i]; ++i) {
936 if (!
contains(ignored_words, w[i])) words.insert(w[i]);
940 for (string::iterator c = s.begin(); c != s.end(); ++c) {
952 tree(g.get_tree_data())
964 return words.
isNull() ? 1 : words->size();
976 string wordSeparators;
990 min_words(min_words_),
991 wordSeparators(wordSeparators_)
996 for (WordSet::const_iterator wi = ignored_words_.begin(); wi != ignored_words_.end(); ++wi) {
999 ignored_words.insert(word);
1016 if (max_possible_word_matches<min_words)
return false;
1023 return gi1.
name.compare(gi2.
name) == 0;
1034 int matched_words = 0;
1035 for (WordSet::const_iterator wi = gi1.
words->begin(); wi != gi1.
words->end(); ++wi) {
1039 return matched_words>=min_words ? matched_words :
false;
1053 GroupClusterSet members;
1056 mutable vector<uint8_t> lookup;
1058 inline bool valid(
int i)
const {
return i >= 0 && i<num_groups; }
1059 inline bool have_lookup()
const {
return !lookup.empty(); }
1063 : num_groups(num_of_groups)
1068 members(other.members),
1069 num_groups(other.num_groups)
1074 if (!have_lookup()) {
1075 lookup.resize(num_groups,
int(
false));
1076 for (GroupClusterCIter ci =
begin(); ci !=
end(); ++ci) {
1085 if (have_lookup()) {
1086 for (GroupClusterCIter ci =
begin(); ci !=
end(); ++ci) lookup[*ci] =
false;
1094 if (have_lookup()) lookup[i] =
true;
1099 if (have_lookup()) lookup[i] =
false;
1108 bool empty()
const {
return members.empty(); }
1109 size_t size()
const {
return members.size(); }
1111 GroupClusterCIter
begin()
const {
return members.begin(); }
1112 GroupClusterCIter
end()
const {
return members.end(); }
1124 listDups(listDups_),
1152 did_match = data1 == data2;
1156 did_match = data1 != data2;
1180 int *firstIndexOfRow;
1181 void init_firstIndexOfRow() {
1182 firstIndexOfRow[0] = 0;
1183 for (
int y = 1;
y<size; ++
y) {
1184 firstIndexOfRow[
y] = firstIndexOfRow[
y-1]+(
y-1);
1191 lin_size(size*(size-1)/2),
1192 firstIndexOfRow(new
int[size])
1195 init_firstIndexOfRow();
1198 delete [] firstIndexOfRow;
1203 if (x>y)
swap(x, y);
1209 return firstIndexOfRow[
y]+x;
1212 #if defined(UNIT_TESTS)
1213 void to_xy(
int lin,
int& x,
int&
y)
const {
1214 for (y = 1; y<size && lin>=
y; ++
y) lin -= y;
1225 vector<uint8_t> name_matches;
1226 vector<bool> tree_matches;
1228 vector<uint8_t> words;
1233 int pairIdx(
int i,
int j)
const {
return symmap.
linear_index(i, j); }
1234 void calc_matches(
GBDATA *gb_main);
1236 int fits_into_cluster(
int idx,
const GroupCluster& cluster,
bool strong_fit)
const {
1238 bool enough_words = min_words<2 || words[idx] >= min_words;
1244 bool fitsAll =
true;
1245 bool weakFitAny =
true;
1247 for (GroupClusterCIter ci = cluster.
begin(); fitsAll && ci != cluster.
end(); ++ci) {
1248 const int pi = pairIdx(idx, *ci);
1249 bool fitWeak = name_matches[pi] >= min_words;
1251 fitsAll = fitWeak && tree_matches[pi];
1252 weakFitAny = weakFitAny || fitWeak;
1255 if (fitsAll) fitting = idx;
1256 else if (weakFitAny && !strong_fit) fitting = -idx;
1261 int find_next_group_fitting_into(
const GroupCluster& cluster,
int behind_idx,
bool strong_fit)
const {
1272 const int gcount = groups->
size();
1275 for (
int idx = behind_idx+1; idx<gcount && !fitting; ++idx) {
1276 fitting = fits_into_cluster(idx, cluster, strong_fit);
1285 int find_next_candidate_group_fitting_into(
const GroupCluster& cluster,
const vector<int>& candidates,
int& cand_idx,
bool strong_fit)
const {
1294 const int cand_size = candidates.size();
1297 for (
int cidx = cand_idx+1; cidx<cand_size; ++cidx) {
1298 int idx = candidates[cidx];
1300 fitting = fits_into_cluster(idx, cluster, strong_fit);
1318 criteria(criteria_),
1319 symmap(groups->size()),
1321 delivered(groups->size())
1323 calc_matches(gb_main);
1333 int this_id = next_id++;
1334 for (GroupClusterCIter ci = ofCluster.
begin(); ci != ofCluster.
end(); ++ci) {
1361 const int maxidx = groups->
size();
1363 for (
int idx = 0; idx<maxidx; ++idx) {
1364 int thisWords = words[idx];
1366 if (thisWords>maxWords && (ignore_delivered ? !
already_delivered(idx) :
true)) {
1367 maxWords = thisWords;
1376 void Clusterer::calc_matches(
GBDATA *gb_main) {
1377 const int gcount = groups->
size();
1379 const long way_to_go =
long(gcount) + lin_range;
1383 name_matches.reserve(lin_range);
1384 tree_matches.reserve(lin_range);
1387 info.reserve(gcount);
1398 info.push_back(
GroupInfo(*g, prep_wordwise, sens, wordSeparators, ignoredWords));
1399 if (prep_wordwise) {
1407 for (
int i1 = 0; i1<gcount && !progress.aborted(); ++i1) {
1408 for (
int i2 = i1+1; i2<gcount && !progress.aborted(); ++i2) {
1422 int gcount = groups->
size();
1427 cluster.
insert(start_idx);
1432 int pcount = start_idx;
1433 int curr_idx = start_idx;
1434 while (!progress_build.
aborted()) {
1435 const int addable = find_next_group_fitting_into(cluster, curr_idx,
false);
1436 if (!addable)
break;
1444 weakCand.
insert(-addable);
1445 curr_idx = -addable;
1449 progress_build.
inc_by(curr_idx-pcount);
1458 if (!weakCand.
empty()) {
1462 for (GroupClusterCIter w = weakCand.
begin(); w != weakCand.
end(); ++w) {
1463 int nameFitsAll =
true;
1464 for (GroupClusterCIter ci = cluster.
begin(); nameFitsAll && ci != cluster.
end(); ++ci) {
1465 int pi = pairIdx(*w, *ci);
1466 nameFitsAll = name_matches[pi];
1468 if (nameFitsAll) toAdd.
insert(*w);
1471 for (GroupClusterCIter a = toAdd.
begin(); a != toAdd.
end(); ++a) cluster.
insert(*a);
1478 progress_build.
done();
1484 return elems*elems/2-elems;
1491 vector<int> candidates;
1495 const int addable = find_next_group_fitting_into(curr, idx,
true);
1496 if (!addable)
break;
1498 candidates.push_back(addable);
1503 if ((candidates.size()+curr.
size()) > best.
size()) {
1504 stack<int> previous;
1507 const int del_size = delivered.
size();
1508 const unsigned long permutation_count =
permutations(candidates.size());
1510 while (!progress_cluster.
aborted()) {
1511 int addable = find_next_candidate_group_fitting_into(curr, candidates, curr_idx,
true);
1515 previous.push(curr_idx);
1521 const unsigned long permutations_left =
permutations(candidates.size()-best.
size());
1522 const double done_percent = (permutation_count-permutations_left) /
double(permutation_count);
1523 const double overall_done_percent = done_low + (done_high-done_low)*done_percent;
1527 if (previous.empty())
break;
1529 const int last_cidx = previous.top();
1530 const int last_add = candidates[last_cidx];
1532 curr.
erase(last_add);
1534 curr_idx = last_cidx;
1536 const int rest_cand = candidates.size() - (curr_idx+1);
1537 const size_t poss_size = rest_cand + curr.
size();
1538 if (poss_size<best.
size())
break;
1554 const int gcount = groups->
size();
1560 const int extension_count = 1+(wanted_words-1-old_min_words);
1561 const double done_per_extension = 1.0/extension_count;
1563 int first_index = 0;
1565 for (
int start_idx = first_cluster_found_from_index; start_idx<max_start_idx && !progress_cluster.
aborted(); ++start_idx) {
1570 extendClusterToBiggest(curr, start_idx, best, progress_cluster, 0.0, done_per_extension);
1571 if (!first_index && !best.
empty()) {
1572 first_cluster_found_from_index = first_index = start_idx;
1577 if (wanted_words>old_min_words && !best.
empty() && !progress_cluster.
aborted()) {
1581 for (
int fewer_words = wanted_words-1; fewer_words>=old_min_words && !progress_cluster.
aborted(); --fewer_words, ++ext_done) {
1587 const double done_start = ext_done*done_per_extension;
1588 extendClusterToBiggest(curr, 0, best, progress_cluster, done_start, done_start+done_per_extension);
1600 int gcount = groups->
size();
1610 arb_progress progress_cluster(
"[pass 2/2: fast duplicate search]",
long(max_i));
1611 for (
int i = 0; i<max_i && !progress_cluster.
aborted(); ++i) {
1615 if (!curr.
empty()) {
1626 long groups_with_min_words = 0;
1627 for (
int gidx = 0; gidx<gcount; ++gidx) {
1628 if (words[gidx]>=min_words) ++groups_with_min_words;
1631 arb_progress progress_cluster(
"[pass 2/2: thorough duplicate search]", groups_with_min_words);
1633 int first_cluster_found_from_index = 0;
1634 while (max_words >= min_words && !progress_cluster.
aborted()) {
1640 first_cluster_found_from_index = 0;
1647 progress_cluster.
done();
1651 GB_ERROR GroupSearch::clusterDuplicates() {
1653 bool enough_hits = found->
size()>=2;
1656 arb_progress progress(
"Restricting to duplicate groups", 2L);
1657 Clusterer clusterer(gb_main, found, dups);
1659 if (clusterer.max_cluster_start_index()<0) {
1660 enough_hits =
false;
1665 clusterer.find_and_deliverTo(*found);
1670 clusterer.deliverRest(*nonDupGroups);
1671 found = nonDupGroups;
1675 if (!error) error = progress.error_if_aborted();
1678 if (!enough_hits && !error) {
1687 if (!sortedByOrder) sort_results();
1704 bool erased = first_removed != found.end();
1706 found.erase(first_removed, found.end());
1707 invalidate_widths();
1712 return modified != found.end();
1720 bool last_was_modifier =
false;
1721 bool reversed =
false;
1723 SortCriteria::const_iterator crit = by.begin();
1724 while ((!cmp || last_was_modifier) && crit != by.end()) {
1750 return reversed ? cmp>0 : cmp<0;
1761 FoundGroupContainer::iterator del = found.begin();
1764 invalidate_widths();
1773 g->track_max_widths(w);
1790 for (SortCriteria::const_iterator sc = sorted_by->begin(); sc != sorted_by->end(); ++sc) {
1842 if (show_tree_name) {
1859 bool changed =
false;
1863 if (erased || changed) {
1880 if (!order.empty() && order.front() == gsc) {
1884 sortedByOrder =
false;
1891 SortCriteria::iterator dup = find(order.begin(), order.end(), gsc);
1892 if (dup != order.end()) {
1893 SortCriteria::iterator pre = dup;
1894 do --pre;
while (pre != order.end() && *pre ==
GSC_REVERSE);
1896 if (pre == order.end()) pre = order.begin();
1900 order.erase(pre,dup);
1904 order.push_front(gsc);
1905 sortedByOrder =
false;
1910 void GroupSearch::sort_results() {
1911 if (!order.empty()) {
1914 sortedByOrder =
true;
1924 dups =
new DupCriteria(listDups,
DupNameCriterion(ntype, sens, min_words, ignored_words, wordSeparators), ttype, min_cluster_size);
1929 string2WordSet(ignored_words, ignoredWordsSet, wordSeparators, none);
1930 setDupCriteria(listDups, ntype, sens, min_words, ignoredWordsSet, wordSeparators, ttype, min_cluster_size);
1939 if (idx<found->size())
return (*found)[idx].delete_from_DB();
1940 return "index out-of-bounds";
1949 error =
group->delete_from_DB();
1951 error = ta.
close(error);
1974 if (legal_hit_index())
return &queried[hit_idx];
2085 if (!input_name || !input_name[0]) {
2086 error =
"Error: empty input groupname";
2090 bool know_hit = hit_idx>=0 && unsigned(hit_idx)<queried.
size();
2108 if (idx<found->size()) {
2109 return (*found)[idx].rename_by_ACI(acisrt, *found, idx);
2111 return "index out-of-bounds";
2116 if (has_results()) {
2123 error =
group->rename_by_ACI(acisrt, *found, idx);
2125 error = ta.
close(error);
2131 if (idx<found->size()) {
2132 return (*found)[idx].change_folding(mode);
2134 return "index out-of-bounds";
2139 return common->get_parent_cache().lookupParent(gb_group);
2145 gb_group = get_parent_group(gb_group);
2146 if (gb_group) ++nesting;
2161 GBDATA *gb_group = g->get_pointer();
2162 targetGroups.insert(gb_group);
2168 while (!testParentsOf.empty()) {
2170 for (GBDATAset::iterator t = testParentsOf.begin(); t != testParentsOf.end(); ++t) {
2171 GBDATA *gb_parent_group = get_parent_group(*t);
2172 if (gb_parent_group && targetGroups.find(gb_parent_group) == targetGroups.end()) {
2173 addedParents.insert(gb_parent_group);
2174 targetGroups.insert(gb_parent_group);
2177 testParentsOf = addedParents;
2182 for (GBDATAset::iterator n = targetGroups.begin(); n != targetGroups.end() && !
error; ++n) {
2187 SearchedTreeContainer searched_tree;
2190 for (SearchedTreeIter t = searched_tree.begin(); t != searched_tree.end() && !
error; ++t) {
2191 GBDATA *gb_tree_data = t->get_tree_data();
2195 if (targetGroups.find(gb_node) == targetGroups.end()) {
2203 return ta.
close(error);
2207 SearchedTreeContainer searched_tree;
2211 for (SearchedTreeIter t = searched_tree.begin(); t != searched_tree.end() && !
error; ++t) {
2214 if (t->get_tree_data() == g->get_tree_data()) {
2215 groupsFoundInTree.insert(g->get_pointer());
2219 if (!groupsFoundInTree.empty()) {
2221 GroupSearchRoot *troot = t->get_tree_root();
2229 if (groupsFoundInTree.find(node->
gb_node) != groupsFoundInTree.end()) {
2235 while (sub != stop) {
2238 if (leaf->
name) speciesInGroup.insert(leaf->
name);
2243 if (species.empty()) {
2245 species = speciesInGroup;
2251 speciesInGroup.begin(), speciesInGroup.end(),
2252 species.begin(), species.end(),
2254 inserter(combined, combined.begin())
2257 if (combined.empty()) {
2258 error =
"No species is member of ALL groups";
2264 speciesInGroup.begin(), speciesInGroup.end(),
2265 species.begin(), species.end(),
2267 inserter(combined, combined.begin())
2277 while (e != start && !error);
2284 if (!targetSpecies.empty()) {
2291 if (targetSpecies.find(name) != targetSpecies.end()) {
2303 size_t targetted = targetSpecies.size();
2304 if (found<targetted) {
2305 size_t zombies = targetted-found;
2313 if (idx<found->size()) {
2318 error = collectSpecies(groups,
UNITE, targetSpecies);
2319 if (!error)
set_marks_of(targetSpecies, gb_main, mode);
2326 if (has_results()) {
2328 error = collectSpecies(*found, cmode, targetSpecies);
2329 if (!error)
set_marks_of(targetSpecies, gb_main, mode);
2393 double pc = 100.0*marked/size;
2406 group_search(group_search_)
2419 bool directParentOnly;
2421 mutable GBDATA *gb_parent;
2422 mutable int distance;
2438 group_search(group_search_),
2455 if (!gb_parent)
return strdup(
"");
2464 return "parent-name";
2489 if (query_expr.isNull()) {
2520 if (query_expr.isNull()) {
2528 query_expr.setNull();
2539 enum GroupListType {
2546 GLT_NAME_AND_PARENT,
2553 ParentCache& pcache = GroupSearch::get_common()->get_parent_cache();
2559 entries.put(strdup(g->get_name()));
2578 case GLT_NAME_FOLD: {
2579 const char *
format = g->is_folded() ?
"[%s]" :
"%s";
2583 case GLT_NAME_AND_PARENT: {
2589 entries.put(strdup(g->get_name()));
2593 case GLT_KNAME_NEST: {
2594 int kstate = g->get_keeled();
2595 const char *kprefix = kstate ? (kstate == 1 ?
"!" :
"?") :
"";
2603 const char *found_entries = &*found_entriesP;
2604 return that(found_entries).is_equal_to(expected_entries);
2614 ARB_ERROR error = gs.collectSpecies(groups, cmode, species);
2619 for (SpeciesNames::const_iterator n = species.begin(); n != species.end(); ++n) {
2620 entries.
put(n->c_str());
2625 const char *contained_species = &*contained_speciesP;
2628 return all().ofgroup(fulfilled);
2637 return groupListingIs(results, type, expected_entries);
2643 const int MAX_ORDER = 20;
2644 char found_order[MAX_ORDER];
2648 for (SortCriteria::const_iterator i = order.begin(); i != order.end(); ++i) {
2665 found_order[off++] = c;
2668 found_order[off] = 0;
2669 return that(found_order).is_equal_to(expected_order);
2678 fulfilled.
add(hasOrder(gs, expected_order));
2679 fulfilled.
add(resultListingIs(gs, GLT_NAME_TREE, expected_entries));
2681 return all().ofgroup(fulfilled);
2684 static int refreshes_traced = 0;
2685 static void trace_refresh_cb() { ++refreshes_traced; }
2687 void TEST_group_search() {
2691 GroupSearchCallback traceRefresh_cb = makeGroupSearchCallback(trace_refresh_cb);
2692 refreshes_traced = 0;
2698 allGroups.perform_search(
GSM_FIND);
2702 "last/tree_test*another group/tree_test*outer/tree_test*inner/tree_test*test/tree_test*outer/tree_test*test/tree_test*xx/tree_test*"
2703 "outer/tree_tree2*g2/tree_tree2*xx/tree_tree2*test/tree_tree2*outer/tree_tree2*inner/tree_tree2*test/tree_tree2*"
2704 "zombsub/tree_zomb*zomb/tree_zomb*ZOMB/tree_zomb*dup/tree_zomb*inner outer group/tree_zomb*inner group/tree_zomb*outer group/tree_zomb*g4/tree_zomb*g3/tree_zomb*g2/tree_zomb*xx/tree_zomb*yy/tree_zomb*eee/tree_zomb"
2708 allGroups.addSortCriterion(
GSC_NAME);
2712 "another group/tree_test*dup/tree_zomb*eee/tree_zomb*"
2713 "g2/tree_tree2*g2/tree_zomb*"
2714 "g3/tree_zomb*g4/tree_zomb*"
2715 "inner/tree_test*inner/tree_tree2*"
2716 "inner group/tree_zomb*inner outer group/tree_zomb*last/tree_test*"
2717 "outer/tree_test*outer/tree_test*outer/tree_tree2*outer/tree_tree2*"
2718 "outer group/tree_zomb*"
2719 "test/tree_test*test/tree_test*test/tree_tree2*test/tree_tree2*"
2720 "xx/tree_test*xx/tree_tree2*xx/tree_zomb*"
2721 "yy/tree_zomb*zomb/tree_zomb*zombsub/tree_zomb"
2726 tree2.insert(
"tree_tree2");
2727 allGroups.setSearchRange(tree2);
2728 allGroups.perform_search(
GSM_FIND);
2731 TEST_EXPECTATION(resultListingIs(allGroups, GLT_NAME_TREE,
"g2/tree_tree2*inner/tree_tree2*outer/tree_tree2*outer/tree_tree2*test/tree_tree2*test/tree_tree2*xx/tree_tree2"));
2740 TEST_EXPECTATION(resultListingIs(some, GLT_NAME,
"another group*outer*outer*outer*outer*inner outer group*inner group*outer group"));
2744 some.forgetQExpressions();
2747 TEST_EXPECTATION(resultListingIs(some, GLT_NAME_TREE,
"outer/tree_test*inner/tree_test*outer/tree_test*outer/tree_tree2*outer/tree_tree2*inner/tree_tree2"));
2752 const char *BY_NAME_FWD =
"inner/tree_test*inner/tree_tree2*outer/tree_test*outer/tree_test*outer/tree_tree2*outer/tree_tree2";
2753 const char *BY_NAME_REV =
"outer/tree_test*outer/tree_test*outer/tree_tree2*outer/tree_tree2*inner/tree_test*inner/tree_tree2";
2765 TEST_EXPECTATION(addingCriterionProduces(some,
GSC_TREENAME,
"T!N",
"outer/tree_test*outer/tree_test*inner/tree_test*outer/tree_tree2*outer/tree_tree2*inner/tree_tree2"));
2766 TEST_EXPECTATION(addingCriterionProduces(some,
GSC_REVERSE,
"!T!N",
"inner/tree_tree2*outer/tree_tree2*outer/tree_tree2*inner/tree_test*outer/tree_test*outer/tree_test"));
2769 TEST_EXPECTATION(addingCriterionProduces(some,
GSC_TREEORDER,
"O!T!N",
"inner/tree_test*outer/tree_test*outer/tree_test*inner/tree_tree2*outer/tree_tree2*outer/tree_tree2"));
2770 TEST_EXPECTATION(addingCriterionProduces(some,
GSC_REVERSE,
"!O!T!N",
"outer/tree_tree2*outer/tree_tree2*inner/tree_tree2*outer/tree_test*outer/tree_test*inner/tree_test"));
2772 some.forgetSortCriteria();
2778 TEST_EXPECTATION(resultListingIs(some, GLT_NAME_TREE,
"outer/tree_test*outer/tree_test*outer/tree_tree2*outer/tree_tree2"));
2781 some.forgetQExpressions();
2784 TEST_EXPECTATION(resultListingIs(some, GLT_NAME,
"outer*outer*outer*outer*xx*xx*xx"));
2786 some.forgetQExpressions();
2789 TEST_EXPECTATION(resultListingIs(some, GLT_NAME,
"outer*outer*outer*outer*xx*xx*xx*another group*inner*inner*inner outer group*inner group*outer group"));
2794 TEST_EXPECTATION(resultListingIs(some, GLT_NAME,
"outer*outer*xx*another group*inner*outer*outer*xx*inner*xx*inner outer group*inner group*outer group"));
2796 const char *FIRST_XX_REMOVED =
"outer*outer*another group*inner*outer*outer*xx*inner*xx*inner outer group*inner group*outer group";
2800 some.remove_hit(-10);
TEST_EXPECTATION(resultListingIs(some, GLT_NAME, FIRST_XX_REMOVED));
2801 some.remove_hit(100);
TEST_EXPECTATION(resultListingIs(some, GLT_NAME, FIRST_XX_REMOVED));
2805 some.forgetQExpressions();
2808 TEST_EXPECTATION(resultListingIs(some, GLT_NAME,
"outer*outer*another group*outer*outer*inner outer group*inner group*outer group"));
2811 some.forgetQExpressions();
2814 TEST_EXPECTATION(resultListingIs(some, GLT_NAME,
"another group*inner outer group*inner group*outer group"));
2820 const char *TOP_GROUPS =
"last*another group*outer*test*outer*outer*zombsub*dup*inner outer group";
2827 keyed.forgetQExpressions();
2830 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT,
"outer<inner>*outer<test>*outer<xx>*outer<g2>*outer<test>*outer<inner>*outer<test>"));
2833 keyed.forgetQExpressions();
2836 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT,
"outer<inner>*outer<test>*outer<xx>*outer<g2>*g2<xx>*outer<test>*test<outer>*outer<inner>*outer<test>"));
2839 keyed.forgetQExpressions();
2843 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT,
"last*test*zombsub*zombsub<zomb>*zombsub<ZOMB>*dup"));
2846 keyed.forgetQExpressions();
2851 keyed.forgetQExpressions();
2856 keyed.forgetQExpressions();
2861 keyed.forgetQExpressions();
2866 keyed.forgetQExpressions();
2869 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT,
"outer<inner>*g2<xx>*g2<yy>*yy<eee>"));
2871 keyed.forgetQExpressions();
2875 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT,
"g2<xx>*test<outer>*outer<inner>*outer group<g4>*outer group<g3>*outer group<g2>*g2<xx>*g2<yy>"));
2877 keyed.forgetQExpressions();
2880 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT,
"g2<xx>*test<outer>*outer group<g4>*outer group<g3>*outer group<g2>"));
2883 const char *EXPANDED_GROUPS =
"last*outer*outer<inner>*outer*outer*zombsub";
2884 keyed.forgetQExpressions();
2887 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT, EXPANDED_GROUPS));
2889 keyed.forgetQExpressions();
2892 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AND_PARENT, EXPANDED_GROUPS));
2895 keyed.forgetQExpressions();
2898 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_SIZE,
"another group(29)*outer(15)*outer(47)*zombsub(14)*inner outer group(19)*outer group(15)"));
2904 keyed.forgetQExpressions();
2907 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME,
"another group*outer*inner outer group*outer group"));
2909 const char *COMPLETELY_MARKED_GROUPS =
"test*xx*xx*g4*xx*eee";
2910 keyed.forgetQExpressions();
2913 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME, COMPLETELY_MARKED_GROUPS));
2914 keyed.forgetQExpressions();
2917 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME, COMPLETELY_MARKED_GROUPS));
2918 keyed.forgetQExpressions();
2922 resultListingIs(keyed, GLT_NAME,
""));
2925 keyed.forgetQExpressions();
2932 keyed.forgetQExpressions();
2938 keyed.forgetQExpressions();
2941 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AID,
"outer(1.0996)*outer(1.1605)"));
2943 keyed.forgetQExpressions();
2946 keyed.addSortCriterion(
GSC_AID);
2948 TEST_EXPECTATION(resultListingIs(keyed, GLT_NAME_AID,
"xx(0.0786)*xx(0.0786)*g3(0.0665)*dup(0.0399)*inner group(0.0259)"));
2958 refreshes_traced = 0;
2966 const char *ACI_add_tag =
"\"[TAG] \";dd";
2968 const char *BEFORE_RENAME =
"outer*inner*test*outer*test*outer*test*outer*inner*test*eee";
2969 const char *OUTER_PREFIXED =
"[TAG] outer*inner*test*outer*test*outer*test*outer*inner*test*eee";
2978 TEST_EXPECTATION(resultListingIs(misc, GLT_NAME,
"[X][TAG] outer*[X]inner*[X]test*[X]outer*[X]test*[X]outer*[X]test*[X]outer*[X]inner*[X]test*[X]eee"));
2987 TEST_EXPECTATION(resultListingIs(misc, GLT_NAME,
"outer_1/11*inner_2/11*test_3/11*outer_4/11*test_5/11*outer_6/11*test_7/11*outer_8/11*inner_9/11*test_10/11*eee_11/11"));
2989 TEST_EXPECT_NO_ERROR(misc.rename_found_groups(
"command(\"/_.*$//\")|dd;\"_\";markedInGroup;\"/\";groupSize"));
2990 TEST_EXPECTATION(resultListingIs(misc, GLT_NAME,
"outer_6/11*inner_4/5*test_7/7*outer_7/15*test_0/4*outer_20/47*test_6/12*outer_6/11*inner_4/5*test_2/6*eee_3/3"));
2993 TEST_EXPECTATION(resultListingIs(misc, GLT_NAME,
"outer_L0=0.695293*inner_L1=0.269289*test_L0=0.160956*outer_L0=1.099650*test_L1=0.591923*outer_L0=1.160535*test_L1=0.726679*outer_L2=0.704352*inner_L3=0.265516*test_L1=0.303089*eee_L4=0.229693"));
3000 refreshes_traced = 0;
3009 TEST_EXPECTATION( resultListingIs(misc, GLT_NAME_FOLD,
"outer*inner*[test]*outer*[test]*outer*[test]*[outer]*[inner]*[test]*[eee]"));
3014 refreshes_traced = 0;
3027 TEST_EXPECTATION(resultListingIs(
all, GLT_NAME_FOLD,
"last*[another group]*outer*inner*[test]*outer*[test]*[xx]*outer*[g2]*[xx]*[test]*[outer]*[inner]*[test]*zombsub*[zomb]*[ZOMB]*[dup]*[inner outer group]*[inner group]*[outer group]*[g4]*[g3]*[g2]*[xx]*[yy]*[eee]"));
3033 TEST_EXPECT_NO_ERROR( few.fold_found_groups(
GFM_EXPANDREC));
TEST_EXPECTATION(resultListingIs(
all, GLT_NAME_FOLD,
"last*[another group]*outer*inner*[test]*outer*[test]*[xx]*" "outer*[g2]*[xx]*test*outer*inner*[test]*" "zombsub*[zomb]*[ZOMB]*[dup]*[inner outer group]*[inner group]*[outer group]*[g4]*[g3]*[g2]*[xx]*[yy]*[eee]"));
3034 TEST_EXPECT_NO_ERROR(misc.fold_found_groups(
GFM_EXPANDREC));
TEST_EXPECTATION(resultListingIs(
all, GLT_NAME_FOLD,
"last*[another group]*outer*inner*test*outer*test*[xx]*" "outer*[g2]*[xx]*test*outer*inner*test*" "zombsub*[zomb]*[ZOMB]*[dup]*inner outer group*[inner group]*outer group*[g4]*[g3]*g2*[xx]*yy*eee"));
3035 TEST_EXPECT_NO_ERROR(misc.fold_found_groups(
GFM_COLLAPSE));
TEST_EXPECTATION(resultListingIs(
all, GLT_NAME_FOLD,
"last*[another group]*[outer]*[inner]*[test]*[outer]*[test]*[xx]*" "[outer]*[g2]*[xx]*[test]*[outer]*[inner]*[test]*" "zombsub*[zomb]*[ZOMB]*[dup]*inner outer group*[inner group]*outer group*[g4]*[g3]*g2*[xx]*yy*[eee]"));
3036 TEST_EXPECT_NO_ERROR( few.fold_found_groups(
GFM_EXPANDREC_COLLREST));
TEST_EXPECTATION(resultListingIs(
all, GLT_NAME_FOLD,
"[last]*[another group]*outer*inner*[test]*[outer]*[test]*[xx]*" "outer*[g2]*[xx]*test*outer*inner*[test]*" "[zombsub]*[zomb]*[ZOMB]*[dup]*[inner outer group]*[inner group]*[outer group]*[g4]*[g3]*[g2]*[xx]*[yy]*[eee]"));
3037 TEST_EXPECT_NO_ERROR(
none.fold_found_groups(
GFM_EXPANDREC_COLLREST));
TEST_EXPECTATION(resultListingIs(
all, GLT_NAME_FOLD,
"[last]*[another group]*[outer]*[inner]*[test]*[outer]*[test]*[xx]*" "[outer]*[g2]*[xx]*[test]*[outer]*[inner]*[test]*" "[zombsub]*[zomb]*[ZOMB]*[dup]*[inner outer group]*[inner group]*[outer group]*[g4]*[g3]*[g2]*[xx]*[yy]*[eee]"));
3038 TEST_EXPECT_NO_ERROR(misc.fold_found_groups(
GFM_EXPANDPARENTS));
TEST_EXPECTATION(resultListingIs(
all, GLT_NAME_FOLD,
"[last]*[another group]*outer*[inner]*[test]*outer*[test]*[xx]*" "outer*[g2]*[xx]*test*outer*[inner]*[test]*" "[zombsub]*[zomb]*[ZOMB]*[dup]*inner outer group*[inner group]*outer group*[g4]*[g3]*g2*[xx]*yy*[eee]"));
3041 refreshes_traced = 0;
3054 const char *INNER_SPECIES =
"McpCapri,McpMyco2,McpMycoi,McpSpeci,SpiMelli";
3058 TEST_EXPECTATION(speciesInGroupsAre(group2,
UNITE,
"AnaAbact,BacMegat,BacPaste,CloTyro2,CloTyro4,CloTyrob,StaAureu,StaEpide"));
3063 TEST_EXPECTATION(resultListingIs(misc, GLT_NAME_AND_PARENT,
"outer*outer<inner>*test*outer*outer<test>*outer*outer<test>*test<outer>*outer<inner>*outer<test>*yy<eee>"));
3074 refreshes_traced = 0;
3076 TEST_EXPECTATION(resultListingIs(misc, GLT_NAME_AND_PARENT,
"outer*outer<inner>*test*test*outer*outer<outer>*outer<inner>*outer<test>*yy<eee>"));
3079 misc.forgetQExpressions();
3082 TEST_EXPECTATION(resultListingIs(misc, GLT_NAME_TREE,
"outer/tree_test*outer/tree_tree2*outer/tree_tree2"));
3085 TEST_EXPECTATION(resultListingIs(misc, GLT_NAME_TREE,
"outer/tree_test*outer/tree_tree2"));
3091 TEST_EXPECTATION(resultListingIs(misc, GLT_NAME_TREE,
"outer/tree_tree2"));
3097 refreshes_traced = 0;
3102 TEST_EXPECTATION(resultListingIs(outer, GLT_NAME_TREE,
"test/tree_test*test/tree_test*test/tree_tree2"));
3112 TEST_EXPECTATION(resultListingIs(outer, GLT_NAME_TREE,
"test/tree_test*test/tree_test"));
3119 void TEST_keeled_group_search() {
3123 GroupSearchCallback traceRefresh_cb = makeGroupSearchCallback(trace_refresh_cb);
3124 refreshes_traced = 0;
3130 GroupSearch keeledGroups(gb_main, traceRefresh_cb);
3131 GroupSearch normalGroups(gb_main, traceRefresh_cb);
3141 allGroups.perform_search(
GSM_FIND);
3142 keeledGroups.perform_search(
GSM_FIND);
3143 normalGroups.perform_search(
GSM_FIND);
3151 keeledGroups.get_results().size()+normalGroups.get_results().size());
3157 "outer/tree_tree2*g2/tree_tree2*"
3158 "outer/tree_removal*g2 [was: test]/tree_removal*"
3159 "lower/tree_groups*low2/tree_groups*twoleafs/tree_groups*low1/tree_groups*upper/tree_groups*"
3160 "twoleafs/tree_keeled*low2/tree_keeled*lower/tree_keeled*upper/tree_keeled*low1/tree_keeled*"
3161 "low2/tree_keeled_2*twoleafs/tree_keeled_2*lower/tree_keeled_2*upper/tree_keeled_2*low1/tree_keeled_2*allButOne/tree_keeled_2"
3165 "!twoleafs(L0)*!low2(L1)*?lower(L2)*"
3166 "!low2(L0)*?lower(L1)*!allButOne(L2)"
3171 keeledTrees.insert(
"tree_keeled");
3172 keeledTrees.insert(
"tree_keeled_2");
3174 allGroups.setSearchRange(keeledTrees);
3175 allGroups.perform_search(
GSM_FIND);
3179 "twoleafs*twoleafs<low2>*low2<lower>*lower<upper>*"
3193 "twoleafs*low2*lower*upper*low1*"
3194 "low2*twoleafs*lower*upper*low1*allButOne"
3201 "[twoleafs]*low2*[lower]*[upper]*low1*"
3202 "low2*twoleafs*lower*upper*low1*allButOne"
3209 "[twoleafs]*low2*lower*upper*low1*"
3210 "low2*twoleafs*lower*upper*low1*[allButOne]"
3217 "twoleafs*low2*lower*upper*low1*"
3218 "low2*twoleafs*lower*upper*low1*allButOne"
3223 "twoleafs(1.4310)*low2(1.4436)*lower(1.0288)*upper(1.0288)*low1(1.1200)*"
3226 "low2(1.4436)*twoleafs(0.0087)*lower(1.0288)*upper(1.0288)*low1(1.1200)*"
3230 keeledTrees.insert(
"tree_groups");
3231 allGroups.setSearchRange(keeledTrees);
3232 allGroups.perform_search(
GSM_FIND);
3236 "lower(L0)*low2(L1)*twoleafs(L2)*low1(L1)*upper(L0)*"
3239 "!twoleafs(L0)*!low2(L1)*?lower(L2)*upper(L3)*"
3245 "?lower(L1)*upper(L2)*low1(L1)*!allButOne(L2)"
3250 "lower(10)*low2(3)*twoleafs(2)*low1(7)*upper(5)*"
3255 "lower(5)*upper(5)*"
3268 "?lower(L2)*?lower(L1)*!twoleafs(L0)*!low2(L1)*!low2(L0)*!allButOne(L2)*lower(L0)*low2(L1)*twoleafs(L2)*low1(L1)*upper(L0)*upper(L3)*low1(L2)*twoleafs(L0)*upper(L2)*low1(L1)"
3285 mm.to_xy(lin, rx, ry);
3286 if (x>y)
swap(x, y);
3291 return all().ofgroup(fulfilled);
3294 void TEST_SymmetricMatrixMapper() {
3317 void TEST_group_duplicate_detection() {
3321 GroupSearchCallback traceRefresh_cb = makeGroupSearchCallback(trace_refresh_cb);
3324 refreshes_traced = 0;
3334 "1/outer/tree_test*"
3335 "1/outer/tree_test*"
3338 "3/outer/tree_tree2*"
3339 "3/outer/tree_tree2*"
3340 "4/test/tree_tree2*"
3359 "3/test/tree_tree2*"
3360 "3/test/tree_tree2*"
3361 "2/inner/tree_test*"
3362 "2/inner/tree_tree2*"
3363 "1/outer/tree_test*"
3364 "1/outer/tree_test*"
3365 "1/outer/tree_tree2*"
3366 "1/outer/tree_tree2"
3372 "0/another group/tree_test*"
3379 "0/inner group/tree_zomb*"
3380 "0/inner outer group/tree_zomb*"
3381 "0/outer group/tree_zomb*"
3384 "0/zombsub/tree_zomb"
3395 "1/outer/tree_test*"
3396 "1/outer/tree_test*"
3397 "1/outer/tree_tree2*"
3398 "1/outer/tree_tree2*"
3399 "2/inner/tree_test*"
3400 "2/inner/tree_tree2*"
3403 "3/test/tree_tree2*"
3404 "3/test/tree_tree2*"
3423 const char *word_sep =
" ";
3428 "1/another group/tree_test*"
3429 "1/inner group/tree_zomb*"
3430 "1/inner outer group/tree_zomb*"
3431 "1/outer group/tree_zomb*"
3433 "2/outer/tree_test*"
3434 "2/outer/tree_test*"
3435 "2/outer/tree_tree2*"
3436 "2/outer/tree_tree2*"
3440 "3/test/tree_tree2*"
3441 "3/test/tree_tree2*"
3447 "5/inner/tree_test*"
3448 "5/inner/tree_tree2*"
3457 "1/inner group/tree_zomb*"
3458 "1/inner outer group/tree_zomb"
3471 "1/inner outer group/tree_zomb*"
3472 "1/outer group/tree_zomb"
3481 "1/inner outer group/tree_zomb*"
3482 "1/inner,group/tree_zomb"
3486 ignore_group.insert(
"Group");
3491 "1/outer/tree_test*"
3492 "1/outer/tree_test*"
3493 "1/outer/tree_tree2*"
3494 "1/outer/tree_tree2*"
3495 "1/inner outer group/tree_zomb*"
3496 "1/outer group/tree_zomb*"
3500 "2/test/tree_tree2*"
3501 "2/test/tree_tree2*"
3503 "3/inner/tree_test*"
3504 "3/inner/tree_tree2*"
3505 "3/inner,group/tree_zomb*"
3525 "1/outer/tree_test*"
3526 "1/outer/tree_test*"
3527 "1/outer/tree_tree2*"
3528 "1/outer/tree_tree2*"
3532 "2/test/tree_tree2*"
3533 "2/test/tree_tree2*"
3539 "4/inner/tree_test*"
3540 "4/inner/tree_tree2*"
3563 "1/group inner outer/tree_test*"
3564 "1/group outer/tree_test*"
3565 "1/outer group/tree_tree2*"
3566 "1/inner outer group/tree_zomb*"
3567 "1/outer group/tree_zomb"
3575 static double bruteForce_calc_average_ingroup_distance(GroupSearchTree *node) {
3576 unsigned leafs = node->get_leaf_count();
3578 if (leafs == 1)
return 0.0;
3583 if (start == last) {
3585 start = start.next();
3589 double dist_sum = 0.0;
3591 for (
ARB_edge e1 = start; e1 != last; e1 = e1.
next()) {
3592 if (e1.is_edge_to_leaf()) {
3594 if (e2.is_edge_to_leaf()) {
3595 dist_sum += e1.dest()->intree_distance_to(e2.dest());
3602 #if defined(ASSERTION_USED)
3603 const unsigned calc_pairs = (leafs*(leafs-1))/2;
3607 return dist_sum/pairs;
3610 #define TEST_EXPECT_PROPER_AID(node) do{ \
3611 const double EPSILON = 0.000001; \
3612 TEST_EXPECT_SIMILAR(bruteForce_calc_average_ingroup_distance(node), \
3613 (node)->get_average_ingroup_distance(), \
3617 void TEST_ingroup_distance() {
3625 GroupSearchRoot *troot = stree.get_tree_root();
3629 GroupSearchTree *
rootNode = troot->get_root_node();
3630 GroupSearchTree *leftSon = rootNode->get_leftson();
3631 GroupSearchTree *grandSon = leftSon->get_rightson();
3633 GroupSearchTree *someLeaf = grandSon->get_leftson();
3634 while (!someLeaf->is_leaf()) {
3635 GroupSearchTree *L = someLeaf->get_leftson();
3636 GroupSearchTree *R = someLeaf->get_rightson();
3638 someLeaf = L->get_leaf_count() > R->get_leaf_count() ? L : R;
3643 GroupSearchTree *minSubtree = someLeaf->get_father();
3648 const double EPSILON = 0.000001;
3650 TEST_EXPECT_SIMILAR(bruteForce_calc_average_ingroup_distance(minSubtree), minSubtree->leftlen + minSubtree->rightlen, EPSILON);
3651 TEST_EXPECT_SIMILAR(bruteForce_calc_average_ingroup_distance(grandSon), 0.534927, EPSILON);
3652 TEST_EXPECT_SIMILAR(bruteForce_calc_average_ingroup_distance(leftSon), 0.976091, EPSILON);
3653 TEST_EXPECT_SIMILAR(bruteForce_calc_average_ingroup_distance(rootNode), 1.108438, EPSILON);
3657 TEST_EXPECT_PROPER_AID(someLeaf);
3658 TEST_EXPECT_PROPER_AID(minSubtree);
3659 TEST_EXPECT_PROPER_AID(grandSon);
3660 TEST_EXPECT_PROPER_AID(leftSon);
3661 TEST_EXPECT_PROPER_AID(rootNode);
3665 TEST_EXPECT_PROPER_AID(
DOWNCAST(GroupSearchTree*, e.
dest()));
3671 #endif // UNIT_TESTS
const char * get_tree_name() const
void compute_tree() OVERRIDE
bool big_enough(const GroupCluster &cluster) const
static GB_ERROR grl_hitcount(GBL_command_arguments *args)
GBDATA * GB_open(const char *path, const char *opent)
const std::string & get_hit_reason() const
const char * get_group_display(const FoundGroup &g, bool show_tree_name) const
GroupSearchRoot * get_tree_root()
void inc_to_avoid_overflow(PINT x)
compare_by_criteria(const SortCriteria &by_)
void put(const char *elem)
void forgetSortCriteria()
GroupClusterCIter begin() const
std::set< std::string > SpeciesNames
bool group_is_folded(GBDATA *gb_group)
int get_marked_pc() const
GBDATA * get_parent_group(GBDATA *gb_group) const
AliDataPtr format(AliDataPtr data, const size_t wanted_len, GB_ERROR &error)
unsigned get_leaf_count() const FINAL_OVERRIDE
#define TRIGGER_UPDATE_GROUP_RESULTS
~GroupSearchRoot() FINAL_OVERRIDE
#define TEST_EXPECT_SIMILAR(expr, want, epsilon)
long GB_read_int(GBDATA *gbd)
GBDATA * GB_child(GBDATA *father)
GB_ERROR GB_add_hierarchy_callback(GBDATA *gb_main, const char *db_path, GB_CB_TYPE type, const DatabaseCallback &dbcb)
#define implicated(hypothesis, conclusion)
return string(buffer, length)
bool overlap_is_folded() const
GB_ERROR delete_group(size_t idx)
const WordSet & get_ignored_words() const
NestingLevelKey(const GroupSearch &group_search_)
static void collect_searched_trees(GBDATA *gb_main, const TreeNameSet &trees_to_search, SearchedTreeContainer &searched_tree)
bool has_group_info() const
void addSortCriterion(GroupSortCriterion gsc)
GB_ERROR GB_add_callback(GBDATA *gbd, GB_CB_TYPE type, const DatabaseCallback &dbcb)
#define DOWNCAST_REFERENCE(totype, expr)
GroupSearchCommon * common
static void result_update_cb(GBDATA *, GroupSearchCommon *common)
void string_to_lower(string &s)
GB_ERROR delete_from_DB()
#define DEFINE_TREE_RELATIVES_ACCESSORS(TreeType)
void setDupCriteria(bool listDups, DupNameCriterionType ntype, GB_CASE sens, DupTreeCriterionType ttype, int min_cluster_size)
int get_tree_order() const
const FoundGroup * get_hit_group() const
GB_ERROR delete_found_groups()
match_expectation doesnt_report_error(const char *error)
const FoundGroup & get_group() const
GBDATA * GB_nextEntry(GBDATA *entry)
#define DEFINE_TREE_ROOT_ACCESSORS(RootType, TreeType)
const char * get_load_error() const
unsigned get_marked_count() const
bool contains_changed(GroupSearchCommon *common) const
void find_and_deliverTo(QueriedGroups &toResult)
void buildInferableClusterStartingWith(int start_idx, GroupCluster &cluster)
ARB_edge_type get_type() const
const char * get_name() const OVERRIDE
ARB_ERROR set_marks_in_found_groups(GroupMarkMode mode, CollectMode cmode)
void allow_lookup() const
double get_average_ingroup_distance() const
TreeNode * GBT_read_tree(GBDATA *gb_main, const char *tree_name, TreeRoot *troot)
const char * get_name() const OVERRIDE
void inform_group(const GroupSearch &group_search, const string &hitReason)
bool isCorrectParent(TreeNode *node, GBDATA *gb_group, GBDATA *gb_parent_group)
static GB_ERROR grl_nesting(GBL_command_arguments *args)
int calc_nesting_level(GBDATA *gb_group) const
bool tree_matches(const GBDATA *data1, const GBDATA *data2) const
double get_average_ingroup_distance() const
const char * GBS_global_string(const char *templat,...)
const char * get_name() const OVERRIDE
int get_leaf_count() const
SmartPtr< GroupSearchRoot > GroupSearchRootPtr
void cat(const char *from)
bool isNull() const
test if SmartPtr is NULp
GroupRename_callenv(const QueriedGroups &queried_, int hit_idx_, const GBL_env &env_)
bool has_been_deleted(GBDATA *gb_node)
was_modified(GroupSearchCommon *common_)
void refresh_results_after_DBchanges()
const TreeNode * find_parent_with_groupInfo(bool skipKeeledBrothers=false) const
ARB_edge rootEdge(TreeRoot *root)
void findBestClusterBasedOnWords(int wanted_words, GroupCluster &best, arb_progress &progress_cluster, int &first_cluster_found_from_index)
bool already_delivered(int idx) const
GroupCluster(const GroupCluster &other)
static void set_marks_of(const SpeciesNames &targetSpecies, GBDATA *gb_main, GroupMarkMode mode)
#define ARRAY_ELEMS(array)
int name_matches(const GroupInfo &gi1, const GroupInfo &gi2) const
void setNull()
set SmartPtr to NULp
int max_cluster_start_index() const
GBDATA * GB_get_father(GBDATA *gbd)
const char * get_name() const OVERRIDE
GBDATA * get_gb_main() const
const GBL_call_env & get_callEnv() const
std::set< std::string > WordSet
int linear_index(int x, int y) const
GroupSearchTree(GroupSearchRoot *root)
#define DOWNCAST(totype, expr)
ARB_ERROR fold_found_groups(GroupFoldingMode mode)
DupNameCriterionType get_name_type() const
GB_ERROR check_no_parameter(GBL_command_arguments *args)
GB_ERROR GB_delete(GBDATA *&source)
int GB_string_comparator(const void *v0, const void *v1, void *)
static HelixNrInfo * start
ARB_edge parentEdge(TreeNode *son)
unsigned long permutations(int elems)
GroupInfo(const FoundGroup &g, bool prep_wordwise, GB_CASE sens, const char *wordSeparators, const WordSet &ignored_words)
GroupClusterCIter end() const
GroupCluster(int num_of_groups)
size_t GB_read_string_count(GBDATA *gbd)
GB_ERROR GB_await_error()
int get_keeledStateInfo() const
ARB_ERROR rename_by_ACI(const char *acisrt, const QueriedGroups &results, int hit_idx)
#define TEST_EXPECT(cond)
static void set_species_data(GBDATA *gb_species_data_)
char * get_target_data(const QueryTarget &target, GB_ERROR &) const OVERRIDE
DupCriteria(bool listDups_, const DupNameCriterion &nameCrit_, DupTreeCriterionType ttype_, int minSize_)
std::set< GBDATA * > GBDATAset
const GroupSearchTree * get_clade() const
bool erase_deleted(GroupSearchCommon *common)
GB_CSTR GB_read_key_pntr(GBDATA *gbd)
const char * get_word_separators() const
const char * get_name() const OVERRIDE
bool isSet() const
test if SmartPtr is not NULp
GBDATA * GB_create(GBDATA *father, const char *key, GB_TYPES type)
GB_CASE get_sensitivity() const
bool knows_details() const
list< Candidate > CandidateList
void addQueryExpression(CriterionOperator op, CriterionType type, CriterionMatch mtype, const char *expression)
vector< GroupInfo > GroupInfoVec
char * GBS_trim(const char *str)
GB_ERROR GBT_write_group_name(GBDATA *gb_group_name, const char *new_group_name, bool pedantic)
void deliverRest(QueriedGroups &toResult)
#define COMMAND_DROPS_INPUT_STREAMS(args)
const char * GBS_readable_size(unsigned long long size, const char *unit_suffix)
#define TEST_REJECT(cond)
#define TEST_REJECT_NULL(n)
const QueriedGroups & queried
static void error(const char *msg)
std::set< std::string > TreeNameSet
unsigned get_group_size() const
GBDATA * GB_get_root(GBDATA *gbd)
GroupSearch(GBDATA *gb_main_, const GroupSearchCallback &redisplay_results_cb)
static void string2WordSet(const char *name, WordSet &words, const char *wordSeparators, const WordSet &ignored_words)
bool contains(int i) const
bool tree_is_loaded() const
void GBT_splitNdestroy_string(ConstStrArray &names, char *&namelist, const char *separator, bool dropEmptyTokens)
bool operator()(const FoundGroup &g)
ARB_ERROR rename_group(size_t idx, const char *acisrt)
expectation_group & add(const expectation &e)
CONSTEXPR_INLINE_Cxx14 void swap(unsigned char &c1, unsigned char &c2)
has_been_deleted(GroupSearchCommon *common_)
size_t get_word_count() const
ASSERTING_CONSTEXPR_INLINE int info2bio(int infopos)
bool is_keeled_group() const
void track_max_widths(ColumnWidths &widths) const
const char * get_name() const
DupNameCriterion(DupNameCriterionType exact, GB_CASE sens_)
bool has_been_modified(GBDATA *gb_node)
set< int > GroupClusterSet
void notify_modified(GBDATA *gb_node)
bool iterate() const OVERRIDE
FoundGroupCIter end() const
char * get_target_data(const QueryTarget &target, GB_ERROR &) const OVERRIDE
void deliverCluster(const GroupCluster &ofCluster, QueriedGroups &toResult)
GroupSearchTree * get_clade()
ARB_ERROR set_marks_in_group(size_t idx, GroupMarkMode mode)
bool wordwise_name_matching() const
GroupClusterSet::const_iterator GroupClusterCIter
GBQUARK GB_get_quark(GBDATA *gbd)
void fix_deleted_groups(const GBDATAset &deleted_groups)
int GB_read_flag(GBDATA *gbd)
GBDATA * GBT_find_species_rel_species_data(GBDATA *gb_species_data, const char *name)
void forgetQExpressions()
void clear_notifications()
AP_tree_nlen * rootNode()
bool contains(const WordSet &ws, const string &w)
void remove_hit(size_t idx)
void track(int wName, int wReason, int nesting, int size, int marked, int clusID, double aid, bool keeled)
Candidate(GBDATA *gb_group_, GroupSearchTree *node_)
void remove(GroupSearch *gs)
ARB_ERROR group_set_folded(GBDATA *gb_group, bool folded)
static int max2width(const int &i)
char * GS_calc_resulting_groupname(GBDATA *gb_main, const QueriedGroups &queried, int hit_idx, const char *input_name, const char *acisrt, ARB_ERROR &error)
static GBL_command_definition groupRename_command_table[]
void sort_by(const SortCriteria &by)
Candidate(const FoundGroup &group_, GroupSearchTree *node_)
void append(QueryExpr *&tail)
void add_informed_group(const FoundGroup &group)
FoundGroupContainer::const_iterator FoundGroupCIter
GB_ERROR inc_and_error_if_aborted()
static GB_ERROR grl_aid(GBL_command_arguments *args)
#define TEST_EXPECTATION(EXPCTN)
SearchedTreeContainer::iterator SearchedTreeIter
const FoundGroup & get_group() const
int get_edge_iteration_count() const
char * GBT_join_strings(const CharPtrArray &strings, char separator)
static GB_ERROR grl_groupsize(GBL_command_arguments *args)
const char * get_name() const OVERRIDE
bool is_edge_to_leaf() const
int get_cluster_id() const
void set_cluster_id(int id)
GB_ERROR GB_remove_hierarchy_callback(GBDATA *gb_main, const char *db_path, GB_CB_TYPE type, const DatabaseCallback &dbcb)
char * get_target_data(const QueryTarget &target, GB_ERROR &) const OVERRIDE
DupTreeCriterionType get_tree_type() const
GB_ERROR close(GB_ERROR error)
void GB_write_flag(GBDATA *gbd, long flag)
int min_cluster_size() const
void refresh_all_results()
#define FORMAT_2_OUT(args, fmt, value)
bool operator()(const FoundGroup &g)
GBDATA * get_tree_data() const
bool is_inferable() const
#define TEST_EXPECTATION__BROKEN(WANTED, GOT)
FoundGroupContainer::iterator FoundGroupIter
static void tree_node_deleted_cb(GBDATA *gb_node, GroupSearchCommon *common, GB_CB_TYPE cbtype)
void GB_touch(GBDATA *gbd)
GBQUARK GB_find_existing_quark(GBDATA *gbd, const char *key)
Clusterer(GBDATA *gb_main, SmartPtr< QueriedGroups > groups_, SmartPtr< DupCriteria > criteria_)
void nprintf(size_t maxlen, const char *templat,...) __ATTR__FORMAT_MEMBER(2)
int GB_read_byte(GBDATA *gbd)
bool matches(const QueryTarget &target, std::string &hit_reason) const
void forget_lookup() const
char * get_target_data(const QueryTarget &target, GB_ERROR &) const OVERRIDE
static GB_ERROR grl_markedingroup(GBL_command_arguments *args)
GBDATA * lookupParent(GBDATA *gb_child_group) const
char * get_target_data(const QueryTarget &target, GB_ERROR &) const OVERRIDE
GBDATA * get_ACI_item() const
char * GB_read_string(GBDATA *gbd)
GB_ERROR GB_write_byte(GBDATA *gbd, int i)
int name_matches_wordwise(const GroupInfo &gi1, const GroupInfo &gi2) const
bool want_unique_groups() const
void GB_remove_callback(GBDATA *gbd, GB_CB_TYPE type, const DatabaseCallback &dbcb)
~ParentGroupNameQueryKey() OVERRIDE
FoundGroupCIter begin() const
GBDATA * GBT_first_species(GBDATA *gb_main)
void GBT_get_tree_names(ConstStrArray &names, GBDATA *gb_main, bool sorted)
void GBT_message(GBDATA *gb_main, const char *msg)
std::list< GroupSortCriterion > SortCriteria
unsigned get_marked_count() const
const char * get_name() const
#define TEST_EXPECT_NO_ERROR(call)
const char * get_group_name() const
GBDATA * get_pointer() const
int get_keeledStateInfo() const
DECLARE_ASSIGNMENT_OPERATOR(GroupCluster)
const ColumnWidths & get_column_widths() const
const GroupRename_callenv & custom_env(GBL_command_arguments *args)
char * get_target_data(const QueryTarget &target, GB_ERROR &) const OVERRIDE
const char * get_name() const OVERRIDE
DupNameCriterion(DupNameCriterionType wordwise, GB_CASE sens_, int min_words_, const WordSet &ignored_words_, const char *wordSeparators_)
void sort(CharPtrArray_compare_fun compare, void *client_data)
ARB_ERROR fold_group(size_t idx, GroupFoldingMode mode)
bool is_inner_edge() const
GBDATA * GBT_next_species(GBDATA *gb_species)
static const GBL_command_lookup_table & get_GroupRename_customized_ACI_commands()
void add(GroupSearch *gs)
#define TEST_EXPECT_ERROR_CONTAINS(call, part)
vector< SearchedTree > SearchedTreeContainer
void add_candidate(const GroupSearch &group_search, Candidate &cand, const std::string &hit_reason)
const char * get_data() const
int get_min_wanted_words() const
RefPtr< GBDATA > gb_overlap_group
NOT4PERL char * GB_command_interpreter_in_env(const char *str, const char *commands, const GBL_call_env &callEnv)
char * get_target_data(const QueryTarget &target, GB_ERROR &) const OVERRIDE
GBDATA * GB_nextChild(GBDATA *child)
void notify_deleted(GBDATA *gb_node)
const char * get_name() const OVERRIDE
ParentCache & get_parent_cache()
GBDATA * GBT_find_tree(GBDATA *gb_main, const char *tree_name)
GB_transaction ta(gb_var)
int calc_max_used_words(bool ignore_delivered)
const QueriedGroups & get_results()
void reset() const OVERRIDE
SymmetricMatrixMapper(int elements)
static void group_name_changed_cb(GBDATA *gb_group_name, GroupSearchCommon *common)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
TargetGroup(GBDATA *gb_main_, const char *treename_)
ParentGroupNameQueryKey(const GroupSearch &group_search_, CriterionType ctype)
bool operator()(const FoundGroup &g1, const FoundGroup &g2) const
void defineParentOf(GBDATA *gb_child_group, GBDATA *gb_parent_group)
ARB_ERROR rename_found_groups(const char *acisrt)
GroupSearchCommon * common
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
char * get_target_data(const QueryTarget &target, GB_ERROR &) const OVERRIDE
void aimTo(const Candidate &c)
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
ARB_ERROR change_folding(GroupFoldingMode mode)
void perform_search(GroupSearchMode mode)
char * get_target_data(const QueryTarget &target, GB_ERROR &) const OVERRIDE
static int info[maxsites+1]
static GB_ERROR grl_hitidx(GBL_command_arguments *args)
const GroupSearchTree * get_clade() const
void set_min_wanted_words(int words)
unsigned get_zombie_count() const
SearchedTree(const char *name_, GBDATA *gb_main)
const char * get_name() const OVERRIDE
GroupMarkedKey(bool percent_)
#define TEST_EXPECT_EQUAL(expr, want)
const GBL_command_lookup_table & ACI_get_standard_commands()
SmartPtr< WordSet > words
bool failed_to_load() const
bool is_normal_group() const
GBDATA * GB_entry(GBDATA *father, const char *key)
bool legal_hit_index() const
char * GBS_global_string_copy(const char *templat,...)
void GB_close(GBDATA *gbd)
TreeNode * source() const
unsigned get_zombie_count() const
static int iteration_count(int leafs_in_tree)
GBDATA * GBT_get_species_data(GBDATA *gb_main)
GB_write_int const char s