32 for (
int i=0; i<256; i++) {
45 for (
int i=0; i<nsets; i++)
delete sets[i];
51 void SpecSetRegistry::add(
const char *species_name) {
62 void SpecSetRegistry::add(
RSpecSet *rset) {
68 void SpecSetRegistry::dump_bitstring(
const char *tag,
unsigned char *bs) {
69 fprintf(stderr,
"%s: ", tag);
71 for (
long i = 0; i<bbytes; ++i) {
73 for (
int b = 0; b<8; ++b) {
74 fputc(val&128 ?
'X' :
'-', stderr);
81 GroupPenalty GroupMatchScorer::calcPenalty(
long removed,
long added,
long commonSpecies) {
86 long oldGroupSize = removed+commonSpecies;
87 long newGroupSize = added+commonSpecies;
88 double ingroupRatio = 1.0 - removed/double(oldGroupSize);
89 double outgroupRatio = added/double(newGroupSize);
91 if (insideLimits(ingroupRatio, outgroupRatio)) {
97 (1-ingroupRatio)*ingroupInvRelPen +
98 outgroupRatio*outgroupRelPen;
100 return GroupPenalty(penalty, ingroupRatio, outgroupRatio, oldGroupSize);
118 GroupPenalty match = calcPenalty(removed, added, commonSpecies);
123 long removed_keeled = commonSpecies;
124 long added_keeled = targetSize_keeled - commonSpecies_keeled;
126 GroupPenalty match_keeled = calcPenalty(removed_keeled, added_keeled, commonSpecies_keeled);
147 if (ingroupPep == 0.0 && ingroupInvRelPen == 0.0) {
148 return "one ingroup penalty has to be different from zero";
150 if (outgroupPep == 0.0 && outgroupRelPen == 0.0) {
151 return "one outgroup penalty has to be different from zero";
154 if (ingroupPep<0.0 || outgroupPep<0.0 || ingroupInvRelPen<0.0 || outgroupRelPen<0.0) {
155 return "invalid negative in/outgroup penalty";
159 return "invalid limits";
176 unsigned char *
const tbs = tset->
bitstring;
177 long *
const tbl = (
long*)tbs;
178 long *
const tmp_bl = (
long*)tmp_bitstring;
180 for (
long i=nsets-1; i>=0; i--) {
185 unsigned char *
const rbs = rset->
bitstring;
186 long *
const rbl = (
long*)rbs;
188 for (
long j = 0; j<blongs; ++j) {
189 tmp_bl[j] = tbl[j] & rbl[j];
191 for (
long j = 0; j<bbytes; ++j) {
192 same += set_bits[tmp_bitstring[j]];
197 min_penalty = penalty;
212 double SpecSetRegistry::search_and_remember_best_match_and_log_errors(
const TSpecSet *tset, FILE *log) {
241 if (earlierFoundGroup && log) {
243 fprintf(log,
"Group '%s' skipped (got superseded by group '%s'; same best target positions)\n",
244 earlierFoundGroup->
name,
254 fprintf(log,
"Group '%s' skipped (does NOT supersede group '%s'; same best target positions)\n",
256 earlierFoundGroup->
name);
277 bitstring[species_index/8] |= 1 << (species_index % 8);
285 const long *lbs = (
const long *)l->
bitstring;
286 const long *rbs = (
const long *)r->
bitstring;
290 dbs[j] = lbs[j] | rbs[j];
300 SpecSet(nodei, ssr, species_name),
308 SpecSet(nodei, ssr, species_name),
309 unfound_species_count(1-known_members)
314 unfound_species_count(l->unfound_species_count + r->unfound_species_count)
333 ss =
new RSpecSet(node, *
this, ls, rs);
338 if (rs->is_leaf_set())
delete rs;
354 nt_assert(contradicted(log, compare_node_info));
365 ss =
new TSpecSet(node, *
this, ls, rs);
366 if (compare_node_info) {
369 double penalty = search_and_remember_best_match_and_log_errors(ss, log);
370 int ipenalty =
int(penalty);
374 else if (ipenalty<0) node->
set_remark(
"# no match");
379 double penalty = search_and_remember_best_match_and_log_errors(ss, log);
382 fprintf(log,
"Group '%s' doesn't fit to any destination subtree.\n", node->
name);
391 if (ss && progress->
aborted()) {
413 oldGroupSize(oldGroupSize_),
414 newGroupSize(newGroupSize_)
428 #define IMPL_FORMATVALUE_CMD(args,fmt,value) \
429 COMMAND_DROPS_INPUT_STREAMS(args); \
430 GB_ERROR error = check_no_parameter(args); \
431 if (!error) FORMAT_2_OUT(args, fmt, value); \
468 if (log)
fputs(
"\nDetailed group changes:\n\n", log);
474 for (
long j=this->nsets-1; j>=0 && !
error; j--) {
475 RSpecSet *
const cset = this->sets[j];
477 char *old_group_name =
NULp;
480 bool insert_new_node = sourceNode && sourceNode->
name;
486 if (targetNode->
gb_node && (delete_old_nodes || insert_new_node)) {
487 if (!targetNode->
name) {
502 freenull(targetNode->
name);
507 if (insert_new_node) {
519 int newGroupsize = cset->
size();
521 char *new_group_name =
NULp;
525 GroupXfer_callenv callEnv(env, source_group_name, match, oldGroupsize, newGroupsize);
530 reassign(new_group_name, source_group_name);
532 free(source_group_name);
537 if (old_group_name) {
538 if (!delete_old_nodes) {
539 if (strcmp(old_group_name, new_group_name) != 0) {
541 if (log) fprintf(log,
"Destination group '%s' overwritten by '%s'", old_group_name, combined_name);
542 reassign(new_group_name, combined_name);
545 if (log) fprintf(log,
"Group '%s' remains unchanged", old_group_name);
550 if (strcmp(old_group_name, new_group_name) == 0) {
551 fprintf(log,
"Group '%s' remains unchanged", old_group_name);
554 fprintf(log,
"Destination group '%s' overwritten by '%s'", old_group_name, new_group_name);
560 if (log) fprintf(log,
"Group '%s' inserted", new_group_name);
568 fprintf(log,
" (Failed! Reason: %s)\n", error);
574 fprintf(log,
" (not placed optimal; penalty=%f; in=%.1f%%/%i; out=%.1f%%/%i%s)\n",
582 free(new_group_name);
588 if (old_group_name && log) {
589 fprintf(log,
"Destination group '%s' removed\n", old_group_name);
593 free(old_group_name);
599 if (!error) error = progress->error_if_aborted();
608 GBK_terminatef(
"invalid use of function NTREE_move_tree_info: mode=%i log_file=%s\n",
int(mode), log_file);
611 if (aci && !aci[0]) aci =
NULp;
621 log = fopen(log_file,
"w");
624 const char *overwriteMode =
NULp;
632 nt_assert(transferredGroups && overwriteMode);
635 "LOGFILE: transfer group information\n"
637 " Source tree '%s'\n"
638 "Destination tree '%s'\n"
640 "transferred groups: %s\n"
641 " overwrite mode: %s\n"
661 if (!error) error = rsource.
loadFromDB(tree_source);
667 AP_tree *source = rsource.get_root_node();
668 AP_tree *dest = rdest.get_root_node();
672 int dest_inodes = dest_nodes-dest_leafs;
676 int source_inodes = source_nodes-source_leafs;
679 size_t progress_steps = (compared_nodes + 2) * dest_inodes;
682 compare_progress.
subtitle(
"Register topology");
689 if (source_leafs < 3) error =
GB_export_errorf(
"tree '%s' has less than 3 species", tree_source);
691 compare_progress.
subtitle(
"Match subtrees");
698 compare_progress.
subtitle(
"Write group information");
709 nt_assert(new_root_rsetl && new_root_rsetr);
721 compare_progress.
subtitle(
"Save trees");
751 if (error) compare_progress.
done();
756 if (error) fprintf(log,
"\nError: %s\n", error);
757 else fputs(
"[done]\n", log);
771 void TEST_species_sets() {
789 for (
int i = 1; i<=8; ++i) {
793 for (
int i = 1; i<=8; ++i) {
802 for (
int i = 1; i<=9; ++i) {
806 for (
int i = 1; i<=9; ++i) {
816 for (
int i = 1; i<=64; ++i) {
820 for (
int i = 1; i<=64; ++i) {
829 for (
int i = 1; i<=65; ++i) {
833 for (
int i = 1; i<=65; ++i) {
GB_ERROR GB_begin_transaction(GBDATA *gbd)
GB_ERROR GB_copy_dropProtectMarksAndTempstate(GBDATA *dest, GBDATA *source)
int get_old_group_size() const
static GB_ERROR gxl_outgroup(GBL_command_arguments *args)
long bitstring_bytes() const
#define implicated(hypothesis, conclusion)
long GBS_write_hash(GB_HASH *hs, const char *key, long val)
const TreeNode * get_root_node() const
double get_outgroup_ratio() const
#define DOWNCAST_REFERENCE(totype, expr)
AP_tree * matchedNode() const
const GroupPenalty & get_match() const
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
void storeBetterMatch(const GroupPenalty &match, AP_tree *matched_node)
GB_ERROR GB_IO_error(const char *action, const char *filename)
char * ARB_strdup(const char *str)
TreeRoot * get_tree_root() const
GB_ERROR GBT_log_to_tree_remark(GBDATA *gb_tree, const char *log_entry, bool stamp)
double get_ingroup_ratio() const
const char * GBS_global_string(const char *templat,...)
void addPenalty(double p)
void GBK_terminatef(const char *templat,...)
bool betterThan(const GroupPenalty &other) const
void GBS_free_hash(GB_HASH *hs)
static GB_ERROR gxl_keeled(GBL_command_arguments *args)
void use_as_remark(const SmartCharPtr &newRemark)
int get_groupsize() const
void finish(GB_ERROR &error)
#define ARRAY_ELEMS(array)
bool isPerfectMatch() const
const GBL_call_env & get_callEnv() const
const GroupPenalty & bestMatch() const
static GroupPenalty NoMatch()
static const GBL_command_lookup_table & get_GroupXfer_customized_ACI_commands()
void registerUnfound(const GroupMatchScorer &scorer, const TSpecSet &tset)
GB_ERROR GB_delete(GBDATA *&source)
unsigned count_leafs() const
long bitstring_longs() const
CONSTEXPR_INLINE int leafs_2_nodes(int leafs, TreeModel model)
static GBL_command_definition groupXfer_command_table[]
GB_ERROR GB_await_error()
static GB_ERROR gxl_newsize(GBL_command_arguments *args)
GBDATA * GB_create_container(GBDATA *father, const char *key)
GB_ERROR GBT_overwrite_tree(GBDATA *gb_tree, TreeNode *tree)
GBDATA * get_gb_tree() const
bool shouldHaveBeenKeeled() const
const char * get_group_name() const
static GB_ERROR gxl_ingroup(GBL_command_arguments *args)
long get_species_index(const char *species_name) const
GB_ERROR GBT_write_group_name(GBDATA *gb_group_name, const char *new_group_name, bool pedantic)
TSpecSet * find_best_matches_info(AP_tree *node, FILE *log, bool compare_node_info)
static void error(const char *msg)
RSpecSet * registerTree(AP_tree *node)
CONSTEXPR_INLINE_Cxx14 void swap(unsigned char &c1, unsigned char &c2)
GB_ERROR loadFromDB(const char *name) FINAL_OVERRIDE
int get_unknown_members() const
int get_known_members() const
TSpecSet(AP_tree *nodei, const SpecSetRegistry &ssr, const char *species_name)
GroupXfer_callenv(const GBL_env &env_, const char *name_, const GroupPenalty &match_, int oldGroupSize_, int newGroupSize_)
void setScorer(const GroupMatchScorer &newScorer)
GroupPenalty matchGroups(const TSpecSet &sourceSet, const RSpecSet &targetSet, long commonSpecies, long overallSpecies)
fputs(TRACE_PREFIX, stderr)
GB_ERROR GB_export_errorf(const char *templat,...)
TYPE * ARB_calloc(size_t nelem)
const GroupPenalty & custom_match(GBL_command_arguments *args)
GB_ERROR linkToDB(int *zombies, int *duplicates) __ATTR__USERESULT
void init(AP_tree *nodei, const SpecSetRegistry &ssr)
void subtitle(const char *stitle)
bool contains_marked_species()
char * GB_read_string(GBDATA *gbd)
RSpecSet * search_best_match(const TSpecSet *tset, GroupPenalty &min_penalty)
SpecSetRegistry(long nspecies_, arb_progress *progress_, const GroupMatchScorer &scorer_)
#define IMPL_FORMATVALUE_CMD(args, fmt, value)
void set_remark(const char *newRemark)
void aw_message(const char *msg)
double calcUnknownMembersPenalty(const TSpecSet &sourceSet) const
double get_penalty() const
const GroupXfer_callenv & custom_gx_env(GBL_command_arguments *args)
RSpecSet(AP_tree *nodei, const SpecSetRegistry &ssr, const char *species_name)
static GB_ERROR gxl_groupname(GBL_command_arguments *args)
static GB_ERROR gxl_unknown(GBL_command_arguments *args)
SpecSet(AP_tree *nodei, const SpecSetRegistry &ssr, const char *species_name)
GB_ERROR check_validity() const
GB_ERROR write_node_information(FILE *log, bool delete_old_nodes, GroupsToTransfer what, const char *aci)
GB_ERROR NTREE_move_tree_info(GBDATA *gb_main, const char *tree_source, const char *tree_dest, const char *log_file, GroupTransferMode mode, GroupsToTransfer what, const GroupMatchScorer &scorer, const char *aci)
NOT4PERL char * GB_command_interpreter_in_env(const char *str, const char *commands, const GBL_call_env &callEnv)
unsigned char * allocate_bitstring() const
static GB_ERROR gxl_penalty(GBL_command_arguments *args)
int get_new_group_size() const
#define TEST_EXPECT_EQUAL(expr, want)
const GBL_command_lookup_table & ACI_get_standard_commands()
unsigned char * bitstring
long GBS_read_hash(const GB_HASH *hs, const char *key)
GBDATA * GB_entry(GBDATA *father, const char *key)
char * GBS_global_string_copy(const char *templat,...)
GB_HASH * GBS_create_hash(long estimated_elements, GB_CASE case_sens)
static GB_ERROR gxl_oldsize(GBL_command_arguments *args)