ARB
DI_compress_matrix.cxx
Go to the documentation of this file.
1 #include "di_matr.hxx"
2 
3 int DI_MATRIX::search_group(TreeNode *node, GB_HASH *hash, size_t& groupcnt, char *groupname, DI_ENTRY **groups) {
4  // returns 1 only if groupname != null and there are species for that group
5 
6  if (node->is_leaf()) {
7  if (!node->name) return 0;
8  DI_ENTRY *phentry = (DI_ENTRY *)GBS_read_hash(hash, node->name);
9  if (!phentry) { // Species is not a member of tree
10  return 0;
11  }
12  if (groupname) { // There is a group for this species
13  phentry->group_nr = groupcnt;
14  return 1;
15  }
16  return 0;
17  }
18  char *myname;
19  if (groupname) {
20  myname = groupname;
21  }
22  else {
23  myname = NULp;
24  if (node->gb_node && node->name) { // but we are a group
25  GBDATA *gb_grouped = GB_entry(node->gb_node, "grouped");
26  if (gb_grouped && GB_read_byte(gb_grouped)) {
27  myname = node->name;
28  }
29  }
30  }
31  int erg =
32  search_group(node->get_leftson(), hash, groupcnt, myname, groups) +
33  search_group(node->get_rightson(), hash, groupcnt, myname, groups);
34 
35  if (!groupname) { // we are not a sub group
36  if (myname) { // but we are a group
37  if (erg>0) { // it is used
38  groups[groupcnt] = new DI_ENTRY(myname, this);
39  groupcnt++;
40  }
41  }
42  return 0;
43  }
44  else {
45  return erg; // We are using an inherited group
46  }
47 }
48 
50  // create a hash table of species
52  char *error = NULp;
53  for (size_t i=0; i<nentries; i++) {
54  if (entries[i]->name) {
55  GBS_write_hash(hash, entries[i]->name, (long)entries[i]);
56  }
57  entries[i]->group_nr = -1;
58  }
59 
60  size_t groupcnt = 0;
61  DI_ENTRY **groups = new DI_ENTRY *[nentries];
62  search_group(tree, hash, groupcnt, NULp, groups); // search a group for all species and create groups
63 
64  DI_ENTRY **found_groups = NULp;
65 
66  if (groupcnt) { // if we found groups => make copy of group array
67  found_groups = new DI_ENTRY *[groupcnt];
68  memcpy(found_groups, groups, groupcnt*sizeof(*groups));
69  }
70 
71  int nongroupcnt = 0; // count # of species NOT in groups and copy them to 'groups'
72  for (size_t i=0; i<nentries; i++) {
73  if (entries[i]->name && entries[i]->group_nr == -1) { // species not in groups
74  groups[nongroupcnt] = new DI_ENTRY(entries[i]->name, this);
75  entries[i]->group_nr = nongroupcnt++;
76  }
77  else { // species is in group => add nentries to group_nr
79  }
80  }
81 
82  // append groups to end of 'groups'
83  if (groupcnt) {
84  memcpy(groups+nongroupcnt, found_groups, groupcnt*sizeof(*groups)); // copy groups to end of 'groups'
85  delete [] found_groups;
86  found_groups = NULp;
87 
88  for (size_t i=0; i<nentries; i++) {
89  if (entries[i]->name && entries[i]->group_nr>=int(nentries)) {
90  entries[i]->group_nr = entries[i]->group_nr - nentries + nongroupcnt; // correct group_nr's for species in groups
91  }
92  }
93  }
94 
95  groupcnt += nongroupcnt; // now groupcnt is # of groups + # of non-group-species
96 
97  AP_smatrix count(groupcnt);
98  AP_smatrix *sum = new AP_smatrix(groupcnt);
99 
100  // Now we have create a new DI_ENTRY table, let's do the matrix
101  for (size_t i=0; i<nentries; i++) {
102  for (size_t j=0; j<=i; j++) {
103  int x = entries[i]->group_nr; if (x<0) continue;
104  int y = entries[j]->group_nr; if (y<0) continue;
105  // x,y are the destination i,j
106  count.set(x, y, count.get(x, y)+1.0);
107  sum->set(x, y, sum->get(x, y)+matrix->get(i, j));
108  }
109  }
110 
111  for (size_t i=0; i<groupcnt; i++) { // get the arithmetic average
112  for (size_t j=0; j<=i; j++) {
113  AP_FLOAT c = count.get(i, j);
114  if (c > 0) {
115  sum->set(i, j, sum->get(i, j) / c);
116  }
117  }
118  }
119  delete matrix;
120  matrix = sum;
121 
122  for (size_t i=0; i<nentries; i++) { // delete everything
123  delete entries[i];
124  entries[i] = NULp;
125  }
126  free(entries);
127 
128  entries = groups;
129  nentries = groupcnt;
130  allocated_entries = groupcnt;
132 
133  GBS_free_hash(hash);
134  return error;
135 }
static char * y[maxsp+1]
long GBS_write_hash(GB_HASH *hs, const char *key, long val)
Definition: adhash.cxx:454
void set(size_t i, size_t j, T val)
Definition: matrix.h:49
void GBS_free_hash(GB_HASH *hs)
Definition: adhash.cxx:538
double AP_FLOAT
Definition: AP_matrix.hxx:30
static void error(const char *msg)
Definition: mkptypes.cxx:96
Definition: di_matr.hxx:75
DI_MATRIX_TYPE matrix_type
Definition: di_matr.hxx:154
int group_nr
Definition: di_matr.hxx:90
AP_smatrix * matrix
Definition: di_matr.hxx:153
bool is_leaf() const
Definition: TreeNode.h:211
DI_ENTRY ** entries
Definition: di_matr.hxx:151
size_t nentries
Definition: di_matr.hxx:152
char * name
Definition: TreeNode.h:174
int GB_read_byte(GBDATA *gbd)
Definition: arbdb.cxx:734
#define NULp
Definition: cxxforward.h:116
T get(size_t i, size_t j) const
Definition: matrix.h:52
GBDATA * gb_node
Definition: TreeNode.h:173
char * compress(TreeNode *tree)
long GBS_read_hash(const GB_HASH *hs, const char *key)
Definition: adhash.cxx:392
GBDATA * GB_entry(GBDATA *father, const char *key)
Definition: adquery.cxx:334
GB_HASH * GBS_create_hash(long estimated_elements, GB_CASE case_sens)
Definition: adhash.cxx:253