58 set<string> consistencies;
78 return consistencies.find(check_name) != consistencies.end();
83 if (was_performed(check_name)) {
84 printf(
"check '%s' already has been registered before. Duplicated check name?\n", check_name.c_str());
95 if (!error) consistencies.insert(check_name);
101 GB_ERROR (*do_check)(
GBDATA *gb_main,
size_t species,
size_t sais),
104 if (!error && !was_performed(check_name)) {
106 error = do_check(gb_main, species_count, sai_count);
107 if (!error) register_as_performed(check_name);
112 if (!was_performed(check_name)) {
113 item_checks[check_name] = item_check;
128 printf(
"Deleting check '%s'\n", check_name);
131 consistencies.erase(check_name);
151 error = chk->second(gb_item, sel);
160 if (!item_checks.empty()) {
166 if (!error && is_genome_db) {
171 error = ta.
close(error);
177 error = register_as_performed(chk->first);
180 if (!error) item_checks.clear();
191 size_t deleted_gene_datas = 0;
192 size_t generated_gene_datas = 0;
196 gb_species && !
error;
202 if (is_organism && !gb_gene_data) {
204 generated_gene_datas++;
206 else if (!is_organism && gb_gene_data) {
210 if (!error) deleted_gene_datas++;
215 "i.e. which is not regarded as full-genome organism.\n"
216 "This causes problems - please fix!",
225 if (deleted_gene_datas) {
228 if (generated_gene_datas) {
229 aw_message(
GBS_global_string(
"Re-created %zu missing 'gene_data' entries.\nThese organisms have no genes yet!", generated_gene_datas));
232 return ta.
close(error);
240 gb_field =
GB_entry(gb_gene, field);
241 if (!gb_field) data_error =
GBS_global_string(
"Expected field '%s' missing", field);
249 gb_field =
GB_entry(gb_gene, field);
250 if (gb_field) data_error =
GBS_global_string(
"Unexpected field '%s' exists (wrong value in pos_joined?)", field);
260 long fixed_genes = 0;
261 long skipped_genes = 0;
264 typedef vector<GBDATA*> GBvec;
270 gb_organism && !
error;
285 if (gb_pos_joined) parts =
GB_read_int(gb_pos_joined);
292 error =
"Neither 'pos_begin' nor 'pos_start' found - format of gene location is unknown";
296 if (!gb_pos_start && !error) {
298 if (parts<1) error =
GBS_global_string(
"Illegal value in 'pos_joined' (%i)", parts);
317 toDelete.push_back(gb_complement);
321 bool has_uncertain_fields =
false;
322 for (
int p = 1; p <= parts && !error && !data_error; ++p) {
325 const char *pos_uncertain_field =
NULp;
328 gb_pos_begin =
expectField(gb_gene,
"pos_begin", data_error);
329 gb_pos_end =
expectField(gb_gene,
"pos_end", data_error);
331 pos_uncertain_field =
"pos_uncertain";
337 gb_pos_begin =
expectField(gb_gene, pos_begin_field, data_error);
338 gb_pos_end =
expectField(gb_gene, pos_end_field, data_error);
346 int pospos = complement ? (parts-p) : (p-1);
349 GBDATA *gb_pos_uncertain =
GB_entry(gb_gene, pos_uncertain_field);
351 if (!gb_pos_uncertain) {
352 if (has_uncertain_fields) data_error =
GBS_global_string(
"Expected field '%s' missing", pos_uncertain_field);
355 if (p == 1) has_uncertain_fields =
true;
357 if (!has_uncertain_fields) {
358 data_error =
GBS_global_string(
"Found '%s' as first certainty-information", pos_uncertain_field);
371 if (gb_pos_uncertain) {
378 if (strlen(uncertain) != 2) {
379 data_error =
"wrong length";
382 for (
int up = 0; up<2; up++) {
383 if (!strchr(
"<=>", uncertain[up])) {
393 toDelete.push_back(gb_pos_uncertain);
397 toDelete.push_back(gb_pos_begin);
398 toDelete.push_back(gb_pos_end);
403 for (
int p = parts+1; p <= parts+4 && !error && !data_error; ++p) {
420 GBvec::const_iterator end = toDelete.end();
421 for (GBvec::const_iterator i = toDelete.begin(); i != end && !
error; ++i) {
426 if (!error) fixed_genes++;
433 if (data_error || error) {
452 if (skipped_genes>0) {
454 error =
"Not all gene locations were fixed.\nFix manually, save DB and restart ARB with that DB.\nMake sure you have a backup copy of the original DB!";
457 if (fixed_genes || skipped_genes) {
458 long already_fixed_genes = genes-(fixed_genes+skipped_genes);
459 if (already_fixed_genes>0)
aw_message(
GBS_global_string(
"Location entries of %li genes already were in new format.", already_fixed_genes));
472 size_t all = species_count+sai_count;
481 for (
int pass = 0; pass < 2 && !
error; ++pass) {
486 for (
int ali = 0; ali_names[ali] && !
error; ++ali) {
506 for (
int ali = 0; ali_names[ali] && !
error; ++ali) {
513 error =
GBS_global_string(
"SAI:%s has 'REF' and '_REF' in '%s' (data corrupt?!)",
514 helix_name, ali_names[ali]);
524 if (!error) error =
GB_delete(gb_old_ref);
542 return ta.
close(error);
563 if (testUse)
return true;
566 if (!decompressed)
return false;
586 bool compressionTested;
590 compressionTested =
false;
596 KeyInfo(
const char *Name, DictPtr originalDict) : name(Name), original(originalDict) {
init(); }
601 compressionTested =
true;
604 const string&
getName()
const {
return name; }
618 map<string, bool> decompressWorks;
628 bool mayBeUsedWith(
const string& key)
const {
return strcasecmp(group.c_str(), key.c_str()) == 0; }
635 if (decompressWorks.find(key) == decompressWorks.end()) {
640 decompressWorks[key] = works;
642 GB_ERROR err2 = unassignFromKey(key);
647 return decompressWorks[key];
655 typedef map<string, KeyInfoPtr>
Keys;
660 #define STATUS_PREFIX "Dictionary: "
662 template<
typename CONT,
typename KEY>
663 bool contains(
const CONT& container,
const KEY& key) {
664 return container.find(key) != container.end();
671 for (
int pass = 1; pass <= 2; ++pass) {
688 if (kcount[keyGroup]>1) {
692 keys[keyName] =
new KeyInfo(keyName, dict);
693 dicts.push_back(dict);
695 else keys[keyName] =
new KeyInfo(keyName);
697 else kcount.erase(keyGroup);
712 error =
deleteDataOfKey(gb_sub, key_quark, deletedData, deleted, notDeleted);
721 deletedData.insert(path);
768 error =
"No " GB_SYSTEM_KEY_DATA
" found.. DB corrupted?";
778 long affectedKeys = 0;
779 for (KeyCounter::iterator kci = kcount.begin(); kci != kcount.end(); ++kci) {
780 affectedKeys += kci->second;
783 if (!error && affectedKeys>0) {
789 for (Keys::iterator ki = keys.begin(); ki != keys.end(); ++ki) {
790 KeyInfoPtr k = ki->second;
791 k->testCompressed(gb_main);
797 long combinations = 0;
800 StringSet multiDecompressible;
802 for (
int pass = 1; pass <= 2; ++pass) {
806 for (Dicts::iterator di = dicts.begin(); di != dicts.end(); ++di) {
809 for (Keys::iterator ki = keys.begin(); ki != keys.end(); ++ki) {
810 KeyInfoPtr k = ki->second;
811 const string& keyname = k->getName();
824 multiDecompressible.insert(keyname);
836 StringSet notDecompressible;
837 for (Keys::iterator ki = keys.begin(); ki != keys.end(); ++ki) {
838 KeyInfoPtr k = ki->second;
839 const string& keyname = k->getName();
841 if (k->isCompressed()) {
842 if (!
contains(use, keyname)) notDecompressible.insert(keyname);
843 if (
contains(multiDecompressible, keyname)) use.erase(keyname);
847 bool dataLost =
false;
850 if (!notDecompressible.empty()) {
852 long nd_count = notDecompressible.size();
854 "Data of %li DB-keys is lost and will be deleted", nd_count));
858 StringSet deletedData;
862 for (StringSet::iterator ki = notDecompressible.begin(); !error && ki != notDecompressible.end(); ++ki) {
863 const string& keyname = *ki;
873 aw_message(
"see console for a list of affected keys");
875 printf(
"Deleted keys:\n");
876 for (StringSet::iterator di = deletedData.begin(); di != deletedData.end(); ++di) {
877 printf(
"* %s\n", di->c_str());
882 if (!error && !multiDecompressible.empty()) {
883 for (StringSet::iterator ki = multiDecompressible.begin(); !error && ki != multiDecompressible.end(); ++ki) {
884 const string& keyname = *ki;
886 vector<DictPtr> possibleDicts;
888 printf(
"--------------------------------------------------------------------------------\n");
890 for (Dicts::iterator di = dicts.begin(); !error && di != dicts.end(); ++di) {
899 printf(
"possibility %i = '%s'\n", possible, data);
902 possibleDicts.push_back(d);
915 "and example data was dumped to the console.\n"
916 "Please examine output and decide which is the correct possibility!",
917 possible, keyname.c_str());
919 const char *buttons =
"Abort";
920 for (
int p = 1; p <= possible; ++p) buttons =
GBS_global_string(
"%s,%i", buttons, p);
921 selected =
aw_question(
"dict_decompress_bug", question, buttons,
false,
NULp);
929 error =
"Aborted by user";
932 use[keyname] = possibleDicts[selected-1];
940 for (Keys::iterator ki = keys.begin(); !error && ki != keys.end(); ++ki) {
941 KeyInfoPtr k = ki->second;
942 const string& keyname = k->getName();
944 if (k->isCompressed()) {
946 error =
GBS_global_string(
"No dictionary detected for key '%s'", keyname.c_str());
949 DictPtr d = use[keyname];
962 if (dataLost||reassigned) {
964 ?
"We apologize for the data-loss."
965 :
"No conflicts detected in compressed data.");
967 "Please save your database with a new name.");
972 Dict::gb_main =
NULp;
973 return ta.
close(error);
993 if (!gb_next_color)
break;
997 if (!error) del_count++;
1003 if (del_count) fprintf(stderr,
ItemSelector & GEN_get_selector()
GBDATA * GBT_first_SAI(GBDATA *gb_main)
void testCompressed(GBDATA *gb_main)
unsigned char * complement
GBDATA * GEN_next_gene(GBDATA *gb_gene)
long GB_read_int(GBDATA *gbd)
bool canDecompress(const string &key)
GBDATA * GB_child(GBDATA *father)
KeyInfo(const char *Name, DictPtr originalDict)
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
void GEN_free_position(GEN_position *pos)
static GB_ERROR NT_convert_gene_locations(GBDATA *gb_main, size_t species_count, size_t)
GBDATA * GB_nextEntry(GBDATA *entry)
GB_ERROR assignToKey(const string &key) const
GBDATA * GEN_findOrCreate_gene_data(GBDATA *gb_species)
long GBT_get_SAI_count(GBDATA *gb_main)
void GBT_get_alignment_names(ConstStrArray &names, GBDATA *gbd)
static GB_ERROR deleteDataOfKey(GBDATA *gbd, GBQUARK key_quark, StringSet &deletedData, long &deleted, long ¬Deleted)
char * ARB_strdup(const char *str)
char * GB_read_as_string(GBDATA *gbd)
static GBDATA * disexpectField(GBDATA *gb_gene, const char *field, GB_ERROR &data_error)
const char * GBS_global_string(const char *templat,...)
GB_ERROR unassignFromKey(const string &key) const
void GEN_use_uncertainties(GEN_position *pos)
static GBDATA * expectField(GBDATA *gb_gene, const char *field, GB_ERROR &data_error)
const string & getOriginalKey() const
Dict(const char *Group, const char *OrgKey, DictData *Data)
void register_item_check(const string &check_name, item_check_fun item_check)
static GB_ERROR NT_fix_gene_data(GBDATA *gb_main, size_t species_count, size_t)
map< string, item_check_fun > item_check_map
ItemSelector & EXP_get_selector()
GB_ERROR GB_delete(GBDATA *&source)
static GB_ERROR remove_dup_colors(GBDATA *gb_item, ItemSelector &IF_DEBUG(sel))
GBDATA * GBT_find_SAI(GBDATA *gb_main, const char *name)
GB_ERROR GB_await_error()
GB_TYPES GB_read_type(GBDATA *gbd)
SmartPtr< KeyInfo > KeyInfoPtr
GBDATA * GB_create(GBDATA *father, const char *key, GB_TYPES type)
bool isCompressed() const
KeyInfo(const char *Name)
GB_ERROR register_as_performed(const string &check_name)
unsigned char * start_uncertain
void perform_check(const string &check_name, GB_ERROR(*do_check)(GBDATA *gb_main, size_t species, size_t sais), GB_ERROR &error)
map< string, KeyInfoPtr > Keys
GBDATA *(* get_first_item_container)(GBDATA *, AW_root *, QUERY_RANGE)
void NT_rerepair_DB(AW_window *, GBDATA *gb_main)
#define GB_COLORGROUP_ENTRY
static void error(const char *msg)
GBDATA * GB_get_root(GBDATA *gbd)
GBDATA *(* get_next_item_container)(GBDATA *, QUERY_RANGE)
char * ARB_strlower(char *s)
GBQUARK GB_find_or_create_quark(GBDATA *gbd, const char *key)
static GB_ERROR NT_fix_dict_compress(GBDATA *gb_main, size_t, size_t)
const string & getName() const
GBDATA * GEN_find_gene_data(GBDATA *gb_species)
GB_ERROR GB_set_dictionary(GBDATA *gb_main, const char *key, const DictData *dd)
GBDATA * GEN_next_organism(GBDATA *gb_organism)
const string & getGroup() const
GB_ERROR GEN_write_position(GBDATA *gb_gene, const GEN_position *pos, long seqLength)
GBQUARK GB_get_quark(GBDATA *gbd)
char *(* generate_item_id)(GBDATA *gb_main, GBDATA *gb_item)
void GBS_reuse_buffer(const char *global_buffer)
GBDATA *(* get_first_item)(GBDATA *, QUERY_RANGE)
CheckedConsistencies(GBDATA *gb_main_)
static char * readFirstCompressedDataOf(GBDATA *gbd, GBQUARK key_quark)
GB_ERROR(* item_check_fun)(GBDATA *gb_item, ItemSelector &sel)
static GB_ERROR findAffectedKeys(GBDATA *gb_key_data, KeyCounter &kcount, Keys &keys, Dicts &dicts)
map< string, DictPtr > DictMap
map< string, int > KeyCounter
bool was_performed(const string &check_name) const
GEN_position * GEN_new_position(int parts, bool joinable)
GBDATA *(* get_next_item)(GBDATA *, QUERY_RANGE)
int aw_question(const char *unique_id, const char *question, const char *buttons, bool sameSizeButtons, const char *helpfile)
GBDATA * GBT_next_SAI(GBDATA *gb_sai)
char * GEN_global_gene_identifier(GBDATA *gb_gene, GBDATA *gb_organism)
GB_ERROR close(GB_ERROR error)
bool mayBeUsedWith(const string &key) const
int GB_read_byte(GBDATA *gbd)
char * GB_read_string(GBDATA *gbd)
GB_ERROR forgetDoneChecks()
ItemSelector & SPECIES_get_selector()
GBDATA * GBT_first_species(GBDATA *gb_main)
static ARB_init_perl_interface init
const char * GB_get_db_path(GBDATA *gbd)
void aw_message(const char *msg)
GB_ERROR NT_repair_DB(GBDATA *gb_main)
char * GBT_get_default_helix(GBDATA *)
GBDATA * GBT_next_species(GBDATA *gb_species)
#define GB_SYSTEM_KEY_DATA
static GB_ERROR NT_del_mark_move_REF(GBDATA *gb_main, size_t species_count, size_t sai_count)
bool contains(const CONT &container, const KEY &key)
GBDATA * GB_nextChild(GBDATA *child)
static bool testDictionaryCompression(GBDATA *gbd, GBQUARK key_quark, bool testUse)
bool GB_is_dictionary_compressed(GBDATA *gbd)
long GBT_get_species_count(GBDATA *gb_main)
GB_transaction ta(gb_var)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
void perform_item_checks(GB_ERROR &error)
item_check_map::const_iterator item_check_iter
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
GBDATA * GEN_first_organism(GBDATA *gb_main)
DictData * GB_get_dictionary(GBDATA *gb_main, const char *key)
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
bool GEN_is_genome_db(GBDATA *gb_main, int default_value)
GBDATA * GEN_first_gene_rel_gene_data(GBDATA *gb_gene_data)
GBDATA * GB_entry(GBDATA *father, const char *key)
void inc_and_check_user_abort(GB_ERROR &error)
char * GBS_global_string_copy(const char *templat,...)
unsigned char * stop_uncertain