26 typedef set<string, NoCaseCmp>
StrSet;
29 for (StrSet::const_iterator i = src.begin(); i != src.end(); ++i) {
30 dst.put(strdup(i->c_str()));
34 for (
unsigned i = 0; i<src.size(); ++i) {
39 void RuleSet::extractUsedFields(StrArray& input, StrArray&
output)
const {
41 for (
unsigned i = 0; i<size(); ++i) {
42 const Rule& rule =
get(i);
47 for (
unsigned f = 0; f<ifield.
size(); ++f) {
48 const char *source = ifield[f];
49 if (source[0]) in.insert(source);
53 if (!srcFields.empty()) in.insert(srcFields);
56 if (!target.empty()) out.insert(target);
72 return TransportedData(asFloat);
78 return TransportedData(data);
86 TransportedData ReadRule::aciAppliedTo(
const string& toStr,
GBDATA *
gb_main,
GBDATA *gb_dest_item)
const {
96 string converted(result);
98 return TransportedData(converted);
104 return TransportedData::makeError(
GBS_global_string(
"cannot read as data ('%s' is a container)", containerName));
111 return TransportedData::makeError(
"lacking item to readFrom");
114 if (fields.empty()) {
115 return TransportedData::makeError(
"no source field(s) specified");
118 if (multiple_source_fields()) {
123 bool gotData =
false;
124 for (
size_t f = 0; f<field.
size(); ++f) {
128 if (sourceType ==
GB_DB) {
133 if (plain.
failed())
return plain;
136 if (!concat.empty()) concat += separator;
148 return aciAppliedTo(concat,
GB_get_root(gb_item), gb_dest_item);
158 if (sourceType ==
GB_DB) {
166 return readTypedFromField(sourceType, gb_field);
211 int32_t i =
int(f+.5);
213 if (i != data.
getInt() && !acceptLossyConversion) {
226 if (wantedTargetType ==
GB_INT) {
229 int i = d>0 ? (
int)(d+0.5) : (
int)(d-0.5);
233 if (d != d2 && !acceptLossyConversion) {
259 if (!gb_item)
return "lacking item to writeTo";
265 GB_TYPES usedTargetType = forcesType() ? getTargetType() : data.
getType();
274 if (data.
getType() == usedTargetType) {
278 error =
convertAndWrite(data, gb_field, usedTargetType, acceptLossyConversion);
296 else if (tdata.
exists()) {
297 error = writeTo(tdata, gb_dest, precisionLossPermitted());
311 for (r = 0; r<size() && !
error; ++r) {
312 const Rule& rule =
get(r);
323 GB_ERROR RuleSet::saveTo(
const char *filename)
const {
325 FILE *out = fopen(filename,
"wt");
331 fputs(
"# arb field transfer set; version 1.0\n", out);
339 for (
int c = 0; clines[c]; ++c) {
340 fprintf(out,
"desc:%s\n", clines[c]);
344 fprintf(out,
"transferUndef:%i\n",
int(transferUndefFields));
347 for (
size_t r = 0; r<size(); ++r) {
348 const Rule& rule =
get(r);
350 fprintf(out,
"rule:%s\n", cfg.c_str());
360 size_t leadingSpaces = line.find_first_not_of(
" \t");
361 return line[leadingSpaces] ==
'#';
372 FILE *in = fopen(filename,
"rt");
381 while (!error && reader.
getLine(line)) {
384 size_t pos = line.find(
':');
385 if (pos == string::npos) {
389 string tag = line.substr(0, pos);
390 string content = line.substr(pos+1);
403 else if (tag ==
"desc") {
404 const string& existing = ruleset->getComment();
405 ruleset->setComment(existing.empty() ? content : existing+
'\n'+content);
407 else if (tag ==
"transferUndef") {
408 ruleset->set_transferUndefFields(
bool(atoi(content.c_str())));
427 #define SOURCE "source"
429 #define TARGET "target"
434 #define PERMITTED "permitted"
440 case GB_INT: str =
"int";
break;
441 case GB_FLOAT: str =
"float";
break;
442 case GB_BITS: str =
"bits";
break;
443 case GB_NONE: str =
"auto";
break;
451 case 't':
if (strcmp(str,
"text") == 0) type =
GB_STRING;
break;
452 case 'i':
if (strcmp(str,
"int") == 0) type =
GB_INT;
break;
453 case 'f':
if (strcmp(str,
"float") == 0) type =
GB_FLOAT;
break;
454 case 'b':
if (strcmp(str,
"bits") == 0) type =
GB_BITS;
break;
455 case 'a':
if (strcmp(str,
"auto") == 0) type =
GB_NONE;
break;
472 string Rule::getConfig()
const {
475 saveReadConfig(cfgmap);
476 saveWriteConfig(cfgmap);
478 if (precisionLossPermitted()) {
495 if (!source) error =
"missing " SOURCE " entry";
496 if (!target) error =
"missing " TARGET " entry";
498 if (!sep) sep =
NOSEP;
503 rule = makeAciConverter(source, sep, aci, target);
506 rule = makeSimple(source, sep, target);
518 rule->setTargetType(type);
524 if (loss && strcmp(loss,
PERMITTED) == 0) {
525 rule->permitPrecisionLoss();
537 string ReadRule::describe()
const {
538 if (aci.empty())
return fields;
539 return fields+
"|ACI";
541 string WriteRule::describe()
const {
544 string Rule::getShortDescription()
const {
545 return ReadRule::describe() +
" -> " + WriteRule::describe();
550 string ItemClonedByRuleSet::lastReportedError;
552 GB_ERROR ItemClonedByRuleSet::overlayOrCloneSub(
const char *subName,
GBDATA *gb_sub) {
564 GB_ERROR ItemClonedByRuleSet::cloneMissingSub(
const char *subName,
GBDATA *gb_sub) {
576 GB_ERROR ItemClonedByRuleSet::copySubIfMissing(
const char *subName) {
585 error = cloneMissingSub(subName, gb_sub);
590 GB_ERROR ItemClonedByRuleSet::copyAlignments() {
601 error = overlayOrCloneSub(aliname, gb_ali);
614 const char *ItemClonedByRuleSet::get_id_field()
const {
615 const char *field =
NULp;
643 #if defined(ASSERTION_USED)
644 checked4error =
false;
645 userCallbackUsed =
false;
650 gb_clone = gb_refItem;
653 error =
"no target species specified (logic error)";
658 GBDATA *gb_item_container;
662 gb_item_container = gb_refItem;
663 if (!gb_item_container) {
664 error =
"no target item container specified (logic error)";
666 else if (gb_item_container == gb_src_item_container) {
667 error =
"source and target item containers need to differ (logic error)";
672 gb_item_container = gb_src_item_container;
688 error = ruleset->transferBy(gb_source, gb_clone);
691 const char *IDFIELD = get_id_field();
692 if (!error) error = copySubIfMissing(IDFIELD);
696 if (!error) error = copySubIfMissing(
"acc");
698 if (aliTransporter) {
700 error = copyAlignments();
703 error = aliTransporter->
transport(gb_source, gb_clone);
705 #if defined(ASSERTION_USED)
706 userCallbackUsed =
true;
710 error = copyAlignments();
717 if (!error && ruleset->shallTransferUndefFields()) {
723 ruleset->extractUsedFields(in, out);
732 for (StrSet::const_iterator field = defined.begin(); field != defined.end(); ++field) {
733 size_t slashpos = field->find_first_of(
'/');
734 if (slashpos != string::npos) {
735 string parentname = field->substr(0, slashpos);
736 parents.insert(parentname);
739 defined.insert(parents.begin(), parents.end());
745 bool keyUsed = defined.find(key) != defined.end();
748 error = copySubIfMissing(key);
756 xf_assert(correlated(aliTransporter, userCallbackUsed));
789 error = ta.
close(error);
809 fprintf(stderr,
"Failed to rename original item after temp. clone (Reason: %s)", error);
818 fprintf(stderr,
"Failed to delete temp. clone (Reason: %s)", error);
840 void TEST_type2id() {
851 void TEST_transportedData() {
863 error = errorData.getError();
868 TransportedData greet(
"hello");
869 TransportedData num(4711);
870 TransportedData fnum(0.815f);
885 void TEST_xferset() {
907 for (
size_t r = 0; r<fts.
size(); ++r) {
992 string partOfFailReason;
994 FailingRule(
const Rule& failing,
string part) :
Rule(failing), partOfFailReason(part) {}
996 const char *expectedPartOfFailure()
const {
return partOfFailReason.c_str(); }
1003 const char *target_ascii;
1009 target_ascii(
"TEST_fields_xferred.arb")
1011 gb_src =
GB_open(
"TEST_fields_ascii.arb",
"r");
1012 gb_dest =
GB_open(target_ascii,
"wc");
1046 void copyAllSpecies() {
1076 void saveAndCompare(
const char *expected_ascii,
bool allowAutoUpdate) {
1078 if (allowAutoUpdate) {
1080 #if defined(TEST_AUTO_UPDATE)
1081 TEST_COPY_FILE(target_ascii, expected_ascii);
1088 static const char *expRuleConfig[] = {
1089 "source='lat_lon';target='geolocation'",
1090 "source='seq_quality_slv';target='seq/slv_quality'",
1091 "source='homop_slv';target='slv_homop'",
1093 "source='no1';target='notTransferred'",
1095 "source='pubmed_id';target='str2int';type='int'",
1096 "source='pubmed_id';target='str2flt';type='float'",
1097 "source='stop';target='int2flt';type='float'",
1098 "source='stop';target='int2str';type='text'",
1099 "source='align_ident_slv';target='flt2str';type='text'",
1100 "loss='permitted';source='align_ident_slv';target='flt2int';type='int'",
1102 "aci='|lower|contains(partial)|isAbove(0)';source='description';target='describedAsPartial'",
1104 "aci='|fdiv(2.0)';source='align_bp_score_slv';target='halfBPscoreStr'",
1105 "aci='|fdiv(2.0)';source='align_bp_score_slv';target='halfBPscore';type='int'",
1106 "aci='|fdiv(2.0)';source='align_bp_score_slv';target='halfBPscoreFlt';type='float'",
1108 "aci='|fmult(3.5)';source='homop_slv';target='multiHomopStr'",
1109 "aci='|fmult(3.5)';source='homop_slv';target='multiHomopInt';type='int'",
1110 "aci='|fmult(3.5)';source='homop_slv';target='multiHomop';type='float'",
1112 "sep='/';source='embl_class;embl_division';target='embl_class_division'",
1113 "sep='-';source='align_startpos_slv;align_stoppos_slv';target='align_range_slv'",
1114 "sep='\\'';source='no1;align_bp_score_slv;no2;rel_ltp;no3';target='missing'",
1116 "sep=';';source='NO1;no2;no3';target='skipped'",
1118 "aci='|upper';sep=':';source='embl_class;embl_division';target='emblClassDivision'",
1119 "aci='|\"<\";dd;\">\"';sep=';';source='no1;no2;no3';target='skipped2'",
1122 static const char *EXPECTED_ASCII =
"TEST_fields_xferred_expected.arb";
1123 static const char *EXPECTED_ASCII_CLONED =
"TEST_fields_cloned_expected.arb";
1125 void TEST_xferBySet() {
1133 typedef std::vector<FailingRule> FailingRuleCont;
1136 FailingRuleCont failing;
1138 #define FAILING_add(rule,msg) failing.push_back(FailingRule(rule, msg))
1175 ruleset.
add(
Rule::makeSimple(
"align_startpos_slv;align_stoppos_slv",
"-",
"align_range_slv"));
1195 TEST_EXPECT_STRARRAY_CONTAINS(input,
';',
"align_bp_score_slv;align_ident_slv;align_startpos_slv;align_stoppos_slv;description;embl_class;embl_division;homop_slv;lat_lon;no1;no2;no3;pubmed_id;rel_ltp;seq_quality_slv;stop");
1196 TEST_EXPECT_STRARRAY_CONTAINS(output,
';',
"align_range_slv;describedAsPartial;emblClassDivision;embl_class_division;flt2int;flt2str;geolocation;halfBPscore;halfBPscoreFlt;halfBPscoreStr;int2flt;int2str;missing;multiHomop;multiHomopInt;multiHomopStr;notTransferred;seq/slv_quality;skipped;skipped2;slv_homop;str2flt;str2int");
1201 const size_t rulz = ruleset.
size();
1203 const size_t testableRepr =
min(cfgs, rulz);
1204 for (
size_t r = 0; r<testableRepr; ++r) {
1206 const Rule& rule = ruleset.
get(r);
1215 for (
size_t r1 = 0; r1<rulz; ++r1) {
1216 for (
size_t r2 = r1+1; r2<rulz; ++r2) {
1221 TEST_ANNOTATE(
NULp);
1223 env.transferAllSpeciesBy(ruleset);
1249 FAILING_add(
Rule::makeSimple(
"ali_16s",
NOSEP,
"whatever"),
"cannot read as data ('ali_16s' is a container)");
1251 for (FailingRuleCont::const_iterator failRule = failing.begin(); failRule != failing.end(); ++failRule) {
1252 const FailingRule& testableRule = *failRule;
1254 separated.
add(
new Rule(testableRule));
1264 gb_src_species && !error;
1269 error =
"cannot search for unnamed species";
1273 error = separated.
transferBy(gb_src_species, gb_dest_species);
1287 typedef map<string,GB_TYPES> TypedField;
1292 gb_dest_species && !error;
1306 for (
int i = 0; curr[i]; ++i) {
1307 const char *scanned = curr[i];
1308 const char *field = scanned+1;
1311 TypedField::iterator found = seen.find(field);
1312 if (found != seen.end()) {
1313 if (type != found->second) {
1314 TEST_ANNOTATE(field);
1319 fprintf(stderr,
"field='%s' type='%i'\n", field, type);
1324 if (!error) error =
GB_delete(gb_specCopy);
1326 if (!error) error =
GB_delete(gb_fake_species_data);
1333 env.saveAndCompare(EXPECTED_ASCII,
true);
1336 void TEST_LATE_ruleConfigsReadable() {
1343 struct InvalidConfig {
1347 InvalidConfig invalidCfg[] = {
1348 {
TARGET "='xxx'",
"missing source entry" },
1349 {
SOURCE "='xxx'",
"missing target entry" },
1350 {
"tag='halfquot;",
"could not find matching quote" },
1351 {
TARGET "='xxx';" SOURCE "='xxx';type='bizarre'",
"invalid type id 'bizarre'" },
1354 for (
size_t i = 0; i<
ARRAY_ELEMS(invalidCfg); ++i) {
1355 InvalidConfig& CFG = invalidCfg[i];
1362 TEST_ANNOTATE(
NULp);
1369 for (
size_t r = 0; r<cfgs; ++r) {
1370 const char *config = expRuleConfig[r];
1378 string reloadedConfig = rule->getConfig();
1386 const char *
COMMENT =
"A multi-\nline-\ntest-\ncomment.";
1396 const char *rulesetSaved =
"impexp/rulesetCurr.fts";
1397 const char *rulesetExpected =
"impexp/ruleset.fts";
1401 #if defined(TEST_AUTO_UPDATE_RS)
1402 TEST_COPY_FILE(rulesetSaved, rulesetExpected);
1419 for (
size_t r = 0; r<loadedSet.
size(); ++r) {
1420 const Rule& rule = loadedSet.
get(r);
1432 reloaded_ruleset = loadedSet;
1441 const char *noSuchFile =
"nosuch.fts";
1447 const char *unsavable =
"noSuchDir/whatever.fts";
1453 const char *emptyFile =
"general/empty.input";
1466 env.transferAllSpeciesBy(reloaded_ruleset);
1467 env.saveAndCompare(EXPECTED_ASCII,
false);
1471 #define CUSTOM_ALI_TRANSPORT_ERROR "custom ali transport error"
1475 TestAlignmentTransporter(
int mode_) : mode(mode_) {}
1483 error = CUSTOM_ALI_TRANSPORT_ERROR;
1518 void TEST_clone_by_ruleset() {
1523 const char *rulesetExpected =
"impexp/ruleset.fts";
1534 env.copyAllSpecies();
1537 char *overwrittenName =
NULp;
1548 gb_species && gb_species != gb_first_clone;
1549 gb_species = gb_next_species, ++count)
1561 ruleset->set_transferUndefFields(count == 4);
1567 gb_clone = clone.get_clone();
1568 if (!gb_first_clone) {
1570 gb_first_clone = gb_clone;
1594 int orgNameCount = 0;
1596 bool hasOrgName = strcmp(
GBT_get_name(gb_peek), orgName) == 0;
1597 orgNameCount += hasOrgName;
1604 if (hasOrgName)
TEST_EXPECT(gb_peek == gb_species);
1616 gb_overwritten_species = gb_species;
1625 GBDATA *gb_source_species;
1654 for (
int pass = 1; pass<=4; ++pass) {
1662 aliTransporter =
new TestAlignmentTransporter(pass);
1663 if (pass == 3) reverseAliTransporter = aliTransporter;
1671 ta2.close(overclone.get_error());
1672 ta1.close(overclone.get_error());
1676 TEST_EXPECT(overclone.get_clone() == gb_overwritten_species);
1712 GBDATA *gb_clone = realClone.get_clone();
1736 env.saveAndCompare(EXPECTED_ASCII_CLONED,
true);
1737 free(overwrittenName);
1741 #endif // UNIT_TESTS
GB_ERROR GB_copy_dropProtectMarksAndTempstate(GBDATA *dest, GBDATA *source)
GBDATA * GB_open(const char *path, const char *opent)
set< string, lt_field > StrSet
bool GB_TYPE_readable_as_string(GB_TYPES type)
const char * type2str(GB_TYPES type)
#define TEST_EXPECT_SIMILAR(expr, want, epsilon)
long GB_read_int(GBDATA *gbd)
GBDATA * GB_child(GBDATA *father)
GB_ERROR GB_incur_error()
bool multiple_source_fields() const
bool getLine(string &line)
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
#define TEST_REJECT_ZERO(cond)
GB_TYPES getTargetType() const
size_t GB_countEntries(GBDATA *father, const char *key)
#define TEST_EXPECT_STRARRAY_CONTAINS(strings, separator, expected)
static RulePtr forceTargetType(GB_TYPES forceType, RulePtr rule)
bool shallIgnore(const string &line)
GB_ERROR GB_IO_error(const char *action, const char *filename)
char * ARB_strdup(const char *str)
char * GBT_reverseNucSequence(const char *s, int len)
char * GB_read_as_string(GBDATA *gbd)
const char * GBS_global_string(const char *templat,...)
char * GBS_string_eval(const char *insource, const char *icommand)
static TransportedData none()
int insertBefore(int idx, RulePtr rule)
const Rule & get(int idx) const
int GB_unlink(const char *path)
bool isCommentLine(const string &line)
void permitPrecisionLoss()
#define ARRAY_ELEMS(array)
void setNull()
set SmartPtr to NULp
GBDATA * GB_get_father(GBDATA *gbd)
static void StrSet2StrArray(const StrSet &src, StrArray &dst)
__ATTR__USERESULT GB_ERROR saveTo(const char *filename) const
GB_ERROR GB_incur_error_if(bool error_may_occur)
GB_ERROR GB_delete(GBDATA *&source)
#define TEST_EXPECT_CONTAINS(str, part)
GB_ERROR GB_await_error()
std::string config_string() const
GBDATA * GB_create_container(GBDATA *father, const char *key)
#define TEST_EXPECT(cond)
GB_TYPES GB_read_type(GBDATA *gbd)
__ATTR__USERESULT GB_ERROR transferBy(GBDATA *gb_source, GBDATA *gb_dest) const
GB_CSTR GB_read_key_pntr(GBDATA *gbd)
AliDataPtr concat(AliDataPtr left, AliDataPtr right)
static RulePtr makeSimple(const string &src, const string &sep, const string &dest)
GB_ERROR GB_save_as(GBDATA *gbd, const char *path, const char *savetype)
const string & targetField() const
GBDATA * GB_clone(GBDATA *gb_destCont, GBDATA *gb_source)
#define TEST_REJECT(cond)
#define TEST_REJECT_NULL(n)
bool shallTransferUndefFields() const
static void error(const char *msg)
GBDATA * GB_get_root(GBDATA *gbd)
const string & getString() const
void GBT_scan_db(StrArray &fieldNames, GBDATA *gbd, const char *datapath)
const string & getComment() const
GB_ERROR GB_write_lossless_int(GBDATA *gbd, int32_t i)
float GB_read_float(GBDATA *gbd)
void setComment(const string &newComment)
#define TEST_EXPECT_ZERO_OR_SHOW_ERRNO(iocond)
bool GB_is_container(GBDATA *gbd)
const T * content() const
convert SmartPtr to plain old pointer (also works if isNull())
int GB_read_flag(GBDATA *gbd)
static ErrorOrRulePtr makeFromConfig(const char *config)
const string & getSourceFields() const
virtual bool shallCopyBefore() const =0
GBDATA * GBT_find_sequence(GBDATA *gb_species, const char *aliname)
static RulePtr makeAciConverter(const char *src, const char *sep, const char *aci, const char *dest)
GB_ERROR GB_write_float(GBDATA *gbd, float f)
GB_ERROR GB_write_int(GBDATA *gbd, long i)
GB_ERROR getError() const
#define TEST_EXPECT_ZERO(cond)
ErrorOr< RulePtr > ErrorOrRulePtr
void set_entry(const std::string &entry, const std::string &value)
fputs(TRACE_PREFIX, stderr)
__ATTR__USERESULT GB_ERROR transferBy(GBDATA *gb_source, GBDATA *gb_dest) const
#define TEST_EXPECT_NULL(n)
GB_ERROR close(GB_ERROR error)
void replace(int idx, RulePtr rule)
GB_ERROR parseFrom(const std::string &configString)
bool precisionLossPermitted() const
GBDATA * GBT_first_species(GBDATA *gb_main)
const char * get_entry(const char *entry) const
#define TEST_EXPECT_NO_ERROR(call)
GB_TYPES str2type(const char *str)
GBDATA * GBT_find_or_create_species(GBDATA *gb_main, const char *name, bool markCreated)
GBDATA * GBT_next_species(GBDATA *gb_species)
GBDATA * GBT_find_species(GBDATA *gb_main, const char *name)
#define TEST_EXPECT_ERROR_CONTAINS(call, part)
GB_ERROR GB_write_lossless_float(GBDATA *gbd, float f)
#define TEST_EXPECT_DIFFERENT(expr, want)
void GBT_split_string(ConstStrArray &dest, const char *namelist, const char *separator, SplitMode mode)
static ErrorOrRuleSetPtr loadFrom(const char *filename)
bool ARB_strBeginsWith(const char *str, const char *with)
const char * GBT_get_name(GBDATA *gb_item)
#define TEST_EXPECT_TEXTFILE_DIFFLINES(fgot, fwant, diff)
NOT4PERL char * GB_command_interpreter_in_env(const char *str, const char *commands, const GBL_call_env &callEnv)
void set_transferUndefFields(bool transferThem)
ARB_ERROR getError() const
GBDATA * GB_nextChild(GBDATA *child)
void extractUsedFields(StrArray &input, StrArray &output) const
static GB_ERROR convertAndWrite(const TransportedData &data, GBDATA *gb_field, GB_TYPES wantedTargetType, bool acceptLossyConversion)
static GB_ERROR unconvertedWrite(const TransportedData &data, GBDATA *gb_field)
GB_transaction ta(gb_var)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
GB_ERROR GB_copy_overlay(GBDATA *dest, GBDATA *source)
#define TEST_EXPECT_EQUAL(expr, want)
virtual GB_ERROR transport(GBDATA *gb_src_item, GBDATA *gb_dst_item) const =0
GBDATA * GB_entry(GBDATA *father, const char *key)
void GB_close(GBDATA *gbd)
ErrorOr< RuleSetPtr > ErrorOrRuleSetPtr
static TransportedData makeError(GB_ERROR error)
static void StrArray2StrSet(const StrArray &src, StrSet &dst)
TransportedData cannotReadContainer(const char *containerName)
GBDATA * GBT_get_species_data(GBDATA *gb_main)
GB_ERROR GB_write_autoconv_string(GBDATA *gbd, const char *val)