ARB
util.cxx
Go to the documentation of this file.
1 #include <BufferedFileReader.h>
2 #include "fun.h"
3 #include "defs.h"
4 #include "global.h"
5 #include "reader.h"
6 
7 #include <cstdarg>
8 #include <cerrno>
9 
10 bool scan_token(char *to, const char *from) { // __ATTR__USERESULT
11  return sscanf(from, "%s", to) == 1;
12 }
13 
14 void scan_token_or_die(char *to, const char *from) {
15  if (!scan_token(to, from)) {
16  throw_error(88, "expected to see a token here");
17  }
18 }
19 void scan_token_or_die(char *to, Reader& reader, int offset) {
20  scan_token_or_die(to, reader.line()+offset);
21 }
22 
23 void throw_error(int error_num, const char *error_message) { // __ATTR__NORETURN
24  throw Convaln_exception(error_num, error_message);
25 }
26 
27 char *strf(const char *format, ...) { // __ATTR__FORMAT(1)
28  va_list parg;
29  va_start(parg, format);
30 
31  const int BUFSIZE = 1000;
32  char buffer[BUFSIZE];
33  int printed = vsprintf(buffer, format, parg);
34  ca_assert(printed <= BUFSIZE);
35 
36  va_end(parg);
37 
38  return strndup(buffer, printed);
39 }
40 
41 void throw_errorf(int error_num, const char *error_messagef, ...) { // __ATTR__FORMAT(2) __ATTR__NORETURN
42  va_list parg;
43  va_start(parg, error_messagef);
44 
45  const int BUFSIZE = 1000;
46  static char buffer[BUFSIZE];
47  int printed = vsprintf(buffer, error_messagef, parg);
48 
49  va_end(parg);
50 
51  if (printed >= BUFSIZE) {
52  throw_errorf(998, "Internal buffer overflow (while formatting error #%i '%s')", error_num, error_messagef);
53  }
54  throw_error(error_num, buffer);
55 }
56 
57 // --------------------------------------------------------------------------------
58 
59 bool Warnings::show_warnings = true;
60 
61 void warning(int warning_num, const char *warning_message) {
62  // print out warning_message and continue execution.
63  if (Warnings::shown())
64  fprintf(stderr, "WARNING(%d): %s\n", warning_num, warning_message);
65 }
66 void warningf(int warning_num, const char *warning_messagef, ...) { // __ATTR__FORMAT(2)
67  if (Warnings::shown()) {
68  va_list parg;
69  va_start(parg, warning_messagef);
70 
71  const int BUFSIZE = 1000;
72  static char buffer[BUFSIZE];
73  int printed = vsprintf(buffer, warning_messagef, parg);
74 
75  va_end(parg);
76 
77  if (printed >= BUFSIZE) {
78  throw_errorf(997, "Internal buffer overflow (while formatting warning #%i '%s')", warning_num, warning_messagef);
79  }
80  warning(warning_num, buffer);
81  }
82 }
83 
84 int Skip_white_space(const char *line, int index) {
85  // Skip white space from (index)th char of Str line.
86 
87  while (line[index] == ' ' || line[index] == '\t')
88  ++index;
89  return index;
90 }
91 
92 void Getstr(char *line, int linenum) {
93  // Get input Str from terminal.
94  char c;
95  int indi = 0;
96 
97  for (; (c = getchar()) != '\n' && indi < (linenum - 1); line[indi++] = c) {}
98 
99  line[indi] = '\0';
100 }
101 
102 inline void append_known_len(char*& string1, int len1, const char *string2, int len2) {
103  ca_assert(len2); // else no need to call, string1 already correct
104  int newlen = len1+len2;
105  ARB_realloc(string1, newlen+1);
106  memcpy(string1+len1, string2, len2);
107  string1[newlen] = 0;
108 }
109 
110 void terminate_with(char*& str, char ch) {
111  // append 'ch' to end of 'str' (before \n)
112  // - if it's not already there and
113  // - 'str' contains more than just '\n'
114 
115  int len = str0len(str);
116  if (!len) return;
117 
118  ca_assert(str[len-1] == '\n');
119  if (len == 1) return;
120 
121  if (str[len-2] == ch) return;
122 
123  char temp[] = { ch, '\n' };
124  append_known_len(str, len-1, temp, 2);
125 }
126 
127 void skip_eolnl_and_append(char*& string1, const char *string2) {
128  int len1 = str0len(string1);
129  if (len1 && string1[len1-1] == '\n') len1--;
130  int len2 = str0len(string2);
131  if (len2) append_known_len(string1, len1, string2, len2);
132  else { string1[len1] = 0; }
133 }
134 
135 void skip_eolnl_and_append_spaced(char*& string1, const char *string2) {
136  int len1 = str0len(string1);
137  if (len1 && string1[len1-1] == '\n') string1[len1-1] = ' ';
138  append_known_len(string1, len1, string2, str0len(string2));
139 }
140 
141 void Append(char*& string1, const char *string2) {
142  int len2 = str0len(string2);
143  if (len2) append_known_len(string1, str0len(string1), string2, len2);
144 }
145 void Append(char*& string1, char ch) {
146  append_known_len(string1, str0len(string1), &ch, 1);
147 }
148 
149 void upcase(char *str) {
150  // Capitalize all char in the str.
151  if (!str) return;
152  for (int i = 0; str[i]; ++i) str[i] = toupper(str[i]);
153 }
154 
155 int fputs_len(const char *str, int len, Writer& write) {
156  // like fputs(), but does not print more than 'len' characters
157  // returns number of chars written or throws write error
158  for (int i = 0; i<len; ++i) {
159  if (!str[i]) return i;
160  write.out(str[i]);
161  }
162  return len;
163 }
164 
165 // -------------------------
166 // pattern matching
167 
169 
170 static int findPattern(const char *text, const char *pattern, FindMode mode) {
171  // Return offset of 'pattern' in 'text' or -1
172  // (compares case insensitive)
173  // return position after found 'pattern' in 'endPtr'
174 
175  if (text && pattern) {
176  for (int t = 0; text[t]; ++t) {
177  bool mismatch = false;
178 
179  int p = 0;
180  for (; !mismatch && pattern[p]; ++p) {
181  mismatch = tolower(text[t+p]) != tolower(pattern[p]);
182  }
183  if (!mismatch) {
184  switch (mode) {
185  case FIND_START: return t;
186  case FIND_SKIP_OVER: return t+p;
187  }
188  ca_assert(0);
189  }
190  }
191  }
192  return -1;
193 }
194 
195 static int findMultipattern(const char *str, const char** const& pattern, char expect_behind, FindMode mode) {
196  // search 'str' for the occurrence of any 'pattern'
197  // if 'expect_behind' != 0 -> expect that char behind the found pattern
198 
199  int offset = -1;
200 
201  if (str) {
202  FindMode use = expect_behind ? FIND_SKIP_OVER : mode;
203 
204  int p;
205  for (p = 0; offset == -1 && pattern[p]; ++p) {
206  offset = findPattern(str, pattern[p], use);
207  }
208 
209  if (offset != -1) {
210  if (expect_behind) {
211  if (str[offset] != expect_behind) { // mismatch
212  offset = findMultipattern(str+offset, pattern, expect_behind, mode);
213  }
214  else {
215  switch (mode) {
216  case FIND_START:
217  offset -= str0len(pattern[p]);
218  ca_assert(offset >= 0);
219  break;
220  case FIND_SKIP_OVER:
221  offset++;
222  break;
223  }
224  }
225  }
226  }
227  }
228  return offset;
229 }
230 
231 static int findSubspecies(const char *str, char expect_behind, FindMode mode) {
232  const char *subspecies_pattern[] = { "subspecies", "sub-species", "subsp.", NULp };
233  return findMultipattern(str, subspecies_pattern, expect_behind, mode);
234 }
235 
236 static int findStrain(const char *str, char expect_behind, FindMode mode) {
237  const char *strain_pattern[] = { "strain", "str.", NULp };
238  return findMultipattern(str, strain_pattern, expect_behind, mode);
239 }
240 
241 int find_pattern(const char *text, const char *pattern) { return findPattern(text, pattern, FIND_START); }
242 int skip_pattern(const char *text, const char *pattern) { return findPattern(text, pattern, FIND_SKIP_OVER); }
243 
244 int find_subspecies(const char *str, char expect_behind) { return findSubspecies(str, expect_behind, FIND_START); }
245 int skip_subspecies(const char *str, char expect_behind) { return findSubspecies(str, expect_behind, FIND_SKIP_OVER); }
246 
247 int find_strain(const char *str, char expect_behind) { return findStrain(str, expect_behind, FIND_START); }
248 int skip_strain(const char *str, char expect_behind) { return findStrain(str, expect_behind, FIND_SKIP_OVER); }
249 
250 const char *stristr(const char *str, const char *substring) {
251  int offset = find_pattern(str, substring);
252  return offset >= 0 ? str+offset : NULp;
253 }
254 
255 int ___lookup_keyword(const char *keyword, const char * const *lookup_table, int lookup_table_size) {
256  // returns the index [0..n-1] of 'keyword' in lookup_table
257  // or -1 if not found.
258 
259  for (int i = 0; i<lookup_table_size; ++i) {
260  if (str_equal(keyword, lookup_table[i])) return i;
261  }
262  return -1;
263 }
264 
265 int parse_key_word(const char *line, char *key, const char *separator) {
266  // Copy keyword starting at position 'index' of 'line' delimited by 'separator' into 'key'.
267  // Do not copy more than 'TOKENSIZE-1' characters.
268  // Returns length of keyword.
269 
270  int k = 0;
271  if (line) {
272  while (k<(TOKENSIZE-1) && line[k] && !occurs_in(line[k], separator)) {
273  key[k] = line[k];
274  ++k;
275  }
276  }
277  key[k] = 0;
278  return k;
279 }
280 
281 // ----------------------
282 // FormattedFile
283 
285  : name_(ARB_strdup(Name)),
286  type_(Type)
287 {}
289  free(name_);
290 }
291 
292 void FormattedFile::init(const char *Name, Format Type) {
293  ca_assert(!name_); // do not init twice
294  ca_assert(Name);
295  ca_assert(Type != UNKNOWN);
296 
297  name_ = nulldup(Name);
298  type_ = Type;
299 }
300 
CONSTEXPR_INLINE int str0len(const char *str)
Definition: global.h:98
void upcase(char *str)
Definition: util.cxx:149
static long linenum
Definition: mkptypes.cxx:79
Definition: reader.h:21
Format
Definition: fun.h:10
int find_subspecies(const char *str, char expect_behind)
Definition: util.cxx:244
void warning(int warning_num, const char *warning_message)
Definition: util.cxx:61
FindMode
Definition: util.cxx:168
AliDataPtr format(AliDataPtr data, const size_t wanted_len, GB_ERROR &error)
Definition: insdel.cxx:615
void Append(char *&string1, const char *string2)
Definition: util.cxx:141
void init(const char *Name, Format Type)
Definition: util.cxx:292
void skip_eolnl_and_append_spaced(char *&string1, const char *string2)
Definition: util.cxx:135
int skip_pattern(const char *text, const char *pattern)
Definition: util.cxx:242
#define ca_assert(cond)
Definition: global.h:33
void Getstr(char *line, int linenum)
Definition: util.cxx:92
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
Definition: fun.h:25
void warningf(int warning_num, const char *warning_messagef,...)
Definition: util.cxx:66
void append_known_len(char *&string1, int len1, const char *string2, int len2)
Definition: util.cxx:102
FormattedFile()
Definition: fun.h:55
int fputs_len(const char *str, int len, Writer &write)
Definition: util.cxx:155
Definition: reader.h:95
int ___lookup_keyword(const char *keyword, const char *const *lookup_table, int lookup_table_size)
Definition: util.cxx:255
char buffer[MESSAGE_BUFFERSIZE]
Definition: seq_search.cxx:34
int find_strain(const char *str, char expect_behind)
Definition: util.cxx:247
const char * stristr(const char *str, const char *substring)
Definition: util.cxx:250
int skip_strain(const char *str, char expect_behind)
Definition: util.cxx:248
void skip_eolnl_and_append(char *&string1, const char *string2)
Definition: util.cxx:127
void scan_token_or_die(char *to, const char *from)
Definition: util.cxx:14
virtual void out(char ch)=0
int Skip_white_space(const char *line, int index)
Definition: util.cxx:84
static int findStrain(const char *str, char expect_behind, FindMode mode)
Definition: util.cxx:236
int find_pattern(const char *text, const char *pattern)
Definition: util.cxx:241
int parse_key_word(const char *line, char *key, const char *separator)
Definition: util.cxx:265
bool scan_token(char *to, const char *from)
Definition: util.cxx:10
static int findSubspecies(const char *str, char expect_behind, FindMode mode)
Definition: util.cxx:231
static int findPattern(const char *text, const char *pattern, FindMode mode)
Definition: util.cxx:170
va_end(argPtr)
~FormattedFile()
Definition: util.cxx:288
char * strf(const char *format,...)
Definition: util.cxx:27
void throw_errorf(int error_num, const char *error_messagef,...)
Definition: util.cxx:41
static cstr error_message
Definition: readcfg.c:10
static int findMultipattern(const char *str, const char **const &pattern, char expect_behind, FindMode mode)
Definition: util.cxx:195
void ARB_realloc(TYPE *&tgt, size_t nelem)
Definition: arb_mem.h:43
va_start(argPtr, format)
CONSTEXPR_INLINE bool str_equal(const char *s1, const char *s2)
Definition: global.h:95
static int pattern[maxsites+1]
#define TOKENSIZE
Definition: defs.h:18
const char * line() const
Definition: reader.h:43
static bool shown()
Definition: global.h:80
static int line
Definition: arb_a2ps.c:296
#define NULp
Definition: cxxforward.h:116
void terminate_with(char *&str, char ch)
Definition: util.cxx:110
#define offset(field)
Definition: GLwDrawA.c:73
char * strndup(const char *str, int len)
Definition: global.h:102
void throw_error(int error_num, const char *error_message)
Definition: util.cxx:23
#define BUFSIZE
int skip_subspecies(const char *str, char expect_behind)
Definition: util.cxx:245
bool occurs_in(char ch, const char *in)
Definition: global.h:111