ARB
date.cxx
Go to the documentation of this file.
1 #include "fun.h"
2 #include "global.h"
3 
4 #include <time.h>
5 #include <sys/time.h>
6 
7 #define SIZE 128 // default buffer size for generated dates
8 
9 static const char *ERROR_DATE = "\?\?-\?\?\?-\?\?\?\?";
10 
11 static const char *MON[12] = {
12  "JAN", "FEB", "MAR",
13  "APR", "MAY", "JUN",
14  "JUL", "AUG", "SEP",
15  "OCT", "NOV", "DEC"
16 };
17 
18 static const char *Month[12] = {
19  "January", "February", "March",
20  "April", "May", "June",
21  "July", "August", "September",
22  "October", "November", "December"
23 };
24 
25 static unsigned char days_in_month[12+1] = {
26  0xFF,
27  31, 29, 31, 30, 31, 30,
28  31, 31, 30, 31, 30, 31
29 };
30 
31 inline bool two_char(const char *str, char determ) {
32  // Return true if Str has two determinator char.
33  int count = 0;
34  for (const char *d = strchr(str, determ); d; d = strchr(d+1, determ)) count++;
35  return count;
36 }
37 
38 inline int ismonth(const char *str) {
39  // Return [1..12] if the char Str is one of 12 months. Case insensitive.
40  for (int i = 0; i<12; i++) {
41  if (str_iequal(str, MON[i])) {
42  return i+1;
43  }
44  }
45  return 0;
46 }
47 
48 
49 __ATTR__USERESULT static bool find_date(const char *date_string, int *month, int *day, int *year) {
50  // Find day, month, year from date Str.
51  char determ = ' ';
52  if (two_char(date_string, '.')) determ = '.';
53  else if (two_char(date_string, '/')) determ = '/';
54  else if (two_char(date_string, '-')) determ = '-';
55 
56  if (determ == ' ') return false;
57 
58  char token[20];
59  int nums[3] = { 0, 0, 0 };
60  int count = 0;
61  int index = 0;
62 
63  int len = str0len(date_string);
64  for (int indi = 0; indi <= len; indi++) {
65  if (date_string[indi] == determ || indi == len) {
66  token[index++] = '\0';
67  if (count == 1) {
68  nums[count++] = ismonth(token);
69  }
70  else {
71  nums[count++] = atoi(token);
72  }
73  index = 0;
74  }
75  else {
76  token[index++] = date_string[indi];
77  }
78  }
79  *day = nums[0];
80  *month = nums[1];
81  *year = nums[2];
82 
83  return true;
84 }
85 
86 static int isdatenum(char *Str) {
87  // Return number of day or year the Str represents.
88  // If not day or year, return 0.
89  int length, num, indi;
90 
91  length = str0len(Str);
92  if (length > 4 || length < 1)
93  return 0;
94  for (indi = 0, num = 1; indi < length && num == 1; indi++) {
95  if (!isdigit(Str[indi])) {
96  num = 0;
97  }
98  }
99  if (num == 1)
100  num = atoi(Str);
101  return num;
102 }
103 
104 class SetOnce {
105  int num_;
106  bool set_;
107  bool is_set() const { return set_; }
108 public:
109  SetOnce() : num_(-1), set_(false) {}
110 
111  bool operator!() const { return !set_; }
112 
113  int value() const { ca_assert(is_set()); return num_; }
114  void set(int val) { ca_assert(!is_set()); num_ = val; set_ = true; }
115  void replace(int val) { ca_assert(is_set()); num_ = val; }
116 };
117 
118 __ATTR__USERESULT static bool find_date_long_form(const char *date_string, int *monthPtr, int *dayPtr, int *yearPtr) {
119  // Find day, month, year in the long term date Str like day-of-week, month, day, time, year.
120 
121  int length = str0len(date_string);
122  SetOnce day, month, year;
123 
124  char token[SIZE];
125  for (int indi = 0, index = 0; index <= length; index++) {
126  if (index == length || isspace(date_string[index]) || strchr("(),", date_string[index])) {
127  if (indi == 0) continue; // empty token
128  token[indi] = '\0';
129 
130  int num = ismonth(token);
131  if (num>0) {
132  if (!month) month.set(num);
133  else if (!day) {
134  day.set(month.value()); // day has been misinterpreted as month
135  month.replace(num);
136  }
137  }
138  else if ((num = isdatenum(token)) > 0) {
139  if (!month && num <= 12) { month.set(num); }
140  else if (!day && num <= 31) { day.set(num); }
141  else if (!year) { year.set(num); }
142  }
143  indi = 0;
144  }
145  else token[indi++] = date_string[index];
146  }
147 
148  if (!day || !month || !year ||
149  day.value()>days_in_month[month.value()]) return false;
150 
151  *monthPtr = month.value();
152  *dayPtr = day.value();
153  *yearPtr = year.value();
154 
155  return true;
156 }
157 
158 inline bool is_genbank_date(const char *str) {
159  // Return true if it is genbank form of date,
160  // which is day(2 digits)-MONTH(in letters)-year(4 digits).
161  return str0len(str) >= 11 && str[2] == '-' && str[6] == '-';
162 }
163 
164 const char *genbank_date(const char *other_date) {
165  // Convert the date to be in genbank date form.
166  const char *result;
167  int length = str0len(other_date);
168 
169  if (other_date[length - 1] == '\n') {
170  char *dup = nulldup(other_date);
171  dup[--length] = '\0';
172  result = genbank_date(dup);
173  free(dup);
174  }
175  else {
176  static char gdate[SIZE];
177  gdate[0] = 0;
178 
179  int day = -1, month = -1, year = -1;
180  bool ok = false;
181  if (length > 10) {
182  if (is_genbank_date(other_date)) {
183  strncpy(gdate, other_date, 11);
184  gdate[11] = 0;
185  ok = true;
186  }
187  else ok = find_date_long_form(other_date, &month, &day, &year);
188  }
189 
190  if (!ok) ok = find_date(other_date, &month, &day, &year);
191 
192  if (!ok) {
193  warningf(146, "Unknown date format: %s, cannot convert.", other_date);
194  strcpy(gdate, ERROR_DATE);
195  }
196 
197  if (!gdate[0]) {
198  if (day <= 0 || month <= 0 || year <= 0 || month > 12 || day > days_in_month[month]) {
199  warningf(147, "Wrong date format: %s", other_date);
200  strcpy(gdate, ERROR_DATE);
201  }
202  else {
203  if (year<100) year += 1900;
204  sprintf(gdate, "%02d-%s-%d", day, MON[month - 1], year);
205  }
206  }
207 
208  ca_assert(gdate[0]);
209  result = gdate;
210  }
211  return result;
212 }
213 
214 const char *today_date() {
215  // Get today's date.
216  static char line[SIZE] = "";
217  if (!line[0]) {
218  struct timeval tp;
219  struct timezone tzp;
220  (void)gettimeofday(&tp, &tzp);
221 
222  strcpy(line, ctime(&(tp.tv_sec)));
223 
224  int len = strlen(line);
225  if (line[len-1] == '\n') {
226  line[len-1] = 0;
227  }
228  }
229  return line;
230 }
231 
232 const char *gcg_date(const char *input) {
233  // Create gcg format of date.
234  static char date[2*SIZE];
235 
236  ca_assert(strlen(input) >= 8);
237 
238  const int MONTH_POS = 4;
239  const int MONTH_LEN = 3;
240  const int DAY_POS = MONTH_POS+MONTH_LEN+1;
241 
242  const char *monthname = "";
243  {
244  char part[MONTH_LEN+1];
245  memcpy(part, input+MONTH_POS, MONTH_LEN);
246  part[MONTH_LEN] = 0;
247 
248  int month = ismonth(part);
249  if (month) monthname = Month[month-1];
250  }
251 
252  char time[SIZE];
253  int day, year;
254  IF_ASSERTION_USED(int scanned = )
255  sscanf(input+DAY_POS, "%d %s %d", &day, time, &year);
256  ca_assert(scanned == 3);
257 
258  sprintf(date, "%s %d, %d %s", monthname, day, year, time);
259  return date;
260 }
261 
262 // --------------------------------------------------------------------------------
263 
264 #ifdef UNIT_TESTS
265 #include <test_unit.h>
266 
267 #define TEST_EXPECT_CONVERT(input,expect,CONVERT,ASSERTION) ASSERTION(CONVERT(input), expect);
268 
269 #define TEST_EXPECT_GENBANK_DATE(input,expect) TEST_EXPECT_CONVERT(input, expect, genbank_date, TEST_EXPECT_EQUAL)
270 #define TEST_EXPECT_GENBANK_DATE__BROKEN(input,expect) TEST_EXPECT_CONVERT(input, expect, genbank_date, TEST_EXPECT_EQUAL__BROKEN)
271 #define TEST_EXPECT_GCG_DATE(input,expect) TEST_EXPECT_CONVERT(input, expect, gcg_date, TEST_EXPECT_EQUAL)
272 #define TEST_EXPECT_GCG_DATE__BROKEN(input,expect) TEST_EXPECT_CONVERT(input, expect, gcg_date, TEST_EXPECT_EQUAL__BROKEN)
273 
274 #define TEST_EXPECT_INVALID_ANYDATE(input,finder) \
275  do { \
276  int day_, month_, year_; \
277  ASSERT_RESULT(bool, false, \
278  finder(input, &month_, &day_, &year_)); \
279  } while(0)
280 
281 #define TEST_EXPECT_INVALID_LONGDATE(input) TEST_EXPECT_INVALID_ANYDATE(input, find_date_long_form)
282 
283 #define TEST_EXPECT_FIND_ANYDATE(input,d,m,y,finder) \
284  do { \
285  char *dup_ = ARB_strdup(input); \
286  int day_, month_, year_; \
287  TEST_EXPECT(finder(dup_, &month_, &day_, &year_)); \
288  TEST_EXPECT_EQUAL(day_, d); \
289  TEST_EXPECT_EQUAL(month_, m); \
290  TEST_EXPECT_EQUAL(year_, y); \
291  free(dup_); \
292  } while (0)
293 
294 #define TEST_EXPECT_FIND_____DATE(input,d,m,y) TEST_EXPECT_FIND_ANYDATE(input, d, m, y, find_date)
295 #define TEST_EXPECT_FIND_LONGDATE(input,d,m,y) TEST_EXPECT_FIND_ANYDATE(input, d, m, y, find_date_long_form)
296 
297 // #define TEST_EXPECT_FIND_DATE(str,d,m,y) TEST_EXPECT_FIND_DATE_IMPL(str,d,m,y,TEST_EXPECT_EQUAL)
298 
299 __ATTR__REDUCED_OPTIMIZE void TEST_BASIC_conv_date() {
300  TEST_EXPECT_EQUAL(ismonth("Apr"), 4);
301 
302  TEST_EXPECT_FIND_____DATE("19-APR-99", 19, 4, 99);
303  TEST_EXPECT_FIND_____DATE("22-JUN-65", 22, 6, 65);
304  TEST_EXPECT_FIND_____DATE("5-SEP-10", 5, 9, 10);
305  TEST_EXPECT_FIND_____DATE("05-SEP-10", 5, 9, 10);
306 
307  TEST_EXPECT_FIND_____DATE("19-APR-1999", 19, 4, 1999);
308  TEST_EXPECT_FIND_____DATE("22-JUN-1965", 22, 6, 1965); // test date b4 epoch
309  TEST_EXPECT_FIND_____DATE("5-SEP-2010", 5, 9, 2010);
310  TEST_EXPECT_FIND_____DATE("05-SEP-2010", 5, 9, 2010);
311 
312  // --------------------
313 
314  TEST_EXPECT_FIND_LONGDATE("05 Sep 2010", 5, 9, 2010);
315  TEST_EXPECT_FIND_LONGDATE("Sep, 05 2010", 5, 9, 2010);
316  TEST_EXPECT_FIND_LONGDATE("Sep 05 2010", 5, 9, 2010);
317 
318  TEST_EXPECT_FIND_LONGDATE("Mon Apr 19 25:46:19 CEST 99", 19, 4, 99);
319  TEST_EXPECT_FIND_LONGDATE("Tue Jun 22 05:11:00 CEST 65", 22, 6, 65);
320  TEST_EXPECT_FIND_LONGDATE("Wed Sep 5 19:46:25 CEST 10", 5, 9, 10);
321  TEST_EXPECT_FIND_LONGDATE("Wed Sep 05 19:46:25 CEST 10", 5, 9, 10);
322 
323  TEST_EXPECT_FIND_LONGDATE("Mon Apr 19 25:46:19 CEST 1999", 19, 4, 1999);
324  TEST_EXPECT_FIND_LONGDATE("Tue Jun 22 05:11:00 CEST 1965", 22, 6, 1965);
325  TEST_EXPECT_FIND_LONGDATE("Wed Sep 5 19:46:25 CEST 2010", 5, 9, 2010);
326  TEST_EXPECT_FIND_LONGDATE("Wed Sep 05 19:46:25 CEST 2010", 5, 9, 2010);
327  TEST_EXPECT_FIND_LONGDATE("Wed Sep 05 19:46:25 2010", 5, 9, 2010);
328 
329  TEST_EXPECT_FIND_LONGDATE("Sun Oct 31 08:37:14 2010", 31, 10, 2010);
330 
331  // --------------------
332 
333  TEST_EXPECT_GENBANK_DATE("19 Apr 1999", "19-APR-1999");
334  TEST_EXPECT_GENBANK_DATE("19-APR-1999", "19-APR-1999");
335  TEST_EXPECT_GENBANK_DATE("22-JUN-1965", "22-JUN-1965");
336  TEST_EXPECT_GENBANK_DATE("5-SEP-2010", "05-SEP-2010");
337  TEST_EXPECT_GENBANK_DATE("05-SEP-2010", "05-SEP-2010");
338  TEST_EXPECT_GENBANK_DATE("crap", ERROR_DATE);
339 
340  TEST_EXPECT_GENBANK_DATE("Mon Apr 19 25:46:19 CEST 1999", "19-APR-1999");
341  TEST_EXPECT_GENBANK_DATE("Tue Jun 22 05:11:00 CEST 1965", "22-JUN-1965");
342  TEST_EXPECT_GENBANK_DATE("Wed Sep 5 19:46:25 CEST 2010", "05-SEP-2010");
343  TEST_EXPECT_GENBANK_DATE("Wed Sep 05 19:46:25 CEST 2010", "05-SEP-2010");
344  TEST_EXPECT_GENBANK_DATE("Wed Sep 31 19:46:25 CEST 2010", ERROR_DATE);
345 
346  TEST_EXPECT_GENBANK_DATE("Sun Oct 31 08:37:14 2010", "31-OCT-2010");
347  TEST_EXPECT_GENBANK_DATE("Sun 10 31 08:37:14 2010", "31-OCT-2010");
348  TEST_EXPECT_GENBANK_DATE("Sun 31 10 08:37:14 2010", "31-OCT-2010");
349  TEST_EXPECT_GENBANK_DATE("Sun Oct 32 08:37:14 2010", ERROR_DATE);
350 
351  TEST_EXPECT_GENBANK_DATE("Fri Dec 31 08:37:14 2010", "31-DEC-2010");
352  TEST_EXPECT_GENBANK_DATE("Fri 12 31 08:37:14 2010", "31-DEC-2010");
353  TEST_EXPECT_GENBANK_DATE("Fri 31 12 08:37:14 2010", "31-DEC-2010");
354  TEST_EXPECT_GENBANK_DATE("Fri 13 31 08:37:14 2010", ERROR_DATE);
355  TEST_EXPECT_GENBANK_DATE("Fri 31 13 08:37:14 2010", ERROR_DATE);
356 
357  TEST_EXPECT_GENBANK_DATE("Tue Feb 28 08:37:14 2011", "28-FEB-2011");
358  TEST_EXPECT_GENBANK_DATE("Tue Feb 29 08:37:14 2011", "29-FEB-2011"); // existence not checked
359  TEST_EXPECT_GENBANK_DATE("Tue Feb 30 08:37:14 2011", ERROR_DATE); // existence not checked
360 
362 
363  // --------------------
364 
365  TEST_EXPECT_GCG_DATE("Mon Apr 19 25:46:19 99", "April 19, 99 25:46:19");
366 
367  TEST_EXPECT_GCG_DATE("Mon Apr 19 25:46:19 1999", "April 19, 1999 25:46:19");
368  TEST_EXPECT_GCG_DATE("Tue Jun 22 05:11:00 1965", "June 22, 1965 05:11:00");
369  TEST_EXPECT_GCG_DATE("Wed Sep 5 19:46:25 2010", "September 5, 2010 19:46:25");
370  TEST_EXPECT_GCG_DATE("Wed Sep 05 19:46:25 2010", "September 5, 2010 19:46:25");
371 
372  TEST_REJECT_NULL(gcg_date(today_date())); // currently gcg_date is only used like this
373 }
374 
375 #endif // UNIT_TESTS
CONSTEXPR_INLINE int str0len(const char *str)
Definition: global.h:98
string result
CONSTEXPR_INLINE bool str_iequal(const char *s1, const char *s2)
Definition: global.h:96
static unsigned char days_in_month[12+1]
Definition: date.cxx:25
void warningf(int warning_num, const char *warning_messagef,...) __ATTR__FORMAT(2)
Definition: util.cxx:66
#define ca_assert(cond)
Definition: global.h:33
static int isdatenum(char *Str)
Definition: date.cxx:86
int ismonth(const char *str)
Definition: date.cxx:38
bool is_genbank_date(const char *str)
Definition: date.cxx:158
const char * genbank_date(const char *other_date)
Definition: date.cxx:164
void set(int val)
Definition: date.cxx:114
#define false
Definition: ureadseq.h:13
static __ATTR__USERESULT bool find_date_long_form(const char *date_string, int *monthPtr, int *dayPtr, int *yearPtr)
Definition: date.cxx:118
bool two_char(const char *str, char determ)
Definition: date.cxx:31
#define TEST_REJECT_NULL(n)
Definition: test_unit.h:1309
#define SIZE
Definition: date.cxx:7
const char * gcg_date(const char *input)
Definition: date.cxx:232
#define __ATTR__REDUCED_OPTIMIZE
Definition: test_unit.h:83
static const char * ERROR_DATE
Definition: date.cxx:9
static const char * Month[12]
Definition: date.cxx:18
static __ATTR__USERESULT bool find_date(const char *date_string, int *month, int *day, int *year)
Definition: date.cxx:49
#define IF_ASSERTION_USED(x)
Definition: arb_assert.h:308
const char * today_date()
Definition: date.cxx:214
#define __ATTR__USERESULT
Definition: attributes.h:58
SetOnce()
Definition: date.cxx:109
static int line
Definition: arb_a2ps.c:296
#define TEST_EXPECT_DIFFERENT(expr, want)
Definition: test_unit.h:1290
int value() const
Definition: date.cxx:113
bool operator!() const
Definition: date.cxx:111
size_t length
static const char * MON[12]
Definition: date.cxx:11
void replace(int val)
Definition: date.cxx:115
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1283