41 if (diff)
return diff;
42 if (*a ==
'\0') done = 1;
55 if (diff)
return diff;
56 if (*a ==
'\0') done = 1;
73 #define kStartLength 500000
75 const char *
aminos =
"ABCDEFGHIKLMNPQRSTVWXYZ*";
81 const char allsymbols[32] =
"_.-*?<>{}[]()!@#$%^&=+;:'/|`~\"\\";
117 return (isalpha(c) || strchr(seqsymbols,c));
122 return (isalnum(c) || strchr(seqsymbols,c));
135 *linestart= ftell(f);
136 if (NULL == fgets(s, 256, f))
139 cp = strchr(s,
'\n');
140 if (cp != NULL) *cp = 0;
160 for (;*s != 0;s++,count++) {
161 if (count < 9 && V->isseqcharfirst8) {
169 ptr = (
char *) realloc(V->
seq, V->
maxseq + 1);
200 while (*s ==
' ') s++;
201 sprintf(si,
" %d) %s\n", V->
nseq, s);
215 boolean (*endTest)(
boolean *addend,
boolean *ungetend,
struct ReadSeqVars *V),
218 boolean addend =
false;
219 boolean ungetend =
false;
229 V->
done = feof(V->
f);
230 V->
done |= (*endTest)( &addend, &ungetend, V);
231 if (V->
addit && (addend || !V->
done) && (strlen(V->
s) > (unsigned)margin)) {
249 return((strchr(V->
s,
'1')!=NULL) || (strchr(V->
s,
'2')!=NULL));
260 for (si= V->
s; *si != 0 && *si <
' '; si++) *si=
' ';
261 if (*si == 0) *V->
s= 0;
262 }
while (! (feof(V->
f) || ((*V->
s != 0) && (*V->
s !=
';') ) ));
278 return (strstr( V->
s,
"//") != NULL);
286 if (strstr(V->
s,
"; DNA sequence ") == V->
s)
287 strcpy(V->
seqid, (V->
s)+16);
289 strcpy(V->
seqid, (V->
s)+1);
290 while ((!feof(V->
f)) && (*V->
s ==
';')) {
302 *ungetend= (strstr(V->
s,
"ENTRY") == V->
s);
303 return ((strstr(V->
s,
"///") != NULL) || *ungetend);
310 while (! (feof(V->
f) || strstr(V->
s,
"ENTRY") || strstr(V->
s,
"SEQUENCE")) )
312 strcpy(V->
seqid, (V->
s)+16);
313 while (! (feof(V->
f) || strstr(V->
s,
"SEQUENCE") == V->
s))
318 while (! (feof(V->
f) || ((*V->
s != 0)
319 && (strstr( V->
s,
"ENTRY") == V->
s))))
330 *ungetend= (strstr(V->
s,
"LOCUS") == V->
s);
331 return ((strstr(V->
s,
"//") != NULL) || *ungetend);
341 strcpy(V->
seqid, (V->
s)+12);
342 while (! (feof(V->
f) || strstr(V->
s,
"ORIGIN") == V->
s))
347 while (! (feof(V->
f) || ((*V->
s != 0)
348 && (strstr( V->
s,
"LOCUS") == V->
s))))
363 if ((a = strchr(V->
s,
'*')) != NULL) {
370 else if (*V->
s ==
'>') {
382 strcpy(V->
seqid, (V->
s)+4);
386 while (!(feof(V->
f) || (*V->
s != 0 && *V->
s ==
'>')))
399 return(*V->
s ==
'>');
405 strcpy(V->
seqid, (V->
s)+1);
408 while (!(feof(V->
f) || ((*V->
s != 0) && (*V->
s ==
'>'))))
420 *ungetend= (strstr(V->
s,
"ID ") == V->
s);
421 return ((strstr(V->
s,
"//") != NULL) || *ungetend);
427 strcpy(V->
seqid, (V->
s)+5);
430 }
while (!(feof(V->
f) | (strstr(V->
s,
"SQ ") == V->
s)));
434 while (!(feof(V->
f) |
435 ((*V->
s !=
'\0') & (strstr(V->
s,
"ID ") == V->
s))))
448 return( *V->
s ==
'(' );
457 strcpy(V->
seqid, (V->
s)+6);
460 while (!(feof(V->
f) |
461 ((*V->
s !=
'\0') & (*V->
s ==
'('))))
477 return( *V->
s !=
' ' );
486 if (!first) strcpy(V->
seqid, V->
s);
501 else sprintf(V->
seqid,
" [Unknown form]");
504 V->
done = feof(V->
f);
527 if ((si = strstr(V->
seqid,
" Length: "))) *si = 0;
528 else if ((si = strstr(V->
seqid,
".."))) *si = 0;
530 V->
done = feof(V->
f);
542 char *si, *sj, *sk, *sm, sid[40], snum[20];
543 boolean indata =
false;
548 rewind(V->
f); V->
nseq= 0;
551 V->
done = feof(V->
f);
553 if (V->
done && !(*V->
s))
break;
555 if ( (si= strstr(V->
s, sid))
557 && (sm= strstr(V->
s, snum)) && (sm < si - snumlen) ) {
572 if ((sk = strstr(si, sid))) *(sk-2) = 0;
573 for (sk = si; *sk != 0; sk++) {
574 if (*sk ==
' ') *sk =
'.';
576 else if (isdigit(*sk)) *sk= nonummask[*sk -
'0'];
583 else if ((sk = strstr(V->
s,
"): "))) {
588 si = 1 + strchr(V->
s,
'(');
592 strcpy(V->
seqid, si);
593 sj = strchr(V->
seqid,
':');
594 while (*(--sj) ==
' ') ;
595 while (--sj != V->
seqid) {
if (*sj ==
' ') *sj =
'_'; }
598 while (*(--sk) ==
' ') *sk = 0;
602 while ((*si <=
' ') && (*si != 0)) si++;
604 while (si[snumlen] >
' ' && snumlen<20)
605 { snum[snumlen]= si[snumlen]; snumlen++; }
611 else if (strstr(V->
s,
"identity: Data:")) {
625 char *si, *sj, sid[128];
626 boolean indata =
false;
631 rewind(V->
f); V->
nseq= 0;
634 V->
done = feof(V->
f);
636 if (V->
done && !(*V->
s))
break;
644 for (sj= si; *sj >
' '; sj++) ;
647 if ( (0==strcmp(si, sid)) ) {
654 else if (NULL != (si = strstr(V->
s,
"Name: "))) {
661 strcpy(V->
seqid, si);
665 for (sj= si; *sj >
' '; sj++) ;
671 else if ( strstr(V->
s,
"//") ) {
688 char *si, *sj, *send, sid[40], sid1[40], saveseq[255];
689 boolean first =
true, indata =
false, domatch;
690 int iline= 0, ifmc, saveseqlen=0;
692 #define fixmatchchar(s) { \
693 for (ifmc=0; ifmc<saveseqlen; ifmc++) \
694 if (s[ifmc] == V->matchchar) s[ifmc]= saveseq[ifmc]; }
705 V->
done = feof(V->
f);
707 if (V->
done && !(*V->
s))
break;
715 if (strchr(si,
';')) indata=
false;
722 for (sj = si; isalnum(*sj); sj++) ;
731 if (V->
nseq == 1) { strcpy( saveseq, sj); saveseqlen= strlen(saveseq); }
736 strcpy(V->
seqid, si);
738 if (V->
nseq == 1) strcpy(sid1, sid);
742 else if ( (strstr(si, sid) == si) ){
743 while (isalnum(*si)) si++;
746 if (V->
nseq == 1) { strcpy( saveseq, si); saveseqlen= strlen(saveseq); }
752 else if (domatch && (strstr(si, sid1) == si)) {
753 strcpy( saveseq, si);
754 saveseqlen= strlen(saveseq);
761 else if ( strstr(V->
s,
"matrix") ) {
777 boolean atname =
true, indata =
false;
786 V->
done = feof(V->
f);
788 if (V->
done && !(*V->
s))
break;
798 if (strchr(si,
';')) indata=
false;
806 while (isalnum(*sj)) sj++;
819 strcpy(V->
seqid, si);
839 else if ( strstr(V->
s,
"matrix") ) {
854 boolean first =
true;
864 while (isdigit(*si)) si++;
871 V->
done = feof(V->
f);
873 if (V->
done && !(*V->
s))
break;
889 strcpy(V->
seqid, si);
920 while (isdigit(*si)) si++;
926 strncpy(V->
seqid, (V->
s), 10);
928 for (i=0; i<10 && V->
s[i]; i++) V->
s[i]=
' ';
939 const long skiplines_,
942 #define tolowerstr(s) { long Itlwr, Ntlwr= strlen(s); \
943 for (Itlwr=0; Itlwr<Ntlwr; Itlwr++) s[Itlwr]= to_lower(s[Itlwr]); }
954 for (l = skiplines_; l > 0; l--)
GetLine( V);
958 for (l= strlen(V->
s); (l > 0) && (V->
s[l] ==
' '); l--) ;
959 }
while ((l == 0) && !feof(V->
f));
963 else switch (format_) {
984 if (strstr( V->
s,
"matrix")) done=
true;
985 if (strstr( V->
s,
"interleav")) interleaved=
true;
986 if (NULL != (cp=strstr( V->
s,
"ntax=")) ) V->
topnseq= atoi(cp+5);
987 if (NULL != (cp=strstr( V->
s,
"nchar=")) ) V->
topseqlen= atoi(cp+6);
988 if (NULL != (cp=strstr( V->
s,
"matchchar=")) ) {
991 else if (*cp==
'"') cp++;
1020 gotuw = (strstr(V->
s,
"..") != NULL);
1023 }
while (!(feof(V->
f) || V->
allDone));
1034 const short whichEntry_,
1036 const long skiplines_,
1037 const short format_,
1045 if (format_ < kMinFormat || format_ >
kMaxFormat) {
1062 else V.
nseq= *nseq_;
1083 const short whichEntry_,
1084 const char *filename_,
1085 const long skiplines_,
1086 const short format_,
1094 if (format_ < kMinFormat || format_ >
kMaxFormat) {
1101 V.
fname = filename_;
1118 V.
f = fopen(V.
fname,
"r");
1123 if (V.
f != NULL) fclose(V.
f);
1135 const char *filename_,
1136 const long skiplines_,
1137 const short format_,
1145 &seqlen, nseq_, error_, seqid);
1152 const char *filename,
1159 fseq = fopen(filename,
"r");
1161 if (fseq!=NULL) fclose(fseq);
1170 boolean foundIG=
false, foundStrider=
false,
1171 foundGB=
false, foundPIR=
false, foundEMBL=
false, foundNBRF=
false,
1172 foundPearson=
false, foundFitch=
false, foundPhylip=
false, foundZuker=
false,
1173 gotolsen=
false, gotpaup =
false, gotasn1 =
false, gotuw=
false, gotMSF=
false,
1174 isfitch=
false, isphylip=
false,
done=
false;
1176 int nlines= 0, k, splen= 0, otherlines= 0, aminolines= 0, dnalines= 0;
1179 int maxlines2check=500;
1181 #define ReadOneLine(sp) \
1182 { done |= (feof(fseq)); \
1183 readline( fseq, sp, &linestart); \
1184 if (!done) { splen = strlen(sp); ++nlines; } }
1194 if (nlines < 4 && !
done) {
1195 if ((strstr(sp,
"From ") == sp) || (strstr(sp,
"Received:") == sp)) {
1199 if (!
done)
for (k=0; (k<splen) && (sp[k]==
' '); k++) ;
1200 }
while ((!
done) && (k < splen));
1201 *skiplines = nlines;
1205 if (sp==NULL || *sp==0)
1210 else if ( strstr(sp,
"MSF:") && strstr(sp,
"Type:") && strstr(sp,
"Check:") )
1213 else if ((strstr(sp,
"..") != NULL) && (strstr(sp,
"Check:") != NULL))
1216 else if (strstr(sp,
"identity: Data:") != NULL)
1219 else if ( strstr(sp,
"::=") &&
1220 (strstr(sp,
"Bioseq") ||
1221 strstr(sp,
"Seq-entry") ||
1222 strstr(sp,
"Seq-submit") ) )
1225 else if ( strstr(sp,
"#NEXUS") ==
sp )
1230 else if (*sp ==
';') {
1231 if (strstr(sp,
"Strider") !=NULL) foundStrider=
true;
1235 else if (strstr(sp,
"LOCUS") ==
sp)
1237 else if (strstr(sp,
"ORIGIN") ==
sp)
1240 else if (strstr(sp,
"ENTRY ") ==
sp)
1242 else if (strstr(sp,
"SEQUENCE") ==
sp)
1245 else if (*sp ==
'>') {
1246 if (sp[3] ==
';') foundNBRF=
true;
1247 else foundPearson=
true;
1250 else if (strstr(sp,
"ID ") ==
sp)
1252 else if (strstr(sp,
"SQ ") ==
sp)
1255 else if (*sp ==
'(')
1259 if (nlines - *skiplines == 1) {
1260 int ispp= 0, ilen= 0;
1261 sscanf( sp,
"%d%d", &ispp, &ilen);
1262 if (ispp > 0 && ilen > 0) isphylip=
true;
1264 else if (isphylip && nlines - *skiplines == 2) {
1272 for (k=0, isfitch=
true; isfitch & (k < splen); k++) {
1273 if (k % 4 == 0) isfitch &= (sp[k] ==
' ');
1274 else isfitch &= (sp[k] !=
' ');
1276 if (isfitch & (splen > 20)) foundFitch=
true;
1281 case kAmino :
if (splen>20) aminolines++;
break;
1283 case kRNA :
if (splen>20) dnalines++;
break;
1307 if (strstr(sp,
"Bioseq-set")) format=
kASNseqset;
1318 if (foundIG) format=
kIG;
1323 else if ((dnalines > 1) ||
done || (nlines > maxlines2check)) {
1326 if (foundStrider) format=
kStrider;
1327 else if (foundGB) format=
kGenBank;
1328 else if (foundPIR) format=
kPIR;
1329 else if (foundEMBL) format=
kEMBL;
1330 else if (foundNBRF) format=
kNBRF;
1332 else if (foundIG) format=
kIG;
1333 else if (foundPearson) format=
kPearson;
1334 else if (foundZuker) format=
kZuker;
1336 else if (foundPhylip) format=
kPhylip;
1338 else if (foundFitch) format=
kFitch;
1340 else if (otherlines > 0) format=
kUnknown;
1341 else if (dnalines > 1) format=
kPlain;
1342 else if (aminolines > 1) format=
kPlain;
1349 else if (strstr(sp,
"): ") != NULL)
1356 long i, j, nspp= 0, nlen= 0, ilen, leaf= 0,
seq= 0;
1363 sscanf( sp,
"%ld%ld", &nspp, &nlen);
1365 for (ps= sp+10, ilen=0; *ps!=0; ps++)
if (isprint(*ps)) ilen++;
1367 for (i= 1; i<nspp; i++) {
1372 for (j=0, ps= sp; isspace(*ps) && j<10; ps++, j++);
1373 for (ps= sp; *ps!=0; ps++)
if (isprint(*ps)) ilen++;
1376 if (j>=9)
seq += 10;
1378 if (tseq != tname) leaf++;
else seq++;
1379 if (tname ==
kDNA || tname ==
kRNA)
seq++;
else leaf++;
1382 if (ilen <= nlen && j<9) {
1386 else if (ilen > nlen) {
1390 for ( nspp *= 2 ; i<nspp; i++) {
1395 for (ps= sp; *ps!=0; ps++)
if (isprint(*ps)) ilen++;
1396 for (j=0, ps= sp; isspace(*ps) && j<10; ps++, j++);
1399 if (tseq != tname)
seq++;
else leaf++;
1400 if (tname ==
kDNA || tname ==
kRNA) leaf++;
else seq++;
1403 if (j>9) leaf += 10;
1423 long i, check = 0, count = 0;
1425 for (i = 0; i <
seqlen; i++) {
1428 if (count == 57) count = 0;
1431 *checktotal += check;
1432 *checktotal %= 10000;
1438 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
1439 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
1440 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
1441 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
1442 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
1443 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
1444 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
1445 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
1446 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
1447 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
1448 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
1449 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
1450 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
1451 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
1452 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
1453 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
1454 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
1455 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
1456 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
1457 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
1458 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
1459 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
1460 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
1461 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
1462 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
1463 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
1464 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
1465 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
1466 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
1467 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
1468 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
1469 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
1470 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
1471 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
1472 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
1473 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
1474 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
1475 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
1476 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
1477 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
1478 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
1479 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
1480 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
1481 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
1482 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
1483 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
1484 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
1485 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
1486 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
1487 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
1488 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
1495 unsigned long c = 0xffffffffL;
1500 c = crctab[((
int)c ^ (
to_upper(*seq))) & 0xff] ^ (c >> 8);
1515 short na = 0, aa = 0, po = 0, nt = 0, nu = 0, ns = 0,
no = 0;
1517 maxtest =
min(300, seqlen);
1518 for (i = 0; i < maxtest; i++) {
1520 if (strchr(protonly, c)) po++;
1521 else if (strchr(primenuc,c)) {
1524 else if (c ==
'U') nu++;
1526 else if (strchr(aminos,c)) aa++;
1527 else if (strchr(seqsymbols,c)) ns++;
1528 else if (isalpha(c))
no++;
1531 if ((
no > 0) || (po+aa+na == 0))
return kOtherSeq;
1535 else if (po > 0)
return kAmino;
1537 if (nu > nt)
return kRNA;
1552 if (!seq)
return NULL;
1553 newseq = (
char*) malloc(seqlen+1);
1554 if (!newseq)
return NULL;
1555 for (a= (
char*)
seq, b=newseq, i=0; *a!=0; a++)
1561 newseq = (
char*) realloc(newseq, i+1);
1587 const short kSpaceAll = -9;
1588 #define kMaxseqwidth 250
1590 boolean baseonlynum=
false;
1592 boolean numright =
false, numleft =
false;
1593 boolean nameright =
false, nameleft =
false;
1594 short namewidth = 8, numwidth = 8;
1595 short spacer = 0, width = 50,
tab = 0;
1598 short linesout = 0, seqtype =
kNucleic;
1599 long i, j, l, l1, ibase;
1600 char idword[31], endstr[14];
1601 char seqnamestore[128], *seqname = seqnamestore;
1603 char nameform[10], numform[10], nocountsymbols[10];
1604 unsigned long checksum = 0, checktotal = 0;
1608 l =
min(128, strlen(seqid));
1609 strncpy( seqnamestore, seqid, l);
1612 sscanf( seqname,
"%30s", idword);
1613 sprintf(numform,
"%ld", seqlen);
1614 numwidth= strlen(numform)+1;
1617 if (strstr(seqname,
"checksum") != NULL) {
1618 cp = strstr(seqname,
"bases");
1620 for ( ; (cp!=seqname) && (*cp!=
','); cp--) ;
1621 if (cp!=seqname) *cp=0;
1628 if (outform ==
kGCG || outform ==
kMSF)
1637 strcpy(endstr,
"\n");
1642 fprintf(outf,
"LOCUS %s %ld bp\n", idword, seqlen);
1643 fprintf(outf,
"DEFINITION %s, %ld bases, %lX checksum.\n", seqname, seqlen, checksum);
1645 fprintf(outf,
"ORIGIN \n");
1649 strcpy(endstr,
"\n//");
1656 fprintf(outf,
"ENTRY %s \n", idword);
1657 fprintf(outf,
"TITLE %s, %ld bases, %lX checksum.\n", seqname, seqlen, checksum);
1659 fprintf(outf,
"SEQUENCE \n");
1664 strcpy(endstr,
"\n///");
1666 for (j=0; j<numwidth; j++)
fputc(
' ',outf);
1667 for (j= 5; j<=width; j += 5) fprintf(outf,
"%10ld",j);
1674 fprintf(outf,
">P1;%s\n", idword);
1676 fprintf(outf,
">DL;%s\n", idword);
1677 fprintf(outf,
"%s, %ld bases, %lX checksum.\n", seqname, seqlen, checksum);
1679 strcpy(endstr,
"*\n");
1684 fprintf(outf,
"ID %s\n", idword);
1686 fprintf(outf,
"DE %s, %ld bases, %lX checksum.\n", seqname, seqlen, checksum);
1687 fprintf(outf,
"SQ %ld BP\n", seqlen);
1688 strcpy(endstr,
"\n//");
1696 fprintf(outf,
"%s\n", seqname);
1698 fprintf(outf,
" %s Length: %ld (today) Check: %ld ..\n", idword, seqlen, checksum);
1701 strcpy(endstr,
"\n");
1706 fprintf(outf,
"; ### from DNA Strider ;-)\n");
1707 fprintf(outf,
"; DNA sequence %s, %ld bases, %lX checksum.\n;\n", seqname, seqlen, checksum);
1708 strcpy(endstr,
"\n//");
1713 fprintf(outf,
"%s, %ld bases, %lX checksum.\n", seqname, seqlen, checksum);
1724 if (strlen(idword)>10) idword[10] = 0;
1725 fprintf(outf,
"%-10s ",idword);
1734 case kDNA : cp= (
char*)
"dna";
break;
1735 case kRNA : cp= (
char*)
"rna";
break;
1736 case kNucleic : cp= (
char*)
"na";
break;
1737 case kAmino : cp= (
char*)
"aa";
break;
1738 case kOtherSeq: cp= (
char*)
"not-set";
break;
1740 fprintf(outf,
" seq {\n");
1741 fprintf(outf,
" id { local id %d },\n", gPretty.
atseq);
1742 fprintf(outf,
" descr { title \"%s\" },\n", seqid);
1743 fprintf(outf,
" inst {\n");
1744 fprintf(outf,
" repr raw, mol %s, length %ld, topology linear,\n", cp, seqlen);
1745 fprintf(outf,
" seq-data\n");
1747 fprintf(outf,
" iupacaa \"");
1749 fprintf(outf,
" iupacna \"");
1754 strcpy(endstr,
"\"\n } } ,");
1766 fprintf(outf,
"[Name: %-16s Len:%6ld Check: %8lX]\n", idword, seqlen, checksum);
1778 spacer = gPretty.
spacer + 1;
1783 fprintf(outf,
"Name: %-16s Len:%6ld Check: %8lX\n", idword, seqlen, checksum);
1789 fprintf(outf,
" Name: %-16s Len:%6ld Check: %5ld Weight: 1.00\n",
1790 idword, seqlen, checksum);
1794 sprintf(nameform,
"%%+%ds ",namewidth);
1801 fprintf(outf,
";%s, %ld bases, %lX checksum.\n", seqname, seqlen, checksum);
1802 fprintf(outf,
"%s\n", idword);
1810 fprintf(outf,
">%s, %ld bases, %lX checksum.\n", seqname, seqlen, checksum);
1815 if (*nameform==0) sprintf(nameform,
"%%%d.%ds ",namewidth,namewidth);
1816 if (numline) sprintf(numform,
"%%%ds ",numwidth);
1817 else sprintf(numform,
"%%%dd ",numwidth);
1818 strcpy( nocountsymbols, kNocountsymbols);
1820 if (strchr(nocountsymbols,gPretty.
gapchar)==NULL) {
1821 strcat(nocountsymbols,
" ");
1822 nocountsymbols[strlen(nocountsymbols)-1]= gPretty.
gapchar;
1824 if (gPretty.
domatch && (cp=strchr(nocountsymbols,gPretty.
matchchar))!=NULL) {
1834 for (i=0, l=0, ibase = 1; i <
seqlen; ) {
1838 if (nameleft) fprintf(outf, nameform, idword);
1839 if (numleft) {
if (numline) fprintf(outf, numform,
"");
1840 else fprintf(outf, numform, ibase);}
1841 for (j=0; j<
tab; j++)
fputc(
' ',outf);
1846 if (spacer==kSpaceAll || (spacer != 0 && (l+1) % spacer == 1)) {
1847 if (numline==1)
fputc(
' ',outf);
1850 if (l1 % 10 == 1 || l1 == width) {
1851 if (numline==1) fprintf(outf,
"%-9ld ",i+1);
1859 if (spacer==kSpaceAll || (spacer != 0 && (l+1) % spacer == 1))
1861 if (!baseonlynum) ibase++;
1862 else if (0==strchr(nocountsymbols,seq[i])) ibase++;
1866 if (l1 == width || i == seqlen) {
1867 if (outform==
kPretty)
for ( ; l1<width; l1++) {
1868 if (spacer==kSpaceAll || (spacer != 0 && (l+1) % spacer == 1))
1876 if (numline==2) fprintf(outf,
"%s",s);
1879 if (i == seqlen) fprintf(outf,
"%s%s",s,endstr);
1880 else fprintf(outf,
"%s",s);
1881 if (numright || nameright)
fputc(
' ',outf);
1882 if (numright) fprintf(outf,numform, ibase-1);
1883 if (nameright) fprintf(outf, nameform,idword);
char * readSeqFp(const short whichEntry_, FILE *fp_, const long skiplines_, const short format_, long *seqlen_, short *nseq_, short *error_, char *seqid_)
#define skipwhitespace(string)
static GB_ERROR tab(GBL_command_arguments *args, bool pretab)
AliDataPtr format(AliDataPtr data, const size_t wanted_len, GB_ERROR &error)
const unsigned long crctab[]
Local void readEMBL(struct ReadSeqVars *V)
Local void readOlsen(struct ReadSeqVars *V)
Local void readMSF(struct ReadSeqVars *V)
Local boolean endPearson(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
Local boolean endFitch(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
Local void addseq(char *s, struct ReadSeqVars *V)
Local void readLoop(short margin, boolean addfirst, boolean(*endTest)(boolean *addend, boolean *ungetend, struct ReadSeqVars *V), struct ReadSeqVars *V)
Local void readPAUPinterleaved(struct ReadSeqVars *V)
Local void addinfo(char *s, struct ReadSeqVars *V)
Local void readline(FILE *f, char *s, long *linestart)
char * compressSeq(const char gapc, const char *seq, const long seqlen, long *newlen)
Local void readPhylipSequential(struct ReadSeqVars *V)
int Strncasecmp(const char *a, const char *b, long maxn)
int Strcasecmp(const char *a, const char *b)
char * listSeqs(const char *filename_, const long skiplines_, const short format_, short *nseq_, short *error_)
static const char * seqsymbols
Local void readStrider(struct ReadSeqVars *V)
Local void countseq(char *s, struct ReadSeqVars *V)
Local boolean endIG(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
Local void readZuker(struct ReadSeqVars *V)
static int diff(int v1, int v2, int v3, int v4, int st, int en)
Local void readNBRF(struct ReadSeqVars *V)
Local void readIG(struct ReadSeqVars *V)
Local void readPIR(struct ReadSeqVars *V)
Local void readSeqMain(struct ReadSeqVars *V, const long skiplines_, const short format_)
Local boolean endPIR(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
const char kNocountsymbols[5]
Local boolean endPhylipSequential(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
static void error(const char *msg)
Local void readFitch(struct ReadSeqVars *V)
Local boolean endStrider(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
Local boolean endZuker(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
Local void readPAUPsequential(struct ReadSeqVars *V)
short writeSeq(FILE *outf, const char *seq, const long seqlen, const short outform, const char *seqid)
unsigned long CRC32checksum(const char *seq, const long seqlen, unsigned long *checktotal)
Local void readGenBank(struct ReadSeqVars *V)
Local void GetLine(struct ReadSeqVars *V)
Local void readPlain(struct ReadSeqVars *V)
short seqFileFormatFp(FILE *fseq, long *skiplines, short *error)
short seqFileFormat(const char *filename, long *skiplines, short *error)
unsigned long GCGchecksum(const char *seq, const long seqlen, unsigned long *checktotal)
Local void unGetLine(struct ReadSeqVars *V)
Local boolean endGB(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
Local void readPearson(struct ReadSeqVars *V)
const char allsymbols[32]
Local boolean endNBRF(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
Local void readPhylipInterleaved(struct ReadSeqVars *V)
char * readSeq(const short whichEntry_, const char *filename_, const long skiplines_, const short format_, long *seqlen_, short *nseq_, short *error_, char *seqid_)
short getseqtype(const char *seq, const long seqlen)
Local boolean endEMBL(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
Local void readUWGCG(struct ReadSeqVars *V)
GB_write_int const char s