33 #pragma segment ureadseq
43 if (diff)
return diff;
44 if (*a ==
'\0') done = 1;
57 if (diff)
return diff;
58 if (*a ==
'\0') done = 1;
75 #define kStartLength 500000
77 const char *
aminos =
"ABCDEFGHIKLMNPQRSTVWXYZ*";
83 const char allsymbols[32] =
"_.-*?<>{}[]()!@#$%^&=+;:'/|`~\"\\";
119 return (isalpha(c) || strchr(seqsymbols,c));
124 return (isalnum(c) || strchr(seqsymbols,c));
137 *linestart= ftell(f);
138 if (NULL == fgets(s, 256, f))
141 cp = strchr(s,
'\n');
142 if (cp != NULL) *cp = 0;
162 for (;*s != 0;s++,count++) {
163 if (count < 9 && V->isseqcharfirst8) {
171 ptr = (
char *) realloc(V->
seq, V->
maxseq + 1);
202 while (*s ==
' ') s++;
203 sprintf(si,
" %d) %s\n", V->
nseq, s);
217 boolean (*endTest)(
boolean *addend,
boolean *ungetend,
struct ReadSeqVars *V),
220 boolean addend =
false;
221 boolean ungetend =
false;
231 V->
done = feof(V->
f);
232 V->
done |= (*endTest)( &addend, &ungetend, V);
233 if (V->
addit && (addend || !V->
done) && (strlen(V->
s) > (unsigned)margin)) {
251 return((strchr(V->
s,
'1')!=NULL) || (strchr(V->
s,
'2')!=NULL));
262 for (si= V->
s; *si != 0 && *si <
' '; si++) *si=
' ';
263 if (*si == 0) *V->
s= 0;
264 }
while (! (feof(V->
f) || ((*V->
s != 0) && (*V->
s !=
';') ) ));
280 return (strstr( V->
s,
"//") != NULL);
288 if (strstr(V->
s,
"; DNA sequence ") == V->
s)
289 strcpy(V->
seqid, (V->
s)+16);
291 strcpy(V->
seqid, (V->
s)+1);
292 while ((!feof(V->
f)) && (*V->
s ==
';')) {
304 *ungetend= (strstr(V->
s,
"ENTRY") == V->
s);
305 return ((strstr(V->
s,
"///") != NULL) || *ungetend);
312 while (! (feof(V->
f) || strstr(V->
s,
"ENTRY") || strstr(V->
s,
"SEQUENCE")) )
314 strcpy(V->
seqid, (V->
s)+16);
315 while (! (feof(V->
f) || strstr(V->
s,
"SEQUENCE") == V->
s))
320 while (! (feof(V->
f) || ((*V->
s != 0)
321 && (strstr( V->
s,
"ENTRY") == V->
s))))
332 *ungetend= (strstr(V->
s,
"LOCUS") == V->
s);
333 return ((strstr(V->
s,
"//") != NULL) || *ungetend);
343 strcpy(V->
seqid, (V->
s)+12);
344 while (! (feof(V->
f) || strstr(V->
s,
"ORIGIN") == V->
s))
349 while (! (feof(V->
f) || ((*V->
s != 0)
350 && (strstr( V->
s,
"LOCUS") == V->
s))))
365 if ((a = strchr(V->
s,
'*')) != NULL) {
372 else if (*V->
s ==
'>') {
384 strcpy(V->
seqid, (V->
s)+4);
388 while (!(feof(V->
f) || (*V->
s != 0 && *V->
s ==
'>')))
401 return(*V->
s ==
'>');
407 strcpy(V->
seqid, (V->
s)+1);
410 while (!(feof(V->
f) || ((*V->
s != 0) && (*V->
s ==
'>'))))
422 *ungetend= (strstr(V->
s,
"ID ") == V->
s);
423 return ((strstr(V->
s,
"//") != NULL) || *ungetend);
429 strcpy(V->
seqid, (V->
s)+5);
432 }
while (!(feof(V->
f) | (strstr(V->
s,
"SQ ") == V->
s)));
436 while (!(feof(V->
f) |
437 ((*V->
s !=
'\0') & (strstr(V->
s,
"ID ") == V->
s))))
450 return( *V->
s ==
'(' );
459 strcpy(V->
seqid, (V->
s)+6);
462 while (!(feof(V->
f) |
463 ((*V->
s !=
'\0') & (*V->
s ==
'('))))
479 return( *V->
s !=
' ' );
488 if (!first) strcpy(V->
seqid, V->
s);
503 else sprintf(V->
seqid,
" [Unknown form]");
506 V->
done = feof(V->
f);
529 if ((si = strstr(V->
seqid,
" Length: "))) *si = 0;
530 else if ((si = strstr(V->
seqid,
".."))) *si = 0;
532 V->
done = feof(V->
f);
544 char *si, *sj, *sk, *sm, sid[40], snum[20];
545 boolean indata =
false;
550 rewind(V->
f); V->
nseq= 0;
553 V->
done = feof(V->
f);
555 if (V->
done && !(*V->
s))
break;
557 if ( (si= strstr(V->
s, sid))
559 && (sm= strstr(V->
s, snum)) && (sm < si - snumlen) ) {
574 if ((sk = strstr(si, sid))) *(sk-2) = 0;
575 for (sk = si; *sk != 0; sk++) {
576 if (*sk ==
' ') *sk =
'.';
578 else if (isdigit(*sk)) *sk= nonummask[*sk -
'0'];
585 else if ((sk = strstr(V->
s,
"): "))) {
590 si = 1 + strchr(V->
s,
'(');
594 strcpy(V->
seqid, si);
595 sj = strchr(V->
seqid,
':');
596 while (*(--sj) ==
' ') ;
597 while (--sj != V->
seqid) {
if (*sj ==
' ') *sj =
'_'; }
600 while (*(--sk) ==
' ') *sk = 0;
604 while ((*si <=
' ') && (*si != 0)) si++;
606 while (si[snumlen] >
' ' && snumlen<20)
607 { snum[snumlen]= si[snumlen]; snumlen++; }
613 else if (strstr(V->
s,
"identity: Data:")) {
627 char *si, *sj, sid[128];
628 boolean indata =
false;
633 rewind(V->
f); V->
nseq= 0;
636 V->
done = feof(V->
f);
638 if (V->
done && !(*V->
s))
break;
646 for (sj= si; *sj >
' '; sj++) ;
649 if ( (0==strcmp(si, sid)) ) {
656 else if (NULL != (si = strstr(V->
s,
"Name: "))) {
663 strcpy(V->
seqid, si);
667 for (sj= si; *sj >
' '; sj++) ;
673 else if ( strstr(V->
s,
"//") ) {
690 char *si, *sj, *send, sid[40], sid1[40], saveseq[255];
691 boolean first =
true, indata =
false, domatch;
692 int iline= 0, ifmc, saveseqlen=0;
694 #define fixmatchchar(s) { \
695 for (ifmc=0; ifmc<saveseqlen; ifmc++) \
696 if (s[ifmc] == V->matchchar) s[ifmc]= saveseq[ifmc]; }
707 V->
done = feof(V->
f);
709 if (V->
done && !(*V->
s))
break;
717 if (strchr(si,
';')) indata=
false;
724 for (sj = si; isalnum(*sj); sj++) ;
733 if (V->
nseq == 1) { strcpy( saveseq, sj); saveseqlen= strlen(saveseq); }
738 strcpy(V->
seqid, si);
740 if (V->
nseq == 1) strcpy(sid1, sid);
744 else if ( (strstr(si, sid) == si) ){
745 while (isalnum(*si)) si++;
748 if (V->
nseq == 1) { strcpy( saveseq, si); saveseqlen= strlen(saveseq); }
754 else if (domatch && (strstr(si, sid1) == si)) {
755 strcpy( saveseq, si);
756 saveseqlen= strlen(saveseq);
763 else if ( strstr(V->
s,
"matrix") ) {
779 boolean atname =
true, indata =
false;
788 V->
done = feof(V->
f);
790 if (V->
done && !(*V->
s))
break;
800 if (strchr(si,
';')) indata=
false;
808 while (isalnum(*sj)) sj++;
821 strcpy(V->
seqid, si);
841 else if ( strstr(V->
s,
"matrix") ) {
856 boolean first =
true;
866 while (isdigit(*si)) si++;
873 V->
done = feof(V->
f);
875 if (V->
done && !(*V->
s))
break;
891 strcpy(V->
seqid, si);
922 while (isdigit(*si)) si++;
928 strncpy(V->
seqid, (V->
s), 10);
930 for (i=0; i<10 && V->
s[i]; i++) V->
s[i]=
' ';
941 const long skiplines_,
944 #define tolowerstr(s) { long Itlwr, Ntlwr= strlen(s); \
945 for (Itlwr=0; Itlwr<Ntlwr; Itlwr++) s[Itlwr]= to_lower(s[Itlwr]); }
956 for (l = skiplines_; l > 0; l--)
GetLine( V);
960 for (l= strlen(V->
s); (l > 0) && (V->
s[l] ==
' '); l--) ;
961 }
while ((l == 0) && !feof(V->
f));
965 else switch (format_) {
986 if (strstr( V->
s,
"matrix")) done=
true;
987 if (strstr( V->
s,
"interleav")) interleaved=
true;
988 if (NULL != (cp=strstr( V->
s,
"ntax=")) ) V->
topnseq= atoi(cp+5);
989 if (NULL != (cp=strstr( V->
s,
"nchar=")) ) V->
topseqlen= atoi(cp+6);
990 if (NULL != (cp=strstr( V->
s,
"matchchar=")) ) {
993 else if (*cp==
'"') cp++;
1022 gotuw = (strstr(V->
s,
"..") != NULL);
1025 }
while (!(feof(V->
f) || V->
allDone));
1036 const short whichEntry_,
1038 const long skiplines_,
1039 const short format_,
1047 if (format_ < kMinFormat || format_ >
kMaxFormat) {
1064 else V.
nseq= *nseq_;
1085 const short whichEntry_,
1086 const char *filename_,
1087 const long skiplines_,
1088 const short format_,
1096 if (format_ < kMinFormat || format_ >
kMaxFormat) {
1103 V.
fname = filename_;
1120 V.
f = fopen(V.
fname,
"r");
1125 if (V.
f != NULL) fclose(V.
f);
1137 const char *filename_,
1138 const long skiplines_,
1139 const short format_,
1147 &seqlen, nseq_, error_, seqid);
1154 const char *filename,
1161 fseq = fopen(filename,
"r");
1163 if (fseq!=NULL) fclose(fseq);
1172 boolean foundIG=
false, foundStrider=
false,
1173 foundGB=
false, foundPIR=
false, foundEMBL=
false, foundNBRF=
false,
1174 foundPearson=
false, foundFitch=
false, foundPhylip=
false, foundZuker=
false,
1175 gotolsen=
false, gotpaup =
false, gotasn1 =
false, gotuw=
false, gotMSF=
false,
1176 isfitch=
false, isphylip=
false,
done=
false;
1178 int nlines= 0, k, splen= 0, otherlines= 0, aminolines= 0, dnalines= 0;
1181 int maxlines2check=500;
1183 #define ReadOneLine(sp) \
1184 { done |= (feof(fseq)); \
1185 readline( fseq, sp, &linestart); \
1186 if (!done) { splen = strlen(sp); ++nlines; } }
1196 if (nlines < 4 && !
done) {
1197 if ((strstr(sp,
"From ") == sp) || (strstr(sp,
"Received:") == sp)) {
1201 if (!
done)
for (k=0; (k<splen) && (sp[k]==
' '); k++) ;
1202 }
while ((!
done) && (k < splen));
1203 *skiplines = nlines;
1207 if (sp==NULL || *sp==0)
1212 else if ( strstr(sp,
"MSF:") && strstr(sp,
"Type:") && strstr(sp,
"Check:") )
1215 else if ((strstr(sp,
"..") != NULL) && (strstr(sp,
"Check:") != NULL))
1218 else if (strstr(sp,
"identity: Data:") != NULL)
1221 else if ( strstr(sp,
"::=") &&
1222 (strstr(sp,
"Bioseq") ||
1223 strstr(sp,
"Seq-entry") ||
1224 strstr(sp,
"Seq-submit") ) )
1227 else if ( strstr(sp,
"#NEXUS") ==
sp )
1232 else if (*sp ==
';') {
1233 if (strstr(sp,
"Strider") !=NULL) foundStrider=
true;
1237 else if (strstr(sp,
"LOCUS") ==
sp)
1239 else if (strstr(sp,
"ORIGIN") ==
sp)
1242 else if (strstr(sp,
"ENTRY ") ==
sp)
1244 else if (strstr(sp,
"SEQUENCE") ==
sp)
1247 else if (*sp ==
'>') {
1248 if (sp[3] ==
';') foundNBRF=
true;
1249 else foundPearson=
true;
1252 else if (strstr(sp,
"ID ") ==
sp)
1254 else if (strstr(sp,
"SQ ") ==
sp)
1257 else if (*sp ==
'(')
1261 if (nlines - *skiplines == 1) {
1262 int ispp= 0, ilen= 0;
1263 sscanf( sp,
"%d%d", &ispp, &ilen);
1264 if (ispp > 0 && ilen > 0) isphylip=
true;
1266 else if (isphylip && nlines - *skiplines == 2) {
1274 for (k=0, isfitch=
true; isfitch & (k < splen); k++) {
1275 if (k % 4 == 0) isfitch &= (sp[k] ==
' ');
1276 else isfitch &= (sp[k] !=
' ');
1278 if (isfitch & (splen > 20)) foundFitch=
true;
1283 case kAmino :
if (splen>20) aminolines++;
break;
1285 case kRNA :
if (splen>20) dnalines++;
break;
1309 if (strstr(sp,
"Bioseq-set")) format=
kASNseqset;
1320 if (foundIG) format=
kIG;
1325 else if ((dnalines > 1) ||
done || (nlines > maxlines2check)) {
1328 if (foundStrider) format=
kStrider;
1329 else if (foundGB) format=
kGenBank;
1330 else if (foundPIR) format=
kPIR;
1331 else if (foundEMBL) format=
kEMBL;
1332 else if (foundNBRF) format=
kNBRF;
1334 else if (foundIG) format=
kIG;
1335 else if (foundPearson) format=
kPearson;
1336 else if (foundZuker) format=
kZuker;
1338 else if (foundPhylip) format=
kPhylip;
1340 else if (foundFitch) format=
kFitch;
1342 else if (otherlines > 0) format=
kUnknown;
1343 else if (dnalines > 1) format=
kPlain;
1344 else if (aminolines > 1) format=
kPlain;
1351 else if (strstr(sp,
"): ") != NULL)
1358 long i, j, nspp= 0, nlen= 0, ilen, leaf= 0,
seq= 0;
1365 sscanf( sp,
"%ld%ld", &nspp, &nlen);
1367 for (ps= sp+10, ilen=0; *ps!=0; ps++)
if (isprint(*ps)) ilen++;
1369 for (i= 1; i<nspp; i++) {
1374 for (j=0, ps= sp; isspace(*ps) && j<10; ps++, j++);
1375 for (ps= sp; *ps!=0; ps++)
if (isprint(*ps)) ilen++;
1378 if (j>=9)
seq += 10;
1380 if (tseq != tname) leaf++;
else seq++;
1381 if (tname ==
kDNA || tname ==
kRNA)
seq++;
else leaf++;
1384 if (ilen <= nlen && j<9) {
1388 else if (ilen > nlen) {
1392 for ( nspp *= 2 ; i<nspp; i++) {
1397 for (ps= sp; *ps!=0; ps++)
if (isprint(*ps)) ilen++;
1398 for (j=0, ps= sp; isspace(*ps) && j<10; ps++, j++);
1401 if (tseq != tname)
seq++;
else leaf++;
1402 if (tname ==
kDNA || tname ==
kRNA) leaf++;
else seq++;
1405 if (j>9) leaf += 10;
1425 long i, check = 0, count = 0;
1427 for (i = 0; i <
seqlen; i++) {
1430 if (count == 57) count = 0;
1433 *checktotal += check;
1434 *checktotal %= 10000;
1440 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
1441 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
1442 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
1443 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
1444 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
1445 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
1446 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
1447 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
1448 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
1449 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
1450 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
1451 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
1452 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
1453 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
1454 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
1455 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
1456 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
1457 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
1458 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
1459 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
1460 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
1461 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
1462 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
1463 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
1464 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
1465 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
1466 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
1467 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
1468 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
1469 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
1470 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
1471 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
1472 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
1473 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
1474 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
1475 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
1476 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
1477 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
1478 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
1479 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
1480 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
1481 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
1482 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
1483 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
1484 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
1485 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
1486 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
1487 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
1488 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
1489 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
1490 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
1497 unsigned long c = 0xffffffffL;
1502 c = crctab[((
int)c ^ (
to_upper(*seq))) & 0xff] ^ (c >> 8);
1517 short na = 0, aa = 0, po = 0, nt = 0, nu = 0, ns = 0,
no = 0;
1519 maxtest =
min(300, seqlen);
1520 for (i = 0; i < maxtest; i++) {
1522 if (strchr(protonly, c)) po++;
1523 else if (strchr(primenuc,c)) {
1526 else if (c ==
'U') nu++;
1528 else if (strchr(aminos,c)) aa++;
1529 else if (strchr(seqsymbols,c)) ns++;
1530 else if (isalpha(c))
no++;
1533 if ((
no > 0) || (po+aa+na == 0))
return kOtherSeq;
1537 else if (po > 0)
return kAmino;
1539 if (nu > nt)
return kRNA;
1554 if (!seq)
return NULL;
1555 newseq = (
char*) malloc(seqlen+1);
1556 if (!newseq)
return NULL;
1557 for (a= (
char*)
seq, b=newseq, i=0; *a!=0; a++)
1563 newseq = (
char*) realloc(newseq, i+1);
1589 const short kSpaceAll = -9;
1590 #define kMaxseqwidth 250
1592 boolean baseonlynum=
false;
1594 boolean numright =
false, numleft =
false;
1595 boolean nameright =
false, nameleft =
false;
1596 short namewidth = 8, numwidth = 8;
1597 short spacer = 0, width = 50,
tab = 0;
1600 short linesout = 0, seqtype =
kNucleic;
1601 long i, j, l, l1, ibase;
1602 char idword[31], endstr[10];
1603 char seqnamestore[128], *seqname = seqnamestore;
1605 char nameform[10], numform[10], nocountsymbols[10];
1606 unsigned long checksum = 0, checktotal = 0;
1610 l =
min(128, strlen(seqid));
1611 strncpy( seqnamestore, seqid, l);
1614 sscanf( seqname,
"%30s", idword);
1615 sprintf(numform,
"%ld", seqlen);
1616 numwidth= strlen(numform)+1;
1619 if (strstr(seqname,
"checksum") != NULL) {
1620 cp = strstr(seqname,
"bases");
1622 for ( ; (cp!=seqname) && (*cp!=
','); cp--) ;
1623 if (cp!=seqname) *cp=0;
1630 if (outform ==
kGCG || outform ==
kMSF)
1639 strcpy(endstr,
"\n");
1644 fprintf(outf,
"LOCUS %s %ld bp\n", idword, seqlen);
1645 fprintf(outf,
"DEFINITION %s, %ld bases, %lX checksum.\n", seqname, seqlen, checksum);
1647 fprintf(outf,
"ORIGIN \n");
1651 strcpy(endstr,
"\n//");
1658 fprintf(outf,
"ENTRY %s \n", idword);
1659 fprintf(outf,
"TITLE %s, %ld bases, %lX checksum.\n", seqname, seqlen, checksum);
1661 fprintf(outf,
"SEQUENCE \n");
1666 strcpy(endstr,
"\n///");
1668 for (j=0; j<numwidth; j++)
fputc(
' ',outf);
1669 for (j= 5; j<=width; j += 5) fprintf(outf,
"%10ld",j);
1676 fprintf(outf,
">P1;%s\n", idword);
1678 fprintf(outf,
">DL;%s\n", idword);
1679 fprintf(outf,
"%s, %ld bases, %lX checksum.\n", seqname, seqlen, checksum);
1681 strcpy(endstr,
"*\n");
1686 fprintf(outf,
"ID %s\n", idword);
1688 fprintf(outf,
"DE %s, %ld bases, %lX checksum.\n", seqname, seqlen, checksum);
1689 fprintf(outf,
"SQ %ld BP\n", seqlen);
1690 strcpy(endstr,
"\n//");
1698 fprintf(outf,
"%s\n", seqname);
1700 fprintf(outf,
" %s Length: %ld (today) Check: %ld ..\n", idword, seqlen, checksum);
1703 strcpy(endstr,
"\n");
1708 fprintf(outf,
"; ### from DNA Strider ;-)\n");
1709 fprintf(outf,
"; DNA sequence %s, %ld bases, %lX checksum.\n;\n", seqname, seqlen, checksum);
1710 strcpy(endstr,
"\n//");
1715 fprintf(outf,
"%s, %ld bases, %lX checksum.\n", seqname, seqlen, checksum);
1726 if (strlen(idword)>10) idword[10] = 0;
1727 fprintf(outf,
"%-10s ",idword);
1736 case kDNA : cp= (
char*)
"dna";
break;
1737 case kRNA : cp= (
char*)
"rna";
break;
1738 case kNucleic : cp= (
char*)
"na";
break;
1739 case kAmino : cp= (
char*)
"aa";
break;
1740 case kOtherSeq: cp= (
char*)
"not-set";
break;
1742 fprintf(outf,
" seq {\n");
1743 fprintf(outf,
" id { local id %d },\n", gPretty.
atseq);
1744 fprintf(outf,
" descr { title \"%s\" },\n", seqid);
1745 fprintf(outf,
" inst {\n");
1746 fprintf(outf,
" repr raw, mol %s, length %ld, topology linear,\n", cp, seqlen);
1747 fprintf(outf,
" seq-data\n");
1749 fprintf(outf,
" iupacaa \"");
1751 fprintf(outf,
" iupacna \"");
1756 strcpy(endstr,
"\"\n } } ,");
1768 fprintf(outf,
"[Name: %-16s Len:%6ld Check: %8lX]\n", idword, seqlen, checksum);
1780 spacer = gPretty.
spacer + 1;
1785 fprintf(outf,
"Name: %-16s Len:%6ld Check: %8lX\n", idword, seqlen, checksum);
1791 fprintf(outf,
" Name: %-16s Len:%6ld Check: %5ld Weight: 1.00\n",
1792 idword, seqlen, checksum);
1796 sprintf(nameform,
"%%+%ds ",namewidth);
1803 fprintf(outf,
";%s, %ld bases, %lX checksum.\n", seqname, seqlen, checksum);
1804 fprintf(outf,
"%s\n", idword);
1812 fprintf(outf,
">%s, %ld bases, %lX checksum.\n", seqname, seqlen, checksum);
1817 if (*nameform==0) sprintf(nameform,
"%%%d.%ds ",namewidth,namewidth);
1818 if (numline) sprintf(numform,
"%%%ds ",numwidth);
1819 else sprintf(numform,
"%%%dd ",numwidth);
1820 strcpy( nocountsymbols, kNocountsymbols);
1822 if (strchr(nocountsymbols,gPretty.
gapchar)==NULL) {
1823 strcat(nocountsymbols,
" ");
1824 nocountsymbols[strlen(nocountsymbols)-1]= gPretty.
gapchar;
1826 if (gPretty.
domatch && (cp=strchr(nocountsymbols,gPretty.
matchchar))!=NULL) {
1836 for (i=0, l=0, ibase = 1; i <
seqlen; ) {
1840 if (nameleft) fprintf(outf, nameform, idword);
1841 if (numleft) {
if (numline) fprintf(outf, numform,
"");
1842 else fprintf(outf, numform, ibase);}
1843 for (j=0; j<
tab; j++)
fputc(
' ',outf);
1848 if (spacer==kSpaceAll || (spacer != 0 && (l+1) % spacer == 1)) {
1849 if (numline==1)
fputc(
' ',outf);
1852 if (l1 % 10 == 1 || l1 == width) {
1853 if (numline==1) fprintf(outf,
"%-9ld ",i+1);
1861 if (spacer==kSpaceAll || (spacer != 0 && (l+1) % spacer == 1))
1863 if (!baseonlynum) ibase++;
1864 else if (0==strchr(nocountsymbols,seq[i])) ibase++;
1868 if (l1 == width || i == seqlen) {
1869 if (outform==
kPretty)
for ( ; l1<width; l1++) {
1870 if (spacer==kSpaceAll || (spacer != 0 && (l+1) % spacer == 1))
1878 if (numline==2) fprintf(outf,
"%s",s);
1881 if (i == seqlen) fprintf(outf,
"%s%s",s,endstr);
1882 else fprintf(outf,
"%s",s);
1883 if (numright || nameright)
fputc(
' ',outf);
1884 if (numright) fprintf(outf,numform, ibase-1);
1885 if (nameright) fprintf(outf, nameform,idword);
char * readSeqFp(const short whichEntry_, FILE *fp_, const long skiplines_, const short format_, long *seqlen_, short *nseq_, short *error_, char *seqid_)
#define skipwhitespace(string)
static GB_ERROR tab(GBL_command_arguments *args, bool pretab)
AliDataPtr format(AliDataPtr data, const size_t wanted_len, GB_ERROR &error)
const unsigned long crctab[]
Local void readEMBL(struct ReadSeqVars *V)
Local void readOlsen(struct ReadSeqVars *V)
Local void readMSF(struct ReadSeqVars *V)
Local boolean endPearson(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
Local boolean endFitch(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
Local void addseq(char *s, struct ReadSeqVars *V)
Local void readLoop(short margin, boolean addfirst, boolean(*endTest)(boolean *addend, boolean *ungetend, struct ReadSeqVars *V), struct ReadSeqVars *V)
Local void readPAUPinterleaved(struct ReadSeqVars *V)
Local void addinfo(char *s, struct ReadSeqVars *V)
Local void readline(FILE *f, char *s, long *linestart)
char * compressSeq(const char gapc, const char *seq, const long seqlen, long *newlen)
Local void readPhylipSequential(struct ReadSeqVars *V)
int Strncasecmp(const char *a, const char *b, long maxn)
int Strcasecmp(const char *a, const char *b)
char * listSeqs(const char *filename_, const long skiplines_, const short format_, short *nseq_, short *error_)
static const char * seqsymbols
Local void readStrider(struct ReadSeqVars *V)
Local void countseq(char *s, struct ReadSeqVars *V)
Local boolean endIG(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
Local void readZuker(struct ReadSeqVars *V)
static int diff(int v1, int v2, int v3, int v4, int st, int en)
Local void readNBRF(struct ReadSeqVars *V)
Local void readIG(struct ReadSeqVars *V)
Local void readPIR(struct ReadSeqVars *V)
Local void readSeqMain(struct ReadSeqVars *V, const long skiplines_, const short format_)
Local boolean endPIR(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
const char kNocountsymbols[5]
Local boolean endPhylipSequential(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
static void error(const char *msg)
Local void readFitch(struct ReadSeqVars *V)
Local boolean endStrider(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
Local boolean endZuker(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
Local void readPAUPsequential(struct ReadSeqVars *V)
short writeSeq(FILE *outf, const char *seq, const long seqlen, const short outform, const char *seqid)
unsigned long CRC32checksum(const char *seq, const long seqlen, unsigned long *checktotal)
Local void readGenBank(struct ReadSeqVars *V)
Local void GetLine(struct ReadSeqVars *V)
Local void readPlain(struct ReadSeqVars *V)
short seqFileFormatFp(FILE *fseq, long *skiplines, short *error)
short seqFileFormat(const char *filename, long *skiplines, short *error)
unsigned long GCGchecksum(const char *seq, const long seqlen, unsigned long *checktotal)
Local void unGetLine(struct ReadSeqVars *V)
Local boolean endGB(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
Local void readPearson(struct ReadSeqVars *V)
const char allsymbols[32]
Local boolean endNBRF(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
Local void readPhylipInterleaved(struct ReadSeqVars *V)
char * readSeq(const short whichEntry_, const char *filename_, const long skiplines_, const short format_, long *seqlen_, short *nseq_, short *error_, char *seqid_)
short getseqtype(const char *seq, const long seqlen)
Local boolean endEMBL(boolean *addend, boolean *ungetend, struct ReadSeqVars *V)
Local void readUWGCG(struct ReadSeqVars *V)
GB_write_int const char s