ARB
ureadasn.c
Go to the documentation of this file.
1 /* ureadasn.c
2  -- parse, mangle and otherwise rewrite ASN1 file/entries for readseq reading
3  -- from NCBI toolkit (ncbi.nlm.nih.gov:/toolkit)
4 */
5 
6 #ifdef NCBI
7 
8 #include <stdio.h>
9 #include <ctype.h>
10 #include <string.h>
11 
12 /* NCBI toolkit :include: must be on lib path */
13 #include <ncbi.h>
14 #include <seqport.h>
15 
16 #define UREADASN
17 #include "ureadseq.h"
18 
19 #pragma segment ureadasn
20 
21 /* this stuff is hacked up from tofasta.c of ncbitools */
22 #define kBaseAny 0
23 #define kBaseNucleic 1
24 #define kBaseAmino 2
25 
26 typedef struct tofasta {
27  Boolean idonly;
28  short *seqnum;
29  short whichSeq;
30  char **seq, **seqid;
31  long *seqlen;
32 } FastaDat, PNTR FastaPtr;
33 
34 
35 void BioseqRawToRaw(BioseqPtr bsp, Boolean idonly,
36  short whichSeq, short *seqnum,
37  char **seq, char **seqid, long *seqlen)
38 {
39  SeqPortPtr spp;
40  SeqIdPtr bestid;
41  Uint1 repr, code, residue;
42  CharPtr tmp, title;
43  long outlen, outmax;
44  char localid[256], *sp;
45 
46  /* !!! this may be called several times for a single sequence
47  because SeqEntryExplore looks for parts and joins them...
48  assume seq, seqid, seqlen may contain data (or NULL)
49  */
50  if (bsp == NULL) return;
51  repr = Bioseq_repr(bsp);
52  if (!(repr == Seq_repr_raw || repr == Seq_repr_const)) return;
53 
54  (*seqnum)++;
55  if (!(whichSeq == *seqnum || whichSeq == 0)) return;
56 
57  bestid = SeqIdFindBest(bsp->id, (Uint1) 0);
58  title = BioseqGetTitle(bsp);
59  if (idonly) {
60  sprintf(localid, " %d) ", *seqnum);
61  tmp= localid + strlen(localid)-1;
62  }
63  else {
64  strcpy(localid," ");
65  tmp= localid;
66  }
67  tmp = SeqIdPrint(bestid, tmp, PRINTID_FASTA_SHORT);
68  tmp = StringMove(tmp, " ");
69  StringNCpy(tmp, title, 200);
70 /* fprintf(stderr,"BioseqRawToRaw: localid='%s'\n",localid); */
71 
72  /* < seqid is fixed storage */
73  /* strcpy( *seqid, localid); */
74  /* < seqid is variable sized */
75  outmax= strlen(localid) + 3;
76  if (*seqid==NULL) {
77  *seqid= (char*) malloc(outmax);
78  if (*seqid==NULL) return;
79  strcpy(*seqid, localid);
80  }
81  else {
82  outmax += strlen(*seqid) + 2;
83  *seqid= (char*) realloc( *seqid, outmax);
84  if (*seqid==NULL) return;
85  if (!idonly) strcat(*seqid, "; ");
86  strcat(*seqid, localid);
87  }
88 
89  if (idonly) {
90  strcat(*seqid,"\n");
91  return;
92  }
93 
94  if (ISA_na(bsp->mol)) code = Seq_code_iupacna;
95  else code = Seq_code_iupacaa;
96  spp = SeqPortNew(bsp, 0, -1, 0, code);
97  SeqPortSeek(spp, 0, SEEK_SET);
98 
99  sp= *seq;
100  if (sp==NULL) { /* this is always true now !? */
101  outlen= 0;
102  outmax= 500;
103  sp= (char*) malloc(outmax);
104  }
105  else {
106  outlen= strlen(sp);
107  outmax= outlen + 500;
108  sp= (char*) realloc( sp, outmax);
109  }
110  if (sp==NULL) return;
111 
112  while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF) {
113  if (outlen>=outmax) {
114  outmax= outlen + 500;
115  sp= (char*) realloc(sp, outmax);
116  if (sp==NULL) return;
117  }
118  sp[outlen++] = residue;
119  }
120  sp= (char*) realloc(sp, outlen+1);
121  if (sp!=NULL) sp[outlen]= '\0';
122  *seq= sp;
123  *seqlen= outlen;
124  SeqPortFree(spp);
125  return;
126 }
127 
128 
129 static void SeqEntryRawseq(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
130 {
131  FastaPtr tfa;
132  BioseqPtr bsp;
133 
134  if (!IS_Bioseq(sep)) return;
135  bsp = (BioseqPtr)sep->data.ptrvalue;
136  tfa = (FastaPtr) data;
137  BioseqRawToRaw(bsp, tfa->idonly, tfa->whichSeq, tfa->seqnum,
138  tfa->seq, tfa->seqid, tfa->seqlen);
139 }
140 
141 void SeqEntryToRaw(SeqEntryPtr sep, Boolean idonly, short whichSeq, short *seqnum,
142  char **seq, char **seqid, long *seqlen)
143 {
144  FastaDat tfa;
145 
146  if (sep == NULL) return;
147  tfa.idonly= idonly;
148  tfa.seqnum= seqnum;
149  tfa.whichSeq= whichSeq;
150  tfa.seq = seq;
151  tfa.seqid = seqid;
152  tfa.seqlen= seqlen;
153  SeqEntryExplore(sep, (Pointer)&tfa, SeqEntryRawseq);
154 }
155 
156 
157 
158 
159 char *listASNSeqs(const char *filename, const long skiplines,
160  const short format, /* note: this is kASNseqentry or kASNseqset */
161  short *nseq, short *error )
162 {
163  AsnIoPtr aip = NULL;
164  SeqEntryPtr the_set;
165  AsnTypePtr atp, atp2;
166  AsnModulePtr amp;
167  Boolean inIsBinary= FALSE; /* damn, why can't asn routines test this? */
168  char *seq = NULL;
169  char *seqid = NULL, stemp[256];
170  long seqlen;
171  int i, count;
172 
173  *nseq= 0;
174  *error= 0;
175 
176  /* asn dictionary setups */
177 /*fprintf(stderr,"listASNSeqs: SeqEntryLoad\n");*/
178  if (! SeqEntryLoad()) goto errxit; /* sequence alphabets (and sequence parse trees) */
179  amp = AsnAllModPtr(); /* get pointer to all loaded ASN.1 modules */
180  if (amp == NULL) goto errxit;
181  atp = AsnFind("Bioseq-set"); /* get the initial type pointers */
182  if (atp == NULL) goto errxit;
183  atp2 = AsnFind("Bioseq-set.seq-set.E");
184  if (atp2 == NULL) goto errxit;
185 
186 /*fprintf(stderr,"listASNSeqs: AsnIoOpen\n");*/
187  /* open the ASN.1 input file in the right mode */
188  /* !!!! THIS FAILS when filename has MAC PATH (& other paths?) (:folder:filename) */
189  if ((aip = AsnIoOpen(filename, inIsBinary?"rb":"r")) == NULL) goto errxit;
190  for (i=0; i<skiplines; i++) fgets( stemp, 255, aip->fp); /* this may mess up asn routines... */
191 
192  if (! ErrSetLog ("stderr")) goto errxit;
193  else ErrSetOpts(ERR_CONTINUE, ERR_LOG_ON); /*?? log errors instead of die */
194 
195  if (format == kASNseqentry) { /* read one Seq-entry */
196 /*fprintf(stderr,"listASNSeqs: SeqEntryAsnRead\n");*/
197  the_set = SeqEntryAsnRead(aip, NULL);
198  SeqEntryToRaw(the_set, true, 0, nseq, &seq, &seqid, &seqlen);
199  if (seq) free(seq); seq= NULL;
200  SeqEntryFree(the_set);
201  }
202  else { /* read Seq-entry's from a Bioseq-set */
203  count = 0;
204 /*fprintf(stderr,"listASNSeqs: AsnReadId\n");*/
205  while ((atp = AsnReadId(aip, amp, atp)) != NULL) {
206  if (atp == atp2) { /* top level Seq-entry */
207  the_set = SeqEntryAsnRead(aip, atp);
208  SeqEntryToRaw(the_set, true, 0, nseq, &seq, &seqid, &seqlen);
209  SeqEntryFree(the_set);
210  if (seq) free(seq); seq= NULL;
211  }
212  else
213  AsnReadVal(aip, atp, NULL);
214  count++;
215  }
216  }
217 
218  AsnIoClose(aip);
219  *error= 0;
220  return seqid;
221 
222 errxit:
223  AsnIoClose(aip);
224  if (seqid) free(seqid);
225  *error= eASNerr;
226  return NULL;
227 }
228 
229 
230 char *readASNSeq(const short whichEntry, const char *filename,
231  const long skiplines,
232  const short format, /* note: this is kASNseqentry or kASNseqset */
233  long *seqlen, short *nseq,
234  short *error, char **seqid )
235 {
236  AsnIoPtr aip = NULL;
237  SeqEntryPtr the_set;
238  AsnTypePtr atp, atp2;
239  AsnModulePtr amp;
240  Boolean inIsBinary= FALSE; /* damn, why can't asn routines test this? */
241  char *seq, stemp[200];
242  int i, count;
243 
244  *seqlen= 0;
245  *nseq= 0;
246  *error= 0;
247  seq= NULL;
248 
249 /*fprintf(stderr,"readASNseq: SeqEntryLoad\n");*/
250  /* asn dictionary setups */
251  if (! SeqEntryLoad()) goto errxit; /* sequence alphabets (and sequence parse trees) */
252  amp = AsnAllModPtr(); /* get pointer to all loaded ASN.1 modules */
253  if (amp == NULL) goto errxit;
254  atp = AsnFind("Bioseq-set"); /* get the initial type pointers */
255  if (atp == NULL) goto errxit;
256  atp2 = AsnFind("Bioseq-set.seq-set.E");
257  if (atp2 == NULL) goto errxit;
258 
259  /* open the ASN.1 input file in the right mode */
260 /*fprintf(stderr,"readASNseq: AsnIoOpen(%s)\n", filename);*/
261  if ((aip = AsnIoOpen(filename, inIsBinary?"rb":"r")) == NULL) goto errxit;
262  for (i=0; i<skiplines; i++) fgets( stemp, 255, aip->fp); /* this may mess up asn routines... */
263 
264  if (! ErrSetLog ("stderr")) goto errxit;
265  else ErrSetOpts(ERR_CONTINUE, ERR_LOG_ON); /*?? log errors instead of die */
266 
267  seq= NULL;
268  if (format == kASNseqentry) { /* read one Seq-entry */
269 /*fprintf(stderr,"readASNseq: SeqEntryAsnRead\n");*/
270  the_set = SeqEntryAsnRead(aip, NULL);
271  SeqEntryToRaw(the_set, false, whichEntry, nseq, &seq, seqid, seqlen);
272  SeqEntryFree(the_set);
273  goto goodexit;
274  }
275 
276  else { /* read Seq-entry's from a Bioseq-set */
277  count = 0;
278 /*fprintf(stderr,"readASNseq: AsnReadId\n");*/
279  while ((atp = AsnReadId(aip, amp, atp)) != NULL) {
280  if (atp == atp2) { /* top level Seq-entry */
281  the_set = SeqEntryAsnRead(aip, atp);
282  SeqEntryToRaw(the_set, false, whichEntry, nseq, &seq, seqid, seqlen);
283  SeqEntryFree(the_set);
284  if (*nseq >= whichEntry) goto goodexit;
285  }
286  else
287  AsnReadVal(aip, atp, NULL);
288  count++;
289  }
290  }
291 
292 goodexit:
293  AsnIoClose(aip);
294  *error= 0;
295  return seq;
296 
297 errxit:
298  AsnIoClose(aip);
299  *error= eASNerr;
300  if (seq) free(seq);
301  return NULL;
302 }
303 
304 
305 #endif /*NCBI*/
AliDataPtr format(AliDataPtr data, const size_t wanted_len, GB_ERROR &error)
Definition: insdel.cxx:615
long whichSeq
Definition: readseq.c:466
#define FALSE
Definition: defs.h:21
const char * title
Definition: readseq.c:22
#define eASNerr
Definition: ureadseq.h:36
FILE * seq
Definition: rns.c:46
static void error(const char *msg)
Definition: mkptypes.cxx:96
#define kASNseqentry
Definition: ureadseq.h:77
Definition: output.h:122