Datei: NDODLL/NDOql/Scanner.cs
Last Commit (f96ae8c)
1 | |
2 | // |
3 | // This is generated code. Do not change it directly, change the Scanner.frame file and rebuild the grammar. |
4 | // |
5 | |
6 | #pragma warning disable 1591 // No XML comments needed |
7 | |
8 | using System; |
9 | using System.IO; |
10 | using System.Collections; |
11 | |
12 | namespace NDOql { |
13 | |
14 | public class Token { |
15 | ····public int kind;····// token kind |
16 | ····public int pos;···· // token position in bytes in the source text (starting at 0) |
17 | ····public int charPos;··// token position in characters in the source text (starting at 0) |
18 | ····public int col;···· // token column (starting at 1) |
19 | ····public int line;····// token line (starting at 1) |
20 | ····public string val;··// token value |
21 | ····public Token next;··// ML 2005-03-11 Tokens are kept in linked list |
22 | } |
23 | |
24 | //----------------------------------------------------------------------------------- |
25 | // Buffer |
26 | //----------------------------------------------------------------------------------- |
27 | public class Buffer { |
28 | ····// This Buffer supports the following cases: |
29 | ····// 1) seekable stream (file) |
30 | ····//····a) whole stream in buffer |
31 | ····//····b) part of stream in buffer |
32 | ····// 2) non seekable stream (network, console) |
33 | |
34 | ····public const int EOF = char.MaxValue + 1; |
35 | ····const int MIN_BUFFER_LENGTH = 1024; // 1KB |
36 | ····const int MAX_BUFFER_LENGTH = MIN_BUFFER_LENGTH * 64; // 64KB |
37 | ····byte[] buf;········ // input buffer |
38 | ····int bufStart;······ // position of first byte in buffer relative to input stream |
39 | ····int bufLen;········ // length of buffer |
40 | ····int fileLen;········// length of input stream (may change if the stream is no file) |
41 | ····int bufPos;········ // current position in buffer |
42 | ····Stream stream;······// input stream (seekable) |
43 | ····bool isUserStream;··// was the stream opened by the user? |
44 | ···· |
45 | ····public Buffer (Stream s, bool isUserStream) { |
46 | ········stream = s; this.isUserStream = isUserStream; |
47 | ········ |
48 | ········if (stream.CanSeek) { |
49 | ············fileLen = (int) stream.Length; |
50 | ············bufLen = Math.Min(fileLen, MAX_BUFFER_LENGTH); |
51 | ············bufStart = Int32.MaxValue; // nothing in the buffer so far |
52 | ········} else { |
53 | ············fileLen = bufLen = bufStart = 0; |
54 | ········} |
55 | |
56 | ········buf = new byte[(bufLen>0) ? bufLen : MIN_BUFFER_LENGTH]; |
57 | ········if (fileLen > 0) Pos = 0; // setup buffer to position 0 (start) |
58 | ········else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid |
59 | ········if (bufLen == fileLen && stream.CanSeek) Close(); |
60 | ····} |
61 | ···· |
62 | ····protected Buffer(Buffer b) { // called in UTF8Buffer constructor |
63 | ········buf = b.buf; |
64 | ········bufStart = b.bufStart; |
65 | ········bufLen = b.bufLen; |
66 | ········fileLen = b.fileLen; |
67 | ········bufPos = b.bufPos; |
68 | ········stream = b.stream; |
69 | ········// keep destructor from closing the stream |
70 | ········b.stream = null; |
71 | ········isUserStream = b.isUserStream; |
72 | ····} |
73 | |
74 | ····~Buffer() { Close(); } |
75 | ···· |
76 | ····protected void Close() { |
77 | ········if (!isUserStream && stream != null) { |
78 | ············stream.Close(); |
79 | ············stream = null; |
80 | ········} |
81 | ····} |
82 | ···· |
83 | ····public virtual int Read () { |
84 | ········if (bufPos < bufLen) { |
85 | ············return buf[bufPos++]; |
86 | ········} else if (Pos < fileLen) { |
87 | ············Pos = Pos; // shift buffer start to Pos |
88 | ············return buf[bufPos++]; |
89 | ········} else if (stream != null && !stream.CanSeek && ReadNextStreamChunk() > 0) { |
90 | ············return buf[bufPos++]; |
91 | ········} else { |
92 | ············return EOF; |
93 | ········} |
94 | ····} |
95 | |
96 | ····public int Peek () { |
97 | ········int curPos = Pos; |
98 | ········int ch = Read(); |
99 | ········Pos = curPos; |
100 | ········return ch; |
101 | ····} |
102 | ···· |
103 | ····// beg .. begin, zero-based, inclusive, in byte |
104 | ····// end .. end, zero-based, exclusive, in byte |
105 | ····public string GetString (int beg, int end) { |
106 | ········int len = 0; |
107 | ········char[] buf = new char[end - beg]; |
108 | ········int oldPos = Pos; |
109 | ········Pos = beg; |
110 | ········while (Pos < end) buf[len++] = (char) Read(); |
111 | ········Pos = oldPos; |
112 | ········return new String(buf, 0, len); |
113 | ····} |
114 | |
115 | ····public int Pos { |
116 | ········get { return bufPos + bufStart; } |
117 | ········set { |
118 | ············if (value >= fileLen && stream != null && !stream.CanSeek) { |
119 | ················// Wanted position is after buffer and the stream |
120 | ················// is not seek-able e.g. network or console, |
121 | ················// thus we have to read the stream manually till |
122 | ················// the wanted position is in sight. |
123 | ················while (value >= fileLen && ReadNextStreamChunk() > 0); |
124 | ············} |
125 | |
126 | ············if (value < 0 || value > fileLen) { |
127 | ················throw new FatalError("buffer out of bounds access, position: " + value); |
128 | ············} |
129 | |
130 | ············if (value >= bufStart && value < bufStart + bufLen) { // already in buffer |
131 | ················bufPos = value - bufStart; |
132 | ············} else if (stream != null) { // must be swapped in |
133 | ················stream.Seek(value, SeekOrigin.Begin); |
134 | ················bufLen = stream.Read(buf, 0, buf.Length); |
135 | ················bufStart = value; bufPos = 0; |
136 | ············} else { |
137 | ················// set the position to the end of the file, Pos will return fileLen. |
138 | ················bufPos = fileLen - bufStart; |
139 | ············} |
140 | ········} |
141 | ····} |
142 | ···· |
143 | ····// Read the next chunk of bytes from the stream, increases the buffer |
144 | ····// if needed and updates the fields fileLen and bufLen. |
145 | ····// Returns the number of bytes read. |
146 | ····private int ReadNextStreamChunk() { |
147 | ········int free = buf.Length - bufLen; |
148 | ········if (free == 0) { |
149 | ············// in the case of a growing input stream |
150 | ············// we can neither seek in the stream, nor can we |
151 | ············// foresee the maximum length, thus we must adapt |
152 | ············// the buffer size on demand. |
153 | ············byte[] newBuf = new byte[bufLen * 2]; |
154 | ············Array.Copy(buf, newBuf, bufLen); |
155 | ············buf = newBuf; |
156 | ············free = bufLen; |
157 | ········} |
158 | ········int read = stream.Read(buf, bufLen, free); |
159 | ········if (read > 0) { |
160 | ············fileLen = bufLen = (bufLen + read); |
161 | ············return read; |
162 | ········} |
163 | ········// end of stream reached |
164 | ········return 0; |
165 | ····} |
166 | } |
167 | |
168 | //----------------------------------------------------------------------------------- |
169 | // UTF8Buffer |
170 | //----------------------------------------------------------------------------------- |
171 | public class UTF8Buffer: Buffer { |
172 | ····public UTF8Buffer(Buffer b): base(b) {} |
173 | |
174 | ····public override int Read() { |
175 | ········int ch; |
176 | ········do { |
177 | ············ch = base.Read(); |
178 | ············// until we find a utf8 start (0xxxxxxx or 11xxxxxx) |
179 | ········} while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EOF)); |
180 | ········if (ch < 128 || ch == EOF) { |
181 | ············// nothing to do, first 127 chars are the same in ascii and utf8 |
182 | ············// 0xxxxxxx or end of file character |
183 | ········} else if ((ch & 0xF0) == 0xF0) { |
184 | ············// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |
185 | ············int c1 = ch & 0x07; ch = base.Read(); |
186 | ············int c2 = ch & 0x3F; ch = base.Read(); |
187 | ············int c3 = ch & 0x3F; ch = base.Read(); |
188 | ············int c4 = ch & 0x3F; |
189 | ············ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; |
190 | ········} else if ((ch & 0xE0) == 0xE0) { |
191 | ············// 1110xxxx 10xxxxxx 10xxxxxx |
192 | ············int c1 = ch & 0x0F; ch = base.Read(); |
193 | ············int c2 = ch & 0x3F; ch = base.Read(); |
194 | ············int c3 = ch & 0x3F; |
195 | ············ch = (((c1 << 6) | c2) << 6) | c3; |
196 | ········} else if ((ch & 0xC0) == 0xC0) { |
197 | ············// 110xxxxx 10xxxxxx |
198 | ············int c1 = ch & 0x1F; ch = base.Read(); |
199 | ············int c2 = ch & 0x3F; |
200 | ············ch = (c1 << 6) | c2; |
201 | ········} |
202 | ········return ch; |
203 | ····} |
204 | } |
205 | |
206 | //----------------------------------------------------------------------------------- |
207 | // Scanner |
208 | //----------------------------------------------------------------------------------- |
209 | public class Scanner { |
210 | ····const char EOL = '\n'; |
211 | ····const int eofSym = 0; /* pdt */ |
212 | ····const int maxT = 39; |
213 | ····const int noSym = 39; |
214 | ····char valCh;······ // current input character (for token.val) |
215 | |
216 | ····public Buffer buffer; // scanner buffer |
217 | ···· |
218 | ····Token t;··········// current token |
219 | ····int ch;·········· // current input character |
220 | ····int pos;··········// byte position of current character |
221 | ····int charPos;······// position by unicode characters starting with 0 |
222 | ····int col;··········// column number of current character |
223 | ····int line;········ // line number of current character |
224 | ····int oldEols;······// EOLs that appeared in a comment; |
225 | ····static readonly Hashtable start; // maps first token character to start state |
226 | |
227 | ····Token tokens;···· // list of tokens already peeked (first token is a dummy) |
228 | ····Token pt;········ // current peek token |
229 | ···· |
230 | ····char[] tval = new char[128]; // text of current token |
231 | ····int tlen;········ // length of current token |
232 | ···· |
233 | ····static Scanner() { |
234 | ········start = new Hashtable(128); |
235 | ········for (int i = 95; i <= 95; ++i) start[i] = 1; |
236 | ········for (int i = 97; i <= 122; ++i) start[i] = 1; |
237 | ········for (int i = 170; i <= 170; ++i) start[i] = 1; |
238 | ········for (int i = 181; i <= 181; ++i) start[i] = 1; |
239 | ········for (int i = 186; i <= 186; ++i) start[i] = 1; |
240 | ········for (int i = 192; i <= 214; ++i) start[i] = 1; |
241 | ········for (int i = 216; i <= 246; ++i) start[i] = 1; |
242 | ········for (int i = 248; i <= 255; ++i) start[i] = 1; |
243 | for ( int i = 48; i <= 57; ++i) start[i] = 12; |
244 | ········start[46] = 2; |
245 | ········start[45] = 31; |
246 | ········start[39] = 7; |
247 | start[123] = 9; |
248 | ········start[33] = 32; |
249 | ········start[60] = 33; |
250 | ········start[61] = 15; |
251 | ········start[40] = 16; |
252 | ········start[41] = 17; |
253 | ········start[62] = 34; |
254 | ········start[124] = 22; |
255 | ········start[94] = 23; |
256 | ········start[38] = 24; |
257 | ········start[43] = 25; |
258 | ········start[42] = 26; |
259 | ········start[47] = 27; |
260 | ········start[37] = 28; |
261 | ········start[126] = 29; |
262 | ········start[44] = 30; |
263 | ········start[Buffer.EOF] = -1; |
264 | |
265 | ····} |
266 | ···· |
267 | ····public Scanner (string fileName) { |
268 | ········try { |
269 | ············Stream stream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read); |
270 | ············buffer = new Buffer(stream, false); |
271 | ············Init(); |
272 | ········} catch (IOException) { |
273 | ············throw new FatalError("Cannot open file " + fileName); |
274 | ········} |
275 | ····} |
276 | ···· |
277 | ····public Scanner (Stream s) { |
278 | ········buffer = new Buffer(s, true); |
279 | ········Init(); |
280 | ····} |
281 | ···· |
282 | ····void Init() { |
283 | ········pos = -1; line = 1; col = 0; charPos = -1; |
284 | ········oldEols = 0; |
285 | ········NextCh(); |
286 | ········if (ch == 0xEF) { // check optional byte order mark for UTF-8 |
287 | ············NextCh(); int ch1 = ch; |
288 | ············NextCh(); int ch2 = ch; |
289 | ············if (ch1 != 0xBB || ch2 != 0xBF) { |
290 | ················throw new FatalError(String.Format("illegal byte order mark: EF {0,2:X} {1,2:X}", ch1, ch2)); |
291 | ············} |
292 | ············buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; |
293 | ············NextCh(); |
294 | ········} |
295 | ········pt = tokens = new Token();··// first token is a dummy |
296 | ····} |
297 | ···· |
298 | ····void NextCh() { |
299 | ········if (oldEols > 0) { ch = EOL; oldEols--; } |
300 | ········else { |
301 | ············pos = buffer.Pos; |
302 | ············// buffer reads unicode chars, if UTF8 has been detected |
303 | ············ch = buffer.Read(); col++; charPos++; |
304 | ············// replace isolated '\r' by '\n' in order to make |
305 | ············// eol handling uniform across Windows, Unix and Mac |
306 | ············if (ch == '\r' && buffer.Peek() != '\n') ch = EOL; |
307 | ············if (ch == EOL) { line++; col = 0; } |
308 | ········} |
309 | ········if (ch != Buffer.EOF) { |
310 | ············valCh = (char) ch; |
311 | ············ch = char.ToLower((char) ch); |
312 | ········} |
313 | |
314 | ····} |
315 | |
316 | ····void AddCh() { |
317 | ········if (tlen >= tval.Length) { |
318 | ············char[] newBuf = new char[2 * tval.Length]; |
319 | ············Array.Copy(tval, 0, newBuf, 0, tval.Length); |
320 | ············tval = newBuf; |
321 | ········} |
322 | ········if (ch != Buffer.EOF) { |
323 | ············tval[tlen++] = valCh; |
324 | ············NextCh(); |
325 | ········} |
326 | ····} |
327 | |
328 | |
329 | |
330 | ····bool Comment0() { |
331 | ········int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos; |
332 | ········NextCh(); |
333 | ········if (ch == '-') { |
334 | ············NextCh(); |
335 | ············for(;;) { |
336 | ················if (ch == 10) { |
337 | ····················level--; |
338 | ····················if (level == 0) { oldEols = line - line0; NextCh(); return true; } |
339 | ····················NextCh(); |
340 | ················} else if (ch == Buffer.EOF) return false; |
341 | ················else NextCh(); |
342 | ············} |
343 | ········} else { |
344 | ············buffer.Pos = pos0; NextCh(); line = line0; col = col0; charPos = charPos0; |
345 | ········} |
346 | ········return false; |
347 | ····} |
348 | |
349 | |
350 | ····void CheckLiteral() { |
351 | ········switch (t.val.ToLower()) { |
352 | ············case "and": t.kind = 5; break; |
353 | ············case "or": t.kind = 6; break; |
354 | ············case "not": t.kind = 7; break; |
355 | ············case "like": t.kind = 8; break; |
356 | ············case "escape": t.kind = 9; break; |
357 | ············case "between": t.kind = 10; break; |
358 | ············case "is": t.kind = 11; break; |
359 | ············case "null": t.kind = 12; break; |
360 | ············case "true": t.kind = 13; break; |
361 | ············case "mod": t.kind = 14; break; |
362 | ············case "in": t.kind = 15; break; |
363 | ············case "false": t.kind = 38; break; |
364 | ············default: break; |
365 | ········} |
366 | ····} |
367 | |
368 | ····Token NextToken() { |
369 | ········while (ch == ' ' || |
370 | ············ch >= 9 && ch <= 10 || ch == 13 |
371 | ········) NextCh(); |
372 | ········if (ch == '-' && Comment0()) return NextToken(); |
373 | ········int recKind = noSym; |
374 | ········int recEnd = pos; |
375 | ········t = new Token(); |
376 | ········t.pos = pos; t.col = col; t.line = line; t.charPos = charPos; |
377 | ········int state; |
378 | ········if (start.ContainsKey(ch)) { state = (int) start[ch]; } |
379 | ········else { state = 0; } |
380 | ········tlen = 0; AddCh(); |
381 | ········ |
382 | ········switch (state) { |
383 | ············case -1: { t.kind = eofSym; break; } // NextCh already done |
384 | ············case 0: { |
385 | ················if (recKind != noSym) { |
386 | ····················tlen = recEnd - t.pos; |
387 | ····················SetScannerBehindT(); |
388 | ················} |
389 | ················t.kind = recKind; break; |
390 | ············} // NextCh already done |
391 | ············case 1: |
392 | ················recEnd = pos; recKind = 1; |
393 | ················if (ch == '.' || ch >= '0' && ch <= '9' || ch == '_' || ch >= 'a' && ch <= 'z' || ch == 128 || ch >= 160 && ch <= 179 || ch == 181 || ch == 186 || ch >= 192 && ch <= 214 || ch >= 216 && ch <= 246 || ch >= 248 && ch <= 255) {AddCh(); goto case 1;} |
394 | ················else {t.kind = 1; t.val = new String(tval, 0, tlen); CheckLiteral(); return t;} |
395 | ············case 2: |
396 | ················if (ch >= '0' && ch <= '9') {AddCh(); goto case 3;} |
397 | ················else {goto case 0;} |
398 | ············case 3: |
399 | ················recEnd = pos; recKind = 2; |
400 | ················if (ch >= '0' && ch <= '9') {AddCh(); goto case 3;} |
401 | ················else if (ch == 'e') {AddCh(); goto case 4;} |
402 | ················else {t.kind = 2; break;} |
403 | ············case 4: |
404 | ················if (ch >= '0' && ch <= '9') {AddCh(); goto case 6;} |
405 | ················else if (ch == '+' || ch == '-') {AddCh(); goto case 5;} |
406 | ················else {goto case 0;} |
407 | ············case 5: |
408 | ················if (ch >= '0' && ch <= '9') {AddCh(); goto case 6;} |
409 | ················else {goto case 0;} |
410 | ············case 6: |
411 | ················recEnd = pos; recKind = 2; |
412 | ················if (ch >= '0' && ch <= '9') {AddCh(); goto case 6;} |
413 | ················else {t.kind = 2; break;} |
414 | ············case 7: |
415 | ················if (ch <= 9 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= '&' || ch >= '(' && ch <= 65535) {AddCh(); goto case 7;} |
416 | else if ( ch == 39) { AddCh( ) ; goto case 8;} |
417 | ················else {goto case 0;} |
418 | ············case 8: |
419 | { t. kind = 4; break;} |
420 | ············case 9: |
421 | if ( ch >= '0' && ch <= '9') { AddCh( ) ; goto case 10;} |
422 | ················else {goto case 0;} |
423 | ············case 10: |
424 | if ( ch == '} ') { AddCh( ) ; goto case 11;} |
425 | ················else {goto case 0;} |
426 | ············case 11: |
427 | ················{t.kind = 16; break;} |
428 | ············case 12: |
429 | ················recEnd = pos; recKind = 3; |
430 | if ( ch >= '0' && ch <= '9') { AddCh( ) ; goto case 12;} |
431 | ················else if (ch == '.') {AddCh(); goto case 2;} |
432 | ················else {t.kind = 3; break;} |
433 | ············case 13: |
434 | ················{t.kind = 17; break;} |
435 | ············case 14: |
436 | ················{t.kind = 18; break;} |
437 | ············case 15: |
438 | ················{t.kind = 19; break;} |
439 | ············case 16: |
440 | ················{t.kind = 20; break;} |
441 | ············case 17: |
442 | ················{t.kind = 21; break;} |
443 | ············case 18: |
444 | ················{t.kind = 24; break;} |
445 | ············case 19: |
446 | ················{t.kind = 25; break;} |
447 | ············case 20: |
448 | ················{t.kind = 26; break;} |
449 | ············case 21: |
450 | ················{t.kind = 27; break;} |
451 | ············case 22: |
452 | ················{t.kind = 28; break;} |
453 | ············case 23: |
454 | ················{t.kind = 29; break;} |
455 | ············case 24: |
456 | ················{t.kind = 30; break;} |
457 | ············case 25: |
458 | ················{t.kind = 31; break;} |
459 | ············case 26: |
460 | ················{t.kind = 33; break;} |
461 | ············case 27: |
462 | ················{t.kind = 34; break;} |
463 | ············case 28: |
464 | ················{t.kind = 35; break;} |
465 | ············case 29: |
466 | ················{t.kind = 36; break;} |
467 | ············case 30: |
468 | ················{t.kind = 37; break;} |
469 | ············case 31: |
470 | ················recEnd = pos; recKind = 32; |
471 | if ( ch >= '0' && ch <= '9') { AddCh( ) ; goto case 12;} |
472 | ················else if (ch == '.') {AddCh(); goto case 2;} |
473 | ················else {t.kind = 32; break;} |
474 | ············case 32: |
475 | ················if (ch == '=') {AddCh(); goto case 13;} |
476 | ················else if (ch == '<') {AddCh(); goto case 20;} |
477 | ················else if (ch == '>') {AddCh(); goto case 21;} |
478 | ················else {goto case 0;} |
479 | ············case 33: |
480 | ················recEnd = pos; recKind = 22; |
481 | ················if (ch == '>') {AddCh(); goto case 14;} |
482 | ················else if (ch == '=') {AddCh(); goto case 18;} |
483 | ················else {t.kind = 22; break;} |
484 | ············case 34: |
485 | ················recEnd = pos; recKind = 23; |
486 | ················if (ch == '=') {AddCh(); goto case 19;} |
487 | ················else {t.kind = 23; break;} |
488 | |
489 | ········} |
490 | ········t.val = new String(tval, 0, tlen); |
491 | ········return t; |
492 | ····} |
493 | ···· |
494 | ····private void SetScannerBehindT() { |
495 | ········buffer.Pos = t.pos; |
496 | ········NextCh(); |
497 | ········line = t.line; col = t.col; charPos = t.charPos; |
498 | ········for (int i = 0; i < tlen; i++) NextCh(); |
499 | ····} |
500 | ···· |
501 | ····// get the next token (possibly a token already seen during peeking) |
502 | ····public Token Scan () { |
503 | ········if (tokens.next == null) { |
504 | ············return NextToken(); |
505 | ········} else { |
506 | ············pt = tokens = tokens.next; |
507 | ············return tokens; |
508 | ········} |
509 | ····} |
510 | |
511 | ····// peek for the next token, ignore pragmas |
512 | ····public Token Peek () { |
513 | ········do { |
514 | ············if (pt.next == null) { |
515 | ················pt.next = NextToken(); |
516 | ············} |
517 | ············pt = pt.next; |
518 | ········} while (pt.kind > maxT); // skip pragmas |
519 | ···· |
520 | ········return pt; |
521 | ····} |
522 | |
523 | ····// make sure that peeking starts at the current scan position |
524 | ····public void ResetPeek () { pt = tokens; } |
525 | |
526 | } // end Scanner |
527 | } |
New Commit (fe801ed)
1 | |
2 | // |
3 | // This is generated code. Do not change it directly, change the Scanner.frame file and rebuild the grammar. |
4 | // |
5 | |
6 | #pragma warning disable 1591 // No XML comments needed |
7 | |
8 | using System; |
9 | using System.IO; |
10 | using System.Collections; |
11 | |
12 | namespace NDOql { |
13 | |
14 | public class Token { |
15 | ····public int kind;····// token kind |
16 | ····public int pos;···· // token position in bytes in the source text (starting at 0) |
17 | ····public int charPos;··// token position in characters in the source text (starting at 0) |
18 | ····public int col;···· // token column (starting at 1) |
19 | ····public int line;····// token line (starting at 1) |
20 | ····public string val;··// token value |
21 | ····public Token next;··// ML 2005-03-11 Tokens are kept in linked list |
22 | } |
23 | |
24 | //----------------------------------------------------------------------------------- |
25 | // Buffer |
26 | //----------------------------------------------------------------------------------- |
27 | public class Buffer { |
28 | ····// This Buffer supports the following cases: |
29 | ····// 1) seekable stream (file) |
30 | ····//····a) whole stream in buffer |
31 | ····//····b) part of stream in buffer |
32 | ····// 2) non seekable stream (network, console) |
33 | |
34 | ····public const int EOF = char.MaxValue + 1; |
35 | ····const int MIN_BUFFER_LENGTH = 1024; // 1KB |
36 | ····const int MAX_BUFFER_LENGTH = MIN_BUFFER_LENGTH * 64; // 64KB |
37 | ····byte[] buf;········ // input buffer |
38 | ····int bufStart;······ // position of first byte in buffer relative to input stream |
39 | ····int bufLen;········ // length of buffer |
40 | ····int fileLen;········// length of input stream (may change if the stream is no file) |
41 | ····int bufPos;········ // current position in buffer |
42 | ····Stream stream;······// input stream (seekable) |
43 | ····bool isUserStream;··// was the stream opened by the user? |
44 | ···· |
45 | ····public Buffer (Stream s, bool isUserStream) { |
46 | ········stream = s; this.isUserStream = isUserStream; |
47 | ········ |
48 | ········if (stream.CanSeek) { |
49 | ············fileLen = (int) stream.Length; |
50 | ············bufLen = Math.Min(fileLen, MAX_BUFFER_LENGTH); |
51 | ············bufStart = Int32.MaxValue; // nothing in the buffer so far |
52 | ········} else { |
53 | ············fileLen = bufLen = bufStart = 0; |
54 | ········} |
55 | |
56 | ········buf = new byte[(bufLen>0) ? bufLen : MIN_BUFFER_LENGTH]; |
57 | ········if (fileLen > 0) Pos = 0; // setup buffer to position 0 (start) |
58 | ········else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid |
59 | ········if (bufLen == fileLen && stream.CanSeek) Close(); |
60 | ····} |
61 | ···· |
62 | ····protected Buffer(Buffer b) { // called in UTF8Buffer constructor |
63 | ········buf = b.buf; |
64 | ········bufStart = b.bufStart; |
65 | ········bufLen = b.bufLen; |
66 | ········fileLen = b.fileLen; |
67 | ········bufPos = b.bufPos; |
68 | ········stream = b.stream; |
69 | ········// keep destructor from closing the stream |
70 | ········b.stream = null; |
71 | ········isUserStream = b.isUserStream; |
72 | ····} |
73 | |
74 | ····~Buffer() { Close(); } |
75 | ···· |
76 | ····protected void Close() { |
77 | ········if (!isUserStream && stream != null) { |
78 | ············stream.Close(); |
79 | ············stream = null; |
80 | ········} |
81 | ····} |
82 | ···· |
83 | ····public virtual int Read () { |
84 | ········if (bufPos < bufLen) { |
85 | ············return buf[bufPos++]; |
86 | ········} else if (Pos < fileLen) { |
87 | ············Pos = Pos; // shift buffer start to Pos |
88 | ············return buf[bufPos++]; |
89 | ········} else if (stream != null && !stream.CanSeek && ReadNextStreamChunk() > 0) { |
90 | ············return buf[bufPos++]; |
91 | ········} else { |
92 | ············return EOF; |
93 | ········} |
94 | ····} |
95 | |
96 | ····public int Peek () { |
97 | ········int curPos = Pos; |
98 | ········int ch = Read(); |
99 | ········Pos = curPos; |
100 | ········return ch; |
101 | ····} |
102 | ···· |
103 | ····// beg .. begin, zero-based, inclusive, in byte |
104 | ····// end .. end, zero-based, exclusive, in byte |
105 | ····public string GetString (int beg, int end) { |
106 | ········int len = 0; |
107 | ········char[] buf = new char[end - beg]; |
108 | ········int oldPos = Pos; |
109 | ········Pos = beg; |
110 | ········while (Pos < end) buf[len++] = (char) Read(); |
111 | ········Pos = oldPos; |
112 | ········return new String(buf, 0, len); |
113 | ····} |
114 | |
115 | ····public int Pos { |
116 | ········get { return bufPos + bufStart; } |
117 | ········set { |
118 | ············if (value >= fileLen && stream != null && !stream.CanSeek) { |
119 | ················// Wanted position is after buffer and the stream |
120 | ················// is not seek-able e.g. network or console, |
121 | ················// thus we have to read the stream manually till |
122 | ················// the wanted position is in sight. |
123 | ················while (value >= fileLen && ReadNextStreamChunk() > 0); |
124 | ············} |
125 | |
126 | ············if (value < 0 || value > fileLen) { |
127 | ················throw new FatalError("buffer out of bounds access, position: " + value); |
128 | ············} |
129 | |
130 | ············if (value >= bufStart && value < bufStart + bufLen) { // already in buffer |
131 | ················bufPos = value - bufStart; |
132 | ············} else if (stream != null) { // must be swapped in |
133 | ················stream.Seek(value, SeekOrigin.Begin); |
134 | ················bufLen = stream.Read(buf, 0, buf.Length); |
135 | ················bufStart = value; bufPos = 0; |
136 | ············} else { |
137 | ················// set the position to the end of the file, Pos will return fileLen. |
138 | ················bufPos = fileLen - bufStart; |
139 | ············} |
140 | ········} |
141 | ····} |
142 | ···· |
143 | ····// Read the next chunk of bytes from the stream, increases the buffer |
144 | ····// if needed and updates the fields fileLen and bufLen. |
145 | ····// Returns the number of bytes read. |
146 | ····private int ReadNextStreamChunk() { |
147 | ········int free = buf.Length - bufLen; |
148 | ········if (free == 0) { |
149 | ············// in the case of a growing input stream |
150 | ············// we can neither seek in the stream, nor can we |
151 | ············// foresee the maximum length, thus we must adapt |
152 | ············// the buffer size on demand. |
153 | ············byte[] newBuf = new byte[bufLen * 2]; |
154 | ············Array.Copy(buf, newBuf, bufLen); |
155 | ············buf = newBuf; |
156 | ············free = bufLen; |
157 | ········} |
158 | ········int read = stream.Read(buf, bufLen, free); |
159 | ········if (read > 0) { |
160 | ············fileLen = bufLen = (bufLen + read); |
161 | ············return read; |
162 | ········} |
163 | ········// end of stream reached |
164 | ········return 0; |
165 | ····} |
166 | } |
167 | |
168 | //----------------------------------------------------------------------------------- |
169 | // UTF8Buffer |
170 | //----------------------------------------------------------------------------------- |
171 | public class UTF8Buffer: Buffer { |
172 | ····public UTF8Buffer(Buffer b): base(b) {} |
173 | |
174 | ····public override int Read() { |
175 | ········int ch; |
176 | ········do { |
177 | ············ch = base.Read(); |
178 | ············// until we find a utf8 start (0xxxxxxx or 11xxxxxx) |
179 | ········} while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EOF)); |
180 | ········if (ch < 128 || ch == EOF) { |
181 | ············// nothing to do, first 127 chars are the same in ascii and utf8 |
182 | ············// 0xxxxxxx or end of file character |
183 | ········} else if ((ch & 0xF0) == 0xF0) { |
184 | ············// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |
185 | ············int c1 = ch & 0x07; ch = base.Read(); |
186 | ············int c2 = ch & 0x3F; ch = base.Read(); |
187 | ············int c3 = ch & 0x3F; ch = base.Read(); |
188 | ············int c4 = ch & 0x3F; |
189 | ············ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; |
190 | ········} else if ((ch & 0xE0) == 0xE0) { |
191 | ············// 1110xxxx 10xxxxxx 10xxxxxx |
192 | ············int c1 = ch & 0x0F; ch = base.Read(); |
193 | ············int c2 = ch & 0x3F; ch = base.Read(); |
194 | ············int c3 = ch & 0x3F; |
195 | ············ch = (((c1 << 6) | c2) << 6) | c3; |
196 | ········} else if ((ch & 0xC0) == 0xC0) { |
197 | ············// 110xxxxx 10xxxxxx |
198 | ············int c1 = ch & 0x1F; ch = base.Read(); |
199 | ············int c2 = ch & 0x3F; |
200 | ············ch = (c1 << 6) | c2; |
201 | ········} |
202 | ········return ch; |
203 | ····} |
204 | } |
205 | |
206 | //----------------------------------------------------------------------------------- |
207 | // Scanner |
208 | //----------------------------------------------------------------------------------- |
209 | public class Scanner { |
210 | ····const char EOL = '\n'; |
211 | ····const int eofSym = 0; /* pdt */ |
212 | ····const int maxT = 39; |
213 | ····const int noSym = 39; |
214 | ····char valCh;······ // current input character (for token.val) |
215 | |
216 | ····public Buffer buffer; // scanner buffer |
217 | ···· |
218 | ····Token t;··········// current token |
219 | ····int ch;·········· // current input character |
220 | ····int pos;··········// byte position of current character |
221 | ····int charPos;······// position by unicode characters starting with 0 |
222 | ····int col;··········// column number of current character |
223 | ····int line;········ // line number of current character |
224 | ····int oldEols;······// EOLs that appeared in a comment; |
225 | ····static readonly Hashtable start; // maps first token character to start state |
226 | |
227 | ····Token tokens;···· // list of tokens already peeked (first token is a dummy) |
228 | ····Token pt;········ // current peek token |
229 | ···· |
230 | ····char[] tval = new char[128]; // text of current token |
231 | ····int tlen;········ // length of current token |
232 | ···· |
233 | ····static Scanner() { |
234 | ········start = new Hashtable(128); |
235 | ········for (int i = 95; i <= 95; ++i) start[i] = 1; |
236 | ········for (int i = 97; i <= 122; ++i) start[i] = 1; |
237 | ········for (int i = 170; i <= 170; ++i) start[i] = 1; |
238 | ········for (int i = 181; i <= 181; ++i) start[i] = 1; |
239 | ········for (int i = 186; i <= 186; ++i) start[i] = 1; |
240 | ········for (int i = 192; i <= 214; ++i) start[i] = 1; |
241 | ········for (int i = 216; i <= 246; ++i) start[i] = 1; |
242 | ········for (int i = 248; i <= 255; ++i) start[i] = 1; |
243 | for ( int i = 48; i <= 57; ++i) start[i] = 11; |
244 | ········start[46] = 2; |
245 | ········start[45] = 31; |
246 | ········start[39] = 7; |
247 | start[123] = 8; |
248 | ········start[33] = 32; |
249 | ········start[60] = 33; |
250 | ········start[61] = 15; |
251 | ········start[40] = 16; |
252 | ········start[41] = 17; |
253 | ········start[62] = 34; |
254 | ········start[124] = 22; |
255 | ········start[94] = 23; |
256 | ········start[38] = 24; |
257 | ········start[43] = 25; |
258 | ········start[42] = 26; |
259 | ········start[47] = 27; |
260 | ········start[37] = 28; |
261 | ········start[126] = 29; |
262 | ········start[44] = 30; |
263 | ········start[Buffer.EOF] = -1; |
264 | |
265 | ····} |
266 | ···· |
267 | ····public Scanner (string fileName) { |
268 | ········try { |
269 | ············Stream stream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read); |
270 | ············buffer = new Buffer(stream, false); |
271 | ············Init(); |
272 | ········} catch (IOException) { |
273 | ············throw new FatalError("Cannot open file " + fileName); |
274 | ········} |
275 | ····} |
276 | ···· |
277 | ····public Scanner (Stream s) { |
278 | ········buffer = new Buffer(s, true); |
279 | ········Init(); |
280 | ····} |
281 | ···· |
282 | ····void Init() { |
283 | ········pos = -1; line = 1; col = 0; charPos = -1; |
284 | ········oldEols = 0; |
285 | ········NextCh(); |
286 | ········if (ch == 0xEF) { // check optional byte order mark for UTF-8 |
287 | ············NextCh(); int ch1 = ch; |
288 | ············NextCh(); int ch2 = ch; |
289 | ············if (ch1 != 0xBB || ch2 != 0xBF) { |
290 | ················throw new FatalError(String.Format("illegal byte order mark: EF {0,2:X} {1,2:X}", ch1, ch2)); |
291 | ············} |
292 | ············buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; |
293 | ············NextCh(); |
294 | ········} |
295 | ········pt = tokens = new Token();··// first token is a dummy |
296 | ····} |
297 | ···· |
298 | ····void NextCh() { |
299 | ········if (oldEols > 0) { ch = EOL; oldEols--; } |
300 | ········else { |
301 | ············pos = buffer.Pos; |
302 | ············// buffer reads unicode chars, if UTF8 has been detected |
303 | ············ch = buffer.Read(); col++; charPos++; |
304 | ············// replace isolated '\r' by '\n' in order to make |
305 | ············// eol handling uniform across Windows, Unix and Mac |
306 | ············if (ch == '\r' && buffer.Peek() != '\n') ch = EOL; |
307 | ············if (ch == EOL) { line++; col = 0; } |
308 | ········} |
309 | ········if (ch != Buffer.EOF) { |
310 | ············valCh = (char) ch; |
311 | ············ch = char.ToLower((char) ch); |
312 | ········} |
313 | |
314 | ····} |
315 | |
316 | ····void AddCh() { |
317 | ········if (tlen >= tval.Length) { |
318 | ············char[] newBuf = new char[2 * tval.Length]; |
319 | ············Array.Copy(tval, 0, newBuf, 0, tval.Length); |
320 | ············tval = newBuf; |
321 | ········} |
322 | ········if (ch != Buffer.EOF) { |
323 | ············tval[tlen++] = valCh; |
324 | ············NextCh(); |
325 | ········} |
326 | ····} |
327 | |
328 | |
329 | |
330 | ····bool Comment0() { |
331 | ········int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos; |
332 | ········NextCh(); |
333 | ········if (ch == '-') { |
334 | ············NextCh(); |
335 | ············for(;;) { |
336 | ················if (ch == 10) { |
337 | ····················level--; |
338 | ····················if (level == 0) { oldEols = line - line0; NextCh(); return true; } |
339 | ····················NextCh(); |
340 | ················} else if (ch == Buffer.EOF) return false; |
341 | ················else NextCh(); |
342 | ············} |
343 | ········} else { |
344 | ············buffer.Pos = pos0; NextCh(); line = line0; col = col0; charPos = charPos0; |
345 | ········} |
346 | ········return false; |
347 | ····} |
348 | |
349 | |
350 | ····void CheckLiteral() { |
351 | ········switch (t.val.ToLower()) { |
352 | ············case "and": t.kind = 5; break; |
353 | ············case "or": t.kind = 6; break; |
354 | ············case "not": t.kind = 7; break; |
355 | ············case "like": t.kind = 8; break; |
356 | ············case "escape": t.kind = 9; break; |
357 | ············case "between": t.kind = 10; break; |
358 | ············case "is": t.kind = 11; break; |
359 | ············case "null": t.kind = 12; break; |
360 | ············case "true": t.kind = 13; break; |
361 | ············case "mod": t.kind = 14; break; |
362 | ············case "in": t.kind = 15; break; |
363 | ············case "false": t.kind = 38; break; |
364 | ············default: break; |
365 | ········} |
366 | ····} |
367 | |
368 | ····Token NextToken() { |
369 | ········while (ch == ' ' || |
370 | ············ch >= 9 && ch <= 10 || ch == 13 |
371 | ········) NextCh(); |
372 | ········if (ch == '-' && Comment0()) return NextToken(); |
373 | ········int recKind = noSym; |
374 | ········int recEnd = pos; |
375 | ········t = new Token(); |
376 | ········t.pos = pos; t.col = col; t.line = line; t.charPos = charPos; |
377 | ········int state; |
378 | ········if (start.ContainsKey(ch)) { state = (int) start[ch]; } |
379 | ········else { state = 0; } |
380 | ········tlen = 0; AddCh(); |
381 | ········ |
382 | ········switch (state) { |
383 | ············case -1: { t.kind = eofSym; break; } // NextCh already done |
384 | ············case 0: { |
385 | ················if (recKind != noSym) { |
386 | ····················tlen = recEnd - t.pos; |
387 | ····················SetScannerBehindT(); |
388 | ················} |
389 | ················t.kind = recKind; break; |
390 | ············} // NextCh already done |
391 | ············case 1: |
392 | ················recEnd = pos; recKind = 1; |
393 | ················if (ch == '.' || ch >= '0' && ch <= '9' || ch == '_' || ch >= 'a' && ch <= 'z' || ch == 128 || ch >= 160 && ch <= 179 || ch == 181 || ch == 186 || ch >= 192 && ch <= 214 || ch >= 216 && ch <= 246 || ch >= 248 && ch <= 255) {AddCh(); goto case 1;} |
394 | ················else {t.kind = 1; t.val = new String(tval, 0, tlen); CheckLiteral(); return t;} |
395 | ············case 2: |
396 | ················if (ch >= '0' && ch <= '9') {AddCh(); goto case 3;} |
397 | ················else {goto case 0;} |
398 | ············case 3: |
399 | ················recEnd = pos; recKind = 2; |
400 | ················if (ch >= '0' && ch <= '9') {AddCh(); goto case 3;} |
401 | ················else if (ch == 'e') {AddCh(); goto case 4;} |
402 | ················else {t.kind = 2; break;} |
403 | ············case 4: |
404 | ················if (ch >= '0' && ch <= '9') {AddCh(); goto case 6;} |
405 | ················else if (ch == '+' || ch == '-') {AddCh(); goto case 5;} |
406 | ················else {goto case 0;} |
407 | ············case 5: |
408 | ················if (ch >= '0' && ch <= '9') {AddCh(); goto case 6;} |
409 | ················else {goto case 0;} |
410 | ············case 6: |
411 | ················recEnd = pos; recKind = 2; |
412 | ················if (ch >= '0' && ch <= '9') {AddCh(); goto case 6;} |
413 | ················else {t.kind = 2; break;} |
414 | ············case 7: |
415 | ················if (ch <= 9 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= '&' || ch >= '(' && ch <= 65535) {AddCh(); goto case 7;} |
416 | else if ( ch == 39) { AddCh( ) ; goto case 12;} |
417 | ················else {goto case 0;} |
418 | ············case 8: |
419 | if ( ch >= '0' && ch <= '9') { AddCh( ) ; goto case 9;} |
420 | ················else {goto case 0;} |
421 | ············case 9: |
422 | if ( ch == '} ') { AddCh( ) ; goto case 10;} |
423 | ················else {goto case 0;} |
424 | ············case 10: |
425 | { t. kind = 16; break;} |
426 | ············case 11: |
427 | ················recEnd = pos; recKind = 3; |
428 | if ( ch >= '0' && ch <= '9') { AddCh( ) ; goto case 11;} |
429 | ················else if (ch == '.') {AddCh(); goto case 2;} |
430 | ················else {t.kind = 3; break;} |
431 | ············case 12: |
432 | ················recEnd = pos; recKind = 4; |
433 | ················if (ch == 39) {AddCh(); goto case 7;} |
434 | ················else {t.kind = 4; break;} |
435 | ············case 13: |
436 | ················{t.kind = 17; break;} |
437 | ············case 14: |
438 | ················{t.kind = 18; break;} |
439 | ············case 15: |
440 | ················{t.kind = 19; break;} |
441 | ············case 16: |
442 | ················{t.kind = 20; break;} |
443 | ············case 17: |
444 | ················{t.kind = 21; break;} |
445 | ············case 18: |
446 | ················{t.kind = 24; break;} |
447 | ············case 19: |
448 | ················{t.kind = 25; break;} |
449 | ············case 20: |
450 | ················{t.kind = 26; break;} |
451 | ············case 21: |
452 | ················{t.kind = 27; break;} |
453 | ············case 22: |
454 | ················{t.kind = 28; break;} |
455 | ············case 23: |
456 | ················{t.kind = 29; break;} |
457 | ············case 24: |
458 | ················{t.kind = 30; break;} |
459 | ············case 25: |
460 | ················{t.kind = 31; break;} |
461 | ············case 26: |
462 | ················{t.kind = 33; break;} |
463 | ············case 27: |
464 | ················{t.kind = 34; break;} |
465 | ············case 28: |
466 | ················{t.kind = 35; break;} |
467 | ············case 29: |
468 | ················{t.kind = 36; break;} |
469 | ············case 30: |
470 | ················{t.kind = 37; break;} |
471 | ············case 31: |
472 | ················recEnd = pos; recKind = 32; |
473 | if ( ch >= '0' && ch <= '9') { AddCh( ) ; goto case 11;} |
474 | ················else if (ch == '.') {AddCh(); goto case 2;} |
475 | ················else {t.kind = 32; break;} |
476 | ············case 32: |
477 | ················if (ch == '=') {AddCh(); goto case 13;} |
478 | ················else if (ch == '<') {AddCh(); goto case 20;} |
479 | ················else if (ch == '>') {AddCh(); goto case 21;} |
480 | ················else {goto case 0;} |
481 | ············case 33: |
482 | ················recEnd = pos; recKind = 22; |
483 | ················if (ch == '>') {AddCh(); goto case 14;} |
484 | ················else if (ch == '=') {AddCh(); goto case 18;} |
485 | ················else {t.kind = 22; break;} |
486 | ············case 34: |
487 | ················recEnd = pos; recKind = 23; |
488 | ················if (ch == '=') {AddCh(); goto case 19;} |
489 | ················else {t.kind = 23; break;} |
490 | |
491 | ········} |
492 | ········t.val = new String(tval, 0, tlen); |
493 | ········return t; |
494 | ····} |
495 | ···· |
496 | ····private void SetScannerBehindT() { |
497 | ········buffer.Pos = t.pos; |
498 | ········NextCh(); |
499 | ········line = t.line; col = t.col; charPos = t.charPos; |
500 | ········for (int i = 0; i < tlen; i++) NextCh(); |
501 | ····} |
502 | ···· |
503 | ····// get the next token (possibly a token already seen during peeking) |
504 | ····public Token Scan () { |
505 | ········if (tokens.next == null) { |
506 | ············return NextToken(); |
507 | ········} else { |
508 | ············pt = tokens = tokens.next; |
509 | ············return tokens; |
510 | ········} |
511 | ····} |
512 | |
513 | ····// peek for the next token, ignore pragmas |
514 | ····public Token Peek () { |
515 | ········do { |
516 | ············if (pt.next == null) { |
517 | ················pt.next = NextToken(); |
518 | ············} |
519 | ············pt = pt.next; |
520 | ········} while (pt.kind > maxT); // skip pragmas |
521 | ···· |
522 | ········return pt; |
523 | ····} |
524 | |
525 | ····// make sure that peeking starts at the current scan position |
526 | ····public void ResetPeek () { pt = tokens; } |
527 | |
528 | } // end Scanner |
529 | } |