1use std::str::FromStr;
36use std::error::Error;
37use std::io;
38
39mod int;
40use int::Int;
41
42mod error;
43pub use error::ScanError;
44
45mod token;
46pub use token::Token;
47
48pub struct Scanner <'a> {
50 iter: ::std::str::Chars<'a>,
51 ch: char,
52 pub lineno: u32,
53 no_float: bool,
54 line_comment: Option<char>,
55}
56
57fn expecting_chars(chars: &[char]) -> String {
58 let mut res = String::new();
59 for c in chars {
60 res.push_str(&format!("'{}'",c));
61 res.push(',')
62 }
63 res.pop();
64 res
65}
66
67impl<'a> Iterator for Scanner<'a> {
68 type Item = Token;
69
70 fn next(&mut self) -> Option<Token> {
71 match self.get() {
72 Token::End => None,
73 t => Some(t)
74 }
75 }
76}
77
78impl<'a> Scanner<'a> {
79 pub fn new(s: &'a str) -> Scanner<'a> {
84 Scanner::new_ex(s,1)
85 }
86
87 fn new_ex(s: &'a str, lineno: u32) -> Scanner<'a> {
88 let mut iter = s.chars();
89 let mch = iter.next();
90 Scanner {
91 iter: iter,
92 ch: match mch {Some(c) => c, None => '\0'},
93 lineno: lineno,
94 no_float: false,
95 line_comment: None,
96 }
97 }
98
99 pub fn no_float(mut self) -> Scanner<'a> {
103 self.no_float = true;
104 self
105 }
106
107 pub fn line_comment(mut self, c: char) -> Scanner<'a> {
109 self.line_comment = Some(c);
110 self
111 }
112
113
114 pub fn scan_error(&self, msg: &str, cause: Option<&dyn Error>) -> ScanError {
115 ScanError{
116 details: format!("{}{}", msg,
117 match cause {
118 Some(c) => format!(": caused by {}",c),
119 None => "".into()
120 }
121 ),
122 lineno: self.lineno
123 }
124 }
125
126 fn update_lineno(&self, mut err: ScanError) -> ScanError {
127 err.lineno = self.lineno;
128 err
129 }
130
131 fn token_error(&self, msg: &str, cause: Option<&dyn Error>) -> Token {
132 Token::Error(self.scan_error(msg,cause))
133 }
134
135 fn check_line_comment(&mut self) -> bool {
136 if let Some(lc) = self.line_comment {
137 if self.ch == lc {
138 self.skip_until(|c| c=='\n');
139 return true;
140 }
141 }
142 return false;
143
144 }
145
146 pub fn skip_whitespace(&mut self) -> bool {
148 loop {
149 self.check_line_comment();
150 if self.ch.is_whitespace() {
151 if self.ch == '\n' {
152 self.lineno += 1;
153 }
154 while let Some(c) = self.iter.next() {
155 if c == '\n' {
156 self.lineno += 1;
157 }
158 if ! c.is_whitespace() {
159 self.ch = c;
160 if self.check_line_comment() {
161 continue;
162 } else {
163 return true;
164 }
165 }
166 }
167 self.ch = '\0';
169 break;
170 } else {
171 break;
172 }
173 }
174 if self.ch == '\0' {
175 false
176 } else {
177 true
178 }
179 }
180
181 pub fn peek(&self) -> char {
183 self.ch
184 }
185
186 pub fn nextch(&mut self) -> char {
188 let old_ch = self.ch;
189 self.ch = match self.iter.next() {
190 Some(c) => c,
191 None => '\0'
192 };
193 old_ch
194 }
195
196 fn either_plus_or_minus(&self) -> Option<char> {
197 if self.ch == '+' || self.ch == '-' {
198 Some(self.ch)
199 } else {
200 None
201 }
202 }
203
204 fn is_digit(&self) -> bool {
205 self.ch.is_digit(10)
206 }
207
208 pub fn get(&mut self) -> Token {
210 use self::Token::*;
211 if ! self.skip_whitespace() {
212 return End;
213 }
214
215 let plusminus = if ! self.no_float {self.either_plus_or_minus()} else {None};
217 if self.is_digit() || plusminus.is_some() {
218 let mut s = String::new();
219 if plusminus.is_some() {
220 s.push(plusminus.unwrap());
221 }
222 if ! self.no_float {
223 let mut maybe_hex = self.ch == '0';
224 if plusminus.is_some() || maybe_hex {
225 self.nextch();
227 if maybe_hex { maybe_hex = self.ch == 'X' || self.ch == 'x';
229 if ! maybe_hex {
230 s.push('0');
231 if ! self.is_digit() && self.ch != '.' { self.ch = '\0'; }
232 }
233 } else
234 if ! self.is_digit() { return Char(plusminus.unwrap());
236 }
237 }
238 if maybe_hex { self.nextch(); self.take_while_into(&mut s,|c| c.is_digit(16));
242 return match i64::from_str_radix(&s,16) {
243 Ok(n) => Int(n),
244 Err(e) => self.token_error("bad hex constant",Some(&e))
245 }
246 }
247 }
248
249 if self.ch != '.' { self.take_digits_into(&mut s);
251 }
252
253 if ! self.no_float && (self.ch == '.' || self.ch == 'e' || self.ch == 'E') {
255 if self.ch == '.' {
256 self.take_digits_into(&mut s);
257 }
258 if self.ch == 'e' || self.ch == 'E' {
259 s.push(self.nextch());
260 if self.is_digit() || self.either_plus_or_minus().is_some() {
261 self.take_digits_into(&mut s);
262 }
263 }
264 return if self.ch.is_alphabetic() {
265 self.token_error("bad floating-point number: letter follows",None)
266 } else {
267 match f64::from_str(&s) {
268 Ok(x) => Num(x),
269 Err(e) => self.token_error(&format!("bad floating-point number {:?}",s),Some(&e))
270 }
271 }
272 } else {
273 return if ! self.no_float && self.ch.is_alphabetic() {
274 self.token_error("bad integer: letter follows",None)
275 } else {
276 match i64::from_str(&s) {
277 Ok(x) => Int(x),
278 Err(e) => self.token_error(&format!("bad integer {:?}",s),Some(&e))
279 }
280 }
281 }
282 } else
283 if self.ch == '\'' || self.ch == '\"' {
284 let endquote = self.ch;
285 self.nextch(); let s = self.grab_while(|c| c != endquote);
287 self.nextch(); Str(s)
290 } else
291 if self.ch.is_alphabetic() || self.ch == '_' {
292 let s = self.grab_while(|c| c.is_alphanumeric() || c == '_');
293 Iden(s)
294 } else {
295 Char(self.nextch())
296 }
297 }
298
299 pub fn grab_while<F>(&mut self, pred: F ) -> String
305 where F: Fn(char) -> bool {
306 let mut s = String::new();
307 self.take_while_into(&mut s,pred);
308 s
309 }
310
311 pub fn take_while_into<F>(&mut self, s: &mut String, pred: F )
313 where F: Fn(char) -> bool {
314 if self.ch != '\0' {
315 s.push(self.ch);
316 }
317 while let Some(c) = self.iter.next() {
318 if ! pred(c) { self.ch = c; return; }
319 s.push(c);
320 }
321 self.ch = '\0';
322 }
323
324 fn take_digits_into(&mut self, s: &mut String) {
325 self.take_while_into(s, |c| c.is_digit(10));
326 }
327
328 pub fn skip_until<F>(&mut self, pred: F ) -> bool
336 where F: Fn(char) -> bool {
337 while let Some(c) = self.iter.next() {
338 if pred(c) { self.ch = c; return true; }
339 }
340 self.ch = '\0';
341 false
342 }
343
344 pub fn take_rest(&mut self) -> String {
354 self.grab_while(|c| c != '\0')
355 }
356
357 pub fn take_until (&mut self, chars: &[char]) -> String {
359 self.grab_while(|c| ! chars.contains(&c))
360 }
361
362 pub fn get_string(&mut self) -> Result<String,ScanError> {
364 self.get().to_string_result().map_err(|e| self.update_lineno(e))
365 }
366
367 pub fn get_iden(&mut self) -> Result<String,ScanError> {
374 self.get().to_iden_result().map_err(|e| self.update_lineno(e))
375 }
376
377 pub fn get_number(&mut self) -> Result<f64,ScanError> {
385 self.get().to_number_result().map_err(|e| self.update_lineno(e))
386 }
387
388 pub fn get_integer(&mut self) -> Result<i64,ScanError> {
390 self.get().to_integer_result().map_err(|e| self.update_lineno(e))
391 }
392
393 pub fn get_int<I: Int>(&mut self) -> Result<I::Type,ScanError> {
395 self.get().to_int_result::<I>().map_err(|e| self.update_lineno(e))
396 }
397
398 pub fn get_float(&mut self) -> Result<f64,ScanError> {
400 self.get().to_float_result().map_err(|e| self.update_lineno(e))
401 }
402
403 pub fn get_char(&mut self) -> Result<char,ScanError> {
405 self.get().to_char_result().map_err(|e| self.update_lineno(e))
406 }
407
408 pub fn get_ch_matching(&mut self, chars: &[char]) -> Result<char,ScanError> {
410 let c = self.get_char()?;
411 if chars.contains(&c) {
412 Ok(c)
413 } else {
414 let s = expecting_chars(chars);
415 Err(self.scan_error(&format!("expected one of {}, got {}",s,c),None))
416 }
417 }
418
419 pub fn skip_chars(&mut self, chars: &str) -> Result<(),ScanError> {
421 for ch in chars.chars() {
422 let c = self.get_char()?;
423 if c != ch {
424 return Err(self.scan_error(&format!("expected '{}' got '{}'",ch,c),None));
425 }
426 }
427 Ok(())
428 }
429
430 pub fn grab_brackets(&mut self, pair: &str) -> Result<String,ScanError> {
432 let mut chars = pair.chars();
433 let open = chars.next().expect("provide open bracket");
434 let close = chars.next().expect("provide close bracket");
435 self.skip_whitespace();
436 let mut s = String::new();
437 if self.ch != '\0' {
438 s.push(self.ch);
439 }
440 let mut level = 1;
441 while let Some(c) = self.iter.next() {
442 if c == open {
443 level += 1;
444 } else
445 if c == close {
446 level -= 1;
447 }
448 s.push(c);
449 if level == 0 {
450 self.nextch();
451 return Ok(s);
452 }
453 }
454 Err(self.scan_error("expect close bracket",None))
455
456 }
457
458}
459
460use std::io::prelude::*;
461
462pub struct ScanLines<R: Read> {
464 rdr: io::BufReader<R>,
465 line: String,
466 lineno: u32,
467}
468
469impl <'a, R: Read> ScanLines<R> {
470
471 pub fn new(f: R) -> ScanLines<R> {
484 ScanLines {
485 rdr: io::BufReader::new(f),
486 line: String::new(),
487 lineno: 0,
488 }
489 }
490
491
492 pub fn next(&'a mut self) -> Option<io::Result<Scanner<'a>>> {
494 self.line.clear();
495 match self.rdr.read_line(&mut self.line) {
496 Ok(nbytes) => if nbytes == 0 {
497 return None;
498 },
499 Err(e) => return Some(Err(e))
500 }
501 self.lineno += 1;
502 Some(Ok(Scanner::new_ex(&self.line,self.lineno)))
503 }
504
505}
506
507
508#[cfg(test)]
509mod tests {
510 use super::*;
511
512 #[test]
513 fn skipping() {
514 let mut scan = Scanner::new("here we go\nand more *yay*");
516 scan.skip_until(|c| c == '\n');
517 assert_eq!(scan.get(),Token::Iden("and".to_string()));
518 scan.skip_until(|c| c == '*');
519 assert_eq!(scan.get(),Token::Char('*'));
520 assert_eq!(scan.get(),Token::Iden("yay".to_string()));
521 }
522
523 #[test]
524 fn getting() {
525 use Token::*;
526 let mut scan = Scanner::new("'hello' 42 * / -10 24B 2.0e6 0xFF-\"yay\"");
527 assert_eq!(scan.get_string().unwrap(), "hello");
528 assert_eq!(scan.get_number().unwrap(), 42.0);
529 assert_eq!(scan.get_ch_matching(&['*']).unwrap(),'*');
530 assert_eq!(
531 scan.get_ch_matching(&[',',':']).err().unwrap(),
532 ScanError::new("expected one of ',',':', got /")
533 );
534 assert_eq!(scan.get(),Int(-10));
535 assert_eq!(scan.get(),Error(ScanError::new("bad integer: letter follows")));
536 assert_eq!(scan.get(),Iden("B".to_string()));
537 assert_eq!(scan.get(),Num(2000000.0));
538 assert_eq!(scan.get(),Int(255));
539 assert_eq!(scan.get(),Char('-'));
540 assert_eq!(scan.get(),Str("yay".to_string()));
541 }
542
543 fn try_scan_err() -> Result<(),ScanError> {
544 let mut scan = Scanner::new("hello: 42");
545 let s = scan.get_iden()?;
546 let ch = scan.get_char()?;
547 let n = scan.get_integer()?;
548 assert_eq!(s,"hello");
549 assert_eq!(ch,':');
550 assert_eq!(n,42);
551 Ok(())
552 }
553
554 #[test]
555 fn try_scan_test() {
556 let _ = try_scan_err();
557 }
558
559 fn try_skip_chars(test: &str) -> Result<(),ScanError> {
560 let mut scan = Scanner::new(test);
561 scan.skip_chars("(")?;
562 let name = scan.get_iden()?;
563 scan.skip_chars(")=")?;
564 let num = scan.get_integer()?;
565 assert_eq!(name,"hello");
566 assert_eq!(num,42);
567 Ok(())
568 }
569
570 #[test]
571 fn skip_chars() {
572 let _ = try_skip_chars("(hello)=42");
573 let _ = try_skip_chars(" ( hello ) = 42 ");
574 }
575
576 #[test]
577 fn numbers() {
578 let mut scan = Scanner::new("10 0.0 1.0e1 1e1 0 ");
579 assert_eq!(scan.get_integer(),Ok(10));
580 assert_eq!(scan.get_number(),Ok(0.0));
581 assert_eq!(scan.get_number(),Ok(10.0));
582 assert_eq!(scan.get_float(),Ok(10.0));
583 assert_eq!(scan.get_integer(),Ok(0));
584 }
585
586 #[test]
587 fn no_float() {
588 use Token::*;
589 let scan = Scanner::new("0.0 1e4").no_float();
590 let c: Vec<_> = scan.collect();
591 assert_eq!(c,&[Int(0),Char('.'),Int(0),Int(1),Iden("e4".into())]);
592 }
593
594 #[test]
595 fn classifying_tokens() {
596 let mut s = Scanner::new("10 2.0 'hello' hello?");
597 let t = s.get();
598 assert!(t.is_integer());
599 assert!(t.is_number());
600 assert!(s.get().is_float());
601 assert!(s.get().is_string());
602 assert!(s.get().is_iden());
603 assert!(s.get().is_char());
604 }
605
606 #[test]
607 fn collecting_tokens_of_type() {
608 let s = Scanner::new("if let Some(a) = Bonzo::Dog {}");
609 let c: Vec<_> = s.filter_map(|t| t.to_iden()).collect();
610 assert_eq!(c,&["if","let","Some","a","Bonzo","Dog"]);
611 }
612
613 #[test]
614 fn collecting_same_tokens_or_error() {
615 let s = Scanner::new("10 1.5 20.0 30.1");
616 let c: Result<Vec<_>,_> = s.map(|t| t.to_number_result()).collect();
617 assert_eq!(c.unwrap(),&[10.0,1.5,20.0,30.1]);
618 }
619
620 #[test]
621 fn line_comments() {
622 let text = "
623 one # some comment
624 20
625 ";
626 let mut scan = Scanner::new(text)
627 .line_comment('#');
628 assert_eq!(scan.get_iden(),Ok("one".into()));
629 assert_eq!(scan.get_number(),Ok(20.0));
630 }
631
632}