From a47dfd3d1417412f4d55ffa964d9fc98d8d53a9d Mon Sep 17 00:00:00 2001 From: johannst Date: Mon, 13 Sep 2021 22:26:20 +0000 Subject: deploy: 743f301eef632a41bd870529a9e838ce4974f9eb --- src/llvm_kaleidoscope_rs/lexer.rs.html | 367 +++++++++++++++++++++++++++++++++ 1 file changed, 367 insertions(+) create mode 100644 src/llvm_kaleidoscope_rs/lexer.rs.html (limited to 'src/llvm_kaleidoscope_rs/lexer.rs.html') diff --git a/src/llvm_kaleidoscope_rs/lexer.rs.html b/src/llvm_kaleidoscope_rs/lexer.rs.html new file mode 100644 index 0000000..3524648 --- /dev/null +++ b/src/llvm_kaleidoscope_rs/lexer.rs.html @@ -0,0 +1,367 @@ +lexer.rs - source + +
  1
+  2
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+
+#[derive(Debug, PartialEq)]
+pub enum Token {
+    Eof,
+    Def,
+    Extern,
+    Identifier(String),
+    Number(f64),
+    Char(char),
+}
+
+pub struct Lexer<I>
+where
+    I: Iterator<Item = char>,
+{
+    input: I,
+    last_char: Option<char>,
+}
+
+impl<I> Lexer<I>
+where
+    I: Iterator<Item = char>,
+{
+    pub fn new(mut input: I) -> Lexer<I> {
+        let last_char = input.next();
+        Lexer { input, last_char }
+    }
+
+    fn step(&mut self) -> Option<char> {
+        self.last_char = self.input.next();
+        self.last_char
+    }
+
+    /// Lex and return the next token.
+    ///
+    /// Implement `int gettok();` from the tutorial.
+    pub fn gettok(&mut self) -> Token {
+        // Eat up whitespaces.
+        while matches!(self.last_char, Some(c) if c.is_ascii_whitespace()) {
+            self.step();
+        }
+
+        // Unpack last char or return EOF.
+        let last_char = if let Some(c) = self.last_char {
+            c
+        } else {
+            return Token::Eof;
+        };
+
+        // Identifier: [a-zA-Z][a-zA-Z0-9]*
+        if last_char.is_ascii_alphabetic() {
+            let mut ident = String::new();
+            ident.push(last_char);
+
+            while let Some(c) = self.step() {
+                if c.is_ascii_alphanumeric() {
+                    ident.push(c)
+                } else {
+                    break;
+                }
+            }
+
+            match ident.as_ref() {
+                "def" => return Token::Def,
+                "extern" => return Token::Extern,
+                _ => {}
+            }
+
+            return Token::Identifier(ident);
+        }
+
+        // Number: [0-9.]+
+        if last_char.is_ascii_digit() || last_char == '.' {
+            let mut num = String::new();
+            num.push(last_char);
+
+            while let Some(c) = self.step() {
+                if c.is_ascii_digit() || c == '.' {
+                    num.push(c)
+                } else {
+                    break;
+                }
+            }
+
+            let num: f64 = num.parse().unwrap_or_default();
+            return Token::Number(num);
+        }
+
+        // Eat up comment.
+        if last_char == '#' {
+            loop {
+                match self.step() {
+                    Some(c) if c == '\r' || c == '\n' => return self.gettok(),
+                    None => return Token::Eof,
+                    _ => { /* consume comment */ }
+                }
+            }
+        }
+
+        // Advance last char and return currently last char.
+        self.step();
+        Token::Char(last_char)
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::{Lexer, Token};
+
+    #[test]
+    fn test_identifier() {
+        let mut lex = Lexer::new("a b c".chars());
+        assert_eq!(Token::Identifier("a".into()), lex.gettok());
+        assert_eq!(Token::Identifier("b".into()), lex.gettok());
+        assert_eq!(Token::Identifier("c".into()), lex.gettok());
+        assert_eq!(Token::Eof, lex.gettok());
+    }
+
+    #[test]
+    fn test_keyword() {
+        let mut lex = Lexer::new("def extern".chars());
+        assert_eq!(Token::Def, lex.gettok());
+        assert_eq!(Token::Extern, lex.gettok());
+        assert_eq!(Token::Eof, lex.gettok());
+    }
+
+    #[test]
+    fn test_number() {
+        let mut lex = Lexer::new("12.34".chars());
+        assert_eq!(Token::Number(12.34f64), lex.gettok());
+        assert_eq!(Token::Eof, lex.gettok());
+
+        let mut lex = Lexer::new(" 1.0   2.0 3.0".chars());
+        assert_eq!(Token::Number(1.0f64), lex.gettok());
+        assert_eq!(Token::Number(2.0f64), lex.gettok());
+        assert_eq!(Token::Number(3.0f64), lex.gettok());
+        assert_eq!(Token::Eof, lex.gettok());
+
+        let mut lex = Lexer::new("12.34.56".chars());
+        assert_eq!(Token::Number(0f64), lex.gettok());
+        assert_eq!(Token::Eof, lex.gettok());
+    }
+
+    #[test]
+    fn test_comment() {
+        let mut lex = Lexer::new("# some comment".chars());
+        assert_eq!(Token::Eof, lex.gettok());
+
+        let mut lex = Lexer::new("abc # some comment \n xyz".chars());
+        assert_eq!(Token::Identifier("abc".into()), lex.gettok());
+        assert_eq!(Token::Identifier("xyz".into()), lex.gettok());
+        assert_eq!(Token::Eof, lex.gettok());
+    }
+
+    #[test]
+    fn test_chars() {
+        let mut lex = Lexer::new("a+b-c".chars());
+        assert_eq!(Token::Identifier("a".into()), lex.gettok());
+        assert_eq!(Token::Char('+'), lex.gettok());
+        assert_eq!(Token::Identifier("b".into()), lex.gettok());
+        assert_eq!(Token::Char('-'), lex.gettok());
+        assert_eq!(Token::Identifier("c".into()), lex.gettok());
+        assert_eq!(Token::Eof, lex.gettok());
+    }
+
+    #[test]
+    fn test_whitespaces() {
+        let mut lex = Lexer::new("    +a  b      c!    ".chars());
+        assert_eq!(Token::Char('+'), lex.gettok());
+        assert_eq!(Token::Identifier("a".into()), lex.gettok());
+        assert_eq!(Token::Identifier("b".into()), lex.gettok());
+        assert_eq!(Token::Identifier("c".into()), lex.gettok());
+        assert_eq!(Token::Char('!'), lex.gettok());
+        assert_eq!(Token::Eof, lex.gettok());
+
+        let mut lex = Lexer::new("\n    a \n\r  b \r \n     c \r\r  \n   ".chars());
+        assert_eq!(Token::Identifier("a".into()), lex.gettok());
+        assert_eq!(Token::Identifier("b".into()), lex.gettok());
+        assert_eq!(Token::Identifier("c".into()), lex.gettok());
+        assert_eq!(Token::Eof, lex.gettok());
+    }
+}
+
+
\ No newline at end of file -- cgit v1.2.3