Info
This post is auto-generated from RSS feed The Rust Programming Language Forum - Latest topics. Source: Why does the lexer cause the program to hang when trying to tokenize a quote
I am working on a lisp interpreter in rust. I have ran into a small problem: When it finds a quote, it hangs, no matter how the quote is placed:
use std::io::{self, Write};
fn main() {
let expr = "'(1 2 3)";
println!("The result is {:#?}",tokenize(clean(expr.clone()),"<FILENAME>"))
}
#[derive(Clone, Debug, PartialEq)]
struct Token {
expr: Expr,
row: usize,
col: usize,
file: String
}
/* Expr enum: the lisp expression*/
#[derive(Clone, Debug, PartialEq)]
enum Expr {
Symbol(String),
QSymbol(String), // Quoted symbols
Number(f64),
String(String),
List(Vec<Expr>),
Bool(bool),
Lambda(Vec<Expr>),
Function(Vec<Expr>)
}
/* clean function: clear comments*/
pub fn clean(expr: &str) -> String {
let mut mexpr = String::new(); // Modified expression
let mut inString = false; // in a String
let mut inComment = false; // in a Comment
for i in expr.chars() { // Loop in chars
match i {
'"' => { // enter or exit a string
inString = !inString;
mexpr.push(i)
}
';' => {
if !inString { // start a comment
inComment = true
} else {
mexpr.push(i)
}
}
'\n' => { // end a comment
inComment = false;
mexpr.push(i)
}
_ => { // push
if !inComment {
mexpr.push(i)
}
}
}
}
mexpr
}
/* Turn strings into vec tokens*/
pub fn tokenize(expr: String, loc: &str) -> Vec<Token> {
let mut tokens = Vec::new(); // tokens
let mut current = String::new(); // current expr
let mut in_string = false; // in a string
let mut row = 1; // file line
let mut col = 0; // file col
let mut isQuoted = false; // is there quote
let chars: Vec<char> = expr.chars().collect(); // chars
let mut i = 0; // index
while i < chars.len() {
let c = chars[i];
col += 1;
if in_string { // Handle strings
current.push(c);
if c == '"' {
in_string = false;
let mut modified = parse_str(¤t);
tokens.push(Token {
expr: Expr::String(modified),
row,
col,
file: loc.to_string(),
});
current.clear();
}
i += 1;
continue;
}
match c {
'"' => {
in_string = true;
current.push(c);
i += 1;
}
'\'' => {
isQuoted = true
}
'(' => {
if !current.trim().is_empty() {
tokens.push(parse(¤t, row, col, loc,isQuoted));
current.clear();
}
// Find matching closing paren
let mut depth = 1;
let start = i + 1;
let mut end = start;
while end < chars.len() && depth > 0 {
if chars[end] == '(' {
depth += 1;
} else if chars[end] == ')' {
depth -= 1;
}
end += 1;
}
if depth != 0 {
panic!("Unmatched '('");
}
// Get substring inside parens
let inside = chars[start..end - 1].iter().collect::<String>();
// Recursive tokenize inside
let inner_tokens = tokenize(inside, loc);
let inner_exprs = inner_tokens.into_iter().map(|t| t.expr).collect();
if isQuoted {
tokens.push(Token {
expr: Expr::List(inner_exprs),
row,
col,
file: loc.to_string(),
});
} else {
tokens.push(Token {
expr: Expr::Function(inner_exprs),
row,
col,
file: loc.to_string(),
})
}
i = end; // skip past closing ')'
}
')' => {
// Unexpected closing paren alone, skip it or panic
i += 1;
}
' ' | '\n' | '\t' => {
isQuoted = false;
if c == '\n' {
row += 1;
col = 0;
}
if !current.trim().is_empty() {
tokens.push(parse(¤t, row, col, loc,isQuoted));
current.clear();
}
i += 1;
}
_ => {
current.push(c);
i += 1;
}
}
}
if !current.trim().is_empty() {
tokens.push(parse(¤t, row, col, loc,isQuoted));
}
tokens
}
/* par: parses string into token atom*/
fn parse(s: &str, row: usize, col: usize, file: &str, is_quoted: bool) -> Token {
let expr = if let Ok(n) = s.parse::<f64>() {
Expr::Number(n)
} else if s == "t" || s == "nil" {
if s == "t" {
Expr::Bool(true)
} else {
Expr::Bool(false)
}
} else {
if is_quoted {;
Expr::List(vec![Expr::Symbol(s.to_string())])
} else {
Expr::Symbol(s.to_string())
}
};
Token {
expr,
row,
col,
file: file.to_string(),
}
}
/* parse_str: escape special characters in strings */
fn parse_str(string: &str) -> String {
let chars: Vec<char> = string.trim_matches('"').chars().collect();
let mut result = String::new();
let mut i = 0;
while i < chars.len() {
match chars[i] {
'\0' => result.push_str("\\0"),
'\n' => result.push_str("\\n"),
'\r' => result.push_str("\\r"),
'\t' => result.push_str("\\t"),
'"' => result.push_str("\\\""),
'\'' => result.push_str("\\\'"),
'\\' => result.push_str("\\\\"),
'x' => {
// Check for \xNN
if i >= 1 && chars[i - 1] == '\\' && i + 2 < chars.len() {
let hi = chars[i + 1];
let lo = chars[i + 2];
if hi.is_ascii_hexdigit() && lo.is_ascii_hexdigit() {
result.push('x');
result.push(hi);
result.push(lo);
i += 2; // skip hex digits
} else {
result.push('x');
}
} else {
result.push('x');
}
}
'u' => {
// Check for \u{NNNN}
if i >= 1 && chars[i - 1] == '\\' && i + 1 < chars.len() && chars[i + 1] == '{' {
result.push('u');
result.push('{');
i += 2;
let mut count = 0;
while i < chars.len() && chars[i] != '}' && count < 6 {
if chars[i].is_ascii_hexdigit() {
result.push(chars[i]);
i += 1;
count += 1;
} else {
break;
}
}
if i < chars.len() && chars[i] == '}' {
result.push('}');
}
} else {
result.push('u');
}
}
c => result.push(c),
}
i += 1;
}
result
}
With expression (print 1 2)
, it works. With '(1 2 3 4 5)
, it just hangs with no output. What is the problem?
7 posts - 3 participants
🏷️ Rust_feed