Why does the lexer cause the program to hang when trying to tokenize a quote

⚓ Rust    📅 2025-08-16    👤 surdeus    👁️ 4      

surdeus

I am working on a lisp interpreter in rust. I have ran into a small problem: When it finds a quote, it hangs, no matter how the quote is placed:

use std::io::{self, Write};

fn main() {
    let expr = "'(1 2 3)";
    println!("The result is {:#?}",tokenize(clean(expr.clone()),"<FILENAME>"))
}

#[derive(Clone, Debug, PartialEq)]
struct Token {
    expr: Expr,
    row: usize,
    col: usize,
    file: String
}

/* Expr enum: the lisp expression*/
#[derive(Clone, Debug, PartialEq)]
enum Expr {
    Symbol(String),
    QSymbol(String), // Quoted symbols
    Number(f64),
    String(String),
    List(Vec<Expr>),
    Bool(bool),
    Lambda(Vec<Expr>),
    Function(Vec<Expr>) 
}

/* clean function: clear comments*/
pub fn clean(expr: &str) -> String {
    let mut mexpr = String::new(); // Modified expression
    let mut inString = false; // in a String
    let mut inComment = false; // in a Comment
    for i in expr.chars() { // Loop in chars
	match i {
	    '"' => { // enter or exit a string
		inString = !inString;
		mexpr.push(i)
	    }
	    ';' => {
		if !inString { // start a comment
		    inComment = true
		} else {
		    mexpr.push(i)
		}
	    }
	    '\n' => { // end a comment
		inComment = false;
		mexpr.push(i)
	    }
	    _ => { // push  
		if !inComment {
		    mexpr.push(i)
		}
	    }
	}
    }
    mexpr
}

/* Turn strings into vec tokens*/
pub fn tokenize(expr: String, loc: &str) -> Vec<Token> {
    let mut tokens = Vec::new(); // tokens
    let mut current = String::new(); // current expr
    let mut in_string = false; //  in a string
    let mut row = 1; // file line
    let mut col = 0; // file col
    let mut isQuoted = false; // is there quote
    let chars: Vec<char> = expr.chars().collect(); // chars
    let mut i = 0; // index

    while i < chars.len() {
        let c = chars[i];
        col += 1;

        if in_string { // Handle strings
            current.push(c);
            if c == '"' {
                in_string = false;
		let mut modified = parse_str(&current);
                tokens.push(Token {
                    expr: Expr::String(modified),
                    row,
                    col,
                    file: loc.to_string(),
                });
                current.clear();
            }
            i += 1;
            continue;
        }

        match c {
            '"' => {
                in_string = true;
                current.push(c);
                i += 1;
            }
	    '\'' => {
		isQuoted = true
	    }
            '(' => {
                if !current.trim().is_empty() {
                    tokens.push(parse(&current, row, col, loc,isQuoted));
                    current.clear();
                }

                // Find matching closing paren
                let mut depth = 1;
                let start = i + 1;
                let mut end = start;
                while end < chars.len() && depth > 0 {
                    if chars[end] == '(' {
                        depth += 1;
                    } else if chars[end] == ')' {
                        depth -= 1;
                    }
                    end += 1;
                }

                if depth != 0 {
                    panic!("Unmatched '('");
                }

                // Get substring inside parens
                let inside = chars[start..end - 1].iter().collect::<String>();

                // Recursive tokenize inside
                let inner_tokens = tokenize(inside, loc);
                let inner_exprs = inner_tokens.into_iter().map(|t| t.expr).collect();
		if isQuoted {
		    tokens.push(Token {
			expr: Expr::List(inner_exprs),
			row,
			col,
			file: loc.to_string(),
                });
		} else {
		    tokens.push(Token {
			expr: Expr::Function(inner_exprs),
			row,
			col,
			file: loc.to_string(), 
		    })
		}

                i = end; // skip past closing ')'
            }
            ')' => {
                // Unexpected closing paren alone, skip it or panic
                i += 1;
            }
            ' ' | '\n' | '\t' => {
		isQuoted = false;
                if c == '\n' {
                    row += 1;
                    col = 0;
                }
                if !current.trim().is_empty() {
                    tokens.push(parse(&current, row, col, loc,isQuoted));
                    current.clear();
                }
                i += 1;
            }
            _ => {
                current.push(c);
                i += 1;
            }
        }
    }

    if !current.trim().is_empty() {
        tokens.push(parse(&current, row, col, loc,isQuoted));
    }
    tokens
}
/* par: parses string into token atom*/
fn parse(s: &str, row: usize, col: usize, file: &str, is_quoted: bool) -> Token {
    let expr = if let Ok(n) = s.parse::<f64>() {
        Expr::Number(n)
    } else if s == "t" || s == "nil" {
        if s == "t" {
	    Expr::Bool(true)
	} else {
	    Expr::Bool(false)
	}
    } else {
	if is_quoted {;
	    Expr::List(vec![Expr::Symbol(s.to_string())])
	} else {
	    Expr::Symbol(s.to_string())
	}
    };

    Token {
        expr,
        row,
        col,
        file: file.to_string(),
    }
}

/* parse_str: escape special characters in strings */
fn parse_str(string: &str) -> String {
    let chars: Vec<char> = string.trim_matches('"').chars().collect();
    let mut result = String::new();
    let mut i = 0;

    while i < chars.len() {
        match chars[i] {
            '\0' => result.push_str("\\0"),
            '\n' => result.push_str("\\n"),
            '\r' => result.push_str("\\r"),
            '\t' => result.push_str("\\t"),
            '"'  => result.push_str("\\\""),
            '\'' => result.push_str("\\\'"),
            '\\' => result.push_str("\\\\"),
            'x' => {
                // Check for \xNN
                if i >= 1 && chars[i - 1] == '\\' && i + 2 < chars.len() {
                    let hi = chars[i + 1];
                    let lo = chars[i + 2];
                    if hi.is_ascii_hexdigit() && lo.is_ascii_hexdigit() {
                        result.push('x');
                        result.push(hi);
                        result.push(lo);
                        i += 2; // skip hex digits
                    } else {
                        result.push('x');
                    }
                } else {
                    result.push('x');
                }
            }
            'u' => {
                // Check for \u{NNNN}
                if i >= 1 && chars[i - 1] == '\\' && i + 1 < chars.len() && chars[i + 1] == '{' {
                    result.push('u');
                    result.push('{');
                    i += 2;
                    let mut count = 0;
                    while i < chars.len() && chars[i] != '}' && count < 6 {
                        if chars[i].is_ascii_hexdigit() {
                            result.push(chars[i]);
                            i += 1;
                            count += 1;
                        } else {
                            break;
                        }
                    }
                    if i < chars.len() && chars[i] == '}' {
                        result.push('}');
                    }
                } else {
                    result.push('u');
                }
            }
            c => result.push(c),
        }
        i += 1;
    }

    result
}

With expression (print 1 2), it works. With '(1 2 3 4 5), it just hangs with no output. What is the problem?

7 posts - 3 participants

Read full topic

🏷️ Rust_feed