;;
;; om-core.scm -- Low-level parser generator routines
;;
;; These are general routines that every grammar needs.
;; Stuff that's specific to e.g. a Python parser shouldn't be here.
;;


;; -----------------------------------------------
;; Settings

(define +MAX-INPUT-FILESIZE+ 100000)   ;plenty for a grammar file

(define *LOGLEVEL* 2)
;; 0  Report problems only
;; 1  Verbose status messages
;; 2  Display source & output for each rule
;; 3  Debugging info  
;; ...
;; 5  Test


;; -----------------------------------------------
;; Helpful macros

(define-syntax-rule (setf x y)
  ; "Lisp-like SETF (returns the new value, unlike SET! in Scheme)"
  (let ((temp y))
    (set! x temp)
    temp))

(define-syntax-rule (r* expr)
  ; "Kleene star '*' regex operation.
  ;  Collects 0 or more expr's as long as expr is true."
  (do ( (acc '())
        (v expr expr) )
    ((not v) (reverse acc))
    (set! acc (cons v acc)) ) )

(define-syntax-rule (r+ expr)
  ; "Kleene plus '+' regex operation.
  ;  Collects 1 or more expr's.
  ;  Returns #f if no matches."
  (do ( (acc '())
        (v expr expr) )
    ((not v) (and (pair? acc)
                  (reverse acc)) )
    (set! acc (cons v acc)) ) )

(define-syntax-rule (dprint loglevel heading expr)
  ; "Debug Print: prints & returns its argument."
  (when (>= *LOGLEVEL* loglevel)
    (printf "~a ~s~%" heading expr)
    expr))

;; -----------------------------------------------
;; Input state
;;
(define *s* "")
(define *i* 0)
(define *end* 0)
(define (eof) (>= *i* *end*))

(define (parse-from-string input-string)
  (set! *s* input-string)
  (set! *end* (string-length input-string))
  (set! *i* 0)
  (grammar))

(define (parse-from-file filename)
  (let ((infile (open-input-file filename)))
    (parse-from-string
     (read-string +MAX-INPUT-FILESIZE+ infile))))


;; -----------------------------------------------
;; OMeta axioms -- the stuff we can't define in OMeta itself
;;

(define (lit)
  "Match a single-quoted string (literal text)"
  (ws)
  (let ((start *i*))
    (and (literal "'")
         (do () ((not   ; match anything up to next quote:
                  (match (lambda (c) (not (eq? c #\')))) ) ))
         (literal "'")
         `(literal
           ,(substring *s* (1+ start) (1- *i*)) ) ) ) )


(define (match fn)
  "Match a single char (stops at EOF)"
  (cond ((and (< *i* *end*) (fn (string-ref *s* *i*)))
         (set! *i* (1+ *i*))
         (string-ref *s* (1- *i*)) )
        (else #f) ) )

(define (match-char c)
  "Match a single char"
  (cond ((and (< *i* *end*) (char=? c (string-ref *s* *i*)))
         (set! *i* (1+ *i*))
         (string-ref *s* (1- *i*)) )
        (else #f)))

(define (literal word)
  "Match a certain sequence of chars"
  (let ((len (string-length word)))
    (if (and (<= (+ *i* len) *end*)
             (string=? word (substring *s* *i* (+ *i* len))))
        (begin (set! *i* (+ *i* len)) word)
        #f)))

(define (name)
  "Match a variable name -- return it as a symbol"
  (ws)
  (let ((start *i*))
    (and (match char-alphabetic_?)
         (do () ((not (match char-alphanumeric_?))))
          (string->symbol
           (substring *s* start *i*)) ) ) )
(define (char-alphabetic_? c)    (or (char=? c #\_) (char-alphabetic? c)))
(define (char-alphanumeric_? c)  (or (char-alphabetic_? c) (char-numeric? c)))

(define (ws)
  "Skip horizontal whitespace"
  ;(do () ((not (match char-blank?))))
  (r* (match char-blank?))
  #t)

(define (ws_)
  "Require horizontal whitespace (at least 1 char)"
  (and (match char-blank?)
       (do () ((not (match char-blank?)) #t)) ))
  ;(r+ (match char-blank?)) )

(define (nl)  (match (lambda (c) (eq? c #\newline))))
(define (not_nl) (match (lambda (c) (not (eq? c #\newline)))))
;(define (dot) (match (lambda (c) (eq? c #\.))))

(defmacro parsed-text ()       '(substring *s* __start *i*))
(defmacro read-parsed-text ()  '(read-from-string (substring *s* __start *i*)))


(define (scheme)
  "Match a rule defined as embedded Scheme code.
   This version only accounts for nested parens, which should suffice,
   but a real Scheme parser would be best."
  (let ((__start *i*))
    (and (ws)
         (_scheme)
;         (read-from-string (substring *s* __start *i*))
         (read-parsed-text)
         )))

(define (_scheme)
  (and (literal "(")
       (r* (or (_scheme)
               (match (lambda (c) (not ( eq? c #\) ) ))) ))
       (literal ")") ))
   