diff --git a/doc/csv.clj b/doc/csv.clj new file mode 100644 index 0000000..76dbdc7 --- /dev/null +++ b/doc/csv.clj @@ -0,0 +1,28 @@ +(ns csv + (:use + [eu.dnetlib.clojure.clarsec] + [eu.dnetlib.clojure.monad])) + +;; Parsing simplified XML: +;; +;; Sample Input: + +(def input + "Year,Make,Model,Length +1997,Ford,Model-350,234 +2000,Mercury,\"Model 800\",238") + +(def cell (<|> stringLiteral + (stringify (many (none-of ",\n"))))) +(def line (sep-by cell comma)) +(def csv (sep-by line eol)) + +(defn -main [] + (prn (:value (parse csv input)))) + +;; Output: +;; +;; (("Year" "Make" "Model" "Length") +;; ("1997" "Ford" "Model-350" "234") +;; ("2000" "Mercury" "Model 800" "238")) + diff --git a/doc/xml.clj b/doc/xml.clj new file mode 100644 index 0000000..6d0f14c --- /dev/null +++ b/doc/xml.clj @@ -0,0 +1,49 @@ +(ns xml + (:use + [eu.dnetlib.clojure.clarsec] + [eu.dnetlib.clojure.monad])) + +;; Parsing simplified XML: +;; +;; Sample Input: + +(def input + " + + Joy of Clojure + Fogus + + + Structured and Interpretation of Computer Programs + MIT + + ") + +(defn arrows [p] (between (symb "<") (symb ">") p)) + +(def open-tag (arrows identifier)) +(defn close-tag [expect-name] (arrows (symb (str "/" expect-name)))) + +(defn element [p] + (let-bind [tag-name open-tag + contents p + _ (close-tag tag-name)] + (result {(keyword tag-name) contents}))) + +(def xml + (let [list$ #(flatten (list %&))] + (element + (<|> (<$> #(apply merge-with list$ %) (many1 (lazy xml))) + (stringify (many (<|> letter space))))))) + +(defn -main [] + (prn (:value (parse xml input)))) + +;; (-main) + +;; Output: +;; {:library +;; {:book ({:author "Fogus", +;; :title "Joy of Clojure"} +;; {:author "MIT", +;; :title "Structured and Interpretation of Computer Programs"})}} diff --git a/project.clj b/project.clj index 9bfd0ad..c8c5b61 100644 --- a/project.clj +++ b/project.clj @@ -1,5 +1,10 @@ (defproject clarsec "0.0.1-SNAPSHOT" :description "Parsec ported to clojure" + :dependencies [[org.clojure/clojure "1.2.1"] [org.clojure/clojure-contrib "1.2.0"]] - :dev-dependencies [[lein-clojars "0.5.0-SNAPSHOT"]]) + + :dev-dependencies [[lein-clojars "0.5.0-SNAPSHOT"] + [swank-clojure "1.2.1"]] + + :test-path "src/test") diff --git a/src/eu/dnetlib/clojure/clarsec.clj b/src/eu/dnetlib/clojure/clarsec.clj index e027b58..6e25169 100644 --- a/src/eu/dnetlib/clojure/clarsec.clj +++ b/src/eu/dnetlib/clojure/clarsec.clj @@ -31,14 +31,14 @@ (defmethod bind 'Parser [dm dfunc] - (let [m (force dm) - func (force dfunc)] + (let [m dm + func dfunc] (make-monad (monad-type m) (fn [strn] (let [parser (monad m) result (parser strn)] (if (consumed? result) - ((force (monad (force (func (:value result))))) (:rest result)) + ((monad (func (:value result))) (:rest result)) result)))))) (defn result [v] @@ -48,7 +48,7 @@ (make-monad 'Parser (fn opt-plus [strn] (failback - (first (drop-while failed? (map #((monad (force %)) strn) parsers))) + (first (drop-while failed? (map #((monad %) strn) parsers))) (failed))))) (defn >> [p1 p2] @@ -62,6 +62,9 @@ (defn <$> [f p] (bind p #(result (f %)))) +;; Applicative Programming +(defn <* [a b] (let-bind [r a _ b] (result r))) + (def any-token (make-monad 'Parser @@ -71,13 +74,6 @@ (consumed (first strn) (. strn (substring 1))))))) -(def eof - (make-monad 'Parser - (fn p-eof [strn] - (if (= "" strn) - (consumed "" "") - (failed))))) - (def fail (make-monad 'Parser (fn p-fail [strn] (failed)))) (defn satisfy [pred] @@ -104,7 +100,7 @@ (def many1) (defn many [parser] - (>>== (optional (delay (many1 parser))) + (>>== (optional (many1 parser)) #(if (nil? %) () %))) (defn many1 [parser] @@ -146,6 +142,11 @@ (let [str-chars (into #{} target-strn)] (satisfy #(contains? str-chars %)))) +(defn none-of [exclusion-strn] + (let [str-chars (into #{} exclusion-strn)] + (satisfy #(not (contains? str-chars %))))) + + (def space (one-of " \r\n\t")) (def spaces (many space)) @@ -194,8 +195,39 @@ (def stringLiteral (stringify (lexeme (between (is-char \") (is-char \") (many (not-char \")))))) +(def eof + (make-monad 'Parser + (fn p-eof [strn] + (if (= "" strn) + (consumed "" "") + (failed))))) + +(def eol + (>> (optional (satisfy #(= % \return))) + (satisfy #(= % \newline)))) + + +(defn force-during-parse [d] + (make-monad 'Parser + (fn [strn] + ((monad (force d)) strn)))) + +(defmacro lazy [sexp] + (let [lazy-p-fn-fn + (fn [p-fn-fn & args] + `(force-during-parse + (delay (@(force (delay (var ~p-fn-fn))) ~@args)))) + lazy-p-fn + (fn [p-fn] + `(force-during-parse + (delay @(force (delay (var ~p-fn))))))] + (cond + (seq? sexp) (apply lazy-p-fn-fn sexp) + (symbol? sexp) (lazy-p-fn sexp) + :else (throw (Exception. (str "Unsupported use of lazy. " + "Proper use: (lazy identifier) or " + "(lazy (symb \"foo\"))")))))) + (defn parse [parser input] - ((monad (force parser)) input)) + ((monad parser) input)) -;;(defn -main [] -;; (println (parse (>> (delay letter) (delay letter)) "ca."))) diff --git a/src/eu/dnetlib/clojure/monad.clj b/src/eu/dnetlib/clojure/monad.clj index 310c7d3..86eb05d 100644 --- a/src/eu/dnetlib/clojure/monad.clj +++ b/src/eu/dnetlib/clojure/monad.clj @@ -62,7 +62,7 @@ #^{:doc "bind makes the value of the given monad available to a function. The function may act on the value, but it must return another monad. Although this cannot be enforced in Clojure."} - bind (fn [m _] (monad-type (force m)))) + bind (fn [m _] (monad-type m))) (defmethod bind `MZero [m _] m) (defmethod bind `Monad [m f] (f (monad m))) diff --git a/src/test/clarsec.clj b/src/test/clarsec.clj new file mode 100644 index 0000000..571a81c --- /dev/null +++ b/src/test/clarsec.clj @@ -0,0 +1,59 @@ +(ns test.clarsec + (:use + [eu.dnetlib.clojure.clarsec] + [eu.dnetlib.clojure.monad] + [clojure.test])) + +(deftest test-recur1 + (def recur1 + (<|> (symb "foo") + (braces (lazy recur1)))) + + (let [parse$ #(or (:value (parse recur1 %)) :fail)] + (is (= (parse$ "foo") "foo")) + (is (= (parse$ "{foo}") "foo")) + (is (= (parse$ "{{foo}}") "foo")) + (is (= (parse$ "{{{foo}}}") "foo")) + (is (= (parse$ "bar") :fail)) + (is (= (parse$ "{bar}") :fail)))) + +(deftest test-recur2 + (defn recur2 [x] + (<|> (symb x) + (braces (lazy (recur2 x))))) + + (let [parse$ #(or (:value (parse (recur2 "bar") %)) :fail)] + (is (= (parse$ "bar") "bar")) + (is (= (parse$ "{bar}") "bar")) + (is (= (parse$ "{{bar}}") "bar")) + (is (= (parse$ "{{{bar}}}") "bar")) + (is (= (parse$ "foo") :fail)) + (is (= (parse$ "{foo}") :fail)))) + +(deftest test-recur3 + (defn recur3 [x] + (<|> (symb (if (= (mod x 2) 0) "foo" "bar")) + (braces (lazy (recur3 (inc x)))))) + + (let [parse$ #(or (:value (parse (recur3 0) %)) :fail)] + (is (= (parse$ "foo") "foo")) + (is (= (parse$ "bar") :fail)) + + (is (= (parse$ "{foo}") :fail)) + (is (= (parse$ "{bar}") "bar")) + + (is (= (parse$ "{{foo}}") "foo")) + (is (= (parse$ "{{bar}}") :fail)) + + (is (= (parse$ "{{{foo}}}") :fail)) + (is (= (parse$ "{{{bar}}}") "bar")))) + + +(deftest test-unbound-var + (def unbound-var) + (def fwdref1 (lazy unbound-var)) + (def unbound-var identifier) + + (let [parse$ #(or (:value (parse fwdref1 %)) :fail)] + (is (= (parse$ "foo") "foo")) + (is (= (parse$ "9foo") :fail))))