Commit 531ad920 authored by Jonathan Poalses's avatar Jonathan Poalses

changed the word predicate to be functions instead of a var containing an...

changed the word predicate to be functions instead of a var containing an anonymous function, started main function
parent 99155e50
...@@ -53,9 +53,17 @@ ...@@ -53,9 +53,17 @@
(def american-words #{"like"}) (def american-words #{"like"})
(def australian-word-predicate #(some australian-words (dl/text (dl/tokens %)))) (defn australian-word-predicate
(def scottish-word-predicate #(some scottish-words (dl/text (dl/tokens %)))) [sentence]
(def american-word-predicate #(some american-words (dl/text (dl/tokens %)))) (some australian-words (dl/text (dl/tokens sentence))))
(defn scottish-word-predicate
[sentence]
(some scottish-words (dl/text (dl/tokens sentence))))
(defn american-word-predicate
[sentence]
(some american-words (dl/text (dl/tokens sentence))))
(defn fake-test (defn fake-test
[fake] [fake]
...@@ -63,11 +71,14 @@ ...@@ -63,11 +71,14 @@
;; Predicate vectors to check a sentence and see if it grammatically matches a dialect ;; Predicate vectors to check a sentence and see if it grammatically matches a dialect
(def australian-predicates [fake-test australian-word-predicate]) (def australian-predicates
[fake-test australian-word-predicate])
(def scottish-predicates [fake-test scottish-word-predicate]) (def scottish-predicates
[fake-test scottish-word-predicate])
(def american-predicates [fake-test american-word-predicate]) (def american-predicates
[fake-test american-word-predicate])
;; Take a sentence and figure out its dialect ;; Take a sentence and figure out its dialect
...@@ -83,7 +94,6 @@ ...@@ -83,7 +94,6 @@
(cond) (cond)
;; Take a text sample and separate it into its sentences, then for each sentence find its dialects, and return the most common dialect ;; Take a text sample and separate it into its sentences, then for each sentence find its dialects, and return the most common dialect
;; A sentence can have an indeterminate number of dialects associated with it, as detect-sentence-dialects can return a collection, ;; A sentence can have an indeterminate number of dialects associated with it, as detect-sentence-dialects can return a collection,
;;when no dialect can be detected it defaults to standard. IE if there's a sample with 3 sentences, one reads as scottish, ;;when no dialect can be detected it defaults to standard. IE if there's a sample with 3 sentences, one reads as scottish,
...@@ -138,6 +148,15 @@ ...@@ -138,6 +148,15 @@
(defn show-dependencies [] (defn show-dependencies []
(clojure.pprint/pprint (dl/dependency-graph (nth sentences-one 1)))) (clojure.pprint/pprint (dl/dependency-graph (nth sentences-one 1))))
;; slurp a file containing a map of all the samples and their expected dialect
;; detect each sample, collecting the results, and comparing against the expected
;; output the results into both a file, as a map of the samples and if it was successful or not,
;; and onto the command line as a fraction and percentage, with a breakdown per dialect
(defn detect-dialects
[filename])
(comment (comment
;; Test every annotator in the pipeline ;; Test every annotator in the pipeline
...@@ -342,6 +361,8 @@ ...@@ -342,6 +361,8 @@
(slurp "test.txt") (slurp "test.txt")
(read-string (slurp "test2.txt")) (read-string (slurp "test2.txt"))
(clojure.edn/read-string)
;; Don't use eval with arbitrary input ;; Don't use eval with arbitrary input
(def horror (eval (read-string (slurp "test2.txt")))) (def horror (eval (read-string (slurp "test2.txt"))))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment