Commit 531ad920 authored by Jonathan Poalses's avatar Jonathan Poalses

changed the word predicate to be functions instead of a var containing an...

changed the word predicate to be functions instead of a var containing an anonymous function, started main function
parent 99155e50
......@@ -53,9 +53,17 @@
(def american-words #{"like"})
(def australian-word-predicate #(some australian-words (dl/text (dl/tokens %))))
(def scottish-word-predicate #(some scottish-words (dl/text (dl/tokens %))))
(def american-word-predicate #(some american-words (dl/text (dl/tokens %))))
(defn australian-word-predicate
[sentence]
(some australian-words (dl/text (dl/tokens sentence))))
(defn scottish-word-predicate
[sentence]
(some scottish-words (dl/text (dl/tokens sentence))))
(defn american-word-predicate
[sentence]
(some american-words (dl/text (dl/tokens sentence))))
(defn fake-test
[fake]
......@@ -63,11 +71,14 @@
;; Predicate vectors to check a sentence and see if it grammatically matches a dialect
(def australian-predicates [fake-test australian-word-predicate])
(def australian-predicates
[fake-test australian-word-predicate])
(def scottish-predicates [fake-test scottish-word-predicate])
(def scottish-predicates
[fake-test scottish-word-predicate])
(def american-predicates [fake-test american-word-predicate])
(def american-predicates
[fake-test american-word-predicate])
;; Take a sentence and figure out its dialect
......@@ -83,7 +94,6 @@
(cond)
;; Take a text sample and separate it into its sentences, then for each sentence find its dialects, and return the most common dialect
;; A sentence can have an indeterminate number of dialects associated with it, as detect-sentence-dialects can return a collection,
;;when no dialect can be detected it defaults to standard. IE if there's a sample with 3 sentences, one reads as scottish,
......@@ -138,6 +148,15 @@
(defn show-dependencies []
(clojure.pprint/pprint (dl/dependency-graph (nth sentences-one 1))))
;; slurp a file containing a map of all the samples and their expected dialect
;; detect each sample, collecting the results, and comparing against the expected
;; output the results into both a file, as a map of the samples and if it was successful or not,
;; and onto the command line as a fraction and percentage, with a breakdown per dialect
(defn detect-dialects
[filename])
(comment
;; Test every annotator in the pipeline
......@@ -342,6 +361,8 @@
(slurp "test.txt")
(read-string (slurp "test2.txt"))
(clojure.edn/read-string)
;; Don't use eval with arbitrary input
(def horror (eval (read-string (slurp "test2.txt"))))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment