Commit 4c257f45 authored by Jonathan Poalses's avatar Jonathan Poalses

Improved detect-sentence-dialect, so it can take a vector of predicates, and...

Improved detect-sentence-dialect, so it can take a vector of predicates, and test the given sentence against them, returning once any of them give a truthy response
parent a9ba2fdc
......@@ -50,29 +50,33 @@
(def american-words #{"like"})
;; Predicate sets to check a sentence and see if it grammatically matches a dialect
(defn fake-test
[fake]
false)
(def australian-predicates #{})
;; Predicate vectors to check a sentence and see if it grammatically matches a dialect
(def scottish-predicates #{})
(def australian-predicates [fake-test])
(def american-predicates #{})
(def scottish-predicates [fake-test])
(def american-predicates [fake-test])
;; Take a sentence and figure out its dialect
(defn detect-sentence-dialect [sentence]
(let [tokens (dl/tokens sentence)
dialects1 (when (some australian-words (dl/text tokens)) [:australian])
dialects2 (when (some scottish-words (dl/text tokens)) [:scottish])
dialects3 (when (some american-words (dl/text tokens)) [:american])
dialects (remove nil? (flatten (conj dialects1 dialects2 dialects3)))]
dialects1 (if (some australian-words (dl/text tokens)) :australian (when ((apply some-fn australian-predicates) sentence) :australian))
dialects2 (if (some scottish-words (dl/text tokens)) :scottish (when ((apply some-fn scottish-predicates) sentence) :scottish))
dialects3 (if (some american-words (dl/text tokens)) :american (when ((apply some-fn american-predicates) sentence) :american))
dialects (remove nil? [dialects1 dialects2 dialects3])]
(if (empty? dialects) [:standard] dialects)))
;; Take a text sample and separate it into its sentences, then for each sentence find its dialects, and return the most common dialect
;; A sentence can have an indeterminate number of dialects associated with it, as detect-sentence-dialects can return a collection,
;;when no dialect can be detected it defaults to standard. (IE if there's a sample with 3 sentences, one reads as scottish,
;;when no dialect can be detected it defaults to standard. IE if there's a sample with 3 sentences, one reads as scottish,
;;one reads as scottish and australian, and the last reads as nothing, it will return a collection containing 2 scottish keys,
;;one australian key, and one standard key, meaning it would be seen as a scottish sample.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment