Commit 3c59b3d4 authored by Jonathan Poalses's avatar Jonathan Poalses

Implemented the word-based dialect detection

parent 8c917db7
...@@ -47,11 +47,11 @@ ...@@ -47,11 +47,11 @@
;; Word sets that will show a sentence as being of that dialect ;; Word sets that will show a sentence as being of that dialect
(def australian-words #{}) (def australian-words #{"incorrect" "why"})
(def scottish-words #{}) (def scottish-words #{"hence"})
(def american-words #{}) (def american-words #{"like"})
;; Predicate sets to check a sentence and see if it grammatically matches a dialect ;; Predicate sets to check a sentence and see if it grammatically matches a dialect
...@@ -65,7 +65,29 @@ ...@@ -65,7 +65,29 @@
;; Take a sentence and figure out its dialect ;; Take a sentence and figure out its dialect
(defn detect-sentence-dialect [sentence] (defn detect-sentence-dialect [sentence]
(if (some bad-words (dl/text (dl/tokens sentence))) :bad :good)) (let [tokens (dl/tokens sentence)
dialects1 (when (some australian-words (dl/text tokens)) [:australian])
dialects2 (when (some scottish-words (dl/text tokens)) [:scottish])
dialects3 (when (some american-words (dl/text tokens)) [:american])
dialects (remove nil? (flatten (conj dialects1 dialects2 dialects3)))]
(if (empty? dialects) [:standard] dialects)))
;; Another failed attempt
;(defn detect-sentence-dialect [sentence]
; (let [dialects []
; tokens (dl/tokens sentence)]
; (when (some australian-words (dl/text (dl/tokens tokens)))
; (let [dialects (conj dialects :australian)]
; (when (some scottish-words (dl/text (dl/tokens tokens)))
; (let [dialects (conj dialects :scottish)]
; (when (some american-words (dl/text (dl/tokens tokens)))
; (let [ dialects (conj dialects :american)]
; (if (empty? dialects) (conj dialects :standard))
; dialects))))))))
;; Take a text sample and separate it into its sentences, then for each sentence find its dialects, and return the most common dialect ;; Take a text sample and separate it into its sentences, then for each sentence find its dialects, and return the most common dialect
;; A sentence can have an indeterminate number of dialects associated with it, as detect-sentence-dialects can return a collection, ;; A sentence can have an indeterminate number of dialects associated with it, as detect-sentence-dialects can return a collection,
...@@ -149,6 +171,15 @@ ...@@ -149,6 +171,15 @@
(def rats (datafy (dl/dependency-graph (nth sentences-one 1)))) (def rats (datafy (dl/dependency-graph (nth sentences-one 1))))
(first (last (sort-by val (frequencies (flatten (map detect-sentence-dialect (dl/sentences (nlp test-sentence-one))))))))
(first (last (sort-by val (frequencies (flatten (map detect-sentence-dialect (dl/sentences (nlp test-sentence-two))))))))
(first (last (sort-by val (frequencies (flatten (map detect-sentence-dialect (dl/sentences (nlp test-sentence-three))))))))
(first (last (sort-by val (frequencies (flatten (map detect-sentence-dialect (dl/sentences (nlp test-sentence-four))))))))
(first (last (sort-by val (frequencies (flatten (map detect-sentence-dialect (dl/sentences (nlp test-sentence-five))))))))
(first (last (sort-by val (frequencies (flatten (map detect-sentence-dialect (dl/sentences (nlp test-sentence-six))))))))
(last (vals rats)) (last (vals rats))
(.getTarget (first (last (vals rats)))) (.getTarget (first (last (vals rats))))
(bean (first (last (vals rats)))) (bean (first (last (vals rats))))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment