Commit a9ba2fdc authored by Jonathan Poalses's avatar Jonathan Poalses

Cleanup

parent 3c59b3d4
...@@ -42,9 +42,6 @@ ...@@ -42,9 +42,6 @@
"ner"] "ner"]
:quote {:extractUnclosedQuotes "true"}})) :quote {:extractUnclosedQuotes "true"}}))
(def bad-words #{"why" "cause"})
;; Word sets that will show a sentence as being of that dialect ;; Word sets that will show a sentence as being of that dialect
(def australian-words #{"incorrect" "why"}) (def australian-words #{"incorrect" "why"})
...@@ -73,22 +70,6 @@ ...@@ -73,22 +70,6 @@
(if (empty? dialects) [:standard] dialects))) (if (empty? dialects) [:standard] dialects)))
;; Another failed attempt
;(defn detect-sentence-dialect [sentence]
; (let [dialects []
; tokens (dl/tokens sentence)]
; (when (some australian-words (dl/text (dl/tokens tokens)))
; (let [dialects (conj dialects :australian)]
; (when (some scottish-words (dl/text (dl/tokens tokens)))
; (let [dialects (conj dialects :scottish)]
; (when (some american-words (dl/text (dl/tokens tokens)))
; (let [ dialects (conj dialects :american)]
; (if (empty? dialects) (conj dialects :standard))
; dialects))))))))
;; Take a text sample and separate it into its sentences, then for each sentence find its dialects, and return the most common dialect ;; Take a text sample and separate it into its sentences, then for each sentence find its dialects, and return the most common dialect
;; A sentence can have an indeterminate number of dialects associated with it, as detect-sentence-dialects can return a collection, ;; A sentence can have an indeterminate number of dialects associated with it, as detect-sentence-dialects can return a collection,
;;when no dialect can be detected it defaults to standard. (IE if there's a sample with 3 sentences, one reads as scottish, ;;when no dialect can be detected it defaults to standard. (IE if there's a sample with 3 sentences, one reads as scottish,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment