Commit bb026b57 authored by Jonathan Poalses's avatar Jonathan Poalses

Added initial functions for handling dialect detection

parent 85952bbe
...@@ -30,6 +30,27 @@ ...@@ -30,6 +30,27 @@
"ner"] "ner"]
:quote {:extractUnclosedQuotes "true"}})) :quote {:extractUnclosedQuotes "true"}}))
;; Take a dependency graph object and change it into a map of tokens and parts of speech labels
(defn dependency-to-token-pos-map [graph])
;; Take a dependency graph object and change it into a map of tokens and relation labels
(defn dependency-to-token-relation-map [graph])
;; Take a text sample and separate it into it's sentences, then for each sentence find its dialect, and return the most common dialect
(defn detect-sample-dialect [sample])
;; Take a sentence and figure out its dialect
(defn detect-sentence-dialect [sentence])
(def annotated-example (def annotated-example
(delay (nlp example))) (delay (nlp example)))
...@@ -48,8 +69,8 @@ ...@@ -48,8 +69,8 @@
(def sentences-two (def sentences-two
(delay (dl/sentences @annotated-example-two))) (delay (dl/sentences @annotated-example-two)))
(defn show-mentions [] (defn show-dependencies []
(clojure.pprint/pprint (map dl/mentions @sentences))) (clojure.pprint/pprint (dl/dependency-graph (nth sentences-one 1))))
(comment (comment
...@@ -67,14 +88,16 @@ ...@@ -67,14 +88,16 @@
(map dl/lemma @sentences) (map dl/lemma @sentences)
(map dl/lemma sentences-one) (map dl/lemma sentences-one)
(map dl/lemma @sentences-two) (map dl/lemma @sentences-two)
(dl/tokens (nth sentences-one 1)) (dl/text (dl/tokens (nth sentences-one 1)))
(map dl/tokens @sentences-two) (map dl/tokens @sentences-two)
(->> (mapcat dl/triples @sentences) (map triple->datalog)) (->> (mapcat dl/triples @sentences) (map triple->datalog))
(dl/annotation CorefCoreAnnotations$CorefChainAnnotation @annotated-example) (dl/annotation CorefCoreAnnotations$CorefChainAnnotation @annotated-example)
(show-mentions) (show-dependencies)
sentences-one (dl/text sentences-one)
(dl/triples sentences-one)
(dl/triples (dl/dependency-graph (nth sentences-one 1)))
;; Datafy the annotations. Retrieves direct annotations for every sentence. ;; Datafy the annotations. Retrieves direct annotations for every sentence.
;; Keep in mind that `dl/recur-datafy` currently doesn't work in this instance ;; Keep in mind that `dl/recur-datafy` currently doesn't work in this instance
......
...@@ -71,7 +71,7 @@ ...@@ -71,7 +71,7 @@
(let [shutdown-trigger (promise) (let [shutdown-trigger (promise)
_bye-testing-hack (future (Thread/sleep 6000) (deliver shutdown-trigger true))] _bye-testing-hack (future (Thread/sleep 6000) (deliver shutdown-trigger true))]
(log/info "Dialect Detector started up.") (log/info "Dialect Detector started up.")
(nlp/show-mentions) (nlp/show-dependencies)
@shutdown-trigger @shutdown-trigger
(log/info "Dialect Detector shutting down...")) (log/info "Dialect Detector shutting down..."))
(catch Exception e (catch Exception e
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment