Improved detect-sentence-dialect, so it can take a vector of predicates, and...

Improved detect-sentence-dialect, so it can take a vector of predicates, and test the given sentence against them, returning once any of them give a truthy response

Improved detect-sentence-dialect, so it can take a vector of predicates, and...
Improved detect-sentence-dialect, so it can take a vector of predicates, and test the given sentence against them, returning once any of them give a truthy response
4c257f45 · Jonathan Poalses · a9ba2fdc · 4c257f45
Commit 4c257f45 authored May 03, 2023 by Jonathan Poalses
Show whitespace changes
Inline Side-by-side

Showing with 16 additions and 12 deletions

dialect_nlp.clj src/poalses/jonathan/dialect/dialect_nlp.clj +16 -12

No files found.
--- a/src/poalses/jonathan/dialect/dialect_nlp.clj
+++ b/src/poalses/jonathan/dialect/dialect_nlp.clj
@@ -50,29 +50,33 @@

 (def american-words #{"like"})

-;; Predicate sets to check a sentence and see if it grammatically matches a dialect
+(defn fake-test
+  [fake]
+  false)

-(def australian-predicates #{})
+;; Predicate vectors to check a sentence and see if it grammatically matches a dialect

-(def scottish-predicates #{})
+(def australian-predicates [fake-test])

-(def american-predicates #{})
+(def scottish-predicates [fake-test])
+
+(def american-predicates [fake-test])


 ;; Take a sentence and figure out its dialect

 (defn detect-sentence-dialect [sentence]
  (let [tokens    (dl/tokens sentence)
-        dialects1 (when (some australian-words (dl/text tokens)) [:australian])
-        dialects2 (when (some scottish-words (dl/text tokens)) [:scottish])
-        dialects3 (when (some american-words (dl/text tokens)) [:american])
-        dialects (remove nil? (flatten (conj dialects1 dialects2 dialects3)))]
+        dialects1 (if (some australian-words (dl/text tokens)) :australian (when ((apply some-fn australian-predicates) sentence) :australian))
+        dialects2 (if (some scottish-words (dl/text tokens)) :scottish (when ((apply some-fn scottish-predicates) sentence) :scottish))
+        dialects3 (if (some american-words (dl/text tokens)) :american (when ((apply some-fn american-predicates) sentence) :american))
+        dialects  (remove nil? [dialects1 dialects2 dialects3])]
    (if (empty? dialects) [:standard] dialects)))


 ;; Take a text sample and separate it into its sentences, then for each sentence find its dialects, and return the most common dialect
 ;; A sentence can have an indeterminate number of dialects associated with it, as detect-sentence-dialects can return a collection,
-;;when no dialect can be detected it defaults to standard. (IE if there's a sample with 3 sentences, one reads as scottish,
+;;when no dialect can be detected it defaults to standard. IE if there's a sample with 3 sentences, one reads as scottish,
 ;;one reads as scottish and australian, and the last reads as nothing, it will return a collection containing 2 scottish keys,
 ;;one australian key, and one standard key, meaning it would be seen as a scottish sample.