hacker-quotes/english.go

527 lines
11 KiB
Go

package hacker
import (
"encoding/json"
"fmt"
"io/ioutil"
"math/rand"
"strings"
"github.com/zorchenhimer/hacker-quotes/database"
"github.com/zorchenhimer/hacker-quotes/models"
)
type InitialData struct {
Adjectives [][]string
Nouns [][]string
Verbs [][]string
Pronouns [][]string
Sentences []string
}
type english struct {
db database.DB
}
func NewEnglish(db database.DB) (HackerQuotes, error) {
return &english{db: db}, nil
}
/*
Sentence format
{word_type:options}
{{word_type:new word:properties}}
{pronoun} can't {verb:i,present} {noun_phrase}, it {verb:it,future} {noun_phrase}!
{verb:you,present} {noun_phrase:definite}, then you can {verb:you,present} {noun_phrase:definite}!
{noun_phrase} {verb}. With {noun_phrase:indifinite,noadj,compound}!
*/
func (g *english) Hack() (string, error) {
//var fmtString string = `{verb:you,present} {noun_phrase:definite} then you can {verb:you,present} {noun_phrase:definite}!`
str, err := g.randomSentence()
if err != nil {
return "", err
}
return g.HackThis(str)
}
func (g *english) HackThis(fmtString string) (string, error) {
var idx int
var err error
var nidx int
output := &strings.Builder{}
for idx < len(fmtString) {
if fmtString[idx] == '{' {
if fmtString[idx+1] == '{' {
nidx, err = g.consumeNewWord(fmtString, idx, output)
if err != nil {
return "", err
}
idx = nidx
continue
}
nidx, err = g.consumeWord(fmtString, idx, output)
if err != nil {
return "", err
}
idx = nidx
continue
}
nidx, err = g.consumeRaw(fmtString, idx, output)
if err != nil {
return "", err
}
idx = nidx
}
return toCap(output.String()), nil
}
func (g *english) consumeRaw(fmtString string, idx int, output *strings.Builder) (int, error) {
end := strings.Index(fmtString[idx:], "{")
if end == -1 {
output.WriteString(fmtString[idx:len(fmtString)])
return len(fmtString), nil
}
output.WriteString(fmtString[idx:end+idx])
return idx+end, nil
}
func (g *english) consumeNewWord(fmtString string, idx int, output *strings.Builder) (int, error) {
return 0, fmt.Errorf("not implemented")
}
func (g *english) consumeWord(fmtString string, idx int, output *strings.Builder) (int, error) {
idx++
var wordtype string
var options string
end := strings.Index(fmtString[idx:], "}")
if end == -1 {
return 0, fmt.Errorf("[consumeWord] Unclosed definition starting at %d", idx)
}
end += idx
optsStart := strings.Index(fmtString[idx:end], ":")
if optsStart != -1 {
options = fmtString[optsStart+idx+1:end]
wordtype = fmtString[idx:optsStart+idx]
} else {
wordtype = fmtString[idx:end]
}
if wordtype == "" {
return 0, fmt.Errorf("[consumeWord] Missing word type at idx: %d", idx)
}
opts := strings.Split(options, ",")
var word string
var err error
switch wordtype {
case "pronoun":
var plural bool
if sliceContains(opts, "plural") {
plural = true
}
word, err = g.randomPronoun(plural)
if err != nil {
return 0, err
}
case "verb":
var ct models.ConjugationType = models.CT_I
if sliceContains(opts, "i") {
ct = models.CT_I
} else if sliceContains(opts, "you") {
ct = models.CT_You
} else if sliceContains(opts, "it") {
ct = models.CT_It
} else if sliceContains(opts, "we") {
ct = models.CT_We
} else if sliceContains(opts, "they") {
ct = models.CT_They
}
var cm models.ConjugationTime = models.CM_Present
if sliceContains(opts, "present") {
cm = models.CM_Present
} else if sliceContains(opts, "past") {
cm = models.CM_Past
} else if sliceContains(opts, "future") {
cm = models.CM_Future
}
var invert bool = false
if sliceContains(opts, "invert") {
invert = true
}
word, err = g.randomVerb(ct, cm, invert)
if err != nil {
return 0, err
}
case "noun":
var plural bool
var compound bool
if sliceContains(opts, "plural") {
plural = true
}
if sliceContains(opts, "compound") {
compound = true
}
word, err = g.randomNoun(plural, compound)
if err != nil {
return 0, err
}
case "noun_phrase":
var definite bool = true
var hasAdj bool = true
var plural bool = false
var compound bool = false
if sliceContains(opts, "indefinite") {
definite = false
}
if sliceContains(opts, "noadj") {
hasAdj = false
}
if sliceContains(opts, "plural") {
plural = true
}
if sliceContains(opts, "compound") {
compound = true
}
word, err = g.nounPhrase(definite, hasAdj, plural, compound)
if err != nil {
return 0, err
}
case "adjective":
word, err = g.randomAdjective()
if err != nil {
return 0, err
}
default:
return 0, fmt.Errorf("[consumeWord] Invalid word type %s at %d", wordtype, idx)
}
output.WriteString(word)
return end+1, nil
}
func (g *english) nounPhrase(definite, hasAdj, plural, compound bool) (string, error){
adj := ""
var err error
if hasAdj {
adj, err = g.randomAdjective()
if err != nil {
return "", err
}
}
noun, err := g.randomNoun(plural, compound)
if err != nil {
return "", err
}
phrase := adj
if phrase != "" {
phrase += " " + noun
} else {
phrase = noun
}
if definite && !plural {
//fmt.Println("[nounPhrase] definite && !plural")
return "the " + phrase, nil
}
if !plural {
//fmt.Println("[nounPhrase] !plural")
return g.ana(phrase), nil
}
return phrase, nil
}
func (g *english) randomAdjective() (string, error) {
ids, err := g.db.GetAdjectiveIds()
if err != nil {
return "", fmt.Errorf("[adj] get IDs error: %v", err)
}
if len(ids) <= 0 {
return "", fmt.Errorf("No adjective IDs returned from database")
}
rid := int(rand.Int63n(int64(len(ids))))
//fmt.Printf("[adj] len(ids): %d; rid: %d; %d\n", len(ids), rid, ids[rid])
adj, err := g.db.GetAdjective(ids[rid])
if err != nil {
return "", fmt.Errorf("[adj] ID: %d; %v", ids[rid], err)
}
return adj.Word, nil
}
func (g *english) randomNoun(plural, compound bool) (string, error) {
var ids []int
var err error
if compound {
ids, err = g.db.GetNounIds(true, true, true)
if err != nil {
return "", fmt.Errorf("[noun] get IDs error: %v", err)
}
} else {
ids, err = g.db.GetNounIds(true, false, false)
if err != nil {
return "", fmt.Errorf("[noun] get IDs error: %v", err)
}
}
if len(ids) <= 0 {
return "", fmt.Errorf("No noun IDs returned from database")
}
rid := int(rand.Int63n(int64(len(ids))))
//fmt.Printf("[noun] len(ids): %d; rid: %d; ID: %d\n", len(ids), rid, ids[rid])
noun, err := g.db.GetNoun(ids[rid])
if err != nil {
return "", fmt.Errorf("[noun] ID: %d; %v", ids[rid], err)
}
if plural {
return noun.Plural(), nil
}
return noun.Word, nil
}
func (g *english) randomVerb(ctype models.ConjugationType, ctime models.ConjugationTime, invert bool) (string, error) {
ids, err := g.db.GetVerbIds()
if err != nil {
return "", fmt.Errorf("[verb] get IDs error: %v", err)
}
if len(ids) <= 0 {
return "", fmt.Errorf("No verb IDs returned from database")
}
rid := int(rand.Int63n(int64(len(ids))))
verb, err := g.db.GetVerb(ids[rid])
if err != nil {
return "", fmt.Errorf("[verb] ID: %d; %v", ids[rid], err)
}
return verb.Conjugate(ctype, ctime, invert), nil
}
func (g *english) randomPronoun(plural bool) (string, error) {
ids, err := g.db.GetPronounIds(plural)
if err != nil {
return "", fmt.Errorf("[pronoun] get IDs error: %v", err)
}
if len(ids) <= 0 {
return "", fmt.Errorf("No pronoun IDs returned from database")
}
rid := int(rand.Int63n(int64(len(ids))))
pronoun, err := g.db.GetPronoun(ids[rid])
if err != nil {
return "", fmt.Errorf("[pronoun] ID: %d; %v", ids[rid], err)
}
return pronoun.Word, nil
}
func (g *english) randomSentence() (string, error) {
ids, err := g.db.GetSentenceIds()
if err != nil {
return "", fmt.Errorf("[sentence] get IDs error: %v", err)
}
if len(ids) <= 0 {
return "", fmt.Errorf("[sentence] No sentence IDs returned from database")
}
rid := int(rand.Int63n(int64(len(ids))))
sentence, err := g.db.GetSentence(ids[rid])
if err != nil {
return "", fmt.Errorf("[sentence] ID: %d, %v", ids[rid], err)
}
return sentence, nil
}
func (g *english) InitData(filename string) error {
fmt.Printf("Initializing database with data in %q\n", filename)
if g.db == nil {
return fmt.Errorf("databse is nil!")
}
raw, err := ioutil.ReadFile(filename)
if err != nil {
return err
}
//data := map[string][]interface{}{}
data := InitialData{}
if err = json.Unmarshal(raw, &data); err != nil {
return err
}
if data.Adjectives == nil || len(data.Adjectives) == 0 {
return fmt.Errorf("Missing Adjectives in input data")
}
adjectives := []models.Adjective{}
for _, adj := range data.Adjectives {
t, word := adj[0], adj[1]
a := models.Adjective{Word: word}
if strings.Contains(t, "a") {
a.Absolute = true
}
if strings.Contains(t, "e") {
a.AppendEst = true
}
if strings.Contains(t, "m") {
a.AppendMore = true
}
adjectives = append(adjectives, a)
}
if data.Nouns == nil || len(data.Nouns) == 0 {
return fmt.Errorf("Missing nouns key in data")
}
nouns := []models.Noun{}
for _, noun := range data.Nouns {
t, word := noun[0], noun[1]
n := models.Noun{Word: word}
if strings.Contains(t, "m") {
n.Multiple = true
}
if strings.Contains(t, "b") {
n.Begin = true
}
if strings.Contains(t, "e") {
n.End = true
}
if strings.Contains(t, "a") {
n.Alone = true
}
if strings.Contains(t, "r") {
n.Regular = true
}
nouns = append(nouns, n)
}
if data.Verbs == nil || len(data.Verbs) == 0 {
return fmt.Errorf("Missing verbs key in data")
}
verbs := []models.Verb{}
for _, verb := range data.Verbs {
v := models.Verb{Word: verb[1]}
if strings.Contains(verb[0], "r") {
v.Regular = true
}
verbs = append(verbs, v)
}
if data.Pronouns == nil || len(data.Pronouns) == 0 {
return fmt.Errorf("Missing pronouns key in data")
}
pronouns := []models.Pronoun{}
for _, pro := range data.Pronouns {
p := models.Pronoun{Word: pro[1]}
if strings.Contains(pro[0], "p") {
p.Plural = true
}
pronouns = append(pronouns, p)
}
if data.Sentences == nil || len(data.Sentences) == 0 {
return fmt.Errorf("Missing sentences key in data")
}
return g.db.InitData(adjectives, nouns, verbs, pronouns, data.Sentences)
}
// Prepend "a", "an" or nothing to a phrase
func (g *english) ana(phrase string) string {
//fmt.Printf("[ana] phrase[0]: %s; %q\n", string(phrase[0]), phrase)
if strings.ContainsAny(string(phrase[0]), "aeiou") {
return "an " + phrase
}
return "a " + phrase
}
// toCap capitalizes the first word of each sentence in the input string.
func toCap(words string) string {
ret := strings.ToUpper(string(words[0])) + words[1:]
next := strings.Index(words, ". ")
if next == -1 {
return ret
}
for next+3 < len(words) {
newnext := strings.Index(words[next+1:], ". ")
ret = ret[0:next+2] + strings.ToUpper(string(ret[next+2])) + ret[next+3:]
if newnext == -1 {
break
}
next = newnext + next+1
}
return ret
}
func sliceContains(haystack []string, needle string) bool {
if len(haystack) == 0 {
return false
}
for _, item := range haystack {
if item == needle {
return true
}
}
return false
}