Some great work on translation

This commit is contained in:
xigoi 2020-08-13 19:04:59 +02:00
parent ed1db72046
commit e9fe0c5af8
4 changed files with 72 additions and 18 deletions

View File

@ -1,4 +1,4 @@
import re, sequtils, strutils
import re, sequtils, strscans, strutils
import words
const
@ -10,6 +10,7 @@ const
kili = fruit/fruit
lili = small thing
meli = woman/women
mi = I/we/me/us
mije = man/men
suli = big thing
telo = liquid
@ -35,7 +36,7 @@ const
"""
let
nounPattern = re"(.+?) = ([^/]+)(?:/([^/]+))?"
nounPattern = re"(.+?) = ([^/]+)(?:/([^/]+))?(?:/([^/]+))?(?:/([^/]+))?"
verbPattern = re"(.+?) = ([^/]+)(?:/([^/]+))?(?:/([^/]+))?"
copulaPattern = re"be (.*)"
@ -47,6 +48,14 @@ proc parseNoun(line: string): Noun =
matches[1] & "s"
else:
matches[2]
result.enSgAcc = if matches[3] == "":
result.enSg
else:
matches[3]
result.enPlAcc = if matches[4] == "":
result.enPl
else:
matches[4]
else:
raise newException(ValueError, "Invalid noun")
@ -78,10 +87,6 @@ proc parseVerb(line: string): Verb =
raise newException(ValueError, "Invalid verb")
let
nouns* = nounsRaw.strip.split("\n").map(parseNoun)
verbsTr* = verbsTrRaw.strip.split("\n").map(parseVerb)
verbsIn* = verbsInRaw.strip.split("\n").map(parseVerb)
echo nouns
echo verbsTr
echo verbsIn
nouns* = nounsRaw.strip.splitLines.map(parseNoun)
verbsTr* = verbsTrRaw.strip.splitLines.map(parseVerb)
verbsIn* = verbsInRaw.strip.splitLines.map(parseVerb)

View File

@ -1,3 +1,3 @@
import translator
echo translate("mi jo e kili. kili li lili.")
echo translate("mi jo e kili. kili li lili. mi moku e kili. kili li pona.")

View File

@ -1,4 +1,9 @@
import re, sequtils, strutils
import re, sequtils, sets, strutils
import dictionary, words
type
Context = ref object
seenNouns: HashSet[string]
let
sentenceSeparator = re"(?<=[.?!])\s+"
@ -12,19 +17,57 @@ let
(?:\se\s(.+?))?
$"""
proc translateSentence(sentence: string): string =
proc translateNounPhrase(phrase: string, context: Context, form: NounForm): string =
for noun in nouns:
if phrase.startsWith(noun.tp):
let
nounTranslated = noun[form]
rest = phrase[noun.tp.len..^1]
restTranslated = rest
article = if noun.tp in context.seenNouns:
"the"
else:
context.seenNouns.incl(noun.tp)
"a"
return [article, restTranslated, nounTranslated]
.filterIt(it != "")
.join(" ")
phrase
proc translateVerbPhrase(phrase: string, context: Context, form: VerbForm, transitive: bool): string =
let verbs = if transitive:
verbsTr
else:
verbsIn
for verb in verbs:
if phrase.startsWith(verb.tp):
let
verbTranslated = verb[form]
rest = phrase[verb.tp.len..^1]
restTranslated = rest
return [restTranslated, verbTranslated]
.filterIt(it != "")
.join(" ")
phrase
proc translateSentence(sentence: string, context: Context): string =
let sentence = sentence.replace(ignoredChars, "")
if sentence =~ sentenceStructure:
let
subject = if matches[0] == "": matches[1] else: matches[0]
predicate = matches[2]
directObject = matches[3]
echo subject
echo predicate
echo directObject
result = sentence
subjectTranslated = subject.translateNounPhrase(context, nfSg)
predicateTranslated = predicate.translateVerbPhrase(context, vf3S, directObject != "")
directObjectTranslated = directObject.translateNounPhrase(context, nfSgAcc)
result = [subjectTranslated, predicateTranslated, directObjectTranslated]
.filterIt(it != "")
.join(" ")
else:
result = sentence
proc translate*(text: string): string =
text.split(sentenceSeparator).map(translateSentence).join(" / ")
let context = Context()
text.split(sentenceSeparator)
.mapIt(it.translateSentence(context))
.join(" / ")

View File

@ -3,9 +3,13 @@ type
tp*: string
enSg*: string
enPl*: string
enSgAcc*: string
enPlAcc*: string
NounForm* = enum
nfSg
nfPl
nfSgAcc
nfPlAcc
Verb* = object
tp*: string
en1S*: string
@ -13,7 +17,7 @@ type
enPl*: string
enPastSg*: string
enPastPl*: string
VerbForm = enum
VerbForm* = enum
vf1S
vf3S
vfPl
@ -24,6 +28,8 @@ func `[]`*(noun: Noun, form: NounForm): string =
case form
of nfSg: noun.enSg
of nfPl: noun.enPl
of nfSgAcc: noun.enSgAcc
of nfPlAcc: noun.enPlAcc
func `[]`*(verb: Verb, form: VerbForm): string =
case form