Some great work on translation
This commit is contained in:
parent
ed1db72046
commit
e9fe0c5af8
|
@ -1,4 +1,4 @@
|
|||
import re, sequtils, strutils
|
||||
import re, sequtils, strscans, strutils
|
||||
import words
|
||||
|
||||
const
|
||||
|
@ -10,6 +10,7 @@ const
|
|||
kili = fruit/fruit
|
||||
lili = small thing
|
||||
meli = woman/women
|
||||
mi = I/we/me/us
|
||||
mije = man/men
|
||||
suli = big thing
|
||||
telo = liquid
|
||||
|
@ -35,7 +36,7 @@ const
|
|||
"""
|
||||
|
||||
let
|
||||
nounPattern = re"(.+?) = ([^/]+)(?:/([^/]+))?"
|
||||
nounPattern = re"(.+?) = ([^/]+)(?:/([^/]+))?(?:/([^/]+))?(?:/([^/]+))?"
|
||||
verbPattern = re"(.+?) = ([^/]+)(?:/([^/]+))?(?:/([^/]+))?"
|
||||
copulaPattern = re"be (.*)"
|
||||
|
||||
|
@ -47,6 +48,14 @@ proc parseNoun(line: string): Noun =
|
|||
matches[1] & "s"
|
||||
else:
|
||||
matches[2]
|
||||
result.enSgAcc = if matches[3] == "":
|
||||
result.enSg
|
||||
else:
|
||||
matches[3]
|
||||
result.enPlAcc = if matches[4] == "":
|
||||
result.enPl
|
||||
else:
|
||||
matches[4]
|
||||
else:
|
||||
raise newException(ValueError, "Invalid noun")
|
||||
|
||||
|
@ -78,10 +87,6 @@ proc parseVerb(line: string): Verb =
|
|||
raise newException(ValueError, "Invalid verb")
|
||||
|
||||
let
|
||||
nouns* = nounsRaw.strip.split("\n").map(parseNoun)
|
||||
verbsTr* = verbsTrRaw.strip.split("\n").map(parseVerb)
|
||||
verbsIn* = verbsInRaw.strip.split("\n").map(parseVerb)
|
||||
|
||||
echo nouns
|
||||
echo verbsTr
|
||||
echo verbsIn
|
||||
nouns* = nounsRaw.strip.splitLines.map(parseNoun)
|
||||
verbsTr* = verbsTrRaw.strip.splitLines.map(parseVerb)
|
||||
verbsIn* = verbsInRaw.strip.splitLines.map(parseVerb)
|
||||
|
|
2
test.nim
2
test.nim
|
@ -1,3 +1,3 @@
|
|||
import translator
|
||||
|
||||
echo translate("mi jo e kili. kili li lili.")
|
||||
echo translate("mi jo e kili. kili li lili. mi moku e kili. kili li pona.")
|
||||
|
|
|
@ -1,4 +1,9 @@
|
|||
import re, sequtils, strutils
|
||||
import re, sequtils, sets, strutils
|
||||
import dictionary, words
|
||||
|
||||
type
|
||||
Context = ref object
|
||||
seenNouns: HashSet[string]
|
||||
|
||||
let
|
||||
sentenceSeparator = re"(?<=[.?!])\s+"
|
||||
|
@ -12,19 +17,57 @@ let
|
|||
(?:\se\s(.+?))?
|
||||
$"""
|
||||
|
||||
proc translateSentence(sentence: string): string =
|
||||
proc translateNounPhrase(phrase: string, context: Context, form: NounForm): string =
|
||||
for noun in nouns:
|
||||
if phrase.startsWith(noun.tp):
|
||||
let
|
||||
nounTranslated = noun[form]
|
||||
rest = phrase[noun.tp.len..^1]
|
||||
restTranslated = rest
|
||||
article = if noun.tp in context.seenNouns:
|
||||
"the"
|
||||
else:
|
||||
context.seenNouns.incl(noun.tp)
|
||||
"a"
|
||||
return [article, restTranslated, nounTranslated]
|
||||
.filterIt(it != "")
|
||||
.join(" ")
|
||||
phrase
|
||||
|
||||
proc translateVerbPhrase(phrase: string, context: Context, form: VerbForm, transitive: bool): string =
|
||||
let verbs = if transitive:
|
||||
verbsTr
|
||||
else:
|
||||
verbsIn
|
||||
for verb in verbs:
|
||||
if phrase.startsWith(verb.tp):
|
||||
let
|
||||
verbTranslated = verb[form]
|
||||
rest = phrase[verb.tp.len..^1]
|
||||
restTranslated = rest
|
||||
return [restTranslated, verbTranslated]
|
||||
.filterIt(it != "")
|
||||
.join(" ")
|
||||
phrase
|
||||
|
||||
proc translateSentence(sentence: string, context: Context): string =
|
||||
let sentence = sentence.replace(ignoredChars, "")
|
||||
if sentence =~ sentenceStructure:
|
||||
let
|
||||
subject = if matches[0] == "": matches[1] else: matches[0]
|
||||
predicate = matches[2]
|
||||
directObject = matches[3]
|
||||
echo subject
|
||||
echo predicate
|
||||
echo directObject
|
||||
result = sentence
|
||||
subjectTranslated = subject.translateNounPhrase(context, nfSg)
|
||||
predicateTranslated = predicate.translateVerbPhrase(context, vf3S, directObject != "")
|
||||
directObjectTranslated = directObject.translateNounPhrase(context, nfSgAcc)
|
||||
result = [subjectTranslated, predicateTranslated, directObjectTranslated]
|
||||
.filterIt(it != "")
|
||||
.join(" ")
|
||||
else:
|
||||
result = sentence
|
||||
|
||||
proc translate*(text: string): string =
|
||||
text.split(sentenceSeparator).map(translateSentence).join(" / ")
|
||||
let context = Context()
|
||||
text.split(sentenceSeparator)
|
||||
.mapIt(it.translateSentence(context))
|
||||
.join(" / ")
|
||||
|
|
|
@ -3,9 +3,13 @@ type
|
|||
tp*: string
|
||||
enSg*: string
|
||||
enPl*: string
|
||||
enSgAcc*: string
|
||||
enPlAcc*: string
|
||||
NounForm* = enum
|
||||
nfSg
|
||||
nfPl
|
||||
nfSgAcc
|
||||
nfPlAcc
|
||||
Verb* = object
|
||||
tp*: string
|
||||
en1S*: string
|
||||
|
@ -13,7 +17,7 @@ type
|
|||
enPl*: string
|
||||
enPastSg*: string
|
||||
enPastPl*: string
|
||||
VerbForm = enum
|
||||
VerbForm* = enum
|
||||
vf1S
|
||||
vf3S
|
||||
vfPl
|
||||
|
@ -24,6 +28,8 @@ func `[]`*(noun: Noun, form: NounForm): string =
|
|||
case form
|
||||
of nfSg: noun.enSg
|
||||
of nfPl: noun.enPl
|
||||
of nfSgAcc: noun.enSgAcc
|
||||
of nfPlAcc: noun.enPlAcc
|
||||
|
||||
func `[]`*(verb: Verb, form: VerbForm): string =
|
||||
case form
|
||||
|
|
Loading…
Reference in New Issue