Txostenaren lehen bertsioa
This commit is contained in:
parent
810f9aefbb
commit
ea7cb3ade7
Binary file not shown.
After Width: | Height: | Size: 15 KiB |
|
@ -0,0 +1,123 @@
|
|||
@book{Aho:72,
|
||||
author = {Alfred V. Aho and Jeffrey D. Ullman},
|
||||
title = {The Theory of Parsing, Translation and Compiling},
|
||||
year = "1972",
|
||||
volume = "1",
|
||||
publisher = {Prentice-Hall},
|
||||
address = {Englewood Cliffs, NJ}
|
||||
}
|
||||
|
||||
@book{APA:83,
|
||||
author = {{American Psychological Association}},
|
||||
title = {Publications Manual},
|
||||
year = "1983",
|
||||
publisher = {American Psychological Association},
|
||||
address = {Washington, DC}
|
||||
}
|
||||
|
||||
@article{Chandra:81,
|
||||
author = {Ashok K. Chandra and Dexter C. Kozen and Larry J. Stockmeyer},
|
||||
year = "1981",
|
||||
title = {Alternation},
|
||||
journal = {Journal of the Association for Computing Machinery},
|
||||
volume = "28",
|
||||
number = "1",
|
||||
pages = "114--133",
|
||||
doi = "10.1145/322234.322243",
|
||||
}
|
||||
|
||||
@inproceedings{andrew2007scalable,
|
||||
title={Scalable training of {L1}-regularized log-linear models},
|
||||
author={Andrew, Galen and Gao, Jianfeng},
|
||||
booktitle={Proceedings of the 24th International Conference on Machine Learning},
|
||||
pages={33--40},
|
||||
year={2007},
|
||||
}
|
||||
|
||||
@book{Gusfield:97,
|
||||
author = {Dan Gusfield},
|
||||
title = {Algorithms on Strings, Trees and Sequences},
|
||||
year = "1997",
|
||||
publisher = {Cambridge University Press},
|
||||
address = {Cambridge, UK}
|
||||
}
|
||||
|
||||
@article{rasooli-tetrault-2015,
|
||||
author = {Mohammad Sadegh Rasooli and Joel R. Tetreault},
|
||||
title = {Yara Parser: {A} Fast and Accurate Dependency Parser},
|
||||
journal = {Computing Research Repository},
|
||||
volume = {arXiv:1503.06733},
|
||||
year = {2015},
|
||||
url = {http://arxiv.org/abs/1503.06733},
|
||||
note = {version 2}
|
||||
}
|
||||
|
||||
@article{Ando2005,
|
||||
Acmid = {1194905},
|
||||
Author = {Ando, Rie Kubota and Zhang, Tong},
|
||||
Issn = {1532-4435},
|
||||
Issue_Date = {12/1/2005},
|
||||
Journal = {Journal of Machine Learning Research},
|
||||
Month = dec,
|
||||
Numpages = {37},
|
||||
Pages = {1817--1853},
|
||||
Publisher = {JMLR.org},
|
||||
Title = {A Framework for Learning Predictive Structures from Multiple Tasks and Unlabeled Data},
|
||||
Volume = {6},
|
||||
Year = {2005}}
|
||||
|
||||
@article{meena2020,
|
||||
author = {Adiwardana, Daniel and Luong, Thang},
|
||||
title = {Towards a Conversational Agent that Can Chat About...Anything},
|
||||
publisher = {Google Blog},
|
||||
year = {2020},
|
||||
url = {https://ai.googleblog.com/2020/01/towards-conversational-agent-that-can.html},
|
||||
}
|
||||
|
||||
@slides{maitehp2020,
|
||||
author = {Maite Oronoz Anchordoqui},
|
||||
title = {Itzulpen automatikoa},
|
||||
publisher = {Egela},
|
||||
year = {2020},
|
||||
}
|
||||
|
||||
@article{sutskever2014,
|
||||
author = {Sutskever, Ilya and Vinyals, Oriol and Le, Quoc Viet},
|
||||
title = {Sequence to sequence learning with neural networks},
|
||||
publisher = {Google Blog},
|
||||
year = {2014},
|
||||
volume = {arXiv:1409.3215},
|
||||
url = {http://arxiv.org/abs/1409.3215},
|
||||
}
|
||||
|
||||
@INPROCEEDINGS{Papineni02bleu:a,
|
||||
author = {Kishore Papineni and Salim Roukos and Todd Ward and Wei-jing Zhu},
|
||||
title = {BLEU: a Method for Automatic Evaluation of Machine Translation},
|
||||
booktitle = {},
|
||||
year = {2002},
|
||||
pages = {311--318}
|
||||
}
|
||||
|
||||
@misc{bahdanau2014neural,
|
||||
title={Neural Machine Translation by Jointly Learning to Align and Translate},
|
||||
author={Dzmitry Bahdanau and Kyunghyun Cho and Yoshua Bengio},
|
||||
year={2014},
|
||||
eprint={1409.0473},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CL}
|
||||
}
|
||||
|
||||
@misc{facebook2016,
|
||||
title={Facebook launches Messenger platform with chatbots},
|
||||
publisher={techcruch.com},
|
||||
author={Josh Constine},
|
||||
year={2016},
|
||||
url={https://techcrunch.com/2016/04/12/agents-on-messenger/?guccounter=1&guce_referrer=aHR0cHM6Ly9lbi53aWtpcGVkaWEub3JnLw&guce_referrer_sig=AQAAAKGfnvGQkg_IbxWWNQTxB-WWGCPqY6AkEieL6mAvuRUyv52_qzZgTh2wjzOGbrwfRUfYYhCLxFj2vPoRMohWR83DZS6HbRgBQ4PtO54tpBVSZjJk-OLwMM3fQ9NToNgVyRygIIEGVNkN2moE9Nm2AEWVtVp7jknUeNSzkcj48_Yj}
|
||||
}
|
||||
|
||||
@misc{customerservice2019,
|
||||
title={How Chatbots are transforming Wall Street and Main Street banks? - marutitech.com},
|
||||
publisher = {marutitech.com},
|
||||
year={2019},
|
||||
url={https://marutitech.com/chatbots-transforming-wall-street-main-street-banks/}
|
||||
}
|
|
@ -0,0 +1,560 @@
|
|||
% This is the LaTex style file for ACL 2020, based off of ACL 2019.
|
||||
|
||||
% Addressing bibtex issues mentioned in https://github.com/acl-org/acl-pub/issues/2
|
||||
% Other major modifications include
|
||||
% changing the color of the line numbers to a light gray; changing font size of abstract to be 10pt; changing caption font size to be 10pt.
|
||||
% -- M Mitchell and Stephanie Lukin
|
||||
|
||||
% 2017: modified to support DOI links in bibliography. Now uses
|
||||
% natbib package rather than defining citation commands in this file.
|
||||
% Use with acl_natbib.bst bib style. -- Dan Gildea
|
||||
|
||||
% This is the LaTeX style for ACL 2016. It contains Margaret Mitchell's
|
||||
% line number adaptations (ported by Hai Zhao and Yannick Versley).
|
||||
|
||||
% It is nearly identical to the style files for ACL 2015,
|
||||
% ACL 2014, EACL 2006, ACL2005, ACL 2002, ACL 2001, ACL 2000,
|
||||
% EACL 95 and EACL 99.
|
||||
%
|
||||
% Changes made include: adapt layout to A4 and centimeters, widen abstract
|
||||
|
||||
% This is the LaTeX style file for ACL 2000. It is nearly identical to the
|
||||
% style files for EACL 95 and EACL 99. Minor changes include editing the
|
||||
% instructions to reflect use of \documentclass rather than \documentstyle
|
||||
% and removing the white space before the title on the first page
|
||||
% -- John Chen, June 29, 2000
|
||||
|
||||
% This is the LaTeX style file for EACL-95. It is identical to the
|
||||
% style file for ANLP '94 except that the margins are adjusted for A4
|
||||
% paper. -- abney 13 Dec 94
|
||||
|
||||
% The ANLP '94 style file is a slightly modified
|
||||
% version of the style used for AAAI and IJCAI, using some changes
|
||||
% prepared by Fernando Pereira and others and some minor changes
|
||||
% by Paul Jacobs.
|
||||
|
||||
% Papers prepared using the aclsub.sty file and acl.bst bibtex style
|
||||
% should be easily converted to final format using this style.
|
||||
% (1) Submission information (\wordcount, \subject, and \makeidpage)
|
||||
% should be removed.
|
||||
% (2) \summary should be removed. The summary material should come
|
||||
% after \maketitle and should be in the ``abstract'' environment
|
||||
% (between \begin{abstract} and \end{abstract}).
|
||||
% (3) Check all citations. This style should handle citations correctly
|
||||
% and also allows multiple citations separated by semicolons.
|
||||
% (4) Check figures and examples. Because the final format is double-
|
||||
% column, some adjustments may have to be made to fit text in the column
|
||||
% or to choose full-width (\figure*} figures.
|
||||
|
||||
% Place this in a file called aclap.sty in the TeX search path.
|
||||
% (Placing it in the same directory as the paper should also work.)
|
||||
|
||||
% Prepared by Peter F. Patel-Schneider, liberally using the ideas of
|
||||
% other style hackers, including Barbara Beeton.
|
||||
% This style is NOT guaranteed to work. It is provided in the hope
|
||||
% that it will make the preparation of papers easier.
|
||||
%
|
||||
% There are undoubtably bugs in this style. If you make bug fixes,
|
||||
% improvements, etc. please let me know. My e-mail address is:
|
||||
% pfps@research.att.com
|
||||
|
||||
% Papers are to be prepared using the ``acl_natbib'' bibliography style,
|
||||
% as follows:
|
||||
% \documentclass[11pt]{article}
|
||||
% \usepackage{acl2000}
|
||||
% \title{Title}
|
||||
% \author{Author 1 \and Author 2 \\ Address line \\ Address line \And
|
||||
% Author 3 \\ Address line \\ Address line}
|
||||
% \begin{document}
|
||||
% ...
|
||||
% \bibliography{bibliography-file}
|
||||
% \bibliographystyle{acl_natbib}
|
||||
% \end{document}
|
||||
|
||||
% Author information can be set in various styles:
|
||||
% For several authors from the same institution:
|
||||
% \author{Author 1 \and ... \and Author n \\
|
||||
% Address line \\ ... \\ Address line}
|
||||
% if the names do not fit well on one line use
|
||||
% Author 1 \\ {\bf Author 2} \\ ... \\ {\bf Author n} \\
|
||||
% For authors from different institutions:
|
||||
% \author{Author 1 \\ Address line \\ ... \\ Address line
|
||||
% \And ... \And
|
||||
% Author n \\ Address line \\ ... \\ Address line}
|
||||
% To start a seperate ``row'' of authors use \AND, as in
|
||||
% \author{Author 1 \\ Address line \\ ... \\ Address line
|
||||
% \AND
|
||||
% Author 2 \\ Address line \\ ... \\ Address line \And
|
||||
% Author 3 \\ Address line \\ ... \\ Address line}
|
||||
|
||||
% If the title and author information does not fit in the area allocated,
|
||||
% place \setlength\titlebox{<new height>} right after
|
||||
% \usepackage{acl2015}
|
||||
% where <new height> can be something larger than 5cm
|
||||
|
||||
% include hyperref, unless user specifies nohyperref option like this:
|
||||
% \usepackage[nohyperref]{naaclhlt2018}
|
||||
\newif\ifacl@hyperref
|
||||
\DeclareOption{hyperref}{\acl@hyperreftrue}
|
||||
\DeclareOption{nohyperref}{\acl@hyperreffalse}
|
||||
\ExecuteOptions{hyperref} % default is to use hyperref
|
||||
\ProcessOptions\relax
|
||||
\ifacl@hyperref
|
||||
\RequirePackage{hyperref}
|
||||
\usepackage{xcolor} % make links dark blue
|
||||
\definecolor{darkblue}{rgb}{0, 0, 0.5}
|
||||
\hypersetup{colorlinks=true,citecolor=darkblue, linkcolor=darkblue, urlcolor=darkblue}
|
||||
\else
|
||||
% This definition is used if the hyperref package is not loaded.
|
||||
% It provides a backup, no-op definiton of \href.
|
||||
% This is necessary because \href command is used in the acl_natbib.bst file.
|
||||
\def\href#1#2{{#2}}
|
||||
% We still need to load xcolor in this case because the lighter line numbers require it. (SC/KG/WL)
|
||||
\usepackage{xcolor}
|
||||
\fi
|
||||
|
||||
\typeout{Conference Style for ACL 2019}
|
||||
|
||||
% NOTE: Some laser printers have a serious problem printing TeX output.
|
||||
% These printing devices, commonly known as ``write-white'' laser
|
||||
% printers, tend to make characters too light. To get around this
|
||||
% problem, a darker set of fonts must be created for these devices.
|
||||
%
|
||||
|
||||
\newcommand{\Thanks}[1]{\thanks{\ #1}}
|
||||
|
||||
% A4 modified by Eneko; again modified by Alexander for 5cm titlebox
|
||||
\setlength{\paperwidth}{21cm} % A4
|
||||
\setlength{\paperheight}{29.7cm}% A4
|
||||
\setlength\topmargin{-0.5cm}
|
||||
\setlength\oddsidemargin{0cm}
|
||||
\setlength\textheight{24.7cm}
|
||||
\setlength\textwidth{16.0cm}
|
||||
\setlength\columnsep{0.6cm}
|
||||
\newlength\titlebox
|
||||
\setlength\titlebox{5cm}
|
||||
\setlength\headheight{5pt}
|
||||
\setlength\headsep{0pt}
|
||||
\thispagestyle{empty}
|
||||
\pagestyle{empty}
|
||||
|
||||
|
||||
\flushbottom \twocolumn \sloppy
|
||||
|
||||
% We're never going to need a table of contents, so just flush it to
|
||||
% save space --- suggested by drstrip@sandia-2
|
||||
\def\addcontentsline#1#2#3{}
|
||||
|
||||
\newif\ifaclfinal
|
||||
\aclfinalfalse
|
||||
\def\aclfinalcopy{\global\aclfinaltrue}
|
||||
|
||||
%% ----- Set up hooks to repeat content on every page of the output doc,
|
||||
%% necessary for the line numbers in the submitted version. --MM
|
||||
%%
|
||||
%% Copied from CVPR 2015's cvpr_eso.sty, which appears to be largely copied from everyshi.sty.
|
||||
%%
|
||||
%% Original cvpr_eso.sty available at: http://www.pamitc.org/cvpr15/author_guidelines.php
|
||||
%% Original evershi.sty available at: https://www.ctan.org/pkg/everyshi
|
||||
%%
|
||||
%% Copyright (C) 2001 Martin Schr\"oder:
|
||||
%%
|
||||
%% Martin Schr"oder
|
||||
%% Cr"usemannallee 3
|
||||
%% D-28213 Bremen
|
||||
%% Martin.Schroeder@ACM.org
|
||||
%%
|
||||
%% This program may be redistributed and/or modified under the terms
|
||||
%% of the LaTeX Project Public License, either version 1.0 of this
|
||||
%% license, or (at your option) any later version.
|
||||
%% The latest version of this license is in
|
||||
%% CTAN:macros/latex/base/lppl.txt.
|
||||
%%
|
||||
%% Happy users are requested to send [Martin] a postcard. :-)
|
||||
%%
|
||||
\newcommand{\@EveryShipoutACL@Hook}{}
|
||||
\newcommand{\@EveryShipoutACL@AtNextHook}{}
|
||||
\newcommand*{\EveryShipoutACL}[1]
|
||||
{\g@addto@macro\@EveryShipoutACL@Hook{#1}}
|
||||
\newcommand*{\AtNextShipoutACL@}[1]
|
||||
{\g@addto@macro\@EveryShipoutACL@AtNextHook{#1}}
|
||||
\newcommand{\@EveryShipoutACL@Shipout}{%
|
||||
\afterassignment\@EveryShipoutACL@Test
|
||||
\global\setbox\@cclv= %
|
||||
}
|
||||
\newcommand{\@EveryShipoutACL@Test}{%
|
||||
\ifvoid\@cclv\relax
|
||||
\aftergroup\@EveryShipoutACL@Output
|
||||
\else
|
||||
\@EveryShipoutACL@Output
|
||||
\fi%
|
||||
}
|
||||
\newcommand{\@EveryShipoutACL@Output}{%
|
||||
\@EveryShipoutACL@Hook%
|
||||
\@EveryShipoutACL@AtNextHook%
|
||||
\gdef\@EveryShipoutACL@AtNextHook{}%
|
||||
\@EveryShipoutACL@Org@Shipout\box\@cclv%
|
||||
}
|
||||
\newcommand{\@EveryShipoutACL@Org@Shipout}{}
|
||||
\newcommand*{\@EveryShipoutACL@Init}{%
|
||||
\message{ABD: EveryShipout initializing macros}%
|
||||
\let\@EveryShipoutACL@Org@Shipout\shipout
|
||||
\let\shipout\@EveryShipoutACL@Shipout
|
||||
}
|
||||
\AtBeginDocument{\@EveryShipoutACL@Init}
|
||||
|
||||
%% ----- Set up for placing additional items into the submitted version --MM
|
||||
%%
|
||||
%% Based on eso-pic.sty
|
||||
%%
|
||||
%% Original available at: https://www.ctan.org/tex-archive/macros/latex/contrib/eso-pic
|
||||
%% Copyright (C) 1998-2002 by Rolf Niepraschk <niepraschk@ptb.de>
|
||||
%%
|
||||
%% Which may be distributed and/or modified under the conditions of
|
||||
%% the LaTeX Project Public License, either version 1.2 of this license
|
||||
%% or (at your option) any later version. The latest version of this
|
||||
%% license is in:
|
||||
%%
|
||||
%% http://www.latex-project.org/lppl.txt
|
||||
%%
|
||||
%% and version 1.2 or later is part of all distributions of LaTeX version
|
||||
%% 1999/12/01 or later.
|
||||
%%
|
||||
%% In contrast to the original, we do not include the definitions for/using:
|
||||
%% gridpicture, div[2], isMEMOIR[1], gridSetup[6][], subgridstyle{dotted}, labelfactor{}, gap{}, gridunitname{}, gridunit{}, gridlines{\thinlines}, subgridlines{\thinlines}, the {keyval} package, evenside margin, nor any definitions with 'color'.
|
||||
%%
|
||||
%% These are beyond what is needed for the NAACL/ACL style.
|
||||
%%
|
||||
\newcommand\LenToUnit[1]{#1\@gobble}
|
||||
\newcommand\AtPageUpperLeft[1]{%
|
||||
\begingroup
|
||||
\@tempdima=0pt\relax\@tempdimb=\ESO@yoffsetI\relax
|
||||
\put(\LenToUnit{\@tempdima},\LenToUnit{\@tempdimb}){#1}%
|
||||
\endgroup
|
||||
}
|
||||
\newcommand\AtPageLowerLeft[1]{\AtPageUpperLeft{%
|
||||
\put(0,\LenToUnit{-\paperheight}){#1}}}
|
||||
\newcommand\AtPageCenter[1]{\AtPageUpperLeft{%
|
||||
\put(\LenToUnit{.5\paperwidth},\LenToUnit{-.5\paperheight}){#1}}}
|
||||
\newcommand\AtPageLowerCenter[1]{\AtPageUpperLeft{%
|
||||
\put(\LenToUnit{.5\paperwidth},\LenToUnit{-\paperheight}){#1}}}%
|
||||
\newcommand\AtPageLowishCenter[1]{\AtPageUpperLeft{%
|
||||
\put(\LenToUnit{.5\paperwidth},\LenToUnit{-.96\paperheight}){#1}}}
|
||||
\newcommand\AtTextUpperLeft[1]{%
|
||||
\begingroup
|
||||
\setlength\@tempdima{1in}%
|
||||
\advance\@tempdima\oddsidemargin%
|
||||
\@tempdimb=\ESO@yoffsetI\relax\advance\@tempdimb-1in\relax%
|
||||
\advance\@tempdimb-\topmargin%
|
||||
\advance\@tempdimb-\headheight\advance\@tempdimb-\headsep%
|
||||
\put(\LenToUnit{\@tempdima},\LenToUnit{\@tempdimb}){#1}%
|
||||
\endgroup
|
||||
}
|
||||
\newcommand\AtTextLowerLeft[1]{\AtTextUpperLeft{%
|
||||
\put(0,\LenToUnit{-\textheight}){#1}}}
|
||||
\newcommand\AtTextCenter[1]{\AtTextUpperLeft{%
|
||||
\put(\LenToUnit{.5\textwidth},\LenToUnit{-.5\textheight}){#1}}}
|
||||
\newcommand{\ESO@HookI}{} \newcommand{\ESO@HookII}{}
|
||||
\newcommand{\ESO@HookIII}{}
|
||||
\newcommand{\AddToShipoutPicture}{%
|
||||
\@ifstar{\g@addto@macro\ESO@HookII}{\g@addto@macro\ESO@HookI}}
|
||||
\newcommand{\ClearShipoutPicture}{\global\let\ESO@HookI\@empty}
|
||||
\newcommand{\@ShipoutPicture}{%
|
||||
\bgroup
|
||||
\@tempswafalse%
|
||||
\ifx\ESO@HookI\@empty\else\@tempswatrue\fi%
|
||||
\ifx\ESO@HookII\@empty\else\@tempswatrue\fi%
|
||||
\ifx\ESO@HookIII\@empty\else\@tempswatrue\fi%
|
||||
\if@tempswa%
|
||||
\@tempdima=1in\@tempdimb=-\@tempdima%
|
||||
\advance\@tempdimb\ESO@yoffsetI%
|
||||
\unitlength=1pt%
|
||||
\global\setbox\@cclv\vbox{%
|
||||
\vbox{\let\protect\relax
|
||||
\pictur@(0,0)(\strip@pt\@tempdima,\strip@pt\@tempdimb)%
|
||||
\ESO@HookIII\ESO@HookI\ESO@HookII%
|
||||
\global\let\ESO@HookII\@empty%
|
||||
\endpicture}%
|
||||
\nointerlineskip%
|
||||
\box\@cclv}%
|
||||
\fi
|
||||
\egroup
|
||||
}
|
||||
\EveryShipoutACL{\@ShipoutPicture}
|
||||
\newif\ifESO@dvips\ESO@dvipsfalse
|
||||
\newif\ifESO@grid\ESO@gridfalse
|
||||
\newif\ifESO@texcoord\ESO@texcoordfalse
|
||||
\newcommand*\ESO@griddelta{}\newcommand*\ESO@griddeltaY{}
|
||||
\newcommand*\ESO@gridDelta{}\newcommand*\ESO@gridDeltaY{}
|
||||
\newcommand*\ESO@yoffsetI{}\newcommand*\ESO@yoffsetII{}
|
||||
\ifESO@texcoord
|
||||
\def\ESO@yoffsetI{0pt}\def\ESO@yoffsetII{-\paperheight}
|
||||
\edef\ESO@griddeltaY{-\ESO@griddelta}\edef\ESO@gridDeltaY{-\ESO@gridDelta}
|
||||
\else
|
||||
\def\ESO@yoffsetI{\paperheight}\def\ESO@yoffsetII{0pt}
|
||||
\edef\ESO@griddeltaY{\ESO@griddelta}\edef\ESO@gridDeltaY{\ESO@gridDelta}
|
||||
\fi
|
||||
|
||||
|
||||
%% ----- Submitted version markup: Page numbers, ruler, and confidentiality. Using ideas/code from cvpr.sty 2015. --MM
|
||||
|
||||
\font\aclhv = phvb at 8pt
|
||||
|
||||
%% Define vruler %%
|
||||
|
||||
%\makeatletter
|
||||
\newbox\aclrulerbox
|
||||
\newcount\aclrulercount
|
||||
\newdimen\aclruleroffset
|
||||
\newdimen\cv@lineheight
|
||||
\newdimen\cv@boxheight
|
||||
\newbox\cv@tmpbox
|
||||
\newcount\cv@refno
|
||||
\newcount\cv@tot
|
||||
% NUMBER with left flushed zeros \fillzeros[<WIDTH>]<NUMBER>
|
||||
\newcount\cv@tmpc@ \newcount\cv@tmpc
|
||||
\def\fillzeros[#1]#2{\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi
|
||||
\cv@tmpc=1 %
|
||||
\loop\ifnum\cv@tmpc@<10 \else \divide\cv@tmpc@ by 10 \advance\cv@tmpc by 1 \fi
|
||||
\ifnum\cv@tmpc@=10\relax\cv@tmpc@=11\relax\fi \ifnum\cv@tmpc@>10 \repeat
|
||||
\ifnum#2<0\advance\cv@tmpc1\relax-\fi
|
||||
\loop\ifnum\cv@tmpc<#1\relax0\advance\cv@tmpc1\relax\fi \ifnum\cv@tmpc<#1 \repeat
|
||||
\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi \relax\the\cv@tmpc@}%
|
||||
% \makevruler[<SCALE>][<INITIAL_COUNT>][<STEP>][<DIGITS>][<HEIGHT>]
|
||||
\def\makevruler[#1][#2][#3][#4][#5]{\begingroup\offinterlineskip
|
||||
\textheight=#5\vbadness=10000\vfuzz=120ex\overfullrule=0pt%
|
||||
\global\setbox\aclrulerbox=\vbox to \textheight{%
|
||||
{\parskip=0pt\hfuzz=150em\cv@boxheight=\textheight
|
||||
\color{gray}
|
||||
\cv@lineheight=#1\global\aclrulercount=#2%
|
||||
\cv@tot\cv@boxheight\divide\cv@tot\cv@lineheight\advance\cv@tot2%
|
||||
\cv@refno1\vskip-\cv@lineheight\vskip1ex%
|
||||
\loop\setbox\cv@tmpbox=\hbox to0cm{{\aclhv\hfil\fillzeros[#4]\aclrulercount}}%
|
||||
\ht\cv@tmpbox\cv@lineheight\dp\cv@tmpbox0pt\box\cv@tmpbox\break
|
||||
\advance\cv@refno1\global\advance\aclrulercount#3\relax
|
||||
\ifnum\cv@refno<\cv@tot\repeat}}\endgroup}%
|
||||
%\makeatother
|
||||
|
||||
|
||||
\def\aclpaperid{***}
|
||||
\def\confidential{\textcolor{black}{ACL 2020 Submission~\aclpaperid. Confidential Review Copy. DO NOT DISTRIBUTE.}}
|
||||
|
||||
%% Page numbering, Vruler and Confidentiality %%
|
||||
% \makevruler[<SCALE>][<INITIAL_COUNT>][<STEP>][<DIGITS>][<HEIGHT>]
|
||||
|
||||
% SC/KG/WL - changed line numbering to gainsboro
|
||||
\definecolor{gainsboro}{rgb}{0.8, 0.8, 0.8}
|
||||
%\def\aclruler#1{\makevruler[14.17pt][#1][1][3][\textheight]\usebox{\aclrulerbox}} %% old line
|
||||
\def\aclruler#1{\textcolor{gainsboro}{\makevruler[14.17pt][#1][1][3][\textheight]\usebox{\aclrulerbox}}}
|
||||
|
||||
\def\leftoffset{-2.1cm} %original: -45pt
|
||||
\def\rightoffset{17.5cm} %original: 500pt
|
||||
\ifaclfinal\else\pagenumbering{arabic}
|
||||
\AddToShipoutPicture{%
|
||||
\ifaclfinal\else
|
||||
\AtPageLowishCenter{\textcolor{black}{\thepage}}
|
||||
\aclruleroffset=\textheight
|
||||
\advance\aclruleroffset4pt
|
||||
\AtTextUpperLeft{%
|
||||
\put(\LenToUnit{\leftoffset},\LenToUnit{-\aclruleroffset}){%left ruler
|
||||
\aclruler{\aclrulercount}}
|
||||
\put(\LenToUnit{\rightoffset},\LenToUnit{-\aclruleroffset}){%right ruler
|
||||
\aclruler{\aclrulercount}}
|
||||
}
|
||||
\AtTextUpperLeft{%confidential
|
||||
\put(0,\LenToUnit{1cm}){\parbox{\textwidth}{\centering\aclhv\confidential}}
|
||||
}
|
||||
\fi
|
||||
}
|
||||
|
||||
%%%% ----- End settings for placing additional items into the submitted version --MM ----- %%%%
|
||||
|
||||
%%%% ----- Begin settings for both submitted and camera-ready version ----- %%%%
|
||||
|
||||
%% Title and Authors %%
|
||||
|
||||
\newcommand\outauthor{
|
||||
\begin{tabular}[t]{c}
|
||||
\ifaclfinal
|
||||
\bf\@author
|
||||
\else
|
||||
% Avoiding common accidental de-anonymization issue. --MM
|
||||
\bf Anonymous ACL submission
|
||||
\fi
|
||||
\end{tabular}}
|
||||
|
||||
% Changing the expanded titlebox for submissions to 2.5 in (rather than 6.5cm)
|
||||
% and moving it to the style sheet, rather than within the example tex file. --MM
|
||||
\ifaclfinal
|
||||
\else
|
||||
\addtolength\titlebox{.25in}
|
||||
\fi
|
||||
% Mostly taken from deproc.
|
||||
\def\maketitle{\par
|
||||
\begingroup
|
||||
\def\thefootnote{\fnsymbol{footnote}}
|
||||
\def\@makefnmark{\hbox to 0pt{$^{\@thefnmark}$\hss}}
|
||||
\twocolumn[\@maketitle] \@thanks
|
||||
\endgroup
|
||||
\setcounter{footnote}{0}
|
||||
\let\maketitle\relax \let\@maketitle\relax
|
||||
\gdef\@thanks{}\gdef\@author{}\gdef\@title{}\let\thanks\relax}
|
||||
\def\@maketitle{\vbox to \titlebox{\hsize\textwidth
|
||||
\linewidth\hsize \vskip 0.125in minus 0.125in \centering
|
||||
{\Large\bf \@title \par} \vskip 0.2in plus 1fil minus 0.1in
|
||||
{\def\and{\unskip\enspace{\rm and}\enspace}%
|
||||
\def\And{\end{tabular}\hss \egroup \hskip 1in plus 2fil
|
||||
\hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\bf}%
|
||||
\def\AND{\end{tabular}\hss\egroup \hfil\hfil\egroup
|
||||
\vskip 0.25in plus 1fil minus 0.125in
|
||||
\hbox to \linewidth\bgroup\large \hfil\hfil
|
||||
\hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\bf}
|
||||
\hbox to \linewidth\bgroup\large \hfil\hfil
|
||||
\hbox to 0pt\bgroup\hss
|
||||
\outauthor
|
||||
\hss\egroup
|
||||
\hfil\hfil\egroup}
|
||||
\vskip 0.3in plus 2fil minus 0.1in
|
||||
}}
|
||||
|
||||
% margins and font size for abstract
|
||||
\renewenvironment{abstract}%
|
||||
{\centerline{\large\bf Abstract}%
|
||||
\begin{list}{}%
|
||||
{\setlength{\rightmargin}{0.6cm}%
|
||||
\setlength{\leftmargin}{0.6cm}}%
|
||||
\item[]\ignorespaces%
|
||||
\@setsize\normalsize{12pt}\xpt\@xpt
|
||||
}%
|
||||
{\unskip\end{list}}
|
||||
|
||||
%\renewenvironment{abstract}{\centerline{\large\bf
|
||||
% Abstract}\vspace{0.5ex}\begin{quote}}{\par\end{quote}\vskip 1ex}
|
||||
|
||||
% Resizing figure and table captions - SL
|
||||
\newcommand{\figcapfont}{\rm}
|
||||
\newcommand{\tabcapfont}{\rm}
|
||||
\renewcommand{\fnum@figure}{\figcapfont Figure \thefigure}
|
||||
\renewcommand{\fnum@table}{\tabcapfont Table \thetable}
|
||||
\renewcommand{\figcapfont}{\@setsize\normalsize{12pt}\xpt\@xpt}
|
||||
\renewcommand{\tabcapfont}{\@setsize\normalsize{12pt}\xpt\@xpt}
|
||||
% Support for interacting with the caption, subfigure, and subcaption packages - SL
|
||||
\usepackage{caption}
|
||||
\DeclareCaptionFont{10pt}{\fontsize{10pt}{12pt}\selectfont}
|
||||
\captionsetup{font=10pt}
|
||||
|
||||
\RequirePackage{natbib}
|
||||
% for citation commands in the .tex, authors can use:
|
||||
% \citep, \citet, and \citeyearpar for compatibility with natbib, or
|
||||
% \cite, \newcite, and \shortcite for compatibility with older ACL .sty files
|
||||
\renewcommand\cite{\citep} % to get "(Author Year)" with natbib
|
||||
\newcommand\shortcite{\citeyearpar}% to get "(Year)" with natbib
|
||||
\newcommand\newcite{\citet} % to get "Author (Year)" with natbib
|
||||
|
||||
% DK/IV: Workaround for annoying hyperref pagewrap bug
|
||||
\RequirePackage{etoolbox}
|
||||
\patchcmd\@combinedblfloats{\box\@outputbox}{\unvbox\@outputbox}{}{\errmessage{\noexpand patch failed}}
|
||||
|
||||
% bibliography
|
||||
|
||||
\def\@up#1{\raise.2ex\hbox{#1}}
|
||||
|
||||
% Don't put a label in the bibliography at all. Just use the unlabeled format
|
||||
% instead.
|
||||
\def\thebibliography#1{\vskip\parskip%
|
||||
\vskip\baselineskip%
|
||||
\def\baselinestretch{1}%
|
||||
\ifx\@currsize\normalsize\@normalsize\else\@currsize\fi%
|
||||
\vskip-\parskip%
|
||||
\vskip-\baselineskip%
|
||||
\section*{References\@mkboth
|
||||
{References}{References}}\list
|
||||
{}{\setlength{\labelwidth}{0pt}\setlength{\leftmargin}{\parindent}
|
||||
\setlength{\itemindent}{-\parindent}}
|
||||
\def\newblock{\hskip .11em plus .33em minus -.07em}
|
||||
\sloppy\clubpenalty4000\widowpenalty4000
|
||||
\sfcode`\.=1000\relax}
|
||||
\let\endthebibliography=\endlist
|
||||
|
||||
|
||||
% Allow for a bibliography of sources of attested examples
|
||||
\def\thesourcebibliography#1{\vskip\parskip%
|
||||
\vskip\baselineskip%
|
||||
\def\baselinestretch{1}%
|
||||
\ifx\@currsize\normalsize\@normalsize\else\@currsize\fi%
|
||||
\vskip-\parskip%
|
||||
\vskip-\baselineskip%
|
||||
\section*{Sources of Attested Examples\@mkboth
|
||||
{Sources of Attested Examples}{Sources of Attested Examples}}\list
|
||||
{}{\setlength{\labelwidth}{0pt}\setlength{\leftmargin}{\parindent}
|
||||
\setlength{\itemindent}{-\parindent}}
|
||||
\def\newblock{\hskip .11em plus .33em minus -.07em}
|
||||
\sloppy\clubpenalty4000\widowpenalty4000
|
||||
\sfcode`\.=1000\relax}
|
||||
\let\endthesourcebibliography=\endlist
|
||||
|
||||
% sections with less space
|
||||
\def\section{\@startsection {section}{1}{\z@}{-2.0ex plus
|
||||
-0.5ex minus -.2ex}{1.5ex plus 0.3ex minus .2ex}{\large\bf\raggedright}}
|
||||
\def\subsection{\@startsection{subsection}{2}{\z@}{-1.8ex plus
|
||||
-0.5ex minus -.2ex}{0.8ex plus .2ex}{\normalsize\bf\raggedright}}
|
||||
%% changed by KO to - values to get teh initial parindent right
|
||||
\def\subsubsection{\@startsection{subsubsection}{3}{\z@}{-1.5ex plus
|
||||
-0.5ex minus -.2ex}{0.5ex plus .2ex}{\normalsize\bf\raggedright}}
|
||||
\def\paragraph{\@startsection{paragraph}{4}{\z@}{1.5ex plus
|
||||
0.5ex minus .2ex}{-1em}{\normalsize\bf}}
|
||||
\def\subparagraph{\@startsection{subparagraph}{5}{\parindent}{1.5ex plus
|
||||
0.5ex minus .2ex}{-1em}{\normalsize\bf}}
|
||||
|
||||
% Footnotes
|
||||
\footnotesep 6.65pt %
|
||||
\skip\footins 9pt plus 4pt minus 2pt
|
||||
\def\footnoterule{\kern-3pt \hrule width 5pc \kern 2.6pt }
|
||||
\setcounter{footnote}{0}
|
||||
|
||||
% Lists and paragraphs
|
||||
\parindent 1em
|
||||
\topsep 4pt plus 1pt minus 2pt
|
||||
\partopsep 1pt plus 0.5pt minus 0.5pt
|
||||
\itemsep 2pt plus 1pt minus 0.5pt
|
||||
\parsep 2pt plus 1pt minus 0.5pt
|
||||
|
||||
\leftmargin 2em \leftmargini\leftmargin \leftmarginii 2em
|
||||
\leftmarginiii 1.5em \leftmarginiv 1.0em \leftmarginv .5em \leftmarginvi .5em
|
||||
\labelwidth\leftmargini\advance\labelwidth-\labelsep \labelsep 5pt
|
||||
|
||||
\def\@listi{\leftmargin\leftmargini}
|
||||
\def\@listii{\leftmargin\leftmarginii
|
||||
\labelwidth\leftmarginii\advance\labelwidth-\labelsep
|
||||
\topsep 2pt plus 1pt minus 0.5pt
|
||||
\parsep 1pt plus 0.5pt minus 0.5pt
|
||||
\itemsep \parsep}
|
||||
\def\@listiii{\leftmargin\leftmarginiii
|
||||
\labelwidth\leftmarginiii\advance\labelwidth-\labelsep
|
||||
\topsep 1pt plus 0.5pt minus 0.5pt
|
||||
\parsep \z@ \partopsep 0.5pt plus 0pt minus 0.5pt
|
||||
\itemsep \topsep}
|
||||
\def\@listiv{\leftmargin\leftmarginiv
|
||||
\labelwidth\leftmarginiv\advance\labelwidth-\labelsep}
|
||||
\def\@listv{\leftmargin\leftmarginv
|
||||
\labelwidth\leftmarginv\advance\labelwidth-\labelsep}
|
||||
\def\@listvi{\leftmargin\leftmarginvi
|
||||
\labelwidth\leftmarginvi\advance\labelwidth-\labelsep}
|
||||
|
||||
\abovedisplayskip 7pt plus2pt minus5pt%
|
||||
\belowdisplayskip \abovedisplayskip
|
||||
\abovedisplayshortskip 0pt plus3pt%
|
||||
\belowdisplayshortskip 4pt plus3pt minus3pt%
|
||||
|
||||
% Less leading in most fonts (due to the narrow columns)
|
||||
% The choices were between 1-pt and 1.5-pt leading
|
||||
\def\@normalsize{\@setsize\normalsize{11pt}\xpt\@xpt}
|
||||
\def\small{\@setsize\small{10pt}\ixpt\@ixpt}
|
||||
\def\footnotesize{\@setsize\footnotesize{10pt}\ixpt\@ixpt}
|
||||
\def\scriptsize{\@setsize\scriptsize{8pt}\viipt\@viipt}
|
||||
\def\tiny{\@setsize\tiny{7pt}\vipt\@vipt}
|
||||
\def\large{\@setsize\large{14pt}\xiipt\@xiipt}
|
||||
\def\Large{\@setsize\Large{16pt}\xivpt\@xivpt}
|
||||
\def\LARGE{\@setsize\LARGE{20pt}\xviipt\@xviipt}
|
||||
\def\huge{\@setsize\huge{23pt}\xxpt\@xxpt}
|
||||
\def\Huge{\@setsize\Huge{28pt}\xxvpt\@xxvpt}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
Binary file not shown.
|
@ -0,0 +1,448 @@
|
|||
%
|
||||
% File acl2020.tex
|
||||
%
|
||||
%% Based on the style files for ACL 2020, which were
|
||||
%% Based on the style files for ACL 2018, NAACL 2018/19, which were
|
||||
%% Based on the style files for ACL-2015, with some improvements
|
||||
%% taken from the NAACL-2016 style
|
||||
%% Based on the style files for ACL-2014, which were, in turn,
|
||||
%% based on ACL-2013, ACL-2012, ACL-2011, ACL-2010, ACL-IJCNLP-2009,
|
||||
%% EACL-2009, IJCNLP-2008...
|
||||
%% Based on the style files for EACL 2006 by
|
||||
%%e.agirre@ehu.es or Sergi.Balari@uab.es
|
||||
%% and that of ACL 08 by Joakim Nivre and Noah Smith
|
||||
|
||||
\documentclass[11pt,a4paper]{article}
|
||||
\usepackage[spanish, basque]{babel} % Add basque or english
|
||||
\selectlanguage{basque}
|
||||
\usepackage[hyperref]{acl2020}
|
||||
\usepackage{times}
|
||||
\usepackage{latexsym}
|
||||
\usepackage{amsmath}
|
||||
\usepackage{graphicx}
|
||||
\renewcommand{\UrlFont}{\ttfamily\small}
|
||||
|
||||
% This is not strictly necessary, and may be commented out,
|
||||
% but it will improve the layout of the manuscript,
|
||||
% and will typically save some space.
|
||||
\usepackage{microtype}
|
||||
|
||||
\aclfinalcopy % Uncomment this line for the final submission
|
||||
%\def\aclpaperid{***} % Enter the acl Paper ID here
|
||||
|
||||
%\setlength\titlebox{5cm}
|
||||
% You can expand the titlebox if you need extra space
|
||||
% to show all the authors. Please do not make the titlebox
|
||||
% smaller than 5cm (the original size); we will check this
|
||||
% in the camera-ready version and ask you to change it back.
|
||||
|
||||
\newcommand\BibTeX{B\textsc{ib}\TeX}
|
||||
|
||||
\title{DIAL: Telegram bidezko galdera-erantzun sistema}
|
||||
|
||||
\author{Jon Ander Campos \\
|
||||
EHU / Tutorea \\
|
||||
\texttt{jcampos004@ikasle.ehu.eus} \\\And
|
||||
Iñigo Ortega \\
|
||||
EHU / Ikaslea \\
|
||||
\texttt{iortega045@ikasle.ehu.eus} \\}
|
||||
|
||||
\begin{document}
|
||||
\maketitle
|
||||
\begin{abstract}
|
||||
Artikulu hau Telegram bidezko bot baten bidez elkarrizketak izateko eredu bati
|
||||
buruz jarduten da. Hori egiteko Seq2Seq sare neuronalen eredua erabiltzen da GRU
|
||||
motako sare errekurrenteen bidez eta atentzio sistema bat erabilita.
|
||||
|
||||
Eraikitako ereduarekin, Telegrameko bot bat egin daiteke eta elkarrizketak
|
||||
telegrametik bertatik izan bere bot-en API-a dagokion moduan erabilita.
|
||||
\end{abstract}
|
||||
|
||||
\section{Sarrera}
|
||||
\label{sec:sarrera}
|
||||
Elkarrizketarako makina automatikoak Turing-en garaitik hasi ziren dagokien
|
||||
garrantzia izaten, honek berak proposatutako Turing probaren ondorioz,
|
||||
elkarrizketa bateko ondoko partaidea gizakia den edo ez egiaztatzeko erabili
|
||||
daitekeena. Hain zuzen, 1950tik dago elkarrizketen partaideak gizakiak diren edo
|
||||
ez egiaztatzeko interesa, ez proba huts bat bezala, baizik eta gizaki bat eta
|
||||
makina bat desberdintzeko zailagoa izan dadin.
|
||||
|
||||
Helburu horretarako, sistema asko eraiki dira, horien artean ospetsuenak, IBM
|
||||
Watson\texttrademark, Alexa\texttrademark\ edo Siri\texttrademark. Hasieran,
|
||||
erregeletan oinarritutako ereduak erabiltzen ziren hauen inplementaziorako,
|
||||
ordea, gaur egun, eta lan honen eredurako, ikasketa automatikoa erabiltzen da
|
||||
haren moldakortasunagatik.
|
||||
|
||||
Hori horrela izanik, azken urteotan helburu antzekoak dituzten aplikazioak sortu
|
||||
dira: Meena (Googleko zientzialari batzuek sortutako elkarrizketa bot bat
|
||||
\cite{meena2020}), Facebook Messenger-eko bot txertatuak \cite{facebook2016} edo
|
||||
bezeroaren arretarako zerbitzuak eskaintzeko \cite{customerservice2019} milioka
|
||||
Euro aurreztuz.
|
||||
|
||||
Kasu honetan, baita ere, ikasketa automatiko bidezko elkarrizketen eredu bat
|
||||
erabili eta inplementatu da. Eredu horren, bere hobekuntzen eta emaitzei buruzko
|
||||
jarduna da artikulu hau. Honen egiazkotasuna eta lekukukoa hurrengoa
|
||||
inplementazioa da: \href{https://git.disroot.org/i.ortega/hp-dial}{Git biltegia}.
|
||||
|
||||
|
||||
\section{Erlazionatutako lanak}
|
||||
\label{sec:erlaz-lanak}
|
||||
Lan honetan Seq2Seq sare neuronalen arkitektura erabiltzen da. Hau, lehen aldiz,
|
||||
Googlen hasi zen erabiltzen. Hain zuzen, Sutskever et al. 2014ean ikertzaileek
|
||||
hasi ziren erabiltzen \cite{sutskever2014}. Lehen eredu honek kodetzaile eta dekodetzaile bat lotzea
|
||||
proposatu zuen (Figure \ref{fig:seq2seq}).
|
||||
|
||||
\begin{figure}[h]
|
||||
\centering
|
||||
\includegraphics[width=7cm]{../presentation/Screenshot-2020-05-11.png}
|
||||
\caption{Sutskever et al. (2014): Lehen Seq2Seq eredua.}
|
||||
\label{fig:seq2seq}
|
||||
\end{figure}
|
||||
|
||||
Eredu honen erabilpen nagusienetako bat itzulpen automatikoa da
|
||||
\cite{maitehp2020}. Ordea, badirudi azken urteotan elkarrizketa sistemak
|
||||
inplementatzeko ere erabili dela nahiko emaitza onekin \cite{meena2020}.
|
||||
|
||||
\section{Sistema}
|
||||
\label{sec:sistema}
|
||||
Sistema honek aipatutako Seq2Seq eredua erabiltzen du.
|
||||
Honetan hitzen sekuentzia bat, beste batean bihurtzen da sare errekurrenteetan
|
||||
oinarritutako prozedura bat erabiliz. Hain zuzen, 2 zati nagusiz osatutako
|
||||
metodo bat erabiltzen da: Kodetzaile eta Dekodetzaile bat; biak GRU motako sare
|
||||
errekurrente bikoitzak izanik (BiGRU).
|
||||
Gainera, atentzio sistema bat erabiltzen da dekodetzailea, uneoro, kodetzaileak
|
||||
sortutako errepresentazio ezkutuez baliatu ahal izateko
|
||||
\cite{bahdanau2014neural}.
|
||||
|
||||
Sistemaren funtzionamendua honelakoa litzateke: Sarrera bezala esaldi bat
|
||||
emanda, GRU kodetzaileek jasotzen dute. Batek hasieratik bukaerara aztertzen du
|
||||
esaldia \textit{aurreranzko} egoera ezkutuak sortuz eta besteak, alderantzizko
|
||||
zentzuan berdina egiten du \textit{atzeranzko} egoera ezkutuak sortuz.
|
||||
Sarreraren hitz bakoitzaren \textit{embedding}-a \textit{aurreranzko} eta
|
||||
\textit{atzeranzko} \textit{embedding}-en konkatenazioak definitzen du.
|
||||
Honela, dekodetzeko garaian, atentzio mekanismoari esker, unean interesgarrien
|
||||
diren \textit{embedding}-ak hartzen dira gehien kontuan. Kasu honetan ere, bi
|
||||
GRU erabiltzen dira eta, kodetzailean egin den moduan, batek sarrera hasieratik
|
||||
aztertzen du eta besteak bukaeratik, baina, kasu honetan, sarrera atentzio
|
||||
mekanismoak itzulitako irteera da. Honek lenguaia naturaleko hitzak itzuliko
|
||||
lituzke (Figure \ref{fig:encoder}).\\
|
||||
|
||||
\begin{figure}[h]
|
||||
\centering
|
||||
\includegraphics[width=5.5cm]{Screenshot-2020-05-21-2.png}
|
||||
\caption{Erabilitako ereduaren adierazpen grafikoa. $X_i$ sarrerako hitzak
|
||||
izanik.}
|
||||
\label{fig:encoder}
|
||||
\end{figure}
|
||||
|
||||
Azaldutako sistema hau, Jon Ander Campos tutoreak eskainitako inplementazioan
|
||||
oinarrituta erabili da. Ordea, hurrengo azpi-ataletan aurkeztuko diren zenbait
|
||||
hobekuntza ere egin zaizkio.
|
||||
|
||||
\subsection{Euskarazko tokenizazioa}
|
||||
\label{sec:eusk-token}
|
||||
Sistemak, jatorriz, ingeleseko tokenizatzaile bat erabiltzen zuen ikasketan,
|
||||
ingeleseko elkarrizketak izateko prestatuta baitzegoen. Ordea, kasu honetan,
|
||||
euskarazko elkarrizketak eduki nahi dira. Horretarako, \textit{SpaCy}
|
||||
liburutegiak eskainitako euskarazko oinarrizko tokenizatzaile bat erabali da.
|
||||
|
||||
|
||||
\subsection{Erantzun motzen aurkako neurriak}
|
||||
\label{sec:erantz-motz-aurk}
|
||||
|
||||
Aipatu den bezala, ereduak erantzun oso motzak emateko joera dauka, hau da,
|
||||
``Bai'' edo ``Ez'' erantzunak ematen ditu gehienbat, izan ere, elkarrizketetan
|
||||
gehien agertzen diren hitzak dira. Ondorioz, hitz horiek hain beste alditan ez
|
||||
agertzeko zenbait neurri hartu behar dira.
|
||||
|
||||
Kasu honetan, 2 neurri hartu dira, ``Brevity Penalization'' (BP) izeneko
|
||||
penalizazioa eta honen gainean aplikatutako Rayleigh distribuzio
|
||||
probabilitiskoa. Bi hauek ikasketan aplikatu dira, hain zuzen, galera funtzioan.
|
||||
|
||||
BP itzulpen automatikoan erabiltzen den penalizazio mota bat da (normalean
|
||||
\textit{BLEU} izeneko metodo batenpean \cite{Papineni02bleu:a}). Honen helburua,
|
||||
kasu horretan, ereduak egindako itzulpen baten luzera errealarenarekin
|
||||
konparatzea da, ereduak itzultzen duena motzagoa balitz penalizazio bat
|
||||
aplikatuz.
|
||||
|
||||
Honelakoa litzateke bere ekuazioa:
|
||||
|
||||
\begin{equation}
|
||||
\label{eq:1}
|
||||
BP(c, r)=
|
||||
\begin{cases}
|
||||
1, & \text{if } c > r\\
|
||||
{\rm e}^{1-\frac{r}{c}},& \text{otherwise}
|
||||
\end{cases}
|
||||
\end{equation}
|
||||
|
||||
non $c$ sistemak sortutako itzulpenaren luzera den eta $r$ itzulpen errealaren
|
||||
luzera.
|
||||
|
||||
Ordea, honek arazo bat eragiten du: Ereduak bakarrik erantzun luzeenak itzultzen
|
||||
ikasten du. Hau, beti erantzun erreala baino luzera handiagoa edo berdineko
|
||||
itzulpen bat sortu nahi duelako gertatzen da. Orduan, luzera besterik ez
|
||||
optimizatzea eragiten du, hain zuzen, luzera handitzeko.
|
||||
|
||||
Horrek, printzipioz, ez luke arazorik eragin beharko, erantzun luzeak ere
|
||||
onargarriak izan daitezkeelako. Ordea, ereduak pauso bat gehiago ematen du:
|
||||
Hitzen konbinazio hoberenak erabiltzen ditu erantzun luzeenak sortzeko, zentzua
|
||||
izan edo ez. Honela, beti erantzun zentzugabeak itzultzen ditu, hitz luzeenak
|
||||
besterik ez baititu erabiltzen.
|
||||
|
||||
Arazo hori konpontzeko, Rayleigh distribuzioa erabili da. Hain zuzen, Rayleigh
|
||||
distribuzioaren ekuazioa hau bada:
|
||||
|
||||
\begin{equation}
|
||||
\label{eq:2}
|
||||
Rayleigh(x, \sigma) = \dfrac{x}{\sigma ^{2}} \cdot {\rm e}^{-\frac{x^{2}}{2 \sigma^{2}}}
|
||||
\end{equation}
|
||||
|
||||
Parametroak horrela definitu dira:
|
||||
|
||||
\begin{equation}
|
||||
\label{eq:3}
|
||||
\sigma = 20
|
||||
\end{equation}
|
||||
\begin{equation}
|
||||
\label{eq:4}
|
||||
0 <= y <= 1, \quad
|
||||
x = y \cdot 5.8 + 0.2
|
||||
\end{equation}
|
||||
|
||||
Honelako kurba bat lortuz:
|
||||
|
||||
\begin{figure}[h]
|
||||
\centering
|
||||
\includegraphics[width=7cm]{../presentation/Figure_1.png}
|
||||
\caption[Rayleigh kurba]{Rayleigh distribuzioaren kurba \ref{eq:4} ekuazioan
|
||||
adierazitako parametroak erabiliz.}
|
||||
\label{fig:rayleigh-kurba}
|
||||
\end{figure}
|
||||
|
||||
Distribuzio hau BP-ren irteerari aplikatu zaio, honela, ereduak ez du erantzunen
|
||||
luzerarekiko optimizatzen, orain BP-k $0.75$ aldera dagoen balio bat itzultzen
|
||||
denean ematen baita balio maximoa ($1$), beti erantzun luzeagoak lortzea
|
||||
eragotziz eta motzagoak (probabilitate txikiagoarekin izan arren) onartuz.
|
||||
|
||||
\section{Telegram zerbitzuara esleipena}
|
||||
\label{sec:telegr-zerb-esle}
|
||||
|
||||
Lan honen helburua elkarrizketarako sistema bat sortzea da, baina horrentzako
|
||||
interfaze bat behar da. Hori Telegram-ek eskaintzen duen \textit{Bot}en API a
|
||||
erabiliz egin da. Hain zuzen, Telegram-eko \textit{Bot} bat inplementatu da
|
||||
sistemak emandako irteera eredu bat erabiliz bidalitako mezueei erantzunak
|
||||
emateko.
|
||||
|
||||
\textit{Bot} honen inplementazioa \textit{python-telegram-bot} Telegrameko
|
||||
API-aren inplementazioaren bidez egin da (\href{https://github.com/python-telegram-bot/python-telegram-bot}{github}).
|
||||
|
||||
\section{Datuak}
|
||||
\label{sec:datuak}
|
||||
Sare neuronalak ikasteko datuak behar ditu. Horretarako, filmen azpitituluak
|
||||
erabili nahi izan dira, baina, euskaraz, ez dago hauen datubase formalik,
|
||||
ondorioz, normalizazio prozesu bat egin behar izan da erabilitako azpitituluei
|
||||
erabili ahal izateko.
|
||||
|
||||
\subsection{Azpitituluen iturria}
|
||||
\label{sec:azpitituluen-iturria}
|
||||
Azpitituluak \textit{\href{http://www.opensubtitles.org/}{OpenSubtitles}} webgunetik hartu dira. Bertan hizkuntza
|
||||
eta film askotako azpitituluak daude eskuragai.
|
||||
|
||||
Kasu honetan, euskarazko azpitituluen datubasea erabili da, galdera erantzun
|
||||
sistema euskarazkoa egin nahi delako.
|
||||
|
||||
Datu base honetan azpitituluak lerroetan daude banatuta filmetan agertzen diren
|
||||
ordena berdinean.
|
||||
|
||||
\subsection{Azpitituluen normalizazioa}
|
||||
\label{sec:azpit-norm}
|
||||
Film azpitituluak ezin dira nolanahi erabili ikasketarako, izan ere, ez dakigu
|
||||
bertan zein esaldi izango diren galderak eta zeintzuk erantzunak. Gainera,
|
||||
azpitituluetan karaktere asko soberan daude elkarriezketei dagokionez, adibidez,
|
||||
lerroen hasieran agertzen diren ``-'' motako karaktereak.
|
||||
|
||||
Ondorioz, zer nolako normalizazio egin behar den jakiteko, ikerketa bat egin da
|
||||
azpitituluetan zehar. Honen emaitzak \ref{sec:karakt-garb}, \ref{sec:zenb-lerr-esald} eta
|
||||
\ref{sec:gald-eta-erantz} ataletan aurkezten dira.
|
||||
|
||||
\subsubsection{Karaktereen garbiketa}
|
||||
\label{sec:karakt-garb}
|
||||
Aipatu bezala, karaktere asko elkarrizketetan ez dira esanguratsuak. Ondorioz,
|
||||
karaktere horiek ezabatu beharra daude. Ordea, ezabatu ahal izateko karaktere
|
||||
horiek zeintzuk diren jakin beharra dago.
|
||||
|
||||
Ikerketa egin ondoren, garbi gelditu da lerroen hasieran erabilgarriak ez diren
|
||||
zenbait karaktere daudela. Horiek ondorengoak dira: ``-'' (marra), ``\_''
|
||||
(azpi-marra) eta `` '' (espazioa). Ohiko marra karakterea erabiliena izan da,
|
||||
baina antzeko UTF-8 motako beste zenbait karaktere ere daude (``–'', ``—'' eta ``―'').
|
||||
|
||||
Karaktere horiek zeintzuk diren jakinda, horien ezabaketa egin da.
|
||||
|
||||
\subsubsection{Zenbait lerroko esaldien identifikazioa}
|
||||
\label{sec:zenb-lerr-esald}
|
||||
Azpitituluak ez daude galdera eta erantzun modura ordenatuta, izan ere, filmean
|
||||
esaldiak datozten moduan agertzen datubasean. Hau da, lerro bakoitiak ez dute
|
||||
zertan galderak izan eta bikoitiak erantzunak. Erabateko kaosa dago alde
|
||||
horretan, film batean pertsonaia batek zerbait esaten badu, beste pertsonaia bat
|
||||
ez dago erantzutera behartuta. Ez hori bakarrik, esaldi askok ez dute erantzunik
|
||||
behar edo esaldiak luzeegiak direnean, azpitituluen egileak zenbait lerrotan
|
||||
banatzea erabaki dezake.
|
||||
|
||||
Egoera hori kontuan izanda, horri aurre egiteko zenbait neurri hartu dira:
|
||||
\begin{itemize}
|
||||
\item Zenbait egilek pertsonaia bakarrak esaten dituen esaldi oso luzeak
|
||||
banatzeko, "\ karakterea erabiltzen dute lerroen hasieran eta batzuetan
|
||||
bukaeran ere. Ondorioz, karaktere hori duten lerroak batu behar dira.
|
||||
\item "\ karakterearekin banatuta ez dauden zenbait lerro ere badaude,
|
||||
ordea. Horiek ``,'' (koma) batekin daude zatituta, adieraziz, koma ondorengo
|
||||
lerroa esaldi berberekoa dela. Komekin banatuta dauden lerroak, orduan,
|
||||
baita ere, batu egin behar dira.
|
||||
\end{itemize}
|
||||
|
||||
|
||||
\subsubsection{Galdera eta erantzunen sailkapena}
|
||||
\label{sec:gald-eta-erantz}
|
||||
|
||||
Aurreko garbiketak egin ondoren, azpitituluak sistemak ikasketarako onartzen
|
||||
duen sarrera formatuan jartzea da egin behar dena. Hain zuzen, sistemak lerro
|
||||
bakoitzean bi elementu dituen fitxategi bat jasotzen du, lehenengo elementua
|
||||
galdera bat izanik eta bigarrengoa erantzuna, biak tabulazio batekin bananduta.
|
||||
Honela, lerro bakoitiak galderak bezala erabiltzen dira (lerroen lehen zatia)
|
||||
eta bikoitiak erantzun bezala (lerroen bigarren zatia).
|
||||
|
||||
Sailkapen hau, ordea, ez da guztiz zuzena, izan ere, oso zaila da tribialki
|
||||
lerro bakoitza galdera bat edo erantzuna den jakitea, ezin baitira kontrolatu
|
||||
azpitituluetan dauden salbuespen guztiak. Gainera, errore bat gertatzen denean,
|
||||
hurrengo galdera-erantzun guztiak gaizki sailkatzen dira beste akats bat
|
||||
dagoen arte.
|
||||
|
||||
\section{Emaitzak}
|
||||
\label{sec:emaitzak}
|
||||
Erabilitako sistemak emaitza onak eta txarrak izan ditu. Horien aurkezpena
|
||||
\ref{sec:onak} eta \ref{sec:txarrak} azpi-ataletan egingo da.
|
||||
|
||||
\subsection{Onak}
|
||||
\label{sec:onak}
|
||||
|
||||
\begin{verbatim}
|
||||
Galdera: Kaixo
|
||||
Erantzuna: Kaixo ,
|
||||
\end{verbatim}
|
||||
\vspace{0.07cm}
|
||||
|
||||
\begin{verbatim}
|
||||
Galdera: Zer moduz zaude?
|
||||
Erantzuna: Ondo .
|
||||
\end{verbatim}
|
||||
\vspace{0.07cm}
|
||||
|
||||
\begin{verbatim}
|
||||
Galdera: Egun on!
|
||||
Erantzuna: Egun on , jauna .
|
||||
\end{verbatim}
|
||||
\vspace{0.07cm}
|
||||
|
||||
\begin{verbatim}
|
||||
Galdera: Gabon
|
||||
Erantzuna: Gabon .
|
||||
\end{verbatim}
|
||||
\vspace{0.07cm}
|
||||
\begin{verbatim}
|
||||
Galdera: Zorionak!
|
||||
Erantzuna: Eskerrik asko .
|
||||
\end{verbatim}
|
||||
\vspace{0.07cm}
|
||||
|
||||
\begin{verbatim}
|
||||
Galdera: Zorte on
|
||||
Erantzuna: Eskerrik asko .
|
||||
\end{verbatim}
|
||||
\vspace{0.07cm}
|
||||
|
||||
Baita ere, onak bezala onartu daitezkeen emaitzak daude, baina ez dira oso
|
||||
esanguratsuak elkarrizketei dagokionez.
|
||||
|
||||
\begin{verbatim}
|
||||
Galdera: Nor zara?
|
||||
Erantzuna: Ez dakit .
|
||||
\end{verbatim}
|
||||
\vspace{0.07cm}
|
||||
|
||||
\begin{verbatim}
|
||||
Galdera: Nor da zure aita?
|
||||
Erantzuna: Ez dakit .
|
||||
\end{verbatim}
|
||||
\vspace{0.07cm}
|
||||
|
||||
\subsection{Txarrak}
|
||||
\label{sec:txarrak}
|
||||
|
||||
\begin{verbatim}
|
||||
Galdera: Arratsalde on!
|
||||
Erantzuna: Egun on , jauna .
|
||||
\end{verbatim}
|
||||
\vspace{0.07cm}
|
||||
\begin{verbatim}
|
||||
Galdera: Non egin duzu mina?
|
||||
Erantzuna: Ez .
|
||||
\end{verbatim}
|
||||
\vspace{0.07cm}
|
||||
\begin{verbatim}
|
||||
Galdera: Zer erosiko duzu?
|
||||
Erantzuna: Ez .
|
||||
\end{verbatim}
|
||||
\vspace{0.07cm}
|
||||
\begin{verbatim}
|
||||
Galdera: Zertan zabiltza?
|
||||
Erantzuna: Ez .
|
||||
\end{verbatim}
|
||||
\vspace{0.07cm}
|
||||
\begin{verbatim}
|
||||
Galdera: Arratsalde on!
|
||||
Erantzuna: <unk> .
|
||||
\end{verbatim}
|
||||
\vspace{0.07cm}
|
||||
\begin{verbatim}
|
||||
Galdera: Zer egin duzu?
|
||||
Erantzuna: Ez egin duzu? .
|
||||
\end{verbatim}
|
||||
|
||||
|
||||
\section{Analisia}
|
||||
\label{sec:analisia}
|
||||
Ikusi den bezala, nahiko emaitza onak lortzen dira ohiko esaldiei erantzuna
|
||||
emateko. Adibidez, agurrei normalean ondo erantzuten die, hauek izaten dituzten
|
||||
emaitzak, orokorrean, antzekoak izaten dira eta. Berdina gertatzen da zori ona
|
||||
opatzen denean, orokorrean, eskerrak ematen baitira erantzun bezala.
|
||||
|
||||
Ordea, erantzun oso desberdinak izaten dituzten galderei edo nahikoa ohikoak ez
|
||||
diren galderei ez zaie erantzun oso egokiak ematen. Izan ere, galdera bat asko
|
||||
agertzen den arren (adibidez, ``Nor zara?''), erantzunak kasu bakoitzean oso
|
||||
desberdinak direnez, ez da gai emaitza garbi bat emateko eta nahikoa agertzen ez
|
||||
diren galderek ere ezin dute erantzun garbirik jaso ez baitago modurik erantzun
|
||||
egokiak ikasteko.
|
||||
|
||||
\section{Ondorioak}
|
||||
\label{sec:ondorioak}
|
||||
Garbi dago emaitza hauek ez direla onak eta sistema hau, dagoen moduan, ezin
|
||||
dela produkzioan erabili. Itzultzen diren erantzunak sinpleegiak dira askotan
|
||||
eta multzo txiki bateko galdera bat egin ezean emaitza arraroak hasten da
|
||||
ematen. Hori gertatzearen arrazoia, zati batean sistemarekin dago erlazionatuta
|
||||
eta beste batean erabilitako datubasearekin, izan ere, erantzun motzak
|
||||
saihesteko erabilitako metodoa ez da erabili zitekeen hoberena. Gainera,
|
||||
datubasearen izaera kaotikoak arazo gehiegi ematen ditu ikasketa garaian.
|
||||
|
||||
Seguruenik, galera funtzioa aldatu beharko litzateke eta \textit{Perplexity}-an
|
||||
oinarritutako beste bat erabili, dagoeneko badaudelako hori erabiltzen duten
|
||||
ereduak.
|
||||
Horrez gain, gaur egun, mota honetako galdera erantzun sistema bat
|
||||
inplementatzeko ideia hoberena \textit{Transformer} motako eredu bat erabiltzea
|
||||
litzateke, emaitza oso onak ematen ari baitira horrelakoak erabiltzen dituzten
|
||||
sistemek \cite{meena2020}.
|
||||
|
||||
\newpage
|
||||
\bibliography{acl2020}
|
||||
\bibliographystyle{ieeetr}
|
||||
|
||||
\end{document}
|
Loading…
Reference in New Issue