Setup
(Here, ${LANG_F} represents the source language and ${LANG_E} represents the target language.
"ja" and "en" are samples.)
LANG_F=ja
LANG_E=en
TRAIN_F=../corpus.tok/train.${LANG_F}
TRAIN_E=../corpus.tok/train.${LANG_E}
DEV_F=../corpus.tok/dev.${LANG_F}
DEV_E=../corpus.tok/dev.${LANG_E}
TEST=../corpus.tok/test.${LANG_F}
NMT_PATH=${path}/OpenNMT-0.9.7
MOSES_SCRIPT=${path}/mosesdecoder-RELEASE-4.0/scripts
export LUA_PATH=${NMT_PATH}/?.lua
DATA_DIR=datadir
MODEL_DIR=model
SIZE=100000
SL=150
TL=150
GPUID=1
cat ${outfile} | \
perl -Mencoding=utf8 -pe 's/([^A-Za-zA-Za-z]) +/${1}/g; s/ +([^A-Za-zA-Za-z])/${1}/g; ' \
> ${outfile}.detok