From ebf10bf260350ec2602fdf0c103fc843f4c6d998 Mon Sep 17 00:00:00 2001 From: tremor021 Date: Fri, 23 Jan 2026 15:58:18 +0100 Subject: [PATCH] LanguageTool: Add ngrams optional download --- install/languagetool-install.sh | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/install/languagetool-install.sh b/install/languagetool-install.sh index 4ff78e02a..e506fff11 100644 --- a/install/languagetool-install.sh +++ b/install/languagetool-install.sh @@ -26,10 +26,39 @@ unzip -q /tmp/LanguageTool-stable.zip -d /opt mv /opt/LanguageTool-*/ /opt/LanguageTool/ download_file "https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin" /opt/lid.176.bin +read -r -p "Enter language code (en, de, es, fr, nl) to download ngrams or press ENTER to skip: " lang_code +ngram_dir="" +if [[ -n "$lang_code" ]]; then + if [[ "$lang_code" =~ ^(en|de|es|fr|nl)$ ]]; then + msg_info "Searching for $lang_code ngrams..." + filename=$(curl -fsSL https://languagetool.org/download/ngram-data/ | grep -oP "ngrams-${lang_code}-[0-9]+\.zip" | sort -uV | tail -n1) + + if [[ -n "$filename" ]]; then + msg_info "Downloading $filename" + download_file "https://languagetool.org/download/ngram-data/${filename}" "/tmp/${filename}" + + mkdir -p /opt/ngrams + msg_info "Extracting $lang_code ngrams to /opt/ngrams" + unzip -q "/tmp/${filename}" -d /opt/ngrams + rm "/tmp/${filename}" + + ngram_dir="/opt/ngrams" + msg_ok "Installed $lang_code ngrams" + else + msg_info "No ngram file found for ${lang_code}" + fi + else + msg_error "Invalid language code: $lang_code" + fi +fi + cat </opt/LanguageTool/server.properties fasttextModel=/opt/lid.176.bin fasttextBinary=/usr/bin/fasttext EOF +if [[ -n "$ngram_dir" ]]; then + echo "languageModel=/opt/ngrams" >> /opt/LanguageTool/server.properties +fi echo "${RELEASE}" >~/.languagetool msg_ok "Setup LanguageTool"