pub / zeitung

News-Aggregator in newspaper style as HTML/PDF/EPUB
git clone src.jayvii.de/pub/zeitung.git
Home | Log | Files | Exports | Refs | README | RSS

commit ab1f205fc93699c740d808d6840ae255f81bd8e9
parent b9d4e55d81649f38ec52d1283c72abe65d1d6289
Author: JayVii <jayvii[AT]posteo[DOT]de>
Date:   Sat, 21 Mar 2026 15:04:36 +0100

feat: add user output for a better overview

Diffstat:
Mgenerate.sh | 27++++++++++++++++++++++++---
1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/generate.sh b/generate.sh @@ -1,10 +1,13 @@ #!/usr/bin/env bash # run perlanet on the config file to fetch all feeds and populate the markdown - +echo "[1/8] Gather RSS sources..." + mkdir -p ./output/ perlanet "./perlanetrc.yaml" # remove all articles before yesterday ----------------------------------------- +echo "[2/8] Remove old articles..." ## remove start/stop markers from all recent articles sed -E \ @@ -17,6 +20,7 @@ perl -pe 'BEGIN{undef $/;} s/<\!--START.*?<\!--STOP.*?-->//smg' \ -i ./output/zeitung.md # ensure the file only includes characters the compile can understand ---------- +echo "[3/8] Simplify character encoding..." sed -E \ -e 's/รก/\[`a\]/g' \ @@ -82,6 +86,7 @@ sed -E \ -i ./output/zeitung.md # clean up HTML ---------------------------------------------------------------- +echo "[4/8] Clean up full text articles..." ## remove linebreaks at the start of headline tags perl -pe 's/(<h[1-6][^>]*>)(.*)\n+/$1$2/g' -i ./output/zeitung.md @@ -120,11 +125,9 @@ perl -pe 'BEGIN{undef $/;} s/<iframe[^>]*?>.*?<\/iframe>//smg' \ ## remove any remaining HTML tags (but leave their content) perl -pe 'BEGIN{undef $/;} s/<[^>]+?>//smg' -i ./output/zeitung.md -## remove lines starting with empty space +## remove empty space at the start of each line sed -E -e 's/^\s+//g' -i ./output/zeitung.md -# clean up quirks -------------------------------------------------------------- - ## separated first letter at start of article sed -E -e 's/^([A-Z])\s([a-z])/\1\2/g' -i ./output/zeitung.md @@ -132,14 +135,20 @@ sed -E -e 's/^([A-Z])\s([a-z])/\1\2/g' -i ./output/zeitung.md sed -E -e 's/\*/\\*/g' -e 's/\+/\\+/g' -i ./output/zeitung.md # generate internal linking by creating SHA1 hashes ---------------------------- +echo "[5/8] Generate article IDs..." + perl -MDigest::SHA=sha1_hex \ -pe 's/HASH:(.+?):HASH/sha1_hex$1/ge' \ -i ./output/zeitung.md # generate domains from URLs --------------------------------------------------- +echo "[6/8] Generate custom fields..." + sed -E -e 's/DOMAIN:http(s)*:\/\/([^\/]+).*?:DOMAIN/\2/g' -i ./output/zeitung.md # remove double entries -------------------------------------------------------- +echo "[7/8] Remove double entries..." + grep "{#" ./output/zeitung.md | \ sed -E -e 's/(\-|\;|\\|\/|\ |\!|\"|\#|\$|\&|\(|\)|\||\*|\,|\<|\>|\[|\]|\^|\`|\{|\.)/\\\1/g' | \ while read -r line; do @@ -147,11 +156,23 @@ while read -r line; do done # generate output (html, pdf, epub) -------------------------------------------- +echo "[8/8] Generate Output..." + +echo " -> HTML" pandoc ./output/zeitung.md -t html -f markdown -o ./output/zeitung.html \ --css=./templates/style.css --include-in-header=./templates/scale_fonts.html \ --self-contained --toc --toc-depth=1 + +echo " -> PDF (DIN A4)" pandoc ./output/zeitung.md -t pdf -f markdown -o ./output/zeitung.pdf \ --pdf-engine=xelatex --template eisvogel + +echo " -> PDF (DIN A5)" pandoc ./output/zeitung.md -t pdf -f markdown -o ./output/zeitung_mobile.pdf \ --pdf-engine=xelatex --template eisvogel -V papersize=a5 + +echo " -> EPUB" pandoc ./output/zeitung.md -t epub -f markdown -o ./output/zeitung.epub + +# done +echo "Done. Have fun reading."