commit ab1f205fc93699c740d808d6840ae255f81bd8e9
parent b9d4e55d81649f38ec52d1283c72abe65d1d6289
Author: JayVii <jayvii[AT]posteo[DOT]de>
Date: Sat, 21 Mar 2026 15:04:36 +0100
feat: add user output for a better overview
Diffstat:
1 file changed, 24 insertions(+), 3 deletions(-)
diff --git a/generate.sh b/generate.sh
@@ -1,10 +1,13 @@
#!/usr/bin/env bash
# run perlanet on the config file to fetch all feeds and populate the markdown -
+echo "[1/8] Gather RSS sources..."
+
mkdir -p ./output/
perlanet "./perlanetrc.yaml"
# remove all articles before yesterday -----------------------------------------
+echo "[2/8] Remove old articles..."
## remove start/stop markers from all recent articles
sed -E \
@@ -17,6 +20,7 @@ perl -pe 'BEGIN{undef $/;} s/<\!--START.*?<\!--STOP.*?-->//smg' \
-i ./output/zeitung.md
# ensure the file only includes characters the compile can understand ----------
+echo "[3/8] Simplify character encoding..."
sed -E \
-e 's/รก/\[`a\]/g' \
@@ -82,6 +86,7 @@ sed -E \
-i ./output/zeitung.md
# clean up HTML ----------------------------------------------------------------
+echo "[4/8] Clean up full text articles..."
## remove linebreaks at the start of headline tags
perl -pe 's/(<h[1-6][^>]*>)(.*)\n+/$1$2/g' -i ./output/zeitung.md
@@ -120,11 +125,9 @@ perl -pe 'BEGIN{undef $/;} s/<iframe[^>]*?>.*?<\/iframe>//smg' \
## remove any remaining HTML tags (but leave their content)
perl -pe 'BEGIN{undef $/;} s/<[^>]+?>//smg' -i ./output/zeitung.md
-## remove lines starting with empty space
+## remove empty space at the start of each line
sed -E -e 's/^\s+//g' -i ./output/zeitung.md
-# clean up quirks --------------------------------------------------------------
-
## separated first letter at start of article
sed -E -e 's/^([A-Z])\s([a-z])/\1\2/g' -i ./output/zeitung.md
@@ -132,14 +135,20 @@ sed -E -e 's/^([A-Z])\s([a-z])/\1\2/g' -i ./output/zeitung.md
sed -E -e 's/\*/\\*/g' -e 's/\+/\\+/g' -i ./output/zeitung.md
# generate internal linking by creating SHA1 hashes ----------------------------
+echo "[5/8] Generate article IDs..."
+
perl -MDigest::SHA=sha1_hex \
-pe 's/HASH:(.+?):HASH/sha1_hex$1/ge' \
-i ./output/zeitung.md
# generate domains from URLs ---------------------------------------------------
+echo "[6/8] Generate custom fields..."
+
sed -E -e 's/DOMAIN:http(s)*:\/\/([^\/]+).*?:DOMAIN/\2/g' -i ./output/zeitung.md
# remove double entries --------------------------------------------------------
+echo "[7/8] Remove double entries..."
+
grep "{#" ./output/zeitung.md | \
sed -E -e 's/(\-|\;|\\|\/|\ |\!|\"|\#|\$|\&|\(|\)|\||\*|\,|\<|\>|\[|\]|\^|\`|\{|\.)/\\\1/g' | \
while read -r line; do
@@ -147,11 +156,23 @@ while read -r line; do
done
# generate output (html, pdf, epub) --------------------------------------------
+echo "[8/8] Generate Output..."
+
+echo " -> HTML"
pandoc ./output/zeitung.md -t html -f markdown -o ./output/zeitung.html \
--css=./templates/style.css --include-in-header=./templates/scale_fonts.html \
--self-contained --toc --toc-depth=1
+
+echo " -> PDF (DIN A4)"
pandoc ./output/zeitung.md -t pdf -f markdown -o ./output/zeitung.pdf \
--pdf-engine=xelatex --template eisvogel
+
+echo " -> PDF (DIN A5)"
pandoc ./output/zeitung.md -t pdf -f markdown -o ./output/zeitung_mobile.pdf \
--pdf-engine=xelatex --template eisvogel -V papersize=a5
+
+echo " -> EPUB"
pandoc ./output/zeitung.md -t epub -f markdown -o ./output/zeitung.epub
+
+# done
+echo "Done. Have fun reading."