run.sh (4244B)
1 #!/usr/bin/env bash
2 # SPDX-License-Identifier: AGPL-3.0-or-later
3 # SPDX-FileCopyrightText: 2024-2025 JayVii <jayvii[AT]posteo[DOT]de>
4
5 # fetch input config file, fall back to "centre"
6 if [ -z $1 ]; then
7 for conf in $(find ./perlanetrc/ -type f | sort -r); do ${0} "$conf"; done
8 exit 0;
9 else
10 config="$1"
11 fi
12
13 # Gather various information from config file
14 printf "Gather information from ${config}...\n"
15 source "./scripts/yaml.sh" > /dev/null
16 eval "$(parse_yaml ${config})"
17
18 # Generate site
19 printf "Fetching feeds and generating html...\n"
20 perlanet "$config"
21
22 # Clean up HTML from tags within the <!--start--> / <!--end--> comments
23 # 1. introduce line-breaks for each existing paragraph
24 # 2. removes anchor text at the end of the post (typically "read more...")
25 # 3. removes text within <figcaption></figcaption>
26 # 4. removes all HTML tags
27 # 5. creates paragraphs line-wise
28 # 6. removes empty paragraphs
29 printf "Clean up HTML...\n"
30 sed -E -e 's/(<\/p>)/\1\n/g' -i "$page_file"
31 start_lines=($(grep -n "<\!--start-->" "$page_file" | sed -e 's/:.*//g'))
32 stop_lines=($(grep -n "<\!--end-->" "$page_file" | sed -e 's/:.*//g'))
33 for i in $(seq 0 1 $((${#start_lines[@]} - 1))); do
34 for line in $(seq ${start_lines[$i]} 1 ${stop_lines[$i]}); do
35 sed -E \
36 -e "${line}s/<a\ [^<]+<\/a><\!--end-->/<\!--end-->/g" \
37 -e "${line}s/<figcaption.*<\/figcaption>//g" \
38 -e "${line}s/<[^<]+>//g" \
39 -e "${line}s/^/<p>/" \
40 -e "${line}s/$/<\/p>/" \
41 -e "${line}s/<p>\s*\t*(<\!--(start|end)-->)*<\/p>/\1/g" \
42 -i "$page_file"
43 done
44 done
45 # re-apply <!--start--> / <!--end--> comments
46 # sed -E \
47 # -e "${start_lines[0]}s/^/<\!--start\-\->/g" \
48 # -e "${stop_lines[0]}s/$/<\!--end-->/g" \
49 # -i "$page_file"
50
51 # insert link to rss/xml file
52 printf "Inserting RSS feed file...\n"
53 sed -E -e "s/<\!--XML-->/$feed_file/g" -i "$page_file"
54
55 # insert link to opml/xml file
56 printf "Inserting OPML feed file...\n"
57 sed -E -e "s/<\!--OPML-->/$opml_file/g" -i "$page_file"
58
59 # insert link to manifest JSON file
60 printf "Inserting manifest file...\n"
61 manifest_file=$(echo "$manifest_file" | sed -e 's/\//\\\//g')
62 sed -E -e "s/<\!--MANIFEST-->/$manifest_file/" -i "$page_file"
63
64 # insert feed domains
65 sed -E \
66 -e 's/<\!--DOMAIN\ src=\"https:\/\/([^\/]+)[^\"]*(\"[^>]*>)-->/\1/g' \
67 -i "$page_file"
68
69 # Insert feeds list
70 # printf "Inserting feeds list...\n"
71 # feeds="<ul>"
72 # for i in $(seq 0 1 $((${#feeds__title[@]} - 1))); do
73 # feeds="${feeds}<li><a rel=\"nofollow\" target=\"_blank\" href=\"${feeds__web[$i]}\">${feeds__title[$i]}</a></li>"
74 # done
75 # feeds=`echo "${feeds}</ul>" | sed -e 's/\//\\\\\//g' -ze 's/\n//g'`
76 # sed -E -e "s/<\!--FEEDS-->/$feeds/" -i "$page_file"
77
78 # Insert Update time
79 printf "Inserting update time...\n"
80 now=`date +%c`
81 sed -E -e "s/<\!--UPDATED-->/${now}/" -i "$page_file"
82
83 # mark posts older than threshold as "inactive"
84 printf "Mark older posts as inactive...\n"
85 dates=$(grep -e "data-inactive=" $page_file | sed -E -e 's/^.*data-inactive="([^"]+)".*$/\1/g' | uniq)
86 now=$(date +%s)
87 for date in $dates; do
88 secs=$(date +%s --date "$date")
89 if [ $(($now - $secs)) -gt $entries_age ]; then
90 sed -E -e "s/(data-inactive=\")$date(\".*)/\1true\2/" -i $page_file
91 fi
92 done
93
94 # mark doubled posts as non-visible
95 printf "Remove duplicated posts...\n"
96 post_ids=$(
97 grep "data-duplicate=" $page_file | \
98 sed -E -e 's/^.*(data-duplicate="[^"]+").*/\1/g' -e 's/(\/|\?)/\\\1/g' | \
99 uniq
100 )
101 for pid in $post_ids; do
102 # check whether post-id appears multiple times
103 if [ $(grep "$pid" -c $page_file) -gt 1 ]; then
104 # if it does, mark all, except for the last one as duplicate
105 lines=$(grep -n "$pid" $page_file | head -n -1 | sed -e 's/:.*//g')
106 for line in $lines; do
107 sed -E -e "${line}s/${pid}/data-duplicate=\"true\"/" -i $page_file
108 done
109 fi
110 done
111
112 # insert important feed at the top
113 if [ ! -z $sub_feed_file ]; then
114 printf "Insert sub-feed if there are active entries...\n"
115 if [ $(grep "<section" $sub_feed_file | grep -cv "data-inactive=\"true\"") -gt 0 ]; then
116 sub_feed_injection="<blockquote><strong>$sub_feed_title<\/strong><iframe class=\"subfeed\" src=\"$sub_feed_file\"><\/iframe><\/blockquote>"
117 sed -e "s/<\!--SUBFEED-->/$sub_feed_injection/g" -i $page_file
118 fi
119 fi