pub / newsplanet

Planet-Style Newsfeed generated with perlanet
git clone https://src.jayvii.de/pub/newsplanet.git
Home | Log | Files | Exports | Refs | README | RSS

run.sh (4244B)


      1 #!/usr/bin/env bash
      2 # SPDX-License-Identifier: AGPL-3.0-or-later
      3 # SPDX-FileCopyrightText: 2024-2025 JayVii <jayvii[AT]posteo[DOT]de>
      4 
      5 # fetch input config file, fall back to "centre"
      6 if [ -z $1 ]; then
      7   for conf in $(find ./perlanetrc/ -type f | sort -r); do ${0} "$conf"; done
      8   exit 0;
      9 else
     10   config="$1"
     11 fi
     12 
     13 # Gather various information from config file
     14 printf "Gather information from ${config}...\n"
     15 source "./scripts/yaml.sh" > /dev/null
     16 eval "$(parse_yaml ${config})"
     17 
     18 # Generate site
     19 printf "Fetching feeds and generating html...\n"
     20 perlanet "$config"
     21 
     22 # Clean up HTML from tags within the <!--start--> / <!--end--> comments
     23 # 1. introduce line-breaks for each existing paragraph
     24 # 2. removes anchor text at the end of the post (typically "read more...")
     25 # 3. removes text within <figcaption></figcaption>
     26 # 4. removes all HTML tags
     27 # 5. creates paragraphs line-wise
     28 # 6. removes empty paragraphs
     29 printf "Clean up HTML...\n"
     30 sed -E -e 's/(<\/p>)/\1\n/g' -i "$page_file"
     31 start_lines=($(grep -n "<\!--start-->" "$page_file" | sed -e 's/:.*//g'))
     32 stop_lines=($(grep -n "<\!--end-->" "$page_file" | sed -e 's/:.*//g'))
     33 for i in $(seq 0 1 $((${#start_lines[@]} - 1))); do
     34   for line in $(seq ${start_lines[$i]} 1 ${stop_lines[$i]}); do
     35     sed -E \
     36       -e "${line}s/<a\ [^<]+<\/a><\!--end-->/<\!--end-->/g" \
     37       -e "${line}s/<figcaption.*<\/figcaption>//g" \
     38       -e "${line}s/<[^<]+>//g" \
     39       -e "${line}s/^/<p>/" \
     40       -e "${line}s/$/<\/p>/" \
     41       -e "${line}s/<p>\s*\t*(<\!--(start|end)-->)*<\/p>/\1/g" \
     42       -i "$page_file"
     43   done
     44 done
     45 # re-apply <!--start--> / <!--end--> comments
     46 # sed -E \
     47 #   -e "${start_lines[0]}s/^/<\!--start\-\->/g" \
     48 #   -e "${stop_lines[0]}s/$/<\!--end-->/g" \
     49 #   -i "$page_file"
     50   
     51 # insert link to rss/xml file
     52 printf "Inserting RSS feed file...\n"
     53 sed -E -e "s/<\!--XML-->/$feed_file/g" -i "$page_file"
     54 
     55 # insert link to opml/xml file
     56 printf "Inserting OPML feed file...\n"
     57 sed -E -e "s/<\!--OPML-->/$opml_file/g" -i "$page_file"
     58 
     59 # insert link to manifest JSON file
     60 printf "Inserting manifest file...\n"
     61 manifest_file=$(echo "$manifest_file" | sed -e 's/\//\\\//g')
     62 sed -E -e "s/<\!--MANIFEST-->/$manifest_file/" -i "$page_file"
     63 
     64 # insert feed domains
     65 sed -E \
     66   -e 's/<\!--DOMAIN\ src=\"https:\/\/([^\/]+)[^\"]*(\"[^>]*>)-->/\1/g' \
     67   -i "$page_file"
     68 
     69 # Insert feeds list
     70 # printf "Inserting feeds list...\n"
     71 # feeds="<ul>"
     72 # for i in $(seq 0 1 $((${#feeds__title[@]} - 1))); do
     73 #   feeds="${feeds}<li><a rel=\"nofollow\" target=\"_blank\" href=\"${feeds__web[$i]}\">${feeds__title[$i]}</a></li>"
     74 # done
     75 # feeds=`echo "${feeds}</ul>" | sed -e 's/\//\\\\\//g' -ze 's/\n//g'`
     76 # sed -E -e "s/<\!--FEEDS-->/$feeds/" -i "$page_file"
     77 
     78 # Insert Update time
     79 printf "Inserting update time...\n"
     80 now=`date +%c`
     81 sed -E -e "s/<\!--UPDATED-->/${now}/" -i "$page_file"
     82 
     83 # mark posts older than threshold as "inactive"
     84 printf "Mark older posts as inactive...\n"
     85 dates=$(grep -e "data-inactive=" $page_file | sed -E -e 's/^.*data-inactive="([^"]+)".*$/\1/g' | uniq)
     86 now=$(date +%s)
     87 for date in $dates; do
     88   secs=$(date +%s --date "$date")
     89   if [ $(($now - $secs)) -gt $entries_age ]; then
     90     sed -E -e "s/(data-inactive=\")$date(\".*)/\1true\2/" -i $page_file
     91   fi
     92 done
     93 
     94 # mark doubled posts as non-visible
     95 printf "Remove duplicated posts...\n"
     96 post_ids=$(
     97   grep "data-duplicate=" $page_file | \
     98   sed -E -e 's/^.*(data-duplicate="[^"]+").*/\1/g' -e 's/(\/|\?)/\\\1/g' | \
     99   uniq
    100 )
    101 for pid in $post_ids; do
    102   # check whether post-id appears multiple times
    103   if [ $(grep "$pid" -c $page_file) -gt 1 ]; then
    104     # if it does, mark all, except for the last one as duplicate
    105     lines=$(grep -n "$pid" $page_file | head -n -1 | sed -e 's/:.*//g')
    106     for line in $lines; do
    107       sed -E -e "${line}s/${pid}/data-duplicate=\"true\"/" -i $page_file
    108     done
    109   fi
    110 done
    111 
    112 # insert important feed at the top
    113 if [ ! -z $sub_feed_file ]; then
    114   printf "Insert sub-feed if there are active entries...\n"
    115   if [ $(grep "<section" $sub_feed_file | grep -cv "data-inactive=\"true\"") -gt 0 ]; then
    116     sub_feed_injection="<blockquote><strong>$sub_feed_title<\/strong><iframe class=\"subfeed\" src=\"$sub_feed_file\"><\/iframe><\/blockquote>"
    117     sed -e "s/<\!--SUBFEED-->/$sub_feed_injection/g" -i $page_file
    118   fi
    119 fi