pub / newsplanet

Planet-Style Newsfeed generated with perlanet
git clone https://src.jayvii.de/pub/newsplanet.git
Home | Log | Files | Exports | Refs | README | RSS

commit ccc1b19aaad3e3eb1c7967e6357b13beb0e426a9
parent dc64999eb312914a47bbbd95ebcc87ed170609e2
Author: JayVii <jayvii[AT]posteo[DOT]de>
Date:   Wed, 19 Mar 2025 20:15:57 +0100

feat: rewrite dates for certain feeds

Diffstat:
M01_pulse.yaml | 7+++++--
Afetch_and_rewrite.sh | 113+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Arewrite.txt | 4++++
3 files changed, 122 insertions(+), 2 deletions(-)

diff --git a/01_pulse.yaml b/01_pulse.yaml @@ -53,11 +53,14 @@ feeds: url: https://www.nd-aktuell.de/rss/wirtschaft-umwelt.xml web: https://www.nd-aktuell.de/rubrik/wirtschaft/ - title: junge Welt (Kapital und Arbeit) - url: https://www.jungewelt.de/feeds/kapital_arbeit.xml + url: file:./rewrite/www.jungewelt.de_feeds_kapital_arbeit.xml web: https://www.jungewelt.de/aktuell/rubrik/kapital_und_arbeit.php - title: junge Welt (Inland) - url: https://www.jungewelt.de/feeds/inland.xml + url: file:./rewrite/www.jungewelt.de_feeds_inland.xml web: https://www.jungewelt.de/aktuell/rubrik/inland.php + - title: junge Welt (Ausland) + url: file:./rewrite/www.jungewelt.de_feeds_ausland.xml + web: https://www.jungewelt.de/aktuell/rubrik/ausland.php - title: Taz (Politik) url: https://taz.de/Politik/!p4615;rss/ web: https://taz.de/Politik/!p4615/ diff --git a/fetch_and_rewrite.sh b/fetch_and_rewrite.sh @@ -0,0 +1,113 @@ +#!/usr/bin/env bash + +# Config ----------------------------------------------------------------------- + +# cache directory +cache_dir="/tmp/cache/newsplanet/rewrite" +file_name=$(echo "$1" | sed -e 's/https*:\/\///' -e 's/\//_/g') +cache_file="${cache_dir}/${file_name}" + +# Function --------------------------------------------------------------------- + +read_xml () { + local IFS=\> + read -d \< ENTITY CONTENT +} + +write_xml () { + cat ${1} | sed -E \ + -e 's/^(\/.*$)/<\1>/g' \ + -e 's/^([^<].*)\s*=>\s*(.*$)/<\1>\2/g' \ + -e 's/^([^<]+$)/<\1>/g' | \ + tee ${2} > /dev/null +} + +reverse() { + tac <(echo "$@" | tr ' ' '\n') | tr '\n' ' ' +} + +# Script ----------------------------------------------------------------------- + +# Fetch given XML Feed +wget --quiet "$1" -O "$cache_file" + +# parse XML file +while read_xml; do + if [[ ! -z $CONTENT ]]; then + echo "$ENTITY => $CONTENT" + else + echo "$ENTITY" + fi +done < "${cache_file}" | tee "${cache_file}.new" > /dev/null + +# compare old to new file +if [ -f "${cache_file}.old" ]; then + + # find new GUID line in new XML file + dguids=$( + diff "${cache_file}.old" "${cache_file}.new" | \ + grep -e "^> guid" | \ + sed -e 's/^>.*=>\s*//g' + ) + + for dguid in $dguids; do + + # get line number of new GUID line + guid_line=$( + grep -E "guid\s*=>\s*${dguid}" -n "${cache_file}.new" | \ + sed -E -e 's/^([0-9]+):.*/\1/g' + ) + + # get <pubDate> lines + date_lines=$( + grep -e "^pubDate" -n ${cache_file}.new | sed -E -e 's/^([0-9]+):.*/\1/g' + ) + + # get <item></item> lines + item_line_start=$( + grep -e "^item" -n "${cache_file}.new" | sed -E -e 's/^([0-9]+):.*/\1/g' + ) + item_line_stop=$( + grep -e "^\/item" -n "${cache_file}.new" | sed -E -e 's/^([0-9]+):.*/\1/g' + ) + + # get emcompasing item lines + ## assign the last SMALLER line as the start-line of the affected item + for i in $item_line_start; do + if [ $i -lt $guid_line ]; then + item_start=$i + fi + done + ## assign the first GREATER line as the stop-line of the affected item + for i in $(reverse $item_line_stop); do + if [ $i -gt $guid_line ]; then + item_stop=$i + fi + done + + # get pubDate line that needs to be replaced + for i in $date_lines; do + if [ $i -gt $item_start ] && [ $i -lt $item_stop ]; then + date_line=$i + fi + done + + # replace date in affected line + if [ ! -z $date_line ]; then + new_date=$(LC_ALL=en date "+%a, %d %b %Y %H:%m:%S %z") + sed -e "${date_line}s/=>[^\/]*/=> ${new_date}/" -i "${cache_file}.new" + fi + + done + + # write new XML file + mkdir -p "./rewrites/" + write_xml "${cache_file}.new" "./rewrites/${file_name}" + +fi + +mv "${cache_file}.new" "${cache_file}.old" + + + + diff --git a/rewrite.txt b/rewrite.txt @@ -0,0 +1,4 @@ +https://www.jungewelt.de/feeds/kapital_arbeit.xml +https://www.jungewelt.de/feeds/inland.xml +https://www.jungewelt.de/feeds/ausland.xml +