pub / newsplanet

Planet-Style Newsfeed generated with perlanet
git clone https://src.jayvii.de/pub/newsplanet.git
Home | Log | Files | Exports | Refs | README | RSS

commit 488cc23a3c44c3350729c44f67e810ea8e2c449f
parent 803b8dc06676219ba6cced648e33538bb6520de0
Author: JayVii <jayvii[AT]posteo[DOT]de>
Date:   Fri, 21 Mar 2025 10:34:59 +0100

feat: also transfering dates that have been re-written in the past

Diffstat:
Mfetch_and_rewrite.sh | 130+++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
1 file changed, 88 insertions(+), 42 deletions(-)

diff --git a/fetch_and_rewrite.sh b/fetch_and_rewrite.sh @@ -11,7 +11,7 @@ cache_file="${cache_dir}/${file_name}" mkdir -p ./rewrites/ mkdir -p "${cache_dir}" -# Function --------------------------------------------------------------------- +# Functions -------------------------------------------------------------------- read_xml () { local IFS=\> @@ -23,7 +23,7 @@ write_xml () { -e 's/^(\/.*$)/<\1>/g' \ -e 's/^([^<].*)\s*=>\s*(.*$)/<\1>\2/g' \ -e 's/^([^<]+$)/<\1>/g' \ - -e 's/\s+(\/)>/\1>/g' \ + -e 's/\s+(\/)*\s*>/\1>/g' \ -e '/^[[:space:]]*$/d' | \ tee ${2} > /dev/null } @@ -32,8 +32,59 @@ reverse() { tac <(echo "$@" | tr ' ' '\n') | tr '\n' ' ' } +get_dateline_of_guid() { +# 1: dguid, 2: cache_file.new + + # get line numbers of GUID lines + guid_line=$( + grep -E "guid\s*=>\s*${1}" -n "${2}" | \ + sed -E -e 's/^([0-9]+):.*/\1/g' + ) + + # get <pubDate> lines + date_lines=$( + grep -e "^pubDate" -n "${2}" | sed -E -e 's/^([0-9]+):.*/\1/g' + ) + + # get <item></item> lines + item_line_start=$( + grep -e "^item" -n "${2}" | sed -E -e 's/^([0-9]+):.*/\1/g' + ) + item_line_stop=$( + grep -e "^\/item" -n "${2}" | sed -E -e 's/^([0-9]+):.*/\1/g' + ) + + # get emcompasing item lines + ## assign the last SMALLER line as the start-line of the affected item + for i in $item_line_start; do + if [ $i -lt $guid_line ]; then + item_start=$i + fi + done + ## assign the first GREATER line as the stop-line of the affected item + for i in $(reverse $item_line_stop); do + if [ $i -gt $guid_line ]; then + item_stop=$i + fi + done + + # get pubDate line that needs to be replaced + for i in $date_lines; do + if [ $i -gt $item_start ] && [ $i -lt $item_stop ]; then + date_line=$i + fi + done + + # return line of date-line + echo "$date_line" + +} + # Script ----------------------------------------------------------------------- +# Create User output +echo "Processing ${1}..." + # Fetch given XML Feed wget --quiet "$1" -O "$cache_file" @@ -55,54 +106,48 @@ fi # compare old to new file if [ -f "${cache_file}.old" ]; then - # find new GUID line in new XML file + # find GUID lines in new and old XML files + guids_new=$( + grep -e "^guid =>" "${cache_file}.new" | \ + sed -e 's/^.*=>\s*//g' + ) + guids_old=$( + grep -e "^guid =>" "${cache_file}.old" | \ + sed -e 's/^.*=>\s*//g' + ) + + # find new GUID lines in new XML file dguids=$( diff "${cache_file}.old" "${cache_file}.new" | \ grep -e "^> guid" | \ sed -e 's/^>.*=>\s*//g' ) + # transfer dates of GUIDs in old file to the new file + for guid_old in $guids_old; do + + # if current old GUID is not present in new GUIDs, skip it! + if [ -z $(echo "$guids_new" | grep "$guid_old") ]; then + continue + fi + + # get line number of old and new GUID line + date_line_old=$(get_dateline_of_guid "$guid_old" "${cache_file}.old") + date_line_new=$(get_dateline_of_guid "$guid_old" "${cache_file}.new") + + # replace date in affected line of new file + if [ ! -z $date_line_old ] && [ ! -z $date_line_new ]; then + old_date=$(awk "NR==$date_line_old" | sed -e 's/^.*=>\s*//') + sed -e "${date_line_new}s/=>[^\/]*/=> ${old_date}/" -i "${cache_file}.new" + fi + + done + + # generate new dates for newly appearing GUIDs for dguid in $dguids; do - # get line number of new GUID line - guid_line=$( - grep -E "guid\s*=>\s*${dguid}" -n "${cache_file}.new" | \ - sed -E -e 's/^([0-9]+):.*/\1/g' - ) - - # get <pubDate> lines - date_lines=$( - grep -e "^pubDate" -n ${cache_file}.new | sed -E -e 's/^([0-9]+):.*/\1/g' - ) - - # get <item></item> lines - item_line_start=$( - grep -e "^item" -n "${cache_file}.new" | sed -E -e 's/^([0-9]+):.*/\1/g' - ) - item_line_stop=$( - grep -e "^\/item" -n "${cache_file}.new" | sed -E -e 's/^([0-9]+):.*/\1/g' - ) - - # get emcompasing item lines - ## assign the last SMALLER line as the start-line of the affected item - for i in $item_line_start; do - if [ $i -lt $guid_line ]; then - item_start=$i - fi - done - ## assign the first GREATER line as the stop-line of the affected item - for i in $(reverse $item_line_stop); do - if [ $i -gt $guid_line ]; then - item_stop=$i - fi - done - - # get pubDate line that needs to be replaced - for i in $date_lines; do - if [ $i -gt $item_start ] && [ $i -lt $item_stop ]; then - date_line=$i - fi - done + # get line number of new GUID lines + date_line=$(get_dateline_of_guid "$dguid" "${cache_file}.new") # replace date in affected line if [ ! -z $date_line ]; then @@ -113,6 +158,7 @@ if [ -f "${cache_file}.old" ]; then done # write new XML file + echo " Writing into ./rewrites/${file_name}" mkdir -p "./rewrites/" write_xml "${cache_file}.new" "./rewrites/${file_name}"