commit 488cc23a3c44c3350729c44f67e810ea8e2c449f
parent 803b8dc06676219ba6cced648e33538bb6520de0
Author: JayVii <jayvii[AT]posteo[DOT]de>
Date: Fri, 21 Mar 2025 10:34:59 +0100
feat: also transfering dates that have been re-written in the past
Diffstat:
M | fetch_and_rewrite.sh | | | 130 | +++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------- |
1 file changed, 88 insertions(+), 42 deletions(-)
diff --git a/fetch_and_rewrite.sh b/fetch_and_rewrite.sh
@@ -11,7 +11,7 @@ cache_file="${cache_dir}/${file_name}"
mkdir -p ./rewrites/
mkdir -p "${cache_dir}"
-# Function ---------------------------------------------------------------------
+# Functions --------------------------------------------------------------------
read_xml () {
local IFS=\>
@@ -23,7 +23,7 @@ write_xml () {
-e 's/^(\/.*$)/<\1>/g' \
-e 's/^([^<].*)\s*=>\s*(.*$)/<\1>\2/g' \
-e 's/^([^<]+$)/<\1>/g' \
- -e 's/\s+(\/)>/\1>/g' \
+ -e 's/\s+(\/)*\s*>/\1>/g' \
-e '/^[[:space:]]*$/d' | \
tee ${2} > /dev/null
}
@@ -32,8 +32,59 @@ reverse() {
tac <(echo "$@" | tr ' ' '\n') | tr '\n' ' '
}
+get_dateline_of_guid() {
+# 1: dguid, 2: cache_file.new
+
+ # get line numbers of GUID lines
+ guid_line=$(
+ grep -E "guid\s*=>\s*${1}" -n "${2}" | \
+ sed -E -e 's/^([0-9]+):.*/\1/g'
+ )
+
+ # get <pubDate> lines
+ date_lines=$(
+ grep -e "^pubDate" -n "${2}" | sed -E -e 's/^([0-9]+):.*/\1/g'
+ )
+
+ # get <item></item> lines
+ item_line_start=$(
+ grep -e "^item" -n "${2}" | sed -E -e 's/^([0-9]+):.*/\1/g'
+ )
+ item_line_stop=$(
+ grep -e "^\/item" -n "${2}" | sed -E -e 's/^([0-9]+):.*/\1/g'
+ )
+
+ # get emcompasing item lines
+ ## assign the last SMALLER line as the start-line of the affected item
+ for i in $item_line_start; do
+ if [ $i -lt $guid_line ]; then
+ item_start=$i
+ fi
+ done
+ ## assign the first GREATER line as the stop-line of the affected item
+ for i in $(reverse $item_line_stop); do
+ if [ $i -gt $guid_line ]; then
+ item_stop=$i
+ fi
+ done
+
+ # get pubDate line that needs to be replaced
+ for i in $date_lines; do
+ if [ $i -gt $item_start ] && [ $i -lt $item_stop ]; then
+ date_line=$i
+ fi
+ done
+
+ # return line of date-line
+ echo "$date_line"
+
+}
+
# Script -----------------------------------------------------------------------
+# Create User output
+echo "Processing ${1}..."
+
# Fetch given XML Feed
wget --quiet "$1" -O "$cache_file"
@@ -55,54 +106,48 @@ fi
# compare old to new file
if [ -f "${cache_file}.old" ]; then
- # find new GUID line in new XML file
+ # find GUID lines in new and old XML files
+ guids_new=$(
+ grep -e "^guid =>" "${cache_file}.new" | \
+ sed -e 's/^.*=>\s*//g'
+ )
+ guids_old=$(
+ grep -e "^guid =>" "${cache_file}.old" | \
+ sed -e 's/^.*=>\s*//g'
+ )
+
+ # find new GUID lines in new XML file
dguids=$(
diff "${cache_file}.old" "${cache_file}.new" | \
grep -e "^> guid" | \
sed -e 's/^>.*=>\s*//g'
)
+ # transfer dates of GUIDs in old file to the new file
+ for guid_old in $guids_old; do
+
+ # if current old GUID is not present in new GUIDs, skip it!
+ if [ -z $(echo "$guids_new" | grep "$guid_old") ]; then
+ continue
+ fi
+
+ # get line number of old and new GUID line
+ date_line_old=$(get_dateline_of_guid "$guid_old" "${cache_file}.old")
+ date_line_new=$(get_dateline_of_guid "$guid_old" "${cache_file}.new")
+
+ # replace date in affected line of new file
+ if [ ! -z $date_line_old ] && [ ! -z $date_line_new ]; then
+ old_date=$(awk "NR==$date_line_old" | sed -e 's/^.*=>\s*//')
+ sed -e "${date_line_new}s/=>[^\/]*/=> ${old_date}/" -i "${cache_file}.new"
+ fi
+
+ done
+
+ # generate new dates for newly appearing GUIDs
for dguid in $dguids; do
- # get line number of new GUID line
- guid_line=$(
- grep -E "guid\s*=>\s*${dguid}" -n "${cache_file}.new" | \
- sed -E -e 's/^([0-9]+):.*/\1/g'
- )
-
- # get <pubDate> lines
- date_lines=$(
- grep -e "^pubDate" -n ${cache_file}.new | sed -E -e 's/^([0-9]+):.*/\1/g'
- )
-
- # get <item></item> lines
- item_line_start=$(
- grep -e "^item" -n "${cache_file}.new" | sed -E -e 's/^([0-9]+):.*/\1/g'
- )
- item_line_stop=$(
- grep -e "^\/item" -n "${cache_file}.new" | sed -E -e 's/^([0-9]+):.*/\1/g'
- )
-
- # get emcompasing item lines
- ## assign the last SMALLER line as the start-line of the affected item
- for i in $item_line_start; do
- if [ $i -lt $guid_line ]; then
- item_start=$i
- fi
- done
- ## assign the first GREATER line as the stop-line of the affected item
- for i in $(reverse $item_line_stop); do
- if [ $i -gt $guid_line ]; then
- item_stop=$i
- fi
- done
-
- # get pubDate line that needs to be replaced
- for i in $date_lines; do
- if [ $i -gt $item_start ] && [ $i -lt $item_stop ]; then
- date_line=$i
- fi
- done
+ # get line number of new GUID lines
+ date_line=$(get_dateline_of_guid "$dguid" "${cache_file}.new")
# replace date in affected line
if [ ! -z $date_line ]; then
@@ -113,6 +158,7 @@ if [ -f "${cache_file}.old" ]; then
done
# write new XML file
+ echo " Writing into ./rewrites/${file_name}"
mkdir -p "./rewrites/"
write_xml "${cache_file}.new" "./rewrites/${file_name}"