gen_opml.sh (3570B)
1 #!/usr/bin/env bash
2 # SPDX-License-Identifier: AGPL-3.0-or-later
3 # SPDX-FileCopyrightText: 2025 JayVii <jayvii[AT]posteo[DOT]de>
4
5 # Gather input -----------------------------------------------------------------
6
7 # fetch source file from the "-s " flag
8 while getopts "s:" opt; do
9 case $opt in
10 s)
11 export sources_file="$OPTARG"
12 ;;
13 esac
14 done
15
16 # exit if sourcefile is not provided
17 if [ -z $sources_file ]; then
18 echo "Usage: $0 -s /path/to/sources.tsv" > /dev/stderr
19 exit 1
20 fi
21
22
23 # Functions --------------------------------------------------------------------
24
25 function parse_tsv_line {
26
27 # escape input as replacement string for regex
28 local repl="\\${2}"
29
30 # split up single-line string into its four parts and return according part
31 echo "$1" | \
32 tail -n 1 | \
33 sed -E -e "s/^([^\t]+)\t+([^\t]+)\t+([^\t]+)\t+([^\t]+)$/${repl}/"
34 }
35
36 # Script -----------------------------------------------------------------------
37
38 # User output
39 echo "Gather information from ${sources_file} ..."
40
41 # parse TSV file and assign to arrays
42 while read -r line; do
43
44 name_tmp=$(parse_tsv_line "$line" 1)
45 catg_tmp=$(parse_tsv_line "$line" 2)
46 addr_tmp=$(parse_tsv_line "$line" 3)
47 rssa_tmp=$(parse_tsv_line "$line" 4)
48 name+=("$name_tmp")
49 catg+=("$catg_tmp")
50 addr+=("$addr_tmp")
51 rssa+=("$rssa_tmp")
52
53 done < ${sources_file}
54
55 # create an array of unique categories
56 ## create space separated single string of categories
57 for i in $(seq 1 1 ${#catg[@]}); do
58 catg_list+="$(echo ${catg[$i]} | sed -e 's/\s/_/g' -e 's/,/\ /g') "
59 done
60 ## sort and return only unique categories into array
61 catg_list=($(echo "$catg_list" | tr ' ' '\n' | sort -u | tr '\n' ' '))
62
63 # define OPML header
64 opml_header="<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
65 opml_header+="<opml version=\"2.0\" xmlns=\"https://linkagg.jayvii.de/\">\n"
66 opml_header+=" <head>\n"
67 opml_header+=" <title>LinkAgg Newsaggregator</title>\n"
68 opml_header+=" <dateCreated>$(LC_ALL=en date)</dateCreated>\n"
69 opml_header+=" </head>\n"
70
71 # build OPML file category-by-category
72 for cat in ${catg_list[@]}; do
73
74 # re-introduce spaces into categories again
75 cat=$(echo "$cat" | tr '_' ' ')
76
77 # output for user
78 echo "Generating ./opml/${cat}.opml"
79
80 # First part of opml body definition (category outline)
81 opml_body=" <outline title=\"$cat\" text=\"$cat\">\n"
82
83 # Loop through each source individually
84 for i in $(seq 1 1 ${#name[@]}); do
85
86 # If source contains the category, add it to the OPML body
87 if [ $(echo ${catg[$i]} | grep -c -E "(^|,)${cat}($|,)") -gt 0 ]; then
88
89 # generate OPML line for the current source
90 a="title=\"${name[$i]}\""
91 b="text=\"${name[$i]}\""
92 c="category=\"$(echo ${catg[$i]} | sed -E -e 's/(^|,)/\1\//g')\""
93 d="htmlUrl=\"${addr[$i]}\""
94 e="xmlUrl=\"${rssa[$i]}\""
95 opml_body+=" <outline $a $b $c $d $e type=\"rss\" />\n"
96
97 fi
98
99 done
100
101 # Last part of opml body definition
102 opml_body+=" </outline>\n"
103
104 # Copy current category opml body to the global string variable
105 opml_body_global+="$opml_body"
106
107 # export current category into its own category file
108 cat_header=$(
109 echo "$opml_header" | sed -E -e "s/(<\/title>)/Kategorie: ${k}\1/"
110 )
111 cat_opml="${cat_header} <body>\n${opml_body} </body>\n</opml>"
112 printf "${cat_opml}\n" | tee "./opml/${cat}.opml" > /dev/null
113
114 done
115
116 # Export of global OPML file
117 echo "Generating ./opml/linkagg.opml"
118 opml="${opml_header}${opml_head} <body>\n${opml_body_global} </body>\n</opml>"
119 printf "${opml}\n" | tee "./opml/linkagg.opml" > /dev/null
120
121 echo "Done!"