pub / linkagg

OPML sources for the left news aggregator
git clone src.jayvii.de/pub/linkagg.git
Home | Log | Files | Exports | Refs | README | RSS

gen_opml.sh (3570B)


      1 #!/usr/bin/env bash
      2 # SPDX-License-Identifier: AGPL-3.0-or-later
      3 # SPDX-FileCopyrightText: 2025 JayVii <jayvii[AT]posteo[DOT]de>
      4 
      5 # Gather input -----------------------------------------------------------------
      6 
      7 # fetch source file from the "-s " flag
      8 while getopts "s:" opt; do
      9   case $opt in
     10     s)
     11       export sources_file="$OPTARG"
     12       ;;
     13   esac
     14 done
     15 
     16 # exit if sourcefile is not provided
     17 if [ -z $sources_file ]; then
     18   echo "Usage: $0 -s /path/to/sources.tsv" > /dev/stderr
     19   exit 1
     20 fi
     21 
     22 
     23 # Functions --------------------------------------------------------------------
     24 
     25 function parse_tsv_line {
     26 
     27   # escape input as replacement string for regex
     28   local repl="\\${2}"
     29 
     30   # split up single-line string into its four parts and return according part
     31   echo "$1" | \
     32     tail -n 1 | \
     33     sed -E -e "s/^([^\t]+)\t+([^\t]+)\t+([^\t]+)\t+([^\t]+)$/${repl}/"
     34 }
     35 
     36 # Script -----------------------------------------------------------------------
     37 
     38 # User output
     39 echo "Gather information from ${sources_file} ..."
     40 
     41 # parse TSV file and assign to arrays
     42 while read -r line; do
     43 
     44   name_tmp=$(parse_tsv_line "$line" 1)
     45   catg_tmp=$(parse_tsv_line "$line" 2)
     46   addr_tmp=$(parse_tsv_line "$line" 3)
     47   rssa_tmp=$(parse_tsv_line "$line" 4)
     48   name+=("$name_tmp")
     49   catg+=("$catg_tmp")
     50   addr+=("$addr_tmp")
     51   rssa+=("$rssa_tmp")
     52 
     53 done < ${sources_file}
     54 
     55 # create an array of unique categories
     56 ## create space separated single string of categories
     57 for i in $(seq 1 1 ${#catg[@]}); do
     58   catg_list+="$(echo ${catg[$i]} | sed -e 's/\s/_/g' -e 's/,/\ /g') "
     59 done
     60 ## sort and return only unique categories into array
     61 catg_list=($(echo "$catg_list" | tr ' ' '\n' | sort -u | tr '\n' ' '))
     62 
     63 # define OPML header
     64 opml_header="<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
     65 opml_header+="<opml version=\"2.0\" xmlns=\"https://linkagg.jayvii.de/\">\n"
     66 opml_header+="  <head>\n"
     67 opml_header+="    <title>LinkAgg Newsaggregator</title>\n"
     68 opml_header+="    <dateCreated>$(LC_ALL=en date)</dateCreated>\n"
     69 opml_header+="  </head>\n"
     70 
     71 # build OPML file category-by-category
     72 for cat in ${catg_list[@]}; do
     73 
     74   # re-introduce spaces into categories again
     75   cat=$(echo "$cat" | tr '_' ' ')
     76 
     77   # output for user
     78   echo "Generating ./opml/${cat}.opml"
     79 
     80   # First part of opml body definition (category outline)
     81   opml_body="    <outline title=\"$cat\" text=\"$cat\">\n"
     82 
     83   # Loop through each source individually
     84   for i in $(seq 1 1 ${#name[@]}); do
     85 
     86     # If source contains the category, add it to the OPML body
     87     if [ $(echo ${catg[$i]} | grep -c -E "(^|,)${cat}($|,)") -gt 0 ]; then
     88 
     89       # generate OPML line for the current source
     90       a="title=\"${name[$i]}\""
     91       b="text=\"${name[$i]}\""
     92       c="category=\"$(echo ${catg[$i]} | sed -E -e 's/(^|,)/\1\//g')\""
     93       d="htmlUrl=\"${addr[$i]}\""
     94       e="xmlUrl=\"${rssa[$i]}\""
     95       opml_body+="      <outline $a $b $c $d $e type=\"rss\" />\n"
     96 
     97     fi
     98 
     99   done
    100 
    101   # Last part of opml body definition
    102   opml_body+="    </outline>\n"
    103 
    104   # Copy current category opml body to the global string variable
    105   opml_body_global+="$opml_body"
    106 
    107   # export current category into its own category file
    108   cat_header=$(
    109     echo "$opml_header" | sed -E -e "s/(<\/title>)/Kategorie: ${k}\1/"
    110   )
    111   cat_opml="${cat_header}  <body>\n${opml_body}  </body>\n</opml>"
    112   printf "${cat_opml}\n" | tee "./opml/${cat}.opml" > /dev/null
    113 
    114 done
    115 
    116 # Export of global OPML file
    117 echo "Generating ./opml/linkagg.opml"
    118 opml="${opml_header}${opml_head}  <body>\n${opml_body_global}  </body>\n</opml>"
    119 printf "${opml}\n" | tee "./opml/linkagg.opml" > /dev/null
    120 
    121 echo "Done!"