pub / yt2html

Fetches Youtube content via RSS and provides a chronological timeline
git clone https://src.jayvii.de/pub/yt2html.git
Home | Log | Files | Exports | Refs | README | RSS

yt.R (6330B)


      1 #!/usr/bin/env Rscript
      2 # SPDX-License-Identifier: AGPL-3.0-or-later
      3 # SPDX-FileCopyrightText: 2021-2024 JayVii <jayvii[AT]posteo[DOT]de>
      4 
      5 # Stop script after 300 seconds, whether is is done or not
      6 setTimeLimit(elapsed = 300)
      7 
      8 # Load Packages ----------------------------------------------------------------
      9 if (!require("tidyRSS")) {
     10     install.packages("tidyRSS")
     11     library("tidyRSS")
     12 }
     13 if (!require("textutils")) {
     14     install.packages("textutils")
     15     library("textutils")
     16 }
     17 
     18 # Load URLs --------------------------------------------------------------------
     19 channels <- as.character(
     20     read.csv(file = "./url.csv", header = TRUE, sep = ",")$url
     21 )
     22 data <- matrix(
     23     data = NA,
     24     nrow = 1,
     25     ncol = 7,
     26     dimnames = list(
     27         NULL,
     28         c("title", "url", "author", "date", "time", "vid", "img")
     29     )
     30 )
     31 
     32 # functions --------------------------------------------------------------------
     33 
     34 fetch.yt <- function(channel) {
     35 
     36     # fetch RSS feed
     37     channel_data <- tidyRSS::tidyfeed(
     38         channel,
     39         clean_tags = TRUE,
     40         list = TRUE,
     41         parse_dates = FALSE
     42     )
     43 
     44     # strip information that is required for constructing HTML
     45     video_dat <- data.frame(
     46         cid = gsub(
     47             x = channel,
     48             pattern = "^.*channel_id=",
     49             replacement = ""
     50         ),
     51         title = channel_data$entries$entry_title,
     52         url = channel_data$entries$entry_link,
     53         author = channel_data$meta$feed_title,
     54         date = gsub(
     55             x = channel_data$entries$entry_published,
     56             pattern = "T.*$",
     57             replacement = ""
     58         ),
     59         time = gsub(
     60             x = channel_data$entries$entry_published,
     61             pattern = "^.*T|\\+.*$",
     62             replacement = ""
     63         ),
     64         vid = gsub(
     65             x = channel_data$entries$entry_link,
     66             pattern = "^.*\\?v=",
     67             replacement = ""
     68         ),
     69         img = paste0(
     70             gsub(
     71                 x = channel_data$entries$entry_link,
     72                 pattern = "^.*\\?v=",
     73                 replacement = "https:\\/\\/i4.ytimg.com\\/vi\\/"
     74             ),
     75             "/hqdefault.jpg"
     76         )
     77     )
     78 
     79     # return video data
     80     return(video_dat)
     81 }
     82 
     83 # fetch data -------------------------------------------------------------------
     84 
     85 video_dat <- list()
     86 for (i in seq_along(channels)) {
     87     cat(paste("Fetching:", as.character(channels[i]), "\n"))
     88     video_dat[[i]] <- tryCatch(fetch.yt(channels[i]), error = function(e) NULL)
     89 }
     90 data <- do.call(rbind, video_dat)
     91 
     92 # encode text for ASCII compatibility
     93 for (var in c("author", "title")) {
     94     data[, var] <- textutils::HTMLencode(data[, var])
     95 }
     96 
     97 # edit data --------------------------------------------------------------------
     98 
     99 # sorting according to date and time
    100 dates <- as.numeric(
    101     gsub(x = data[, "date"], pattern = "-", replacement = "")
    102 )
    103 times <- as.numeric(
    104     gsub(x = data[, "time"], pattern = ":", replacement = "")
    105 )
    106 data <- data[rev(order(dates, times, na.last = FALSE)), ]
    107 
    108 # construct per channel HTML ---------------------------------------------------
    109 
    110 # unique, vectors of channels that returned some data
    111 channel_id <- unique(data[, "cid"])
    112 channel_name <- unique(data[, "author"])
    113 
    114 
    115 # initilise entry-per-channel object
    116 entry_pc <- list()
    117 
    118 # fill in entry-per-channel object
    119 for (chan in seq_along(channel_id)) {
    120 
    121     # choose entries for the current channel
    122     entries <- which(data[, "cid"] == channel_id[chan])
    123 
    124     # fill entry-per-channel-object with contents from current channel
    125     entry_pc[[chan]] <- paste0(
    126         "<section id=\"entry_", seq_along(entries), "\">",
    127         "<h2>",
    128         "<a href=\"", data[entries, "url"], "\">",
    129         data[entries, "title"],
    130         "</a>",
    131         "</h2>",
    132         "<div class=\"thumbnails\">",
    133         "<img src=\"", data[entries, "img"], "\"",
    134         " id=\"thumbnail_", seq_along(entries), "\" loading=\"lazy\"", 
    135         " onclick=embed_yt(\"",
    136         seq_along(entries), "\",\"", data[entries, "vid"],
    137         "\")",
    138         ">",
    139         "</div>",
    140         "<p>",
    141         "<a class=\"button\" href=\"./", data[entries, "cid"], ".html\">",
    142         data[entries, "author"],
    143         "</a>",
    144         " on ", data[entries, "date"], " ", data[entries, "time"],
    145         "</p>",
    146         "</section>"
    147     )
    148 
    149 }
    150 
    151 names(entry_pc) <- channel_id
    152 
    153 # construct main HTML ---------------------------------------------------------
    154 
    155 # only list first "n" entries
    156 n <- seq_len(500)
    157 
    158 entry <- paste0(
    159     "<section id=\"entry_", n, "\">",
    160     "<h2>",
    161     "<a href=\"", data[n, "url"], "\">",
    162     data[n, "title"],
    163     "</a>",
    164     "</h2>",
    165     "<div class=\"thumbnails\">",
    166     "<img src=\"", data[n, "img"], "\"",
    167     " id=\"thumbnail_", n, "\" loading=\"lazy\"", 
    168     " onclick=embed_yt(\"", n, "\",\"", data[n, "vid"], "\")",
    169     ">",
    170     "</div>",
    171     "<p>",
    172     "<a class=\"button\" href=\"./", data[n, "cid"], ".html\">",
    173     data[n, "author"],
    174     "</a>",
    175     " on ", data[n, "date"], " ", data[n, "time"],
    176     "</p>",
    177     "</section>"
    178 )
    179 
    180 # additional HTML -------------------------------------------------------------
    181 
    182 # Load template and fill in page title
    183 template <- paste0(readLines("./template.html", encoding = "UTF-8"), "\n")
    184 template <- sub(x = template, pattern = "%%TITLE%%", replacement = "Video-Feed")
    185 
    186 # insert content for top of the page
    187 top <- paste0("<p>Last Updated: ", Sys.time(), "</p><hr>")
    188 top_mainfeed <- paste0(
    189     "<details>",
    190     "<summary>Channel List</summary>",
    191     paste0(
    192         "<a href=\"", channel_id, ".html\">",
    193         channel_name,
    194         " (", sapply(X = entry_pc, FUN = length), ")",
    195         "</a>",
    196         collapse = "<br>"
    197     ),
    198    "</details><hr>"
    199 )
    200 bottom <- paste0("</body></html>")
    201 
    202 # print files -------------------------------------------------------------------
    203 
    204 html_output <- file("index.html", open = "wt", encoding = "UTF-8")
    205 sink(html_output)
    206 cat(
    207     template, "\n",
    208     top, "\n",
    209     top_mainfeed, "\n",
    210     entry, "\n",
    211     bottom, "\n"
    212 )
    213 sink()
    214 close(html_output)
    215 
    216 for (cid in channel_id) {
    217     html_output <- file(
    218         paste0(cid, ".html"), open = "wt", encoding = "UTF-8"
    219     )
    220     sink(html_output)
    221     cat(
    222         template, "\n",
    223         top, "\n",
    224         top_mainfeed, "\n",
    225         entry_pc[[cid]], "\n",
    226         bottom, "\n"
    227     )
    228     sink()
    229     close(html_output)
    230 }
    231 
    232 # EOF yt.R
    233