pub / kontra

Der linke Newsaggregator.
git clone src.jayvii.de/pub/kontra.git
Home | Log | Files | Exports | Refs | README | RSS

gen_categories.php (2080B)


      1 <!--
      2 SPDX-License-Identifier: AGPL-3.0-or-later
      3 SPDX-FileCopyrightText: 2025 JayVii <jayvii+kontra[AT]posteo[DOT]de>
      4 -->
      5 <?php
      6 
      7 // Load news sources file
      8 $sources = json_decode(
      9     file_get_contents("./news-sources.json"),
     10     true
     11 );
     12 
     13 // extract categories from each news source and add it to a sorted array
     14 $categories = array();
     15 foreach ($sources["sources"] as $source) {
     16     foreach($source["categories"] as $category) {
     17         array_push($categories, $category);
     18     }
     19 }
     20 sort($categories);
     21 
     22 // Count each category value
     23 $categories_count = array_count_values($categories);
     24 
     25 // extract categories definitions
     26 $definitions = array();
     27 foreach ($sources["categories"] as $definition) {
     28     array_push($definitions, $definition["id"]);
     29 }
     30 sort($definitions);
     31 
     32 // check how often each definition exists in the news sources categories
     33 $definitions_count = array();
     34 foreach ($definitions as $definition) {
     35     $search = "/^" . $definition . "$/";
     36     $definitions_count[$definition] = count(preg_grep($search, $categories));
     37 }
     38 
     39 // check how often each news sources categories exists in the definitions
     40 $categories_exist = array();
     41 foreach ($categories as $category) {
     42     $search = "/^" . $category . "$/";
     43     $categories_exist[$category] = count(preg_grep($search, $definitions));
     44 }
     45 
     46 
     47 // Generate User output
     48 echo "####################" . PHP_EOL .
     49     "Category-Occurrence in News Sources:" . PHP_EOL;
     50 print_r($categories_count);
     51 echo "####################" . PHP_EOL .
     52     "Category-Occurrence in Definitions:" . PHP_EOL;
     53 print_r($definitions_count);
     54 
     55 // Warnings
     56 $definitions_zeros = preg_grep("/^0$/", $definitions_count);
     57 if (count($definitions_zeros) > 0) {
     58     echo "####################" . PHP_EOL .
     59         "WARNING: Some definitions are not used in the news sources:" . PHP_EOL;
     60     print_r($definitions_zeros);
     61 }
     62 $categories_zeros = preg_grep("/^0$/", $categories_exist);
     63 if (count($categories_zeros) > 0) {
     64     echo "####################" . PHP_EOL .
     65         "WARNING: Some categories do not exist in the definitions:" . PHP_EOL;
     66     print_r($categories_zeros);
     67 }
     68 
     69 ?>