pub / kontra

Der linke Newsaggregator.
git clone src.jayvii.de/pub/kontra.git
Home | Log | Files | Exports | Refs | README | RSS

gen_categories.php (6846B)


      1 <!--
      2 SPDX-License-Identifier: AGPL-3.0-or-later
      3 SPDX-FileCopyrightText: 2025 JayVii <jayvii+kontra[AT]posteo[DOT]de>
      4 -->
      5 <?php
      6 
      7 // Load news sources file
      8 $sources = json_decode(
      9     file_get_contents("./news-sources.json"),
     10     true
     11 );
     12 
     13 // Define function to count regions, locals, languages, accesses, medium, topics
     14 function count_catgroies($type, $sources) {
     15 
     16     // extract categories from each news source and add it to a sorted array
     17     $categories = array();
     18     foreach ($sources["sources"] as $source) {
     19         foreach($source[$type] as $category) {
     20             array_push($categories, $category);
     21         }
     22     }
     23     sort($categories);
     24 
     25     // Count each category value
     26     $categories_count = array_count_values($categories);
     27 
     28     // extract categories definitions
     29     $definitions = array();
     30     foreach ($sources[$type] as $definition) {
     31         array_push($definitions, $definition["id"]);
     32     }
     33     sort($definitions);
     34 
     35     // check how often each definition exists in the news sources categories
     36     $definitions_count = array();
     37     foreach ($definitions as $definition) {
     38         $search = "/^" . $definition . "$/";
     39         $definitions_count[$definition] = count(
     40             preg_grep($search, $categories)
     41         );
     42     }
     43 
     44     // check how often each news sources categories exists in the definitions
     45     $categories_exist = array();
     46     foreach ($categories as $category) {
     47         $search = "/^" . $category . "$/";
     48         $categories_exist[$category] = count(preg_grep($search, $definitions));
     49     }
     50 
     51     return array(
     52         "categories_count" => $categories_count,
     53         "definitions_count" => $definitions_count,
     54         "categories_exist" => $categories_exist
     55     );
     56 
     57 }
     58 
     59 // Generate output
     60 $regions = count_catgroies("regions", $sources);
     61 $languages = count_catgroies("languages", $sources);
     62 $access = count_catgroies("access", $sources);
     63 $medium = count_catgroies("medium", $sources);
     64 $topics = count_catgroies("topics", $sources);
     65 $publisher = count_catgroies("publisher", $sources);
     66 
     67 // Generate User output
     68 echo "####################" . PHP_EOL .
     69     "Regions-Occurrence in News Sources:" . PHP_EOL;
     70 print_r($regions["categories_count"]);
     71 echo "####################" . PHP_EOL .
     72     "Regions-Occurrence in Definitions:" . PHP_EOL;
     73 print_r($regions["definitions_count"]);
     74 echo "####################" . PHP_EOL .
     75     "Language-Occurrence in News Sources:" . PHP_EOL;
     76 print_r($languages["categories_count"]);
     77 echo "####################" . PHP_EOL .
     78     "Language-Occurrence in Definitions:" . PHP_EOL;
     79 print_r($languages["definitions_count"]);
     80 echo "####################" . PHP_EOL .
     81     "Access-Occurrence in News Sources:" . PHP_EOL;
     82 print_r($access["categories_count"]);
     83 echo "####################" . PHP_EOL .
     84     "Access-Occurrence in Definitions:" . PHP_EOL;
     85 print_r($access["definitions_count"]);
     86 echo "####################" . PHP_EOL .
     87     "Medium-Occurrence in News Sources:" . PHP_EOL;
     88 print_r($medium["categories_count"]);
     89 echo "####################" . PHP_EOL .
     90     "Medium-Occurrence in Definitions:" . PHP_EOL;
     91 print_r($medium["definitions_count"]);
     92 echo "####################" . PHP_EOL .
     93     "Topics-Occurrence in News Sources:" . PHP_EOL;
     94 print_r($topics["categories_count"]);
     95 echo "####################" . PHP_EOL .
     96     "Topics-Occurrence in Definitions:" . PHP_EOL;
     97 print_r($topics["definitions_count"]);
     98 echo "####################" . PHP_EOL .
     99     "Publisher-Occurrence in News Sources:" . PHP_EOL;
    100 print_r($publisher["categories_count"]);
    101 echo "####################" . PHP_EOL .
    102     "Publisher-Occurrence in Definitions:" . PHP_EOL;
    103 print_r($publisher["definitions_count"]);
    104 
    105 
    106 // Warnings
    107 $definitions_zeros = preg_grep("/^0$/", $regions["definitions_count"]);
    108 if (count($definitions_zeros) > 0) {
    109     echo "####################" . PHP_EOL .
    110         "WARNING: Some definitions are not used in the news sources:" . PHP_EOL;
    111     print_r($definitions_zeros);
    112 }
    113 $categories_zeros = preg_grep("/^0$/", $regions["categories_exist"]);
    114 if (count($categories_zeros) > 0) {
    115     echo "####################" . PHP_EOL .
    116         "WARNING: Some categories do not exist in the definitions:" . PHP_EOL;
    117     print_r($categories_zeros);
    118 }
    119 $definitions_zeros = preg_grep("/^0$/", $languages["definitions_count"]);
    120 if (count($definitions_zeros) > 0) {
    121     echo "####################" . PHP_EOL .
    122         "WARNING: Some definitions are not used in the news sources:" . PHP_EOL;
    123     print_r($definitions_zeros);
    124 }
    125 $categories_zeros = preg_grep("/^0$/", $languages["categories_exist"]);
    126 if (count($categories_zeros) > 0) {
    127     echo "####################" . PHP_EOL .
    128         "WARNING: Some categories do not exist in the definitions:" . PHP_EOL;
    129     print_r($categories_zeros);
    130 }
    131 $definitions_zeros = preg_grep("/^0$/", $access["definitions_count"]);
    132 if (count($definitions_zeros) > 0) {
    133     echo "####################" . PHP_EOL .
    134         "WARNING: Some definitions are not used in the news sources:" . PHP_EOL;
    135     print_r($definitions_zeros);
    136 }
    137 $categories_zeros = preg_grep("/^0$/", $access["categories_exist"]);
    138 if (count($categories_zeros) > 0) {
    139     echo "####################" . PHP_EOL .
    140         "WARNING: Some categories do not exist in the definitions:" . PHP_EOL;
    141     print_r($categories_zeros);
    142 }
    143 $definitions_zeros = preg_grep("/^0$/", $medium["definitions_count"]);
    144 if (count($definitions_zeros) > 0) {
    145     echo "####################" . PHP_EOL .
    146         "WARNING: Some definitions are not used in the news sources:" . PHP_EOL;
    147     print_r($definitions_zeros);
    148 }
    149 $categories_zeros = preg_grep("/^0$/", $medium["categories_exist"]);
    150 if (count($categories_zeros) > 0) {
    151     echo "####################" . PHP_EOL .
    152         "WARNING: Some categories do not exist in the definitions:" . PHP_EOL;
    153     print_r($categories_zeros);
    154 }
    155 $definitions_zeros = preg_grep("/^0$/", $topics["definitions_count"]);
    156 if (count($definitions_zeros) > 0) {
    157     echo "####################" . PHP_EOL .
    158         "WARNING: Some definitions are not used in the news sources:" . PHP_EOL;
    159     print_r($definitions_zeros);
    160 }
    161 $categories_zeros = preg_grep("/^0$/", $topics["categories_exist"]);
    162 if (count($categories_zeros) > 0) {
    163     echo "####################" . PHP_EOL .
    164         "WARNING: Some categories do not exist in the definitions:" . PHP_EOL;
    165     print_r($categories_zeros);
    166 }
    167 $definitions_zeros = preg_grep("/^0$/", $publisher["definitions_count"]);
    168 if (count($definitions_zeros) > 0) {
    169     echo "####################" . PHP_EOL .
    170         "WARNING: Some definitions are not used in the news sources:" . PHP_EOL;
    171     print_r($definitions_zeros);
    172 }
    173 $categories_zeros = preg_grep("/^0$/", $publisher["categories_exist"]);
    174 if (count($categories_zeros) > 0) {
    175     echo "####################" . PHP_EOL .
    176         "WARNING: Some categories do not exist in the definitions:" . PHP_EOL;
    177     print_r($categories_zeros);
    178 }
    179 
    180 
    181 ?>