gen_categories.php (2080B)
1 <!--
2 SPDX-License-Identifier: AGPL-3.0-or-later
3 SPDX-FileCopyrightText: 2025 JayVii <jayvii+kontra[AT]posteo[DOT]de>
4 -->
5 <?php
6
7 // Load news sources file
8 $sources = json_decode(
9 file_get_contents("./news-sources.json"),
10 true
11 );
12
13 // extract categories from each news source and add it to a sorted array
14 $categories = array();
15 foreach ($sources["sources"] as $source) {
16 foreach($source["categories"] as $category) {
17 array_push($categories, $category);
18 }
19 }
20 sort($categories);
21
22 // Count each category value
23 $categories_count = array_count_values($categories);
24
25 // extract categories definitions
26 $definitions = array();
27 foreach ($sources["categories"] as $definition) {
28 array_push($definitions, $definition["id"]);
29 }
30 sort($definitions);
31
32 // check how often each definition exists in the news sources categories
33 $definitions_count = array();
34 foreach ($definitions as $definition) {
35 $search = "/^" . $definition . "$/";
36 $definitions_count[$definition] = count(preg_grep($search, $categories));
37 }
38
39 // check how often each news sources categories exists in the definitions
40 $categories_exist = array();
41 foreach ($categories as $category) {
42 $search = "/^" . $category . "$/";
43 $categories_exist[$category] = count(preg_grep($search, $definitions));
44 }
45
46
47 // Generate User output
48 echo "####################" . PHP_EOL .
49 "Category-Occurrence in News Sources:" . PHP_EOL;
50 print_r($categories_count);
51 echo "####################" . PHP_EOL .
52 "Category-Occurrence in Definitions:" . PHP_EOL;
53 print_r($definitions_count);
54
55 // Warnings
56 $definitions_zeros = preg_grep("/^0$/", $definitions_count);
57 if (count($definitions_zeros) > 0) {
58 echo "####################" . PHP_EOL .
59 "WARNING: Some definitions are not used in the news sources:" . PHP_EOL;
60 print_r($definitions_zeros);
61 }
62 $categories_zeros = preg_grep("/^0$/", $categories_exist);
63 if (count($categories_zeros) > 0) {
64 echo "####################" . PHP_EOL .
65 "WARNING: Some categories do not exist in the definitions:" . PHP_EOL;
66 print_r($categories_zeros);
67 }
68
69 ?>