gen_categories.php (6846B)
1 <!--
2 SPDX-License-Identifier: AGPL-3.0-or-later
3 SPDX-FileCopyrightText: 2025 JayVii <jayvii+kontra[AT]posteo[DOT]de>
4 -->
5 <?php
6
7 // Load news sources file
8 $sources = json_decode(
9 file_get_contents("./news-sources.json"),
10 true
11 );
12
13 // Define function to count regions, locals, languages, accesses, medium, topics
14 function count_catgroies($type, $sources) {
15
16 // extract categories from each news source and add it to a sorted array
17 $categories = array();
18 foreach ($sources["sources"] as $source) {
19 foreach($source[$type] as $category) {
20 array_push($categories, $category);
21 }
22 }
23 sort($categories);
24
25 // Count each category value
26 $categories_count = array_count_values($categories);
27
28 // extract categories definitions
29 $definitions = array();
30 foreach ($sources[$type] as $definition) {
31 array_push($definitions, $definition["id"]);
32 }
33 sort($definitions);
34
35 // check how often each definition exists in the news sources categories
36 $definitions_count = array();
37 foreach ($definitions as $definition) {
38 $search = "/^" . $definition . "$/";
39 $definitions_count[$definition] = count(
40 preg_grep($search, $categories)
41 );
42 }
43
44 // check how often each news sources categories exists in the definitions
45 $categories_exist = array();
46 foreach ($categories as $category) {
47 $search = "/^" . $category . "$/";
48 $categories_exist[$category] = count(preg_grep($search, $definitions));
49 }
50
51 return array(
52 "categories_count" => $categories_count,
53 "definitions_count" => $definitions_count,
54 "categories_exist" => $categories_exist
55 );
56
57 }
58
59 // Generate output
60 $regions = count_catgroies("regions", $sources);
61 $languages = count_catgroies("languages", $sources);
62 $access = count_catgroies("access", $sources);
63 $medium = count_catgroies("medium", $sources);
64 $topics = count_catgroies("topics", $sources);
65 $publisher = count_catgroies("publisher", $sources);
66
67 // Generate User output
68 echo "####################" . PHP_EOL .
69 "Regions-Occurrence in News Sources:" . PHP_EOL;
70 print_r($regions["categories_count"]);
71 echo "####################" . PHP_EOL .
72 "Regions-Occurrence in Definitions:" . PHP_EOL;
73 print_r($regions["definitions_count"]);
74 echo "####################" . PHP_EOL .
75 "Language-Occurrence in News Sources:" . PHP_EOL;
76 print_r($languages["categories_count"]);
77 echo "####################" . PHP_EOL .
78 "Language-Occurrence in Definitions:" . PHP_EOL;
79 print_r($languages["definitions_count"]);
80 echo "####################" . PHP_EOL .
81 "Access-Occurrence in News Sources:" . PHP_EOL;
82 print_r($access["categories_count"]);
83 echo "####################" . PHP_EOL .
84 "Access-Occurrence in Definitions:" . PHP_EOL;
85 print_r($access["definitions_count"]);
86 echo "####################" . PHP_EOL .
87 "Medium-Occurrence in News Sources:" . PHP_EOL;
88 print_r($medium["categories_count"]);
89 echo "####################" . PHP_EOL .
90 "Medium-Occurrence in Definitions:" . PHP_EOL;
91 print_r($medium["definitions_count"]);
92 echo "####################" . PHP_EOL .
93 "Topics-Occurrence in News Sources:" . PHP_EOL;
94 print_r($topics["categories_count"]);
95 echo "####################" . PHP_EOL .
96 "Topics-Occurrence in Definitions:" . PHP_EOL;
97 print_r($topics["definitions_count"]);
98 echo "####################" . PHP_EOL .
99 "Publisher-Occurrence in News Sources:" . PHP_EOL;
100 print_r($publisher["categories_count"]);
101 echo "####################" . PHP_EOL .
102 "Publisher-Occurrence in Definitions:" . PHP_EOL;
103 print_r($publisher["definitions_count"]);
104
105
106 // Warnings
107 $definitions_zeros = preg_grep("/^0$/", $regions["definitions_count"]);
108 if (count($definitions_zeros) > 0) {
109 echo "####################" . PHP_EOL .
110 "WARNING: Some definitions are not used in the news sources:" . PHP_EOL;
111 print_r($definitions_zeros);
112 }
113 $categories_zeros = preg_grep("/^0$/", $regions["categories_exist"]);
114 if (count($categories_zeros) > 0) {
115 echo "####################" . PHP_EOL .
116 "WARNING: Some categories do not exist in the definitions:" . PHP_EOL;
117 print_r($categories_zeros);
118 }
119 $definitions_zeros = preg_grep("/^0$/", $languages["definitions_count"]);
120 if (count($definitions_zeros) > 0) {
121 echo "####################" . PHP_EOL .
122 "WARNING: Some definitions are not used in the news sources:" . PHP_EOL;
123 print_r($definitions_zeros);
124 }
125 $categories_zeros = preg_grep("/^0$/", $languages["categories_exist"]);
126 if (count($categories_zeros) > 0) {
127 echo "####################" . PHP_EOL .
128 "WARNING: Some categories do not exist in the definitions:" . PHP_EOL;
129 print_r($categories_zeros);
130 }
131 $definitions_zeros = preg_grep("/^0$/", $access["definitions_count"]);
132 if (count($definitions_zeros) > 0) {
133 echo "####################" . PHP_EOL .
134 "WARNING: Some definitions are not used in the news sources:" . PHP_EOL;
135 print_r($definitions_zeros);
136 }
137 $categories_zeros = preg_grep("/^0$/", $access["categories_exist"]);
138 if (count($categories_zeros) > 0) {
139 echo "####################" . PHP_EOL .
140 "WARNING: Some categories do not exist in the definitions:" . PHP_EOL;
141 print_r($categories_zeros);
142 }
143 $definitions_zeros = preg_grep("/^0$/", $medium["definitions_count"]);
144 if (count($definitions_zeros) > 0) {
145 echo "####################" . PHP_EOL .
146 "WARNING: Some definitions are not used in the news sources:" . PHP_EOL;
147 print_r($definitions_zeros);
148 }
149 $categories_zeros = preg_grep("/^0$/", $medium["categories_exist"]);
150 if (count($categories_zeros) > 0) {
151 echo "####################" . PHP_EOL .
152 "WARNING: Some categories do not exist in the definitions:" . PHP_EOL;
153 print_r($categories_zeros);
154 }
155 $definitions_zeros = preg_grep("/^0$/", $topics["definitions_count"]);
156 if (count($definitions_zeros) > 0) {
157 echo "####################" . PHP_EOL .
158 "WARNING: Some definitions are not used in the news sources:" . PHP_EOL;
159 print_r($definitions_zeros);
160 }
161 $categories_zeros = preg_grep("/^0$/", $topics["categories_exist"]);
162 if (count($categories_zeros) > 0) {
163 echo "####################" . PHP_EOL .
164 "WARNING: Some categories do not exist in the definitions:" . PHP_EOL;
165 print_r($categories_zeros);
166 }
167 $definitions_zeros = preg_grep("/^0$/", $publisher["definitions_count"]);
168 if (count($definitions_zeros) > 0) {
169 echo "####################" . PHP_EOL .
170 "WARNING: Some definitions are not used in the news sources:" . PHP_EOL;
171 print_r($definitions_zeros);
172 }
173 $categories_zeros = preg_grep("/^0$/", $publisher["categories_exist"]);
174 if (count($categories_zeros) > 0) {
175 echo "####################" . PHP_EOL .
176 "WARNING: Some categories do not exist in the definitions:" . PHP_EOL;
177 print_r($categories_zeros);
178 }
179
180
181 ?>