commit 6a743eae53dcf3f1d1ba0a3b1bd8128784673cb8
parent 9a696513594bea00fd174de9001b7989e5b61714
Author: JayVii <jayvii[AT]posteo[DOT]de>
Date: Fri, 6 Jun 2025 13:03:01 +0200
feat: extend testing script
Diffstat:
1 file changed, 47 insertions(+), 5 deletions(-)
diff --git a/tools/gen_categories.php b/tools/gen_categories.php
@@ -10,18 +10,60 @@ $sources = json_decode(
true
);
-// extract categories from each news source and add it to a new array
+// extract categories from each news source and add it to a sorted array
$categories = array();
foreach ($sources["sources"] as $source) {
foreach($source["categories"] as $category) {
array_push($categories, $category);
}
}
-
-// sort new array by ID
sort($categories);
-// Print unique values and their counts
-print_r(array_count_values($categories));
+// Count each category value
+$categories_count = array_count_values($categories);
+
+// extract categories definitions
+$definitions = array();
+foreach ($sources["categories"] as $definition) {
+ array_push($definitions, $definition["id"]);
+}
+sort($definitions);
+
+// check how often each definition exists in the news sources categories
+$definitions_count = array();
+foreach ($definitions as $definition) {
+ $search = "/^" . $definition . "$/";
+ $definitions_count[$definition] = count(preg_grep($search, $categories));
+}
+
+// check how often each news sources categories exists in the definitions
+$categories_exist = array();
+foreach ($categories as $category) {
+ $search = "/^" . $category . "$/";
+ $categories_exist[$category] = count(preg_grep($search, $definitions));
+}
+
+
+// Generate User output
+echo "####################" . PHP_EOL .
+ "Category-Occurrence in News Sources:" . PHP_EOL;
+print_r($categories_count);
+echo "####################" . PHP_EOL .
+ "Category-Occurrence in Definitions:" . PHP_EOL;
+print_r($definitions_count);
+
+// Warnings
+$definitions_zeros = preg_grep("/^0$/", $definitions_count);
+if (count($definitions_zeros) > 0) {
+ echo "####################" . PHP_EOL .
+ "WARNING: Some definitions are not used in the news sources:" . PHP_EOL;
+ print_r($definitions_zeros);
+}
+$categories_zeros = preg_grep("/^0$/", $categories_exist);
+if (count($categories_zeros) > 0) {
+ echo "####################" . PHP_EOL .
+ "WARNING: Some categories do not exist in the definitions:" . PHP_EOL;
+ print_r($categories_zeros);
+}
?>