pub / kontra

Der linke Newsaggregator.
git clone src.jayvii.de/pub/kontra.git
Home | Log | Files | Exports | Refs | README | RSS

commit 6a743eae53dcf3f1d1ba0a3b1bd8128784673cb8
parent 9a696513594bea00fd174de9001b7989e5b61714
Author: JayVii <jayvii[AT]posteo[DOT]de>
Date:   Fri,  6 Jun 2025 13:03:01 +0200

feat: extend testing script

Diffstat:
Mtools/gen_categories.php | 52+++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 47 insertions(+), 5 deletions(-)

diff --git a/tools/gen_categories.php b/tools/gen_categories.php @@ -10,18 +10,60 @@ $sources = json_decode( true ); -// extract categories from each news source and add it to a new array +// extract categories from each news source and add it to a sorted array $categories = array(); foreach ($sources["sources"] as $source) { foreach($source["categories"] as $category) { array_push($categories, $category); } } - -// sort new array by ID sort($categories); -// Print unique values and their counts -print_r(array_count_values($categories)); +// Count each category value +$categories_count = array_count_values($categories); + +// extract categories definitions +$definitions = array(); +foreach ($sources["categories"] as $definition) { + array_push($definitions, $definition["id"]); +} +sort($definitions); + +// check how often each definition exists in the news sources categories +$definitions_count = array(); +foreach ($definitions as $definition) { + $search = "/^" . $definition . "$/"; + $definitions_count[$definition] = count(preg_grep($search, $categories)); +} + +// check how often each news sources categories exists in the definitions +$categories_exist = array(); +foreach ($categories as $category) { + $search = "/^" . $category . "$/"; + $categories_exist[$category] = count(preg_grep($search, $definitions)); +} + + +// Generate User output +echo "####################" . PHP_EOL . + "Category-Occurrence in News Sources:" . PHP_EOL; +print_r($categories_count); +echo "####################" . PHP_EOL . + "Category-Occurrence in Definitions:" . PHP_EOL; +print_r($definitions_count); + +// Warnings +$definitions_zeros = preg_grep("/^0$/", $definitions_count); +if (count($definitions_zeros) > 0) { + echo "####################" . PHP_EOL . + "WARNING: Some definitions are not used in the news sources:" . PHP_EOL; + print_r($definitions_zeros); +} +$categories_zeros = preg_grep("/^0$/", $categories_exist); +if (count($categories_zeros) > 0) { + echo "####################" . PHP_EOL . + "WARNING: Some categories do not exist in the definitions:" . PHP_EOL; + print_r($categories_zeros); +} ?>