<?php

	mb_internal_encoding('UTF-8');

	if ( $argc < 3 )
		die('ERROR: Not enough parameters.' . "\n");

	$language					= $argv[1];
	$config_file				= $argv[2];
	if ( !file_exists($config_file) )
		die('ERROR: Cannot open file: ' . $config_file . "\n");

	$config						= parse_ini_file($config_file);
	$multi_corpus_file			= $config['MULTI_CORPUS_FOLDER']  . '/multi_corpus_intersected_' . $language . '.json';
	$multi_corpus				= json_decode(file_get_contents($multi_corpus_file), true);
	$eurovoc_disapproved_ids	= array_flip(json_decode(file_get_contents('eurovoc_disapproved_ids.json'), true));

	$statistics					= array(
		'Eurovoc' => array(),
		'IATE' => array(),
		'Terms' => array(),
		'Lemmas' => array()
	);

	foreach ( $multi_corpus as $document_name => $document_data ) {
		foreach ( $document_data as $code => $count ) {

			$prefix = mb_strtolower(mb_substr($code, 0, 2));
			if ( $prefix == 'eu' ) {

				$eurovoc_id = str_replace('Eurovoc_', '', $code);
				if ( array_key_exists($eurovoc_id, $eurovoc_disapproved_ids) )
					continue;

				if ( !array_key_exists($code, $statistics['Eurovoc']) )
					$statistics['Eurovoc'][$code] = 0;

				$statistics['Eurovoc'][$code] += $count;

			} else if ( $prefix == 'ia' ) {

				if ( !array_key_exists($code, $statistics['IATE']) )
					$statistics['IATE'][$code] = 0;

				$statistics['IATE'][$code] += $count;

			} else if ( $prefix == 'te' ) {

				if ( !array_key_exists($code, $statistics['Terms']) )
					$statistics['Terms'][$code] = 0;

				$statistics['Terms'][$code] += $count;

			} else if ( $prefix == 'le' ) {

				if ( !array_key_exists($code, $statistics['Lemmas']) )
					$statistics['Lemmas'][$code] = 0;

				$statistics['Lemmas'][$code] += $count;

			}
		}
	}

	foreach ( array_keys($statistics) as $dictionary_name )
		arsort($statistics[$dictionary_name]);

	foreach ( array_keys($statistics) as $dictionary_name ) {

		echo 'Unique ' . $dictionary_name . "\t" . count($statistics[$dictionary_name]) . "\n";
		echo 'Total ' . $dictionary_name . "\t" . array_sum($statistics[$dictionary_name]) . "\n";
	}

?>