<?php

	mb_internal_encoding('UTF-8');

	if ( $argc < 3 )
		die('ERROR: Not enough parameters.' . "\n");

	$language					= $argv[1];
	$config_file				= $argv[2];
	$config						= parse_ini_file($config_file);
	$descriptors_file			= $config['EUROVOC_FOLDER'] . '/desc_' . $language . '.xml';
	$used_for_file				= $config['EUROVOC_FOLDER'] . '/uf_' . $language . '.xml';
	$eurovoc_extended			= array();

	if ( !file_exists($descriptors_file) )
		die('ERROR: Cannot open filename: ' . $descriptors_file . "\n");

	if ( !file_exists($used_for_file) )
		die('ERROR: Cannot open filename: ' . $used_for_file . "\n");

	load_descriptors($descriptors_file);
	load_used_for($used_for_file);

	uasort($eurovoc_extended, function($a, $b) {

		if ( count($a) == count($b) )
			return 0;

		return ( count($a) < count($b) ) ? 1 : -1;
	});

	file_put_contents($config['EUROVOC_FOLDER'] . '/eurovoc_extended_' . $language . '.json', json_encode($eurovoc_extended, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE));

	function load_descriptors($descriptors_file) {

		global $eurovoc_extended;

		$records_xml = get_records($descriptors_file);
		foreach( $records_xml as $record_xml ) {

			list($descriptor_id, $descriptor) = parse_desc_record($record_xml);
			if ( preg_match('/^[А-ЯA-Z]+$/', $descriptor) )
				continue;

			$eurovoc_extended[$descriptor_id] = array($descriptor);
		}
	}

	function load_used_for($used_for_file) {

		global $eurovoc_extended;

		$records_xml = get_records($used_for_file);
		foreach( $records_xml as $record_xml ) {

			list($descriptor_id, $synonyms) = parse_uf_record($record_xml);
			foreach ( $synonyms as $synonym ) {

				if ( mb_strtoupper($synonym) == $synonym && preg_match('/^\p{L}+$/u', $synonym) ) {
// 					echo $synonym . "\n";
					continue;
				}

				if ( !array_key_exists($descriptor_id, $eurovoc_extended) )
					$eurovoc_extended[$descriptor_id] = array();

				$eurovoc_extended[$descriptor_id][] = $synonym;
			}
		}
	}

	function parse_desc_record($record_xml) {

		preg_match('/<LIBELLE>(.+?)<\/LIBELLE>/s', $record_xml, $match);
		$descriptor = $match[1];

		preg_match('/<DESCRIPTEUR_ID>(.+?)<\/DESCRIPTEUR_ID>/s', $record_xml, $match);
		$id = $match[1];

		return array($id, $descriptor);
	}

	function parse_uf_record($record_xml) {

		preg_match_all('/<UF_EL>(.+?)<\/UF_EL>/s', $record_xml, $match);
		$synonyms = $match[1];

		preg_match('/<DESCRIPTEUR_ID>(.+?)<\/DESCRIPTEUR_ID>/s', $record_xml, $match);
		$descriptor_id = $match[1];

		return array($descriptor_id, $synonyms);
	}

	function get_records($descriptors_file) {

		$xml = file_get_contents($descriptors_file);

		preg_match_all('/<RECORD>(.+?)<\/RECORD>/s', $xml, $matches);
		return $matches[1];
	}

?>