<?php

	if ( $argc < 4 )
		die('ERROR: Not enough parameters.' . "\n");

	$language				= $argv[1];
	$input_file				= $argv[2];
	if ( !file_exists($input_file) )
		die('Cannot open filename: ' . $input_file . "\n");

	$config_file			= $argv[3];
	$config					= parse_ini_file($config_file);

	if ( $config['IATE_FILTER_DUPLICATES'] == 1 )
		$iate_duplicates_ids = array_flip(json_decode(file_get_contents('iate_duplicate_ids.json'), true));

	$iate_simple			= array();

	$xml = file_get_contents($input_file);
	list($ids, $entries_xml) = get_entries();

	$j = 0;
	foreach ( $entries_xml as $i => $entry_xml ) {

		$iate_id = str_replace('IATE-', '', $ids[$i]);
		$eurovoc_ids = get_eurovoc_ids($entry_xml);

		$terms = get_terms($entry_xml);
		if ( count($terms) == 0 )
			continue;

		if ( $config['IATE_FILTER_DUPLICATES'] == 1  && array_key_exists($iate_id, $iate_duplicates_ids) )
			continue;

		if ( !array_key_exists($iate_id, $iate_simple) )
			$iate_simple[$iate_id] = array();

		foreach ( $terms as $term ) {

			$iate_simple[$iate_id][] = $term;
			$j++;
		}
	}

	file_put_contents($config['IATE_FOLDER'] . '/iate_simple_' . $language . '.json', json_encode($iate_simple, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES));

	function get_entries() {

		global $xml;
		preg_match_all('/<termEntry id="(.+?)">(.+?)<\/termEntry>/s', $xml, $matches);
		return array($matches[1], $matches[2]);
	}

	function get_terms($entry_xml) {

		$terms = array();

		preg_match_all('/<term>(.+?)<\/term>/s', $entry_xml, $match_terms);
		preg_match_all('/<termNote type="termType">(.+?)<\/termNote>/s', $entry_xml, $match_types);

		foreach ( $match_terms[1] as $i => $term ) {
		
			$type = $match_types[1][$i];
			if ( $type == 'abbreviation' ) {

// 				echo $term . "\n";
				continue;
			}

			$terms[] = $term;
		}

		return $terms;
	}

	function get_eurovoc_ids($entry_xml) {

		preg_match('/<descrip type="subjectField">(.+?)<\/descrip>/s', $entry_xml, $match);
		$eurovoc_ids = $match[1];
		$eurovoc_ids = str_replace(' ', '', $eurovoc_ids);
// 		$eurovoc_ids = str_replace(',', 'Ю', $eurovoc_ids);

		return $eurovoc_ids;
	}

?>