<?php

	mb_internal_encoding('UTF-8');

	if ( $argc < 2 )
		die('ERROR: Not enough parameters.' . "\n");

	$folder = $argv[1];

	$clusters_file			= $folder . '/clusters.txt';
	$metadata_file			= $folder . '/metadata.txt';

	$clusters = array();
	load_clusters($clusters_file);

	$groups = json_decode(file_get_contents('groups_v2.json'), true);

	$distribution = array();
	foreach ( $groups as $id => $group ) {

		if ( !array_key_exists($id, $distribution) )
			$distribution[$id] = array();

		foreach ( $group as $document ) {

			$document_name = $document['document_name'];

			if ( !array_key_exists($document_name, $clusters) )
				continue;

			$cluster_name = $clusters[$document_name];
			if ( !array_key_exists($cluster_name, $distribution[$id]) )
				$distribution[$id][$cluster_name] = 0;

			$distribution[$id][$cluster_name]++;
		}
	}

	$means = array();
	foreach ( $distribution as $id => $cluster_counts ) {

		if ( count($distribution[$id]) < 2 )
			continue;

		arsort($distribution[$id]);
		$value = array_values($distribution[$id])[0];
		$means[] = $value / array_sum($distribution[$id]);
	}

	$mean = round(array_sum($means) * 100 / count($means), 2);

	$metadata = parse_ini_file($folder . '/metadata.txt');
	if ( !array_key_exists('Groups Distribution', $metadata) )
		$metadata['Groups Distribution'] = $mean . '%';

	$content = '';
	foreach ( $metadata as $key => $value )
		$content .= $key . ' = ' . $value . "\n";

	file_put_contents($metadata_file, $content);

	function load_clusters($clusters_file) {

		global $clusters;

		$lines = file($clusters_file);
		foreach ( $lines as $i => $line ) {

			$line = trim($line);
			if ( empty($line) )
				continue;

			$cluster_name = 'cluster_' . ( $i + 1 );
			$cluster_documents = explode(' ', $line);
			foreach ( $cluster_documents as $document_name ) {

				$document_name = pathinfo($document_name, PATHINFO_FILENAME);
				$clusters[$document_name] = $cluster_name;
			}
		}
	}

?>