<?php

	mb_internal_encoding('UTF-8');

	if ( $argc < 2 )
		die('ERROR: Not enough parameters.' . "\n");

	$clusters_file = $argv[1];
	$clusters = load_clusters($clusters_file);
	$cluster_language_counts = array();

	foreach ( $clusters as $cluster_name => $cluster )
		$cluster_language_counts[$cluster_name] = count_languages($cluster);

	$content = '';
	arsort($cluster_language_counts);
	foreach ( $cluster_language_counts as $cluster_name => $count )
		$content .= implode(' ', $clusters[$cluster_name]) . "\n";

	file_put_contents($clusters_file, $content);

	function count_languages($cluster) {

		$cluster_languages = array();

		foreach ( $cluster as $document_name ) {

			$language = get_document_language($document_name);
			$cluster_languages[] = $language;
		}

		$cluster_languages = array_values(array_unique($cluster_languages));

		return count($cluster_languages);
	}

	function get_document_language($document_name) {

		$language = mb_substr($document_name, 0, 2);

		if ( in_array($language, array('ha', 'rn', 'ut', 'tr', 'in', 'ko', 've', 'pa', 'ta', 'al', 'ny')) )
			$language = 'hu';

		if ( in_array($language, array('x0', 'x1', 'x2')) )
			$language = 'sl';

		return $language;
	}

	function load_clusters($clusters_file) {

		$clusters = array();

		$lines = file($clusters_file);
		foreach ( $lines as $i => $line ) {

			$line = trim($line);
			if ( empty($line) )
				continue;

			$document_names = explode(' ', $line);
			foreach ( $document_names as $j => $document_name )
				$document_names[$j] = pathinfo($document_name, PATHINFO_FILENAME);

			$cluster_name = 'cluster_' . ( $i + 1 );
			$clusters[$cluster_name] = $document_names;
		}

		return $clusters;
	}

?>