#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: Yaroslav_Rozdobudko $
#$Revision: 10538 $
#$URL: svn+ssh://www.crystallography.net/home/coder/svn-repositories/cod-tools/tags/v3.11.0/scripts/cif_polymer_multiplicity $
#$Date: 2025-03-06 19:55:02 +0200 (Thu, 06 Mar 2025) $
#------------------------------------------------------------------------------
#*
#* Parse CIF file and print out multiplicity and dimensionality per each
#* molecule.
#* Requires TOPOCIF data items for representation of labeled quotient graphs:
#* _topol_net.id, _topol_atom.atom_label, _topol_atom.node_id,
#* _topol_link.node_id_1, _topol_link.node_id_2, _topol_link.translation_2_x,
#* _topol_link.translation_2_y, _topol_link.translation_2_z.
#*
#* USAGE:
#*    $0 [options] input.cif [input2.cif ...]
#**
use strict;
use warnings;

use COD::CIF::Parser qw( parse_cif );
use COD::CIF::Tags::CanonicalNames qw( canonicalize_all_names );
use COD::UserMessage qw( error );
use COD::SOptions qw( getOptions );
use COD::SUsage qw( usage options );
use COD::ToolsVersion qw( get_version_string );
use COD::Graph::CycleBasis;
use COD::Algebra::GaussJordanBigRat qw( gj_elimination_non_zero_elements );

use List::MoreUtils qw( any );

sub generate_combinations {
    my @data = @_;
    my @combinations;
    for my $i (0 .. $#data) {
        for my $j ($i+1 .. $#data) {
            for my $k ($j+1 .. $#data) {
                push @combinations, [$data[$i], $data[$j], $data[$k]];
            }
        }
    }
    return @combinations;
}

sub find_min_det {
    my @matrices = @_;
    my $min_det = undef;
    my $best_matrix;

    my $tolerance = 1e-10;

    for my $sub_matrix_ref (@matrices) {
        my $det = abs(calculate_determinant($sub_matrix_ref));

        if ($det > $tolerance) {
            if (!defined $min_det || $det < $min_det) {
                $min_det = $det;
                $best_matrix = $sub_matrix_ref;
            }
        }
    }
    unless($min_det) {
        my $matrix_ref = $matrices[0];
        my $det = abs(calculate_determinant($matrix_ref));
        $min_det = $det;
        $best_matrix = $matrix_ref;
    }

    return ($best_matrix, $min_det);
}

##
# Calculates a determinant of a square matrix.
#
# @input $m
#       Reference to a square matrix of orders 1, 2 or 3.
# @return
#       Determinant of the square matrix.
##
sub calculate_determinant {
    my ($m) = @_;
    my $n = scalar(@{$m});

    if ($n == 1) {
        return $m->[0][0];
    }
    if ($n == 2) {
        return $m->[0][0] * $m->[1][1] -
               $m->[0][1] * $m->[1][0];
    }
    if ($n == 3) {
        return
            $m->[0][0] * ($m->[1][1] * $m->[2][2] - $m->[1][2] * $m->[2][1]) -
            $m->[0][1] * ($m->[1][0] * $m->[2][2] - $m->[1][2] * $m->[2][0]) +
            $m->[0][2] * ($m->[1][0] * $m->[2][1] - $m->[1][1] * $m->[2][0]);
    }
}

sub filter_non_zero_columns {
    my @matrix = @_;
    my $num_cols = scalar @{$matrix[0]};

    my @active_cols;
    for my $col (0..$num_cols-1) {
        if (any { $_->[$col] != 0 } @matrix) {
            push @active_cols, $col;
        }
    }

    my @filtered_matrix;
    for my $row (@matrix) {
        push @filtered_matrix, [@{$row}[@active_cols]];
    }
    return @filtered_matrix;
}

sub generate_square_combinations {
    my @matrix = @_;
    my $num_rows = scalar @matrix;
    my $num_cols = scalar @{$matrix[0]};
    my @combinations;

    if ($num_cols == 1) {
        for my $i (0 .. $num_rows - 1) {
            push @combinations, [$matrix[$i]];
        }
    } elsif ($num_cols == 2) {
        for my $i (0 .. $num_rows - 2) {
            for my $j ($i + 1 .. $num_rows - 1) {
                push @combinations, [$matrix[$i], $matrix[$j]];
            }
        }
    } elsif ($num_cols == 3) {
        for my $i (0 .. $num_rows - 3) {
            for my $j ($i + 1 .. $num_rows - 2) {
                for my $k ($j + 1 .. $num_rows - 1) {
                    push @combinations, [$matrix[$i], $matrix[$j], $matrix[$k]];
                }
            }
        }
    }
    return @combinations;
}

#==============================================================================
# Find machine epsilon.
# @param   void
# @retval  scalar
sub get_machine_epsilon
{
    my $epsilon = 1.00;
    while ( $epsilon + 1.00 > 1.00 ) {
        $epsilon /= 2;
    }
    return $epsilon;
}

my $debug;
my $use_parser = 'c';

my $machine_epsilon = get_machine_epsilon();

@ARGV = getOptions(

#* OPTIONS:
#*
#*   --debug
#*                     Print some human-readable debug output.
#*   --use-c-parser
#*                     Use Perl & C parser for CIF parsing (default).
#*   --use-perl-parser
#*                     Use Perl parser for CIF parsing.
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   --version
#*                     Output version information and exit.
#**

    '--debug'    => sub { $debug = 1 },
    '--use-perl-parser' => sub{ $use_parser = 'perl' },
    '--use-c-parser'    => sub{ $use_parser = 'c' },
    '--help,--usage' => sub { usage; exit },
    '--version'      => sub { print get_version_string(), "\n"; exit },
);

@ARGV = ( '-' ) unless @ARGV;

binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

for my $filename (@ARGV) {

    my $options = { 'parser' => $use_parser, 'no_print' => 1 };
    my ( $data, $err_count, $messages ) = parse_cif( $filename, $options );

    if( $err_count > 0 ) {
        print STDERR $_ foreach ( @{$messages} );
        error( {
            'program'  => $0,
            'filename' => $filename,
            'message'  =>
                "$err_count error(s) encountered while parsing the file"
        } );
        next;
    }

    canonicalize_all_names( $data, $options );

    my @multiplicities;

    foreach my $block (@{$data}) {
        next unless exists $block->{values}{'_topol_net.id'};
        my $molecule_id = $block->{name};
        my @topol_atom_labels = @{ $block->{values}{'_topol_atom.atom_label'} };
        my @topol_atom_node_ids = @{ $block->{values}{'_topol_atom.node_id'} };
        my %node_id_label;
        my $graph = COD::Graph::CycleBasis->new();
        for my $i ( 0..$#topol_atom_labels ) {
            my $atom_label = $topol_atom_labels[$i];
            my $atom_node_id = $topol_atom_node_ids[$i];
            $node_id_label{$atom_node_id} = $atom_label;
            $graph->add_vertex($atom_label);
        }
        my @link_node_id_1 = @{$block->{values}{'_topol_link.node_id_1'}};
        my @link_node_id_2 = @{$block->{values}{'_topol_link.node_id_2'}};
        my @link_translation_x =
                             @{$block->{values}{'_topol_link.translation_2_x'}};
        my @link_translation_y =
                             @{$block->{values}{'_topol_link.translation_2_y'}};
        my @link_translation_z =
                             @{$block->{values}{'_topol_link.translation_2_z'}};

        for my $i (0..$#link_node_id_1) {
            my $node_id_1 = $link_node_id_1[$i];
            my $node_id_2 = $link_node_id_2[$i];
            my $translation_x = $link_translation_x[$i];
            my $translation_y = $link_translation_y[$i];
            my $translation_z = $link_translation_z[$i];
            $graph->add_edge_with_label(
                $node_id_label{$node_id_1},
                $node_id_label{$node_id_2},
                "$translation_x,$translation_y,$translation_z"
            );
        }

        my ($cycles_ref, $labels_collection_ref) = $graph->get_cycle_basis();
        my @cycles = @{$cycles_ref};
        next unless @cycles;
        my @labels_collection = @{$labels_collection_ref};
        my @matrix_ref;

        for my $c_i (0 .. $#cycles) {
            my $cycle = $cycles[$c_i];
            my $labels = $labels_collection[$c_i];

            my @sum_label = (0, 0, 0);
            foreach my $edge (@{$cycle}) {
                my ($source, $target, $id) = @{$edge};
                my $label_str = $labels->{$source}{$target}{$id};
                my @label_parts = split /\s*,\s*/, $label_str;
                @label_parts = map { int($_) } @label_parts;

                $sum_label[0] += $label_parts[0];
                $sum_label[1] += $label_parts[1];
                $sum_label[2] += $label_parts[2];
            }

            push(@matrix_ref, \@sum_label);
        }

        if($debug) {
            print "Cycles:\n";
            foreach my $cycle (@cycles) {
                print '[',
                    join(', ', map { ref($_) eq 'ARRAY' ? '(' .
                            join(', ', @{$_}). ')' : $_ } @{$cycle}), "]\n";
            }
            print "Matrix:\n";
            foreach my $row (@matrix_ref) {
                print join(', ', @{$row}), "\n";
            }
        }

        my $reduced_row_echelon_matrix = gj_elimination_non_zero_elements(
                                            \@matrix_ref,
                                            8 * $machine_epsilon
                                         );
        my $rank = scalar( @{$reduced_row_echelon_matrix} );
        my @filtered_matrix = filter_non_zero_columns(@matrix_ref);
        my @square_combinations =
                            generate_square_combinations(@filtered_matrix);

        my ($best_matrix, $min_det) = find_min_det(@square_combinations);
        if ($min_det) {
            push(@multiplicities, [$molecule_id, $min_det, $rank]);
        }
    }
    if(@multiplicities) {
        print "molecule_id\tmultiplicity\tdimensionality\n";
        foreach my $m (@multiplicities) {
            print "@$m[0]\t@$m[1]\t@$m[2]\n";
        }
    }
}
