#!/usr/bin/perl

# ------------------------------------------------------------------
# update_links.pl
# ------------------------------------------------------------------
# Author : Sam Tregar
# Copyright : (c) MKDoc Holdings Ltd, 2005
#
# This file is part of MKDoc. 
# 
# MKDoc is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# MKDoc is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with MKDoc; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# ------------------------------------------------------------------

# This script updates links into the new format which differentiates
# between internal and external links.  Internal links are matched to
# the documents they point to and stored by ID.  The script will
# output a line for each external, internal and invalid internal link
# it finds.
#
# It would be a good idea to make a backup of your database before
# running this script as it has not been extensively tested.
 
use strict;
use warnings;
use MKDoc;
use MKDoc::Util::LinkParser;

# make sure SITE_DIR is set since MKDoc->init needs it
die "SITE_DIR isn't set.  Please source mksetenv.sh from an installed MKDoc site ".
  "and try again.\n" 
  unless $ENV{SITE_DIR};

# initialize MKDoc, needed for database connection
MKDoc->init;

# setup global accessors
my $dbh    = lib::sql::DBH->get();
my $doc_t  = flo::Standard::table('Document');
my $parser = MKDoc::Util::LinkParser->new();

# step through documents one at a time
my $document_ids = $dbh->selectcol_arrayref('SELECT ID FROM Document');
foreach my $document_id (@$document_ids) {
    my $doc   = $doc_t->get($document_id);

    # setup an editor so we can make changes if necessary
    my $editor = flo::Editor->_new();
    $editor->parse_xml($doc->{Body}, $document_id);

    my $dirty = 0;
    my @components = $editor->access();
    $dirty += process_links($document_id, \@components);
    $dirty += process_headlines($document_id, \@components);

    # if changes were made they need to get updated in the DB.  (It's
    # hard to believe there isn't a better way to do this, but I'm
    # pretty sure there isn't.)
    if ($dirty) {
        print "*** Updating Document $document_id with $dirty new links.\n";
        $doc->{Body} = $editor->generate_xml();
        $doc->save();
    }
}

# look at all headline components, evaluating and replacing from_path
sub process_headlines {
    my ($document_id, $components) = @_;
    my $dirty = 0;

    # look at each headline component
    my @links = grep { $_->isa('flo::editor::Headlines') } @$components;
    foreach my $link (@links) {
        # ignore already processed internal links
        next if $link->{from_document_id};

        # evaluate plain links, ignoring broken ones
        my $url = $link->{from_path};
        next unless $url;

        $parser->parse($url);

        # external links stay as-is
        if (not $parser->is_internal or not $parser->is_valid) {
            print "!!! Invalid Headline Link ($document_id): $url\n";
            next;
        }          

        # update valid internal links
        $link->{from_document_id} = $parser->document_id;
        $link->{from_path}        = $parser->as_path;
        $dirty++;
        print "+++ Valid Headline Link ($document_id): $url\n"
    }
    

    return $dirty;
}

# look at all links, evaluating and replacing valid internal links
sub process_links {
    my ($document_id, $components) = @_;
    my $dirty = 0;

    # look at each link
    my @links = grep { $_->isa('flo::editor::Link') } @$components;
    foreach my $link (@links) {
        # ignore already processed internal links
        next if $link->{internal_link};

        # evaluate plain links, ignoring broken ones
        my $url = $link->{url};
        next unless $url;

        $parser->parse($url);

        # external links stay as-is
        if (not $parser->is_internal) {
            print "=== External Link ($document_id): $url\n";
            next;
        }          

        # flag invalid internal links
        if (not $parser->is_valid) {
            print "!!! Invalid Internal Link ($document_id): $url\n";
            next;
        }

        # update valid internal links
        $link->{internal_link} = $parser->freeze();
        $link->{url}           = $parser->as_string;
        $dirty++;
        print "+++ Valid Internal Link ($document_id): $url\n"
    }

    return $dirty;
}
