#!/usr/bin/perl
# $Id: scabies_parse.pl,v 1.1.1.1 2008-10-15 18:55:57 jo Exp $
# Licensed under the GPL. Copyright 2008 Joachim Zobel.
# scabies_parse.pl <infile> <outdir>
# Converts osm files into several textfiles suitable for 
# LOAD DATA INFILE / mysqlimport
#

use strict;
use warnings;

#use XML::LibXML::SAX;
use XML::Parser;
use FileHandle;
use Data::Dumper;

use Tables::NODES;
use Tables::NODE_TAGS;
use Tables::WAY_NODES;
use Tables::WAYS;
use Tables::WAY_TAGS;
use Tables::RELATIONS;
use Tables::RELATION_TAGS;
use Tables::RELATION_MEMBERS;

use constant FLD_SEP => "\t";
use constant REC_SEP => "\n";
use constant NULL => '\\N';

die "Usage:
scabies_parse.pl <infile> <outdir>
" unless scalar(@ARGV) == 2;

my $outdir = $ARGV[1];
my $infile = $ARGV[0];
#my $infile = "../europe.osm";

my %table_names = (
  node  => 'nodes',
  # tags are special,
  #  tag   => 'node_tags',
  #  tag   => 'way_tags',
  #  tag   => 'relation_tags',
  nd    => 'way_nodes',
  way   => 'ways',
  relation => 'relations',
  member => 'relation_members'
);

my $parser = new XML::Parser(Handlers => {Init  => \&init,
                                          Final => \&final,
                                          Start => \&start_element,
                                          End   => \&end_element});

$parser->parsefile($infile);


#
# The SAX Handler. Almost all work is done in the end 
# tag handler.
#
sub init
{
    my ($self) = @_;

    $self->{current} = {};
    # We keep one open file for every table.
    # These are accessed through the table names.
    $self->{files} = {};
    # The XML path in reverse order.
    # The current tag is at [0].
    $self->{xpath} = []; 
}

#
# Closes the file handles.
#
sub final
{
  my $self = shift;
  foreach my $hd (values(%{$self->{files}})) {
    $hd->close();
  }
}

# extract the attributes into a name value hash
# $rtag - ref. to the tag data 'Attributes'
sub extract_attributes($)
{
  my ($rtag)  = @_;

  my %rtn = map {($rtag->{$_}{Name} => $rtag->{$_}{Value});} 
                keys(%{$rtag}); 

  return \%rtn;
}

# Handle things that are generic for each tag.
# $data - as passed into start_element.
sub tag_handler
{
}

#
# Most of the work is done at the end.
#
sub start_element {
  my $self = shift;
  my $name = shift;
  my %attr = @_;

  unshift(@{$self->{xpath}}, $name);
  $self->{current}{$name}  = \%attr; 
}


sub ensure_open_file
{
  my ($self, $table) = @_;
  if (!exists($self->{files}{$table})) {
    my $fname = "$outdir/$table.txt";
    my $fh = new FileHandle;
    $fh->open(">$fname")
    or die "Could not open $fname for writing:$!";
    binmode($fh, ':utf8');
    $self->{files}{$table} = $fh;
  }
}

sub get_map_and_fields($)
{
  my ($table) = @_;    
  # Choose the map
  my $module = uc($table);
  no strict qw(refs);
  my @fields = &{"Tables::$module"."::ROW"};
  use strict;
  return @fields;
}

sub tag_tag_specials
{
  my ($self, $table, $current) = @_;
  # We need to distiguish between way tags,
  # node tags and relation tags.
  my $parent = $self->{xpath}[1];
  $table  = $parent."_tags";
  $current->{tag}{id} = $current->{$parent}{id};

  return $table;
}

sub escape_for_load($)
{
  my ($val) = @_;
  return  NULL unless defined($val);
  $val =~ s/[\t\n\\]/\\$&/go;
  return $val;
}

sub end_element {
  my ($self, $name) = @_;

  my $table = $table_names{$name};
  my $current = $self->{current};
  if ($name eq 'tag') {
    $table = tag_tag_specials($self, $table, $current);
  }
  if ($name eq 'nd') {
    $current->{nd}{id} = $current->{way}{id};
  }
  if ($name eq 'member') {
    $current->{member}{id} = $current->{relation}{id};
  }
  if (defined($table)) {
    ensure_open_file($self, $table); 

    my @fields = get_map_and_fields($table);

    # Apply the map to the attributes
    my @row = map {escape_for_load($current->{$name}{$_});} @fields; 
    # and print the result to the table file
    my $fh = $self->{files}{$table};
    print $fh join(FLD_SEP, @row).REC_SEP;
  }

  # Cleanup
  shift(@{$self->{xpath}});
  delete $self->{current}{$name};

}


1;



