#!/usr/bin/perl -w

use constant SX_ELEM => 0;
use constant SX_ATTR => 1;
use constant SX_CONT => 2;

my %decode_types = (
  none => sub {undef},
  'int' => sub {int($_[0])},
  long => sub {int($_[0])},
  float => sub {$_[0]},
  text => sub {$_[0]},
  array => sub {my($n)=@_; [map decode($_),@_]},
  hash => sub {my($n)=@_; [map {$_->[SX_ATTR]{name}, decode($_)} @_]},
);
for (keys %decode_types) {$decode_types{uc substr($_,0,1)} = $decode_types{$_}}
my %encode_types = (
);
for (keys %encode_types) {$encode_types{uc substr($_,0,1)} = $encode_types{$_}}


use XML::Parser;
use Data::Dumper;

my $data = <<'EOT';
<mp><data type='array'><data type='N'></data></data></mp>
EOT

my $sx = xml2sx($data);
#print Dumper($sx);
my $struct = decode($sx);
print Dumper($struct);
my $data2 = sx2xml(encode($struct));
print $data2, "\n";

sub decode {
  my ($node) = @_;
  #local $Data::Dumper::Indent = 0;
  #print "Decoding ", Dumper($node), "\n";
  if ($node->[SX_ELEM] eq 'mp') {
    return decode($node->[SX_CONT]);
  } elsif ($node->[SX_ELEM] eq 'data') {
    return $decode_types{$node->[SX_ATTR]{'type'}}->(@$node[SX_CONT..$#$node]);
  } else {
    die "Unknown element: $node->[SX_ELEM]";
  }
}

sub encode {
  my ($node, $hkey) = @_;
  my @h = defined $hkey ? (name=>$hkey) : ();
  if (not defined $node) {
    return [data=>{type=>'N',@h}];
  } elsif (not ref $node) {
    return [data=>{type=>'T',@h}, $node];
  } elsif (ref $node eq 'ARRAY') {
    return ['data',{type=>'array',@h}, map encode($_), @$node];
  } elsif (ref $node eq 'HASH') {
    return ['data',{type=>'hash',@h}, map encode($node->{$_}, $_), keys %$node];
  } else {
    return ['data',{type=>'other-perl',@h}, $node];
  }
}



### XML/SX: Constants, conversion and processing ##############################

sub sx2xml {
  @_ > 1 and return join '', map sx2xml($_), @_;
  my ($thing) = @_;

  if (ref $thing ne 'ARRAY') {
    return "<!--undef-->" if not defined $thing;
    $thing = "$thing" if ref $thing;

    $thing =~ s/&/&amp;/g;
    $thing =~ s/</&lt;/g;
    $thing =~ s/>/&gt;/g;
    return $thing;
  
  } else {
    my $attrs = $thing->[SX_ATTR] or die "sx2xml: missing attribute hash";
    return "<$thing->[SX_ELEM]"
         . join('', map {
             my $v = $attrs->{$_};
             $v =~ s/&/&amp;/g;
             $v =~ s/'/&apos;/g;
             " $_='$v'";
           } keys %$attrs)
         . ($#$thing >= SX_CONT
             ? '>' . join('', map sx2xml($_), @{$thing}[SX_CONT..$#$thing])
                   . "</$thing->[SX_ELEM]>"
             : '/>'
           )
         ;
  }
}


  # xml2sx processing.
  # NOTE: This code is NOT thread-safe. It uses lexicals outside of
  # the scope of the xml2sx subroutine. If this needs to be made thread-
  # safe, just move the creation of the XML::Parser object inside 
  # xml2sx.
  
  use XML::Parser;

  my ($sxout, @estack, $epreserve);

  my $p;
BEGIN {$p = XML::Parser->new(
    Handlers => {
      Init => sub {
        my ($exp) = @_;
        $sxout = undef;
        $epreserve = 0;
      },
      Final => sub {
        my ($exp) = @_;
        @estack = ();
      },
      Start => sub {
        my ($exp, $elem, %attr) = @_;
        my $sx = [$elem, {%attr}];
        if (@estack) {
          push @{$estack[-1]}, $sx;
        } else {
          # check disabled because expat checks this for us
          # !$sxout or die "xpe[second root element encountered]"; 
          $sxout = $sx;
        }
        push @estack, $sx;
        $epreserve++ if ($sx->[SX_ATTR]{'xml:space'} || '') eq 'preserve';
      },
      End => sub {
        my ($exp, $elem) = @_;
        my $sx = pop @estack;
        # check disabled because expat checks this for us
        # $sx->[0] eq $elem or die "xpe[end tag '$elem' doesn't match start tag '$sx->[0]']";
        
        # combine adjacent text nodes
        for (my $i = SX_CONT; $i < $#$sx; $i++) {
          if (!ref $sx->[$i] and !ref $sx->[$i+1]) {
            splice @$sx, $i, 2, join('', @$sx[$i..$i+1]);
            $i--;
          }
        }
        if (!$epreserve) {
          for (@$sx) {
            next if ref;
            s/[\cM\cJ\cI ]+/ /g;
          }
          if ($#$sx >= SX_CONT) {
            $sx->[SX_CONT] =~ s/^ +// unless ref $sx->[SX_CONT];
            $sx->[-1]      =~ s/ +$// unless ref $sx->[-1];
          }
        }
        $epreserve-- if ($sx->[SX_ATTR]{'xml:space'} || '') eq 'preserve';
      },
      Char => sub {
        my ($exp, $text) = @_;
        # check disabled because expat checks this for us
        # @estack or die "xpe[text encountered ouside of any element]";
        my $node = $estack[-1];
        push @$node, $text;
      },
    },
    ProtocolEncoding => 'US-ASCII',
  );}
  
  sub xml2sx {
    $p->parse($_[0]);
    
    # save a little (or a lot) of memory
    my $sx = $sxout;
    $sxout = undef;
    return $sx;
  }
  
  sub xml2sx_file {
    $p->parsefile(rfile($_[0]));
    my $sx = $sxout;
    $sxout = undef;
    return $sx;
  }

1;
