Table of Contents
概要
- XMLファイルを比較し、要素/属性が一致する場合は「=:」、不一致の場合は行頭に「!」を付けて表示します。
- 他方に項目が無かった場合は「~」が表示されます。
- XMLの要素も属性も共に配列ではなくハッシュとして取り扱います。
- 設定を外部ファイルから読み込むようにしました。(2012/05/20)
- Settings.yml では、PARAM要素を、NAME属性をキー、VALUE属性を値とするハッシュに変換します。
- 3つ以上のXMLファイルも比較できるようにしました。(2012/05/20)
- 初版(2012/05/18)
ソース
compareXML.pl
# XMLファイルを比較
use strict;
use warnings;
use utf8;
use Encode;
use XML::Simple;
use YAML::Syck;
use Getopt::Long;
$YAML::Syck::ImplicitUnicode = 1;
my $charsetConsole = 'CP932';
my $charsetFile = 'UTF-8';
binmode( STDIN, ":encoding($charsetConsole)" );
binmode( STDOUT, ":encoding($charsetConsole)" );
binmode( STDERR, ":encoding($charsetConsole)" );
my $configFile = "";
my $config = {
'Output' => "diff.txt",
'Indent' => "\t",
'XMLOpt' => {},
};
my @FileNames = ();
my $root = {};
@ARGV = map{ decode( $charsetConsole, $_ ); } @ARGV;
my $result = GetOptions(
"config=s" => \$configFile,
);
if ( @ARGV < 2 ){
die( "usage: compareXML [-config <yml>] <xml> <xml> [<xml> ...]\n" );
}
if ( $configFile ){
my $config2 = YAML::Syck::LoadFile( encode( $charsetConsole, $configFile ) )
or die( "$configFile: $!\n" );
$config = { %{$config}, %{$config2} };
}
my $fileOut = $config->{'Output'};
my $IndentBase = $config->{'Indent'};
my $xs = XML::Simple->new( %{$config->{'XMLOpt'}} );
foreach my $file ( @ARGV ){
if ( -f encode( $charsetConsole, $file ) ){
push( @FileNames, $file );
$root->{$file} = $xs->XMLin( encode( $charsetConsole, $file ) );
} else {
die( "Not exist: $file\n" );
}
}
open( my $fhout, ">:encoding($charsetFile)", encode( $charsetConsole, $fileOut ) )
or die( "$fileOut: $!" );
#print $fhout Dump( $xml );
compareNodes( $root, 0 );
close( $fhout );
sub compareNodes
{
my( $href, $indent ) = @_;
my $IndentText1 = $IndentBase x $indent;
my $IndentText2 = $IndentBase x ( $indent + 1 );
# すべてのキーを列挙
my %currentkeys = ();
foreach my $fn ( @FileNames ){
if ( ref( $href->{$fn} ) eq 'HASH' ){
foreach my $key ( keys( %{$href->{$fn}} ) ){
$currentkeys{$key} = 1;
}
}
}
foreach my $key ( sort( keys( %currentkeys ) ) ){
printf $fhout ( "%s%s:\n", $IndentText1, $key );
my $bHash = 0; # どちらかがハッシュなら1
my $child = {};
foreach my $fn ( @FileNames ){
if ( ref( $href->{$fn} ) eq 'HASH' ){
my $t1 = $href->{$fn}{$key} // '~'; # undef は '~'
$child->{$fn} = $t1;
if ( ref( $t1 ) eq 'HASH' ){
$bHash = 1;
}
} else {
$child->{$fn} = '~';
}
}
if ( $bHash ){
# どちらかがハッシュの場合は再帰探索
compareNodes( $child, $indent + 1 );
} else {
# 両方共ハッシュでなければ値の表示
if ( eqAll( $child ) ){
# 一致する場合は「=:」でまとめて表示
printf $fhout (
"%s=:%s%s\n",
$IndentText2, $IndentBase, $child->{$FileNames[0]}
);
} else {
# 不一致の場合は行頭に「!」を付けて表示
foreach my $fn ( @FileNames ){
printf $fhout (
"!%s%s:%s%s\n",
$IndentText2, $fn, $IndentBase, $child->{$fn}
);
}
}
}
}
}
## @function eqAll( %$refHash )
# ハッシュの全ての値を比較する。
# @retval 一致 1
# @retval 不一致 0
sub eqAll
{
my( $refHash ) = @_;
my $bEqual = 1;
my @Keys = keys( %{$refHash} );
my $first = $refHash->{ shift( @Keys ) };
foreach my $key ( @Keys ){
if ( $first ne $refHash->{ $key } ){
$bEqual = 0;
last;
}
}
return $bEqual;
}
# EOF
入力
Settings.yml
Output: diff.txt
Indent: "\t"
XMLOpt:
ForceArray:
- PARAM
KeyAttr:
PARAM: NAME
OPERATION: NAME
ContentKey:
"-VALUE"
Item1.xml
<SETTINGS NAME="Item1" VERSION="8" >
<DOCUMENT DOCUMENT_ID="79" NAME="Doc02.indd" SRC="//localhost/Data/2/2/" TYPE="INDD" />
<OUTPUT>
<PARAM NAME="START_RECORD" VALUE="1" />
<PARAM NAME="END_RECORD" VALUE="100" />
<PARAM NAME="TYPE" VALUE="PDF" />
<PARAM NAME="CENTER_PAGE" VALUE="0" />
</OUTPUT>
<ASSET_LIST Resolve="1">
<ASSET ID="73" NAME="Assets1" PRIORITY="1" TYPE="LOCAL" >
<PARAM NAME="BaseID" VALUE="1" />
</ASSET>
</ASSET_LIST>
<DATA_SOURCES>
<DATA_SOURCE ID="13" >
<PARAM NAME="FILENAME" VALUE="db1.csv" />
<PARAM NAME="TYPE" VALUE="TEXT" />
</DATA_SOURCE>
</DATA_SOURCES>
<FONT_LIST>
<FONT ID="20" NAME="Arial" OPTION="OpenType" />
</FONT_LIST>
<POST_PRODUCTION />
</SETTINGS>
Item2.xml
<SETTINGS NAME="Item2" VERSION="8" >
<DOCUMENT DOCUMENT_ID="78" NAME="Doc01.indd" SRC="//localhost/Data/2/1/" TYPE="INDD" />
<ASSET_LIST Resolve="1">
<ASSET ID="73" NAME="Assets1" PRIORITY="1" TYPE="LOCAL" >
<PARAM NAME="BaseID" VALUE="1" />
</ASSET>
</ASSET_LIST>
<JOB ID="330" TYPE="PRINT">
<PARAM NAME="HOST_NAME" VALUE="localhost" />
<JOB_CONTEXT JobID="330" JobName="Doc01" JobType="1" />
</JOB>
<OUTPUT MEDIA="1" OUTPUT_FILE_NAME="Doc01" TYPE="PDF">
<PARAM NAME="TYPE" VALUE="PDF" />
<PARAM NAME="START_RECORD" VALUE="80" />
<PARAM NAME="END_RECORD" VALUE="100" />
<PARAM NAME="FONTS_POLICY" VALUE="1" />
<PARAM NAME="OVERFLOW_POLICY" VALUE="0" />
</OUTPUT>
<FONT_LIST>
<FONT ID="20" NAME="Arial" />
</FONT_LIST>
<POST_PRODUCTION JOBID="330">
<OPERATION NAME="IMPOSITION">
<PARAM NAME="NUPX" VALUE="1" />
<PARAM NAME="NUPY" VALUE="2" />
</OPERATION>
<OPERATION NAME="DISTILLER">
<PARAM NAME="DSTL_SETTINGS" VALUE="HighQuality" />
</OPERATION>
<OPERATION NAME="COPY">
<PARAM NAME="DEST_PATH" VALUE="//localhost/Output/2/1/" />
</OPERATION>
</POST_PRODUCTION>
<TRACK_INFO />
</SETTINGS>
出力
ASSET_LIST:
ASSET:
ID:
=: 73
NAME:
=: Assets1
PARAM:
BaseID:
=: 1
PRIORITY:
=: 1
TYPE:
=: LOCAL
Resolve:
=: 1
DATA_SOURCES:
DATA_SOURCE:
ID:
! Item1.xml: 13
! Item2.xml: ~
PARAM:
FILENAME:
! Item1.xml: db1.csv
! Item2.xml: ~
TYPE:
! Item1.xml: TEXT
! Item2.xml: ~
DOCUMENT:
DOCUMENT_ID:
! Item1.xml: 79
! Item2.xml: 78
NAME:
! Item1.xml: Doc02.indd
! Item2.xml: Doc01.indd
SRC:
! Item1.xml: //localhost/Data/2/2/
! Item2.xml: //localhost/Data/2/1/
TYPE:
=: INDD
FONT_LIST:
FONT:
ID:
=: 20
NAME:
=: Arial
OPTION:
! Item1.xml: OpenType
! Item2.xml: ~
JOB:
ID:
! Item1.xml: ~
! Item2.xml: 330
JOB_CONTEXT:
JobID:
! Item1.xml: ~
! Item2.xml: 330
JobName:
! Item1.xml: ~
! Item2.xml: Doc01
JobType:
! Item1.xml: ~
! Item2.xml: 1
PARAM:
HOST_NAME:
! Item1.xml: ~
! Item2.xml: localhost
TYPE:
! Item1.xml: ~
! Item2.xml: PRINT
NAME:
! Item1.xml: Item1
! Item2.xml: Item2
OUTPUT:
MEDIA:
! Item1.xml: ~
! Item2.xml: 1
OUTPUT_FILE_NAME:
! Item1.xml: ~
! Item2.xml: Doc01
PARAM:
CENTER_PAGE:
! Item1.xml: 0
! Item2.xml: ~
END_RECORD:
=: 100
FONTS_POLICY:
! Item1.xml: ~
! Item2.xml: 1
OVERFLOW_POLICY:
! Item1.xml: ~
! Item2.xml: 0
START_RECORD:
! Item1.xml: 1
! Item2.xml: 80
TYPE:
=: PDF
TYPE:
! Item1.xml: ~
! Item2.xml: PDF
POST_PRODUCTION:
JOBID:
! Item1.xml: ~
! Item2.xml: 330
OPERATION:
COPY:
PARAM:
DEST_PATH:
! Item1.xml: ~
! Item2.xml: //localhost/Output/2/1/
DISTILLER:
PARAM:
DSTL_SETTINGS:
! Item1.xml: ~
! Item2.xml: HighQuality
IMPOSITION:
PARAM:
NUPX:
! Item1.xml: ~
! Item2.xml: 1
NUPY:
! Item1.xml: ~
! Item2.xml: 2
TRACK_INFO:
VERSION:
=: 8
Link
Perl/XML-Simple
XML Notepad XmlDiff機能あり
- diffxml XML Diff and Patch Utilities