#!/usr/local/bin/perl
# 文字列を16進表記に変換
# このスクリプトは、文字コード:UTF8N, 改行:LF として保存すること。
use utf8;
use Encode qw/encode decode/;
#binmode( STDOUT, ":encoding(utf8)" );
$message = 'ABCXYZあいうえおわゐゑをん';
$output = 'output.txt';
open( OUT, ">:encoding(utf8)", $output ) || die( "can't open '$output'.\n" );
printf OUT ( "str : %s\n", $message );
printf OUT ( "UCS2: %s\n", &str_to_ucs2( $message ) );
printf OUT ( "UTF8: %s\n", &str_to_utf8( $message ) );
printf OUT ( "UTF8: %s\n", &str_to_byte( $message, "utf8" ) );
printf OUT ( "SJIS: %s\n", &str_to_byte( $message, "shift_jis" ) );
printf OUT ( "euc : %s\n", &str_to_byte( $message, "euc-jp" ) );
close( OUT );
exit();
#### Subroutine ####
# wide character 文字列として取り扱う
sub str_to_ucs2
{
my( $sIn ) = @_;
my( $sOut, $len, $i );
$sOut = '';
$len = length( $sIn );
for( $i=0; $i<$len; ++$i ){
$sOut .= sprintf( "%04x ", unpack( "U", substr( $sIn, $i, 1 )));
}
chop( $sOut );
return $sOut;
}
# wide character 文字列 として取り扱う
sub str_to_utf8
{
my( $sIn ) = @_;
my( $sOut, $len, $i );
$sOut = '';
$len = length( $sIn );
for( $i=0; $i<$len; ++$i ){
$sOut .= unpack( "H8", substr( $sIn, $i, 1 ) ) . ' ';
}
chop( $sOut );
return $sOut;
}
# multi byte 文字列として取り扱う
sub str_to_byte
{
my( $sIn, $encoding ) = @_;
my( $sOut, $len, $i );
$sOut = '';
$sIn = encode( $encoding, $sIn );
$len = length( $sIn );
for( $i=0; $i<$len; ++$i ){
$sOut .= unpack( "H2", substr( $sIn, $i, 1 ) ) . ' ';
}
chop( $sOut );
return $sOut;
}
# EOF
str : ABCXYZあいうえおわゐゑをん
UCS2: 0041 0042 0043 0058 0059 005a 3042 3044 3046 3048 304a 308f 3090 3091 3092 3093
UTF8: 41 42 43 58 59 5a e38182 e38184 e38186 e38188 e3818a e3828f e38290 e38291 e38292 e38293
UTF8: 41 42 43 58 59 5a e3 81 82 e3 81 84 e3 81 86 e3 81 88 e3 81 8a e3 82 8f e3 82 90 e3 82 91 e3 82 92 e3 82 93
SJIS: 41 42 43 58 59 5a 82 a0 82 a2 82 a4 82 a6 82 a8 82 ed 82 ee 82 ef 82 f0 82 f1
euc : 41 42 43 58 59 5a a4 a2 a4 a4 a4 a6 a4 a8 a4 aa a4 ef a4 f0 a4 f1 a4 f2 a4 f3
# UTF-8 <-> 16進 変換
# utf8-hex.pl
# このスクリプトは、文字コード:UTF8N, 改行:LF として保存すること。
use strict;
use warnings;
use utf8;
use Encode;
use MIME::Base64;
my $utf8str = "ABCあいうえお";
my $hexstr = "414243E38182E38184E38186E38188E3818A";
my $b64str = "QUJD44GC44GE44GG44GI44GK";
open( OUT, ">:utf8", "output.txt" ) || die( "can't open 'output.txt'.\n" );
printf OUT ( "utf8:\t'%s'\n", $utf8str );
printf OUT ( "->hex:\t'%s'\n", &Utf8ToHex( $utf8str ) );
printf OUT ( "hex:\t'%s'\n", $hexstr );
printf OUT ( "->utf8:\t'%s'\n", &HexToUtf8( $hexstr ) );
printf OUT ( "utf8:\t'%s'\n", $utf8str );
printf OUT ( "->B64:\t'%s'\n", &Utf8ToB64( $utf8str ) );
printf OUT ( "B64:\t'%s'\n", $b64str );
printf OUT ( "->utf8:\t'%s'\n", &B64ToUtf8( $b64str ) );
close( OUT );
exit();
sub Utf8ToHex
{
my( $src ) = @_;
return unpack( "H*", encode( "utf8", $src ) );
}
sub HexToUtf8
{
my( $src ) = @_;
return decode( "utf8", pack( "H*", $src ) );
}
sub Utf8ToB64
{
my( $src ) = @_;
return encode_base64( encode( "utf8", $src ), "" );
}
sub B64ToUtf8
{
my( $src ) = @_;
return decode( "utf8", decode_base64( $src ) );
}
# EOF
utf8: 'ABCあいうえお'
->hex: '414243e38182e38184e38186e38188e3818a'
hex: '414243E38182E38184E38186E38188E3818A'
->utf8: 'ABCあいうえお'
utf8: 'ABCあいうえお'
->B64: 'QUJD44GC44GE44GG44GI44GK'
B64: 'QUJD44GC44GE44GG44GI44GK'
->utf8: 'ABCあいうえお'
#!/usr/local/bin/perl
# bin2hex.pl
# バイナリファイルを16進に変換する。
use strict;
use warnings;
use utf8;
my $ext = "hex";
my $fin = $ARGV[0] or die( "usage: $0 <binfile>\n" );
open( my $fhin, "<:raw", $fin ) or die( "$fin: $!\n" );
open( my $fhout, ">:utf8", "$fin.$ext" ) or die( "$fin.$ext: $!\n" );
while( <$fhin> ){
print $fhout unpack( "H*", $_ );
}
close( $fhin );
close( $fhout );
# EOF
#!/usr/local/bin/perl
# hex2bin.pl
# 16進をバイナリファイルに変換する。
use strict;
use warnings;
use utf8;
my $ext = "bin";
my $fin = $ARGV[0] or die( "usage: $0 <hexfile>\n" );
open( my $fhin, "<:utf8", $fin ) or die( "$fin: $!\n" );
open( my $fhout, ">:raw", "$fin.$ext" ) or die( "$fin.$ext: $!\n" );
while( <$fhin> ){
print $fhout pack( "H*", $_ );
}
close( $fhin );
close( $fhout );
# EOF
#!/usr/bin/perl
# decodeUni.pl
# ユニコード表現(&#xXXXX;)をUTF8文字に変換
use strict;
use warnings;
use utf8;
use Encode;
my $fileNameIn = 'Text1.txt';
my $fileNameOut = 'Text2.txt';
my $charsetConsole = 'CP932';
my $charsetFile = 'UTF-8';
binmode( STDIN, ":encoding($charsetConsole)" );
binmode( STDOUT, ":encoding($charsetConsole)" );
binmode( STDERR, ":encoding($charsetConsole)" );
open( my $fin, "<:encoding($charsetFile)", encode( $charsetConsole, $fileNameIn ) )
or die( "$fileNameIn: $!\n" );
my @body = <$fin>;
close( $fin );
my $body = join( "", @body );
$body =~ s/&#x([0-9a-f]+);/chr( hex( $1 ) )/igmeo;
open( my $fout, ">:encoding($charsetFile)", encode( $charsetConsole, $fileNameOut ) )
or die( "$fileNameOut: $!\n" );
print $fout $body;
close( $fout );
# EOF
#!/usr/bin/perl
# encodeUni.pl
# UTF8文字の内、ShiftJIS外の文字をユニコード表現(&#xXXXX;)に変換
use strict;
use warnings;
use utf8;
use Encode;
my $fileNameIn = 'Text2.txt';
my $fileNameOut = 'Text3.txt';
my $charsetConsole = 'CP932';
my $charsetFile = 'UTF-8';
binmode( STDIN, ":encoding($charsetConsole)" );
binmode( STDOUT, ":encoding($charsetConsole)" );
binmode( STDERR, ":encoding($charsetConsole)" );
open( my $fin, "<:encoding($charsetFile)", encode( $charsetConsole, $fileNameIn ) )
or die( "$fileNameIn: $!\n" );
my @body = <$fin>;
close( $fin );
my $body = join( "", @body );
$body =~ s/([^\x00-\xff])/encodeUnlessSJIS( $1 )/igmeo;
$body =~ s/([^[:print:]])/sprintf( "&#x%X;", ord( $1 ) )/gmeo;
open( my $fout, ">:encoding($charsetFile)", encode( $charsetConsole, $fileNameOut ) )
or die( "$fileNameOut: $!\n" );
print $fout $body;
close( $fout );
sub issjis
{
my( $utf8 ) = @_;
return ( $utf8 eq '?' || encode( 'CP932', $utf8 ) ne '?' );
}
sub encodeUnlessSJIS
{
my( $utf8 ) = @_;
return ( issjis( $utf8 ) ) ? $utf8 : sprintf( "&#x%X;", ord( $utf8 ) ) ;
}
# EOF
漢字のファイル
ちゃんと変換されていますか?
Is this file converted correctly?
高髙
崎﨑
吉𠮷
叱𠮟
剥剝
填塡
頬頰
	タブ制御コード
漢字のファイル
ちゃんと変換されていますか?
Is this file converted correctly?
高髙
崎﨑
吉𠮷
叱𠮟
剥剝
填塡
頬頰
タブ制御コード
# convSJIStoUTF16.pl
# 行単位で加工を行う。
# 文字コードを Shift_JIS から UTF-16LE(BOM付き)に変換する。
use strict;
use warnings;
use utf8;
use Encode;
my $fileNameIn = "Data_SJIS.txt";
my $fileNameOut = "Data_UTF16.txt";
binmode( STDIN, ":encoding(CP932)" );
binmode( STDOUT, ":encoding(CP932)" );
binmode( STDERR, ":encoding(CP932)" );
$| = 1;
open( my $fin, "<:encoding(CP932)", encode( 'CP932', $fileNameIn ) )
or die( "$fileNameIn: $!\n" );
my @body = <$fin>;
close( $fin );
$body[ 0 ] =~ /(\s+)$/;
$/ = $1;
chomp( @body );
open( my $fout, ">:raw", encode( 'CP932', $fileNameOut ) )
or die( "$fileNameOut: $!\n" );
print $fout pack( 'H*', 'fffe' ); # BOMの出力
binmode( $fout, ":encoding(UTF16LE)" );
my $max = scalar( @body );
my $count = 0;
while( @body > 0 ){
printf STDERR ( "%d/%d\r", ++$count, $max );
my $line = shift( @body );
# $line =~ s///; # なにか加工を行う。
print $fout $line . "\n";
}
close( $fout );
# EOF
print $fout pack( 'H*', 'efbbbf' );
binmode( $fout, ":utf8" );
#!/usr/bin/perl
# UTF32 コードの取得
use strict;
use warnings;
use utf8;
use Encode;
use Win32::Unicode::Native;
foreach my $arg (@ARGV){
foreach my $ch (split(//, $arg)){
my $u = unpack('H*', encode('UTF32BE', $ch));
$u =~ s/^0*//;
print "$ch: $u\n";
}
}