概要

  • Shift_JIS 範囲内の文字が Unicode のどの文字クラスに当たるのかを調べリストアップする。

ソース

  • ListupShiftJISClass.zip
  • Program.cs
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Text.RegularExpressions;
    
    namespace ListupShiftJISClass
    {
        class Program
        {
            /// <summary>
            /// .NET Framework の文字クラス名
            /// http://msdn.microsoft.com/ja-jp/library/20bw873z
            /// </summary>
            static string[] ClassNames ={
    			"BasicLatin", 
    			"Latin-1Supplement", 
    			"LatinExtended-A", 
    			"LatinExtended-B", 
    			"IPAExtensions", 
    			"SpacingModifierLetters", 
    			"CombiningDiacriticalMarks", 
    			"Greek", 
    			"GreekandCoptic", 
    			"Cyrillic", 
    			"CyrillicSupplement", 
    			"Armenian", 
    			"Hebrew", 
    			"Arabic", 
    			"Syriac", 
    			"Thaana", 
    			"Devanagari", 
    			"Bengali", 
    			"Gurmukhi", 
    			"Gujarati", 
    			"Oriya", 
    			"Tamil", 
    			"Telugu", 
    			"Kannada", 
    			"Malayalam", 
    			"Sinhala", 
    			"Thai", 
    			"Lao", 
    			"Tibetan", 
    			"Myanmar", 
    			"Georgian", 
    			"HangulJamo", 
    			"Ethiopic", 
    			"Cherokee", 
    			"UnifiedCanadianAboriginalSyllabics", 
    			"Ogham", 
    			"Runic", 
    			"Tagalog", 
    			"Hanunoo", 
    			"Buhid", 
    			"Tagbanwa", 
    			"Khmer", 
    			"Mongolian", 
    			"Limbu", 
    			"TaiLe", 
    			"KhmerSymbols", 
    			"PhoneticExtensions", 
    			"LatinExtendedAdditional", 
    			"GreekExtended", 
    			"GeneralPunctuation", 
    			"SuperscriptsandSubscripts", 
    			"CurrencySymbols", 
    			"CombiningDiacriticalMarksforSymbols", 
    			"CombiningMarksforSymbols", 
    			"LetterlikeSymbols", 
    			"NumberForms", 
    			"Arrows", 
    			"MathematicalOperators", 
    			"MiscellaneousTechnical", 
    			"ControlPictures", 
    			"OpticalCharacterRecognition", 
    			"EnclosedAlphanumerics", 
    			"BoxDrawing", 
    			"BlockElements", 
    			"GeometricShapes", 
    			"MiscellaneousSymbols", 
    			"Dingbats", 
    			"MiscellaneousMathematicalSymbols-A", 
    			"SupplementalArrows-A", 
    			"BraillePatterns", 
    			"SupplementalArrows-B", 
    			"MiscellaneousMathematicalSymbols-B", 
    			"SupplementalMathematicalOperators", 
    			"MiscellaneousSymbolsandArrows", 
    			"CJKRadicalsSupplement", 
    			"KangxiRadicals", 
    			"IdeographicDescriptionCharacters", 
    			"CJKSymbolsandPunctuation", 
    			"Hiragana", 
    			"Katakana", 
    			"Bopomofo", 
    			"HangulCompatibilityJamo", 
    			"Kanbun", 
    			"BopomofoExtended", 
    			"KatakanaPhoneticExtensions", 
    			"EnclosedCJKLettersandMonths", 
    			"CJKCompatibility", 
    			"CJKUnifiedIdeographsExtensionA", 
    			"YijingHexagramSymbols", 
    			"CJKUnifiedIdeographs", 
    			"YiSyllables", 
    			"YiRadicals", 
    			"HangulSyllables", 
    			"HighSurrogates", 
    			"HighPrivateUseSurrogates", 
    			"LowSurrogates", 
    			"PrivateUse",
    			"PrivateUseArea",
    			"CJKCompatibilityIdeographs", 
    			"AlphabeticPresentationForms", 
    			"ArabicPresentationForms-A", 
    			"VariationSelectors", 
    			"CombiningHalfMarks", 
    			"CJKCompatibilityForms", 
    			"SmallFormVariants", 
    			"ArabicPresentationForms-B", 
    			"HalfwidthandFullwidthForms", 
    			"Specials", 
            };
            static Dictionary<string, Regex> regCharClasses = new Dictionary<string, Regex>();
    
            static void Main(string[] args)
            {
                foreach (string classname in ClassNames)
                {
                    regCharClasses[classname] = new Regex(@"\p{Is" + classname + "}");
                }
                Dictionary<string, string> result = new Dictionary<string, string>();
                for (int code = 0x00; code <= 0xffff; ++code)
                {
                    string c = ((char)code).ToString();
                    if (IsShiftJIS(c))
                    {
                        string classname = getClassName(c);
                        result[classname] = (result.ContainsKey(classname)) ? result[classname] + c : c;
                    }
                }
                foreach (string classname in result.Keys)
                {
                    Console.WriteLine("{0}", classname);
                    //Console.WriteLine("{0}\n{1}\n", classname, result[classname]);
                }
            }
    
            /// <summary>
            /// Shift_JIS 範囲内の文字のみかどうかをチェックして、その結果を返す。
            /// </summary>
            /// <remarks>
            /// http://acha-ya.cocolog-nifty.com/blog/2010/12/unicode-ef79.html
            /// </remarks>
            /// <param name="checkString">チェック対象の文字列</param>
            /// <returns>
            /// <list type="bullet">
            /// <item>true: 全ての文字は Shift_JIS 範囲内である</item>
            /// <item>false: Shift_JIS 範囲外の文字が含まれている</item>
            /// </list>
            /// </returns>
            static bool IsShiftJIS(string checkString)
            {
                byte[] translateBuffer = Encoding.GetEncoding("shift_jis").GetBytes(checkString);
                string translateString = Encoding.GetEncoding("shift_jis").GetString(translateBuffer);
                return (checkString == translateString.ToString());
            }
    
            static string getClassName(string c)
            {
                string result = "-";    // Not found
                foreach (string classname in ClassNames)
                {
                    Match m = regCharClasses[classname].Match(c);
                    if (m.Success)
                    {
                        result = classname;
                        break;
                    }
                }
                return result;
            }
        }
    }

出力

BasicLatin
Latin-1Supplement
Greek
Cyrillic
GeneralPunctuation
LetterlikeSymbols
NumberForms
Arrows
MathematicalOperators
MiscellaneousTechnical
EnclosedAlphanumerics
BoxDrawing
GeometricShapes
MiscellaneousSymbols
CJKSymbolsandPunctuation
Hiragana
Katakana
EnclosedCJKLettersandMonths
CJKCompatibility
CJKUnifiedIdeographs
PrivateUse
CJKCompatibilityIdeographs
HalfwidthandFullwidthForms

リンク