# English ASCII encoding module for CharString
# 2003- Hisashi MORITA

class DocDiff
module CharString
  module ASCII

    Encoding = "US-ASCII"

    CNTRL =     "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" \
                "\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13" \
                "\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d" \
                "\x1e\x1f\x7f"
    SPACE =     "\x09\x0a\x0b\x0c\x0d\x20"
    BLANK =     "\x09\x20"
    DIGIT =     "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39"
    ALPHA =     "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
                "\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
                "\x55\x56\x57\x58\x59\x5a\x61\x62\x63\x64" \
                "\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e" \
                "\x6f\x70\x71\x72\x73\x74\x75\x76\x77\x78" \
                "\x79\x7a"
    ALNUM =     "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39" \
                "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
                "\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
                "\x55\x56\x57\x58\x59\x5a\x61\x62\x63\x64" \
                "\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e" \
                "\x6f\x70\x71\x72\x73\x74\x75\x76\x77\x78" \
                "\x79\x7a"
    PUNCT =     "\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a" \
                "\x2b\x2c\x2d\x2e\x2f\x3a\x3b\x3c\x3d\x3e" \
                "\x3f\x40\x5b\x5c\x5d\x5e\x5f\x60\x7b\x7c" \
                "\x7d\x7e"
    LOWER =     "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a" \
                "\x6b\x6c\x6d\x6e\x6f\x70\x71\x72\x73\x74" \
                "\x75\x76\x77\x78\x79\x7a"
    UPPER =     "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
                "\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
                "\x55\x56\x57\x58\x59\x5a"
    PRINT =     "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29" \
                "\x2a\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33" \
                "\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d" \
                "\x3e\x3f\x40\x41\x42\x43\x44\x45\x46\x47" \
                "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50\x51" \
                "\x52\x53\x54\x55\x56\x57\x58\x59\x5a\x5b" \
                "\x5c\x5d\x5e\x5f\x60\x61\x62\x63\x64\x65" \
                "\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" \
                "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79" \
                "\x7a\x7b\x7c\x7d\x7e"
    GRAPH =     "\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a" \
                "\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33\x34" \
                "\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e" \
                "\x3f\x40\x41\x42\x43\x44\x45\x46\x47\x48" \
                "\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50\x51\x52" \
                "\x53\x54\x55\x56\x57\x58\x59\x5a\x5b\x5c" \
                "\x5d\x5e\x5f\x60\x61\x62\x63\x64\x65\x66" \
                "\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70" \
                "\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a" \
                "\x7b\x7c\x7d\x7e"
    XDIGIT =    "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39" \
                "\x41\x42\x43\x44\x45\x46\x61\x62\x63\x64" \
                "\x65\x66"

    JA_BLANK =  "" # kludge...
    JA_GRAPH =  "" # kludge...

    PUNCT.replace(Regexp.quote(PUNCT)) # kludge to avoid warning "character class has `[' without escape"
    PRINT.replace(Regexp.quote(PRINT)) # kludge to avoid warning "character class has `[' without escape"
    GRAPH.replace(Regexp.quote(GRAPH)) # kludge to avoid warning "character class has `[' without escape"

    WORD_REGEXP_SRC = ["(?:[#{GRAPH}]+[#{BLANK}]?)", 
                       "|(?:[#{SPACE}]+)", 
                       "|(?:.+?)"].join

    # override default method, as ASCII has no Japanese in it
    def count_ja_graph_char()
      0
    end

    # override default method, as ASCII has no Japanese in it
    def count_ja_blank_char()
      0
    end

    # override default method, as ASCII has no Japanese in it
    def count_ja_word()
      0
    end

    # override default method, as ASCII has no Japanese in it
    def count_ja_valid_word()
      0
    end

    CharString.register_encoding(self)

  end  # module ASCII
end  # module CharString
end  # class DocDiff
