lib/jcode.rb


DEFINITIONS

This source file includes following functions.


   1  # jcode.rb - ruby code to handle japanese (EUC/SJIS) string
   2  
   3  if $VERBOSE && $KCODE == "NONE"
   4    STDERR.puts "Warning: $KCODE is NONE."
   5  end
   6  
   7  $vsave, $VERBOSE = $VERBOSE, false
   8  class String
   9    printf STDERR, "feel free for some warnings:\n" if $VERBOSE
  10  
  11    def _regex_quote(str)
  12      str.gsub(/(\\[][\-\\])|\\(.)|([][\\])/) do
  13        $1 || $2 || '\\' + $3
  14      end
  15    end
  16    private :_regex_quote
  17  
  18    PATTERN_SJIS = '[\x81-\x9f\xe0-\xef][\x40-\x7e\x80-\xfc]'
  19    PATTERN_EUC = '[\xa1-\xfe][\xa1-\xfe]'
  20    PATTERN_UTF8 = '[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80-\xbf]'
  21  
  22    RE_SJIS = Regexp.new(PATTERN_SJIS, 'n')
  23    RE_EUC = Regexp.new(PATTERN_EUC, 'n')
  24    RE_UTF8 = Regexp.new(PATTERN_UTF8, 'n')
  25  
  26    SUCC = {}
  27    SUCC['s'] = Hash.new(1)
  28    for i in 0 .. 0x3f
  29      SUCC['s'][i.chr] = 0x40 - i
  30    end
  31    SUCC['s']["\x7e"] = 0x80 - 0x7e
  32    SUCC['s']["\xfd"] = 0x100 - 0xfd
  33    SUCC['s']["\xfe"] = 0x100 - 0xfe
  34    SUCC['s']["\xff"] = 0x100 - 0xff
  35    SUCC['e'] = Hash.new(1)
  36    for i in 0 .. 0xa0
  37      SUCC['e'][i.chr] = 0xa1 - i
  38    end
  39    SUCC['e']["\xfe"] = 2
  40    SUCC['u'] = Hash.new(1)
  41    for i in 0 .. 0x7f
  42      SUCC['u'][i.chr] = 0x80 - i
  43    end
  44    SUCC['u']["\xbf"] = 0x100 - 0xbf
  45  
  46    def mbchar?
  47      case $KCODE[0]
  48      when ?s, ?S
  49        self =~ RE_SJIS
  50      when ?e, ?E
  51        self =~ RE_EUC
  52      when ?u, ?U
  53        self =~ RE_UTF8
  54      else
  55        nil
  56      end
  57    end
  58  
  59    def end_regexp
  60      case $KCODE[0]
  61      when ?s, ?S
  62        /#{PATTERN_SJIS}$/o
  63      when ?e, ?E
  64        /#{PATTERN_EUC}$/o
  65      when ?u, ?U
  66        /#{PATTERN_UTF8}$/o
  67      else
  68        /.$/o
  69      end
  70    end
  71  
  72    alias original_succ! succ!
  73    private :original_succ!
  74  
  75    alias original_succ succ
  76    private :original_succ
  77  
  78    def succ!
  79      reg = end_regexp
  80      if self =~ reg
  81        succ_table = SUCC[$KCODE[0,1].downcase]
  82        begin
  83          self[-1] += succ_table[self[-1]]
  84          self[-2] += 1 if self[-1] == 0
  85        end while self !~ reg
  86        self
  87      else
  88        original_succ!
  89      end
  90    end
  91  
  92    def succ
  93      (str = self.dup).succ! or str
  94    end
  95  
  96    private
  97  
  98    def _expand_ch str
  99      a = []
 100      str.scan(/(?:\\(.)|([^\\]))-(?:\\(.)|([^\\]))|(?:\\(.)|(.))/m) do
 101        from = $1 || $2
 102        to = $3 || $4
 103        one = $5 || $6
 104        if one
 105          a.push one
 106        elsif from.length != to.length
 107          next
 108        elsif from.length == 1
 109          from[0].upto(to[0]) { |c| a.push c.chr }
 110        else
 111          from.upto(to) { |c| a.push c }
 112        end
 113      end
 114      a
 115    end
 116  
 117    def expand_ch_hash from, to
 118      h = {}
 119      afrom = _expand_ch(from)
 120      ato = _expand_ch(to)
 121      afrom.each_with_index do |x,i| h[x] = ato[i] || ato[-1] end
 122      h
 123    end
 124  
 125    HashCache = {}
 126    TrPatternCache = {}
 127    DeletePatternCache = {}
 128    SqueezePatternCache = {}
 129  
 130    public
 131  
 132    def tr!(from, to)
 133      return self.delete!(from) if to.length == 0
 134  
 135      pattern = TrPatternCache[from] ||= /[#{_regex_quote(from)}]/
 136      if from[0] == ?^
 137        last = /.$/.match(to)[0]
 138        self.gsub!(pattern, last)
 139      else
 140        h = HashCache[from + "1-0" + to] ||= expand_ch_hash(from, to)
 141        self.gsub!(pattern) do |c| h[c] end
 142      end
 143    end
 144  
 145    def tr(from, to)
 146      (str = self.dup).tr!(from, to) or str
 147    end
 148  
 149    def delete!(del)
 150      self.gsub!(DeletePatternCache[del] ||= /[#{_regex_quote(del)}]+/, '')
 151    end
 152  
 153    def delete(del)
 154      (str = self.dup).delete!(del) or str
 155    end
 156  
 157    def squeeze!(del=nil)
 158      pattern =
 159        if del
 160          SqueezePatternCache[del] ||= /([#{_regex_quote(del)}])\1+/
 161        else
 162          /(.|\n)\1+/
 163        end
 164      self.gsub!(pattern, '\1')
 165    end
 166  
 167    def squeeze(del=nil)
 168      (str = self.dup).squeeze!(del) or str
 169    end
 170  
 171    def tr_s!(from, to)
 172      return self.delete!(from) if to.length == 0
 173  
 174      pattern = SqueezePatternCache[from] ||= /([#{_regex_quote(from)}])\1+/
 175      if from[0] == ?^
 176        last = /.$/.match(to)[0]
 177        self.gsub!(pattern, last)
 178      else
 179        h = HashCache[from + "1-0" + to] ||= expand_ch_hash(from, to)
 180        self.gsub!(pattern) do h[$1] end
 181      end
 182    end
 183  
 184    def tr_s(from, to)
 185      (str = self.dup).tr_s!(from,to) or str
 186    end
 187  
 188    def chop!
 189      self.gsub!(/(?:.|\r?\n)\z/, '')
 190    end
 191  
 192    def chop
 193      (str = self.dup).chop! or str
 194    end
 195  
 196    def jlength
 197      self.gsub(/[^\Wa-zA-Z_\d]/, ' ').length
 198    end
 199    alias jsize jlength
 200  
 201    def jcount(str)
 202      self.delete("^#{str}").jlength
 203    end
 204  
 205    def each_char
 206      if block_given?
 207        scan(/./m) do |x|
 208          yield x
 209        end
 210      else
 211        scan(/./m)
 212      end
 213    end
 214  
 215  end
 216  $VERBOSE = $vsave