string.c


DEFINITIONS

This source file includes following functions.
  1. rb_str_s_alloc
  2. str_new
  3. rb_str_new
  4. rb_str_new2
  5. rb_tainted_str_new
  6. rb_tainted_str_new2
  7. str_new3
  8. rb_str_new3
  9. rb_str_new4
  10. rb_str_new5
  11. rb_str_buf_new
  12. rb_str_buf_new2
  13. rb_str_to_str
  14. rb_str_shared_replace
  15. rb_str_associate
  16. rb_str_associated
  17. rb_obj_as_string
  18. rb_str_dup
  19. rb_str_init
  20. rb_str_length
  21. rb_str_empty
  22. rb_str_plus
  23. rb_str_times
  24. rb_str_format
  25. str_independent
  26. str_make_independent
  27. rb_str_modify
  28. rb_string_value
  29. rb_string_value_ptr
  30. rb_str_substr
  31. rb_str_freeze
  32. rb_str_dup_frozen
  33. rb_str_resize
  34. rb_str_buf_cat
  35. rb_str_buf_cat2
  36. rb_str_cat
  37. rb_str_cat2
  38. rb_str_buf_append
  39. rb_str_append
  40. rb_str_concat
  41. rb_str_hash
  42. rb_str_hash_m
  43. rb_str_cmp
  44. rb_str_equal
  45. rb_str_eql
  46. rb_str_cmp_m
  47. rb_str_casecmp
  48. rb_str_index
  49. rb_str_index_m
  50. rb_str_rindex
  51. rb_str_rindex_m
  52. rb_str_match
  53. rb_str_match2
  54. rb_str_match_m
  55. succ_char
  56. rb_str_succ
  57. rb_str_succ_bang
  58. rb_str_upto
  59. rb_str_upto_m
  60. rb_str_subpat
  61. rb_str_aref
  62. rb_str_aref_m
  63. rb_str_update
  64. rb_str_subpat_set
  65. rb_str_aset
  66. rb_str_aset_m
  67. rb_str_insert
  68. rb_str_slice_bang
  69. get_pat
  70. rb_str_sub_bang
  71. rb_str_sub
  72. str_gsub
  73. rb_str_gsub_bang
  74. rb_str_gsub
  75. rb_str_replace
  76. uscore_get
  77. rb_f_sub_bang
  78. rb_f_sub
  79. rb_f_gsub_bang
  80. rb_f_gsub
  81. rb_str_reverse_bang
  82. rb_str_reverse
  83. rb_str_include
  84. rb_str_to_i
  85. rb_str_to_f
  86. rb_str_to_s
  87. rb_str_inspect
  88. rb_str_dump
  89. rb_str_upcase_bang
  90. rb_str_upcase
  91. rb_str_downcase_bang
  92. rb_str_downcase
  93. rb_str_capitalize_bang
  94. rb_str_capitalize
  95. rb_str_swapcase_bang
  96. rb_str_swapcase
  97. trnext
  98. tr_trans
  99. rb_str_tr_bang
  100. rb_str_tr
  101. tr_setup_table
  102. rb_str_delete_bang
  103. rb_str_delete
  104. rb_str_squeeze_bang
  105. rb_str_squeeze
  106. rb_str_tr_s_bang
  107. rb_str_tr_s
  108. rb_str_count
  109. rb_str_split_m
  110. rb_str_split
  111. rb_f_split
  112. rb_str_each_line
  113. rb_str_each_byte
  114. rb_str_chop_bang
  115. rb_str_chop
  116. rb_f_chop_bang
  117. rb_f_chop
  118. rb_str_chomp_bang
  119. rb_str_chomp
  120. rb_f_chomp_bang
  121. rb_f_chomp
  122. rb_str_lstrip_bang
  123. rb_str_lstrip
  124. rb_str_rstrip_bang
  125. rb_str_rstrip
  126. rb_str_strip_bang
  127. rb_str_strip
  128. scan_once
  129. rb_str_scan
  130. rb_f_scan
  131. rb_str_hex
  132. rb_str_oct
  133. rb_str_crypt
  134. rb_str_intern
  135. rb_str_sum
  136. rb_str_ljust
  137. rb_str_rjust
  138. rb_str_center
  139. rb_str_setter
  140. Init_String


   1  /**********************************************************************
   2  
   3    string.c -
   4  
   5    $Author: knu $
   6    $Date: 2002/09/11 04:05:36 $
   7    created at: Mon Aug  9 17:12:58 JST 1993
   8  
   9    Copyright (C) 1993-2002 Yukihiro Matsumoto
  10    Copyright (C) 2000  Network Applied Communication Laboratory, Inc.
  11    Copyright (C) 2000  Information-technology Promotion Agency, Japan
  12  
  13  **********************************************************************/
  14  
  15  #include "ruby.h"
  16  #include "re.h"
  17  
  18  #define BEG(no) regs->beg[no]
  19  #define END(no) regs->end[no]
  20  
  21  #include <math.h>
  22  #include <ctype.h>
  23  
  24  #ifdef HAVE_UNISTD_H
  25  #include <unistd.h>
  26  #endif
  27  
  28  VALUE rb_cString;
  29  
  30  #define STR_ASSOC   FL_USER3
  31  
  32  #define RESIZE_CAPA(str,capacity) do {\
  33      REALLOC_N(RSTRING(str)->ptr, char, (capacity)+1);\
  34      RSTRING(str)->aux.capa = (capacity);\
  35  } while (0)
  36  
  37  VALUE rb_fs;
  38  
  39  static VALUE
  40  rb_str_s_alloc(klass)
  41      VALUE klass;
  42  {
  43      NEWOBJ(str, struct RString);
  44      OBJSETUP(str, klass, T_STRING);
  45  
  46      str->ptr = 0;
  47      str->len = 0;
  48      str->aux.capa = 0;
  49  
  50      return (VALUE)str;
  51  }
  52  
  53  static VALUE
  54  str_new(klass, ptr, len)
  55      VALUE klass;
  56      const char *ptr;
  57      long len;
  58  {
  59      VALUE str;
  60  
  61      if (len < 0) {
  62          rb_raise(rb_eArgError, "negative string size (or size too big)");
  63      }
  64  
  65      str = rb_obj_alloc(klass);
  66      RSTRING(str)->len = len;
  67      RSTRING(str)->aux.capa = len;
  68      RSTRING(str)->ptr = ALLOC_N(char,len+1);
  69      if (ptr) {
  70          memcpy(RSTRING(str)->ptr, ptr, len);
  71      }
  72      else {
  73          MEMZERO(RSTRING(str)->ptr, char, len);
  74      }
  75      RSTRING(str)->ptr[len] = '\0';
  76      return str;
  77  }
  78  
  79  VALUE
  80  rb_str_new(ptr, len)
  81      const char *ptr;
  82      long len;
  83  {
  84      return str_new(rb_cString, ptr, len);
  85  }
  86  
  87  VALUE
  88  rb_str_new2(ptr)
  89      const char *ptr;
  90  {
  91      if (!ptr) {
  92          rb_raise(rb_eArgError, "NULL pointer given");
  93      }
  94      return rb_str_new(ptr, strlen(ptr));
  95  }
  96  
  97  VALUE
  98  rb_tainted_str_new(ptr, len)
  99      const char *ptr;
 100      long len;
 101  {
 102      VALUE str = rb_str_new(ptr, len);
 103  
 104      OBJ_TAINT(str);
 105      return str;
 106  }
 107  
 108  VALUE
 109  rb_tainted_str_new2(ptr)
 110      const char *ptr;
 111  {
 112      VALUE str = rb_str_new2(ptr);
 113  
 114      OBJ_TAINT(str);
 115      return str;
 116  }
 117  
 118  static VALUE
 119  str_new3(klass, str)
 120      VALUE klass, str;
 121  {
 122      VALUE str2 = rb_obj_alloc(klass);
 123  
 124      RSTRING(str2)->len = RSTRING(str)->len;
 125      RSTRING(str2)->ptr = RSTRING(str)->ptr;
 126      RSTRING(str2)->aux.shared = str;
 127      FL_SET(str2, ELTS_SHARED);
 128      OBJ_INFECT(str2, str);
 129  
 130      return str2;
 131  }
 132  
 133  VALUE
 134  rb_str_new3(str)
 135      VALUE str;
 136  {
 137      return str_new3(rb_obj_class(str), str);
 138  }
 139  
 140  VALUE
 141  rb_str_new4(orig)
 142      VALUE orig;
 143  {
 144      VALUE klass, str;
 145  
 146      klass = rb_obj_class(orig);
 147      if (FL_TEST(orig, ELTS_SHARED)) {
 148          str = str_new3(klass, RSTRING(orig)->aux.shared);
 149      }
 150      else if (FL_TEST(orig, STR_ASSOC)) {
 151          str = str_new(klass, RSTRING(orig)->ptr, RSTRING(orig)->len);
 152      }
 153      else {
 154          str = rb_obj_alloc(klass);
 155  
 156          RSTRING(str)->len = RSTRING(orig)->len;
 157          RSTRING(str)->ptr = RSTRING(orig)->ptr;
 158          RSTRING(orig)->aux.shared = str;
 159          FL_SET(orig, ELTS_SHARED);
 160      }
 161      OBJ_INFECT(str, orig);
 162      OBJ_FREEZE(str);
 163      return str;
 164  }
 165  
 166  VALUE
 167  rb_str_new5(obj, ptr, len)
 168      VALUE obj;
 169      const char *ptr;
 170      long len;
 171  {
 172      return str_new(rb_obj_class(obj), ptr, len);
 173  }
 174  
 175  #define STR_BUF_MIN_SIZE 128
 176  
 177  VALUE
 178  rb_str_buf_new(capa)
 179      long capa;
 180  {
 181      VALUE str = rb_obj_alloc(rb_cString);
 182  
 183      if (capa < STR_BUF_MIN_SIZE) {
 184          capa = STR_BUF_MIN_SIZE;
 185      }
 186      RSTRING(str)->ptr = 0;
 187      RSTRING(str)->len = 0;
 188      RSTRING(str)->aux.capa = capa;
 189      RSTRING(str)->ptr = ALLOC_N(char, capa+1);
 190      RSTRING(str)->ptr[0] = '\0';
 191  
 192      return str;
 193  }
 194  
 195  VALUE
 196  rb_str_buf_new2(ptr)
 197      const char *ptr;
 198  {
 199      VALUE str;
 200      long len = strlen(ptr);
 201  
 202      str = rb_str_buf_new(len);
 203      rb_str_buf_cat(str, ptr, len);
 204  
 205      return str;
 206  }
 207  
 208  VALUE
 209  rb_str_to_str(str)
 210      VALUE str;
 211  {
 212      return rb_convert_type(str, T_STRING, "String", "to_str");
 213  }
 214  
 215  static void
 216  rb_str_shared_replace(str, str2)
 217      VALUE str, str2;
 218  {
 219      if (str == str2) return;
 220      if (!FL_TEST(str, ELTS_SHARED)) free(RSTRING(str)->ptr);
 221      if (NIL_P(str2)) {
 222          RSTRING(str)->ptr = 0;
 223          RSTRING(str)->len = 0;
 224          RSTRING(str)->aux.capa = 0;
 225          return;
 226      }
 227      RSTRING(str)->ptr = RSTRING(str2)->ptr;
 228      RSTRING(str)->len = RSTRING(str2)->len;
 229      if (FL_TEST(str2, ELTS_SHARED|STR_ASSOC)) {
 230          FL_SET(str, RBASIC(str2)->flags & (ELTS_SHARED|STR_ASSOC));
 231          RSTRING(str)->aux.shared = RSTRING(str2)->aux.shared;
 232      }
 233      else {
 234          RSTRING(str)->aux.capa = RSTRING(str2)->aux.capa;
 235      }
 236      RSTRING(str2)->ptr = 0;     /* abandon str2 */
 237      RSTRING(str2)->len = 0;
 238      RSTRING(str2)->aux.capa = 0;
 239      FL_UNSET(str, ELTS_SHARED|STR_ASSOC);
 240      if (OBJ_TAINTED(str2)) OBJ_TAINT(str);
 241  }
 242  
 243  void
 244  rb_str_associate(str, add)
 245      VALUE str, add;
 246  {
 247      if (FL_TEST(str, STR_ASSOC)) {
 248          /* already associated */
 249          rb_ary_concat(RSTRING(str)->aux.shared, add);
 250      }
 251      else {
 252          if (FL_TEST(str, ELTS_SHARED)) {
 253              rb_str_modify(str);
 254          }
 255          else if (RSTRING(str)->aux.shared) {
 256              /* str_buf */
 257              if (RSTRING(str)->aux.capa != RSTRING(str)->len) {
 258                  RESIZE_CAPA(str, RSTRING(str)->len);
 259              }
 260          }
 261          RSTRING(str)->aux.shared = add;
 262          FL_UNSET(str, ELTS_SHARED);
 263          FL_SET(str, STR_ASSOC);
 264      }
 265  }
 266  
 267  VALUE
 268  rb_str_associated(str)
 269      VALUE str;
 270  {
 271      if (FL_TEST(str, STR_ASSOC)) {
 272          return RSTRING(str)->aux.shared;
 273      }
 274      return Qfalse;
 275  }
 276  
 277  static ID id_to_s;
 278  
 279  VALUE
 280  rb_obj_as_string(obj)
 281      VALUE obj;
 282  {
 283      VALUE str;
 284  
 285      if (TYPE(obj) == T_STRING) {
 286          return obj;
 287      }
 288      str = rb_funcall(obj, id_to_s, 0);
 289      if (TYPE(str) != T_STRING)
 290          return rb_any_to_s(obj);
 291      if (OBJ_TAINTED(obj)) OBJ_TAINT(str);
 292      return str;
 293  }
 294  
 295  static VALUE rb_str_replace _((VALUE, VALUE));
 296  
 297  VALUE
 298  rb_str_dup(str)
 299      VALUE str;
 300  {
 301      VALUE dup = rb_str_s_alloc(rb_cString);
 302      rb_str_replace(dup, str);
 303      return dup;
 304  }
 305  
 306  static VALUE
 307  rb_str_init(argc, argv, str)
 308      int argc;
 309      VALUE *argv;
 310      VALUE str;
 311  {
 312      VALUE orig;
 313  
 314      if (rb_scan_args(argc, argv, "01", &orig) == 1)
 315          rb_str_replace(str, orig);
 316      return str;
 317  }
 318  
 319  static VALUE
 320  rb_str_length(str)
 321      VALUE str;
 322  {
 323      return LONG2NUM(RSTRING(str)->len);
 324  }
 325  
 326  static VALUE
 327  rb_str_empty(str)
 328      VALUE str;
 329  {
 330      if (RSTRING(str)->len == 0)
 331          return Qtrue;
 332      return Qfalse;
 333  }
 334  
 335  VALUE
 336  rb_str_plus(str1, str2)
 337      VALUE str1, str2;
 338  {
 339      VALUE str3;
 340  
 341      StringValue(str2);
 342      str3 = rb_str_new(0, RSTRING(str1)->len+RSTRING(str2)->len);
 343      memcpy(RSTRING(str3)->ptr, RSTRING(str1)->ptr, RSTRING(str1)->len);
 344      memcpy(RSTRING(str3)->ptr + RSTRING(str1)->len,
 345             RSTRING(str2)->ptr, RSTRING(str2)->len);
 346      RSTRING(str3)->ptr[RSTRING(str3)->len] = '\0';
 347  
 348      if (OBJ_TAINTED(str1) || OBJ_TAINTED(str2))
 349          OBJ_TAINT(str3);
 350      return str3;
 351  }
 352  
 353  VALUE
 354  rb_str_times(str, times)
 355      VALUE str;
 356      VALUE times;
 357  {
 358      VALUE str2;
 359      long i, len;
 360  
 361      len = NUM2LONG(times);
 362      if (len == 0) return rb_str_new5(str,0,0);
 363      if (len < 0) {
 364          rb_raise(rb_eArgError, "negative argument");
 365      }
 366      if (LONG_MAX/len <  RSTRING(str)->len) {
 367          rb_raise(rb_eArgError, "argument too big");
 368      }
 369  
 370      str2 = rb_str_new5(str,0, RSTRING(str)->len*len);
 371      for (i=0; i<len; i++) {
 372          memcpy(RSTRING(str2)->ptr+(i*RSTRING(str)->len),
 373                 RSTRING(str)->ptr, RSTRING(str)->len);
 374      }
 375      RSTRING(str2)->ptr[RSTRING(str2)->len] = '\0';
 376  
 377      OBJ_INFECT(str2, str);
 378  
 379      return str2;
 380  }
 381  
 382  static VALUE
 383  rb_str_format(str, arg)
 384      VALUE str, arg;
 385  {
 386      VALUE *argv;
 387  
 388      if (TYPE(arg) == T_ARRAY) {
 389          argv = ALLOCA_N(VALUE, RARRAY(arg)->len + 1);
 390          argv[0] = str;
 391          MEMCPY(argv+1, RARRAY(arg)->ptr, VALUE, RARRAY(arg)->len);
 392          return rb_f_sprintf(RARRAY(arg)->len+1, argv);
 393      }
 394      
 395      argv = ALLOCA_N(VALUE, 2);
 396      argv[0] = str;
 397      argv[1] = arg;
 398      return rb_f_sprintf(2, argv);
 399  }
 400  
 401  static int
 402  str_independent(str)
 403      VALUE str;
 404  {
 405      if (OBJ_FROZEN(str)) rb_error_frozen("string");
 406      if (!OBJ_TAINTED(str) && rb_safe_level() >= 4)
 407          rb_raise(rb_eSecurityError, "Insecure: can't modify string");
 408      if (!FL_TEST(str, ELTS_SHARED)) return 1;
 409      return 0;
 410  }
 411  
 412  static void
 413  str_make_independent(str)
 414      VALUE str;
 415  {
 416      char *ptr;
 417  
 418      ptr = ALLOC_N(char, RSTRING(str)->len+1);
 419      if (RSTRING(str)->ptr) {
 420          memcpy(ptr, RSTRING(str)->ptr, RSTRING(str)->len);
 421      }
 422      ptr[RSTRING(str)->len] = 0;
 423      RSTRING(str)->ptr = ptr;
 424      RSTRING(str)->aux.capa = RSTRING(str)->len;
 425      FL_UNSET(str, ELTS_SHARED|STR_ASSOC);
 426  }
 427  
 428  void
 429  rb_str_modify(str)
 430      VALUE str;
 431  {
 432      if (!str_independent(str))
 433          str_make_independent(str);
 434  }
 435  
 436  VALUE
 437  rb_string_value(ptr)
 438      volatile VALUE *ptr;
 439  {
 440      return *ptr = rb_str_to_str(*ptr);
 441  }
 442  
 443  char *
 444  rb_string_value_ptr(ptr)
 445      volatile VALUE *ptr;
 446  {
 447      VALUE s = *ptr;
 448      if (TYPE(s) != T_STRING) {
 449          s = rb_str_to_str(s);
 450          *ptr = s;
 451      }
 452      if (!RSTRING(s)->ptr) {
 453          str_make_independent(s);
 454      }
 455      return RSTRING(s)->ptr;
 456  }
 457  
 458  VALUE
 459  rb_str_substr(str, beg, len)
 460      VALUE str;
 461      long beg, len;
 462  {
 463      VALUE str2;
 464  
 465      if (len < 0) return Qnil;
 466      if (beg > RSTRING(str)->len) return Qnil;
 467      if (beg < 0) {
 468          beg += RSTRING(str)->len;
 469          if (beg < 0) return Qnil;
 470      }
 471      if (beg + len > RSTRING(str)->len) {
 472          len = RSTRING(str)->len - beg;
 473      }
 474      if (len < 0) {
 475          len = 0;
 476      }
 477      if (len == 0) return rb_str_new5(str,0,0);
 478  
 479      str2 = rb_str_new5(str,RSTRING(str)->ptr+beg, len);
 480      OBJ_INFECT(str2, str);
 481  
 482      return str2;
 483  }
 484  
 485  VALUE
 486  rb_str_freeze(str)
 487      VALUE str;
 488  {
 489      return rb_obj_freeze(str);
 490  }
 491  
 492  VALUE
 493  rb_str_dup_frozen(str)
 494      VALUE str;
 495  {
 496      if (FL_TEST(str, ELTS_SHARED)) {
 497          OBJ_FREEZE(RSTRING(str)->aux.shared);
 498          return RSTRING(str)->aux.shared;
 499      }
 500      if (OBJ_FROZEN(str)) return str;
 501      str = rb_str_dup(str);
 502      OBJ_FREEZE(str);
 503      return str;
 504  }
 505  
 506  VALUE
 507  rb_str_resize(str, len)
 508      VALUE str;
 509      long len;
 510  {
 511      if (len < 0) {
 512          rb_raise(rb_eArgError, "negative string size (or size too big)");
 513      }
 514          
 515      if (len != RSTRING(str)->len) {
 516          rb_str_modify(str);
 517  
 518          if (RSTRING(str)->len < len || RSTRING(str)->len - len > 1024) {
 519              RESIZE_CAPA(str, len);
 520          }
 521          RSTRING(str)->len = len;
 522          RSTRING(str)->ptr[len] = '\0';  /* sentinel */
 523      }
 524      return str;
 525  }
 526  
 527  VALUE
 528  rb_str_buf_cat(str, ptr, len)
 529      VALUE str;
 530      const char *ptr;
 531      long len;
 532  {
 533      long capa, total;
 534  
 535      if (FL_TEST(str, ELTS_SHARED)) {
 536          rb_str_modify(str);
 537      }
 538      capa = RSTRING(str)->aux.capa;
 539      total = RSTRING(str)->len+len;
 540      if (capa <= total) {
 541          while (total > capa) {
 542              capa = (capa + 1) * 2;
 543          }
 544          RESIZE_CAPA(str, capa);
 545      }
 546      memcpy(RSTRING(str)->ptr + RSTRING(str)->len, ptr, len);
 547      RSTRING(str)->len = total;
 548      RSTRING(str)->ptr[total] = '\0'; /* sentinel */
 549  
 550      return str;
 551  }
 552  
 553  VALUE
 554  rb_str_buf_cat2(str, ptr)
 555      VALUE str;
 556      const char *ptr;
 557  {
 558      return rb_str_buf_cat(str, ptr, strlen(ptr));
 559  }
 560  
 561  VALUE
 562  rb_str_cat(str, ptr, len)
 563      VALUE str;
 564      const char *ptr;
 565      long len;
 566  {
 567      rb_str_modify(str);
 568      if (len > 0) {
 569          if (!FL_TEST(str, ELTS_SHARED) && !FL_TEST(str, STR_ASSOC)) {
 570              return rb_str_buf_cat(str, ptr, len);
 571          }
 572          RESIZE_CAPA(str, RSTRING(str)->len + len);
 573          if (ptr) {
 574              memcpy(RSTRING(str)->ptr + RSTRING(str)->len, ptr, len);
 575          }
 576          else {
 577              MEMZERO(RSTRING(str)->ptr + RSTRING(str)->len, char, len);
 578          }
 579          RSTRING(str)->len += len;
 580          RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; /* sentinel */
 581      }
 582  
 583      return str;
 584  }
 585  
 586  VALUE
 587  rb_str_cat2(str, ptr)
 588      VALUE str;
 589      const char *ptr;
 590  {
 591      return rb_str_cat(str, ptr, strlen(ptr));
 592  }
 593  
 594  VALUE
 595  rb_str_buf_append(str, str2)
 596      VALUE str, str2;
 597  {
 598      long capa, len;
 599  
 600      if (FL_TEST(str, ELTS_SHARED)) {
 601          rb_str_modify(str);
 602      }
 603      capa = RSTRING(str)->aux.capa;
 604  
 605      len = RSTRING(str)->len+RSTRING(str2)->len;
 606      if (capa <= len) {
 607          while (len > capa) {
 608              capa = (capa + 1) * 2;
 609          }
 610          RESIZE_CAPA(str, capa);
 611      }
 612      memcpy(RSTRING(str)->ptr + RSTRING(str)->len,
 613             RSTRING(str2)->ptr, RSTRING(str2)->len);
 614      RSTRING(str)->len += RSTRING(str2)->len;
 615      RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; /* sentinel */
 616  
 617      return str;
 618  }
 619  
 620  VALUE
 621  rb_str_append(str, str2)
 622      VALUE str, str2;
 623  {
 624      long len;
 625  
 626      StringValue(str2);
 627      rb_str_modify(str);
 628      if (RSTRING(str2)->len > 0) {
 629          len = RSTRING(str)->len+RSTRING(str2)->len;
 630          if (!FL_TEST(str, ELTS_SHARED) && !FL_TEST(str, STR_ASSOC)) {
 631              rb_str_buf_append(str, str2);
 632              OBJ_INFECT(str, str2);
 633              return str;
 634          }
 635          RESIZE_CAPA(str, len);
 636          memcpy(RSTRING(str)->ptr + RSTRING(str)->len,
 637                 RSTRING(str2)->ptr, RSTRING(str2)->len);
 638          RSTRING(str)->len += RSTRING(str2)->len;
 639          RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; /* sentinel */
 640      }
 641      OBJ_INFECT(str, str2);
 642  
 643      return str;
 644  }
 645  
 646  VALUE
 647  rb_str_concat(str1, str2)
 648      VALUE str1, str2;
 649  {
 650      if (FIXNUM_P(str2)) {
 651          int i = FIX2INT(str2);
 652          if (0 <= i && i <= 0xff) { /* byte */
 653              char c = i;
 654              return rb_str_cat(str1, &c, 1);
 655          }
 656      }
 657      str1 = rb_str_append(str1, str2);
 658  
 659      return str1;
 660  }
 661  
 662  int
 663  rb_str_hash(str)
 664      VALUE str;
 665  {
 666      register long len = RSTRING(str)->len;
 667      register char *p = RSTRING(str)->ptr;
 668      register int key = 0;
 669  
 670  #ifdef HASH_ELFHASH
 671      register unsigned int g;
 672  
 673      while (len--) {
 674          key = (key << 4) + *p++;
 675          if (g = key & 0xF0000000)
 676              key ^= g >> 24;
 677          key &= ~g;
 678      }
 679  #elif HASH_PERL
 680      while (len--) {
 681          key = key*33 + *p++;
 682      }
 683      key = key + (key>>5);
 684  #else
 685      while (len--) {
 686          key = key*65599 + *p;
 687          p++;
 688      }
 689      key = key + (key>>5);
 690  #endif
 691      return key;
 692  }
 693  
 694  static VALUE
 695  rb_str_hash_m(str)
 696      VALUE str;
 697  {
 698      int key = rb_str_hash(str);
 699      return INT2FIX(key);
 700  }
 701  
 702  #define lesser(a,b) (((a)>(b))?(b):(a))
 703  
 704  int
 705  rb_str_cmp(str1, str2)
 706      VALUE str1, str2;
 707  {
 708      long len;
 709      int retval;
 710  
 711      len = lesser(RSTRING(str1)->len, RSTRING(str2)->len);
 712      retval = rb_memcmp(RSTRING(str1)->ptr, RSTRING(str2)->ptr, len);
 713      if (retval == 0) {
 714          if (RSTRING(str1)->len == RSTRING(str2)->len) return 0;
 715          if (RSTRING(str1)->len > RSTRING(str2)->len) return 1;
 716          return -1;
 717      }
 718      if (retval > 0) return 1;
 719      return -1;
 720  }
 721  
 722  static VALUE
 723  rb_str_equal(str1, str2)
 724      VALUE str1, str2;
 725  {
 726      if (str1 == str2) return Qtrue;
 727      if (TYPE(str2) != T_STRING) {
 728          str2 = rb_check_convert_type(str2, T_STRING, "String", "to_str");
 729          if (NIL_P(str2)) return Qfalse;
 730      }
 731  
 732      if (RSTRING(str1)->len == RSTRING(str2)->len
 733          && rb_str_cmp(str1, str2) == 0) {
 734          return Qtrue;
 735      }
 736      return Qfalse;
 737  }
 738  
 739  static VALUE
 740  rb_str_eql(str1, str2)
 741      VALUE str1, str2;
 742  {
 743      if (TYPE(str2) != T_STRING || RSTRING(str1)->len != RSTRING(str2)->len)
 744          return Qfalse;
 745  
 746      if (memcmp(RSTRING(str1)->ptr, RSTRING(str2)->ptr,
 747                 lesser(RSTRING(str1)->len, RSTRING(str2)->len)) == 0)
 748          return Qtrue;
 749  
 750      return Qfalse;
 751  }
 752  
 753  static VALUE
 754  rb_str_cmp_m(str1, str2)
 755      VALUE str1, str2;
 756  {
 757      int result;
 758  
 759      StringValue(str2);
 760      result = rb_str_cmp(str1, str2);
 761      return INT2FIX(result);
 762  }
 763  
 764  static VALUE
 765  rb_str_casecmp(str1, str2)
 766      VALUE str1, str2;
 767  {
 768      long len;
 769      int retval;
 770  
 771      StringValue(str2);
 772      len = lesser(RSTRING(str1)->len, RSTRING(str2)->len);
 773      retval = rb_memcicmp(RSTRING(str1)->ptr, RSTRING(str2)->ptr, len);
 774      if (retval == 0) {
 775          if (RSTRING(str1)->len == RSTRING(str2)->len) return INT2FIX(0);
 776          if (RSTRING(str1)->len > RSTRING(str2)->len) return INT2FIX(1);
 777          return INT2FIX(-1);
 778      }
 779      if (retval == 0) return INT2FIX(0);
 780      if (retval > 0) return INT2FIX(1);
 781      return INT2FIX(-1);
 782  }
 783  
 784  static long
 785  rb_str_index(str, sub, offset)
 786      VALUE str, sub;
 787      long offset;
 788  {
 789      char *s, *e, *p;
 790      long len;
 791  
 792      if (offset < 0) {
 793          offset += RSTRING(str)->len;
 794          if (offset < 0) return -1;
 795      }
 796      if (RSTRING(str)->len - offset < RSTRING(sub)->len) return -1;
 797      s = RSTRING(str)->ptr+offset;
 798      p = RSTRING(sub)->ptr;
 799      len = RSTRING(sub)->len;
 800      if (len == 0) return offset;
 801      e = RSTRING(str)->ptr + RSTRING(str)->len - len + 1;
 802      while (s < e) {
 803          if (rb_memcmp(s, p, len) == 0) {
 804              return (s-(RSTRING(str)->ptr));
 805          }
 806          s++;
 807      }
 808      return -1;
 809  }
 810  
 811  static VALUE
 812  rb_str_index_m(argc, argv, str)
 813      int argc;
 814      VALUE *argv;
 815      VALUE str;
 816  {
 817      VALUE sub;
 818      VALUE initpos;
 819      long pos;
 820  
 821      if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
 822          pos = NUM2LONG(initpos);
 823      }
 824      else {
 825          pos = 0;
 826      }
 827      if (pos < 0) {
 828          pos += RSTRING(str)->len;
 829          if (pos < 0) {
 830              if (TYPE(sub) == T_REGEXP) {
 831                  rb_backref_set(Qnil);
 832              }
 833              return Qnil;
 834          }
 835      }
 836  
 837      switch (TYPE(sub)) {
 838        case T_REGEXP:
 839          pos = rb_reg_adjust_startpos(sub, str, pos, 0);
 840          pos = rb_reg_search(sub, str, pos, 0);
 841          break;
 842  
 843        case T_STRING:
 844          pos = rb_str_index(str, sub, pos);
 845          break;
 846  
 847        case T_FIXNUM:
 848        {
 849            int c = FIX2INT(sub);
 850            long len = RSTRING(str)->len;
 851            char *p = RSTRING(str)->ptr;
 852  
 853            for (;pos<len;pos++) {
 854                if (p[pos] == c) return LONG2NUM(pos);
 855            }
 856            return Qnil;
 857        }
 858  
 859        default:
 860          rb_raise(rb_eTypeError, "type mismatch: %s given",
 861                   rb_class2name(CLASS_OF(sub)));
 862      }
 863  
 864      if (pos == -1) return Qnil;
 865      return LONG2NUM(pos);
 866  }
 867  
 868  static long
 869  rb_str_rindex(str, sub, pos)
 870      VALUE str, sub;
 871      long pos;
 872  {
 873      long len = RSTRING(sub)->len;
 874      char *s, *sbeg, *t;
 875  
 876      /* substring longer than string */
 877      if (RSTRING(str)->len < len) return -1;
 878      if (RSTRING(str)->len - pos < len) {
 879          pos = RSTRING(str)->len - len;
 880      }
 881      sbeg = RSTRING(str)->ptr;
 882      s = RSTRING(str)->ptr + pos;
 883      t = RSTRING(sub)->ptr;
 884      if (len) {
 885          while (sbeg <= s) {
 886              if (rb_memcmp(s, t, len) == 0) {
 887                  return s - RSTRING(str)->ptr;
 888              }
 889              s--;
 890          }
 891          return -1;
 892      }
 893      else {
 894          return pos;
 895      }
 896  }
 897  
 898  static VALUE
 899  rb_str_rindex_m(argc, argv, str)
 900      int argc;
 901      VALUE *argv;
 902      VALUE str;
 903  {
 904      VALUE sub;
 905      VALUE position;
 906      long pos;
 907  
 908      if (rb_scan_args(argc, argv, "11", &sub, &position) == 2) {
 909          pos = NUM2LONG(position);
 910          if (pos < 0) {
 911              pos += RSTRING(str)->len;
 912              if (pos < 0) {
 913                  if (TYPE(sub) == T_REGEXP) {
 914                      rb_backref_set(Qnil);
 915                  }
 916                  return Qnil;
 917              }
 918          }
 919          if (pos > RSTRING(str)->len) pos = RSTRING(str)->len;
 920      }
 921      else {
 922          pos = RSTRING(str)->len;
 923      }
 924  
 925      switch (TYPE(sub)) {
 926        case T_REGEXP:
 927          if (RREGEXP(sub)->len) {
 928              pos = rb_reg_adjust_startpos(sub, str, pos, 1);
 929              pos = rb_reg_search(sub, str, pos, 1);
 930          }
 931          if (pos >= 0) return LONG2NUM(pos);
 932          break;
 933  
 934        case T_STRING:
 935          pos = rb_str_rindex(str, sub, pos);
 936          if (pos >= 0) return LONG2NUM(pos);
 937          break;
 938  
 939        case T_FIXNUM:
 940        {
 941            int c = FIX2INT(sub);
 942            char *p = RSTRING(str)->ptr + pos;
 943            char *pbeg = RSTRING(str)->ptr;
 944  
 945            while (pbeg <= p) {
 946                if (*p == c) return LONG2NUM(p - RSTRING(str)->ptr);
 947                p--;
 948            }
 949            return Qnil;
 950        }
 951  
 952        default:
 953          rb_raise(rb_eTypeError, "type mismatch: %s given",
 954                   rb_class2name(CLASS_OF(sub)));
 955      }
 956      return Qnil;
 957  }
 958  
 959  static VALUE
 960  rb_str_match(x, y)
 961      VALUE x, y;
 962  {
 963      long start;
 964  
 965      switch (TYPE(y)) {
 966        case T_REGEXP:
 967          return rb_reg_match(y, x);
 968  
 969        case T_STRING:
 970          start = rb_str_index(x, y, 0);
 971          if (start == -1) {
 972              return Qnil;
 973          }
 974          return LONG2NUM(start);
 975  
 976        default:
 977          return rb_funcall(y, rb_intern("=~"), 1, x);
 978      }
 979  }
 980  
 981  static VALUE
 982  rb_str_match2(str)
 983      VALUE str;
 984  {
 985      StringValue(str);
 986      return rb_reg_match2(rb_reg_regcomp(rb_reg_quote(str)));
 987  }
 988  
 989  static VALUE get_pat _((VALUE, int));
 990  
 991  static VALUE
 992  rb_str_match_m(str, re)
 993      VALUE str, re;
 994  {
 995      return rb_funcall(get_pat(re, 0), rb_intern("match"), 1, str);
 996  }
 997  
 998  static char
 999  succ_char(s)
1000      char *s;
1001  {
1002      char c = *s;
1003  
1004      /* numerics */
1005      if ('0' <= c && c < '9') (*s)++;
1006      else if (c == '9') {
1007          *s = '0';
1008          return '1';
1009      }
1010      /* small alphabets */
1011      else if ('a' <= c && c < 'z') (*s)++;
1012      else if (c == 'z') {
1013          return *s = 'a';
1014      }
1015      /* capital alphabets */
1016      else if ('A' <= c && c < 'Z') (*s)++;
1017      else if (c == 'Z') {
1018          return *s = 'A';
1019      }
1020      return 0;
1021  }
1022  
1023  static VALUE
1024  rb_str_succ(orig)
1025      VALUE orig;
1026  {
1027      VALUE str;
1028      char *sbeg, *s;
1029      int c = -1;
1030      long n = 0;
1031  
1032      str = rb_str_new5(orig,RSTRING(orig)->ptr, RSTRING(orig)->len);
1033      OBJ_INFECT(str, orig);
1034      if (RSTRING(str)->len == 0) return str;
1035  
1036      sbeg = RSTRING(str)->ptr; s = sbeg + RSTRING(str)->len - 1;
1037  
1038      while (sbeg <= s) {
1039          if (ISALNUM(*s)) {
1040              if ((c = succ_char(s)) == 0) break;
1041              n = s - sbeg;
1042          }
1043          s--;
1044      }
1045      if (c == -1) {              /* str contains no alnum */
1046          sbeg = RSTRING(str)->ptr; s = sbeg + RSTRING(str)->len - 1;
1047          c = '\001';
1048          while (sbeg <= s) {
1049             if ((*s += 1) != 0) break;
1050             s--;
1051          }
1052      }
1053      if (s < sbeg) {
1054          RESIZE_CAPA(str, RSTRING(str)->len + 1);
1055          s = RSTRING(str)->ptr + n;
1056          memmove(s+1, s, RSTRING(str)->len - n);
1057          *s = c;
1058          RSTRING(str)->len += 1;
1059          RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
1060      }
1061  
1062      return str;
1063  }
1064  
1065  static VALUE
1066  rb_str_succ_bang(str)
1067      VALUE str;
1068  {
1069      rb_str_modify(str);
1070      rb_str_shared_replace(str, rb_str_succ(str));
1071  
1072      return str;
1073  }
1074  
1075  VALUE
1076  rb_str_upto(beg, end, excl)
1077      VALUE beg, end;
1078      int excl;
1079  {
1080      VALUE current;
1081      ID succ = rb_intern("succ");
1082  
1083      StringValue(end);
1084      current = beg;
1085      while (rb_str_cmp(current, end) <= 0) {
1086          rb_yield(current);
1087          if (!excl && rb_str_equal(current, end)) break;
1088          current = rb_funcall(current, succ, 0, 0);
1089          if (excl && rb_str_equal(current, end)) break;
1090          if (RSTRING(current)->len > RSTRING(end)->len)
1091              break;
1092      }
1093  
1094      return beg;
1095  }
1096  
1097  static VALUE
1098  rb_str_upto_m(beg, end)
1099      VALUE beg, end;
1100  {
1101      return rb_str_upto(beg, end, 0);
1102  }
1103  
1104  static VALUE
1105  rb_str_subpat(str, re, nth)
1106      VALUE str, re;
1107      int nth;
1108  {
1109      if (rb_reg_search(re, str, 0, 0) >= 0) {
1110          return rb_reg_nth_match(nth, rb_backref_get());
1111      }
1112      return Qnil;
1113  }
1114      
1115  static VALUE
1116  rb_str_aref(str, indx)
1117      VALUE str;
1118      VALUE indx;
1119  {
1120      long idx;
1121  
1122      switch (TYPE(indx)) {
1123        case T_FIXNUM:
1124          idx = FIX2LONG(indx);
1125  
1126        num_index:
1127          if (idx < 0) {
1128              idx = RSTRING(str)->len + idx;
1129          }
1130          if (idx < 0 || RSTRING(str)->len <= idx) {
1131              return Qnil;
1132          }
1133          return INT2FIX(RSTRING(str)->ptr[idx] & 0xff);
1134  
1135        case T_REGEXP:
1136          return rb_str_subpat(str, indx, 0);
1137  
1138        case T_STRING:
1139          if (rb_str_index(str, indx, 0) != -1)
1140              return rb_str_dup(indx);
1141          return Qnil;
1142  
1143        default:
1144          /* check if indx is Range */
1145          {
1146              long beg, len;
1147              switch (rb_range_beg_len(indx, &beg, &len, RSTRING(str)->len, 0)) {
1148                case Qfalse:
1149                  break;
1150                case Qnil:
1151                  return Qnil;
1152                default:
1153                  return rb_str_substr(str, beg, len);
1154              }
1155          }
1156          idx = NUM2LONG(indx);
1157          goto num_index;
1158      }
1159      return Qnil;                /* not reached */
1160  }
1161  
1162  static VALUE
1163  rb_str_aref_m(argc, argv, str)
1164      int argc;
1165      VALUE *argv;
1166      VALUE str;
1167  {
1168      if (argc == 2) {
1169          if (TYPE(argv[0]) == T_REGEXP) {
1170              return rb_str_subpat(str, argv[0], NUM2INT(argv[1]));
1171          }
1172          return rb_str_substr(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]));
1173      }
1174      if (argc != 1) {
1175          rb_raise(rb_eArgError, "wrong number of arguments(%d for 1)", argc);
1176      }
1177      return rb_str_aref(str, argv[0]);
1178  }
1179  
1180  void
1181  rb_str_update(str, beg, len, val)
1182      VALUE str;
1183      long beg, len;
1184      VALUE val;
1185  {
1186      if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len);
1187      if (RSTRING(str)->len < beg) {
1188        out_of_range:
1189          rb_raise(rb_eIndexError, "index %ld out of string", beg);
1190      }
1191      if (beg < 0) {
1192          if (-beg > RSTRING(str)->len) {
1193              goto out_of_range;
1194          }
1195          beg += RSTRING(str)->len;
1196      }
1197      if (RSTRING(str)->len < beg + len) {
1198          len = RSTRING(str)->len - beg;
1199      }
1200  
1201      StringValue(val);
1202      if (len < RSTRING(val)->len) {
1203          /* expand string */
1204          RESIZE_CAPA(str, RSTRING(str)->len + RSTRING(val)->len - len);
1205      }
1206  
1207      if (RSTRING(val)->len != len) {
1208          memmove(RSTRING(str)->ptr + beg + RSTRING(val)->len,
1209                  RSTRING(str)->ptr + beg + len,
1210                  RSTRING(str)->len - (beg + len));
1211      }
1212      if (RSTRING(str)->len < beg && len < 0) {
1213          MEMZERO(RSTRING(str)->ptr + RSTRING(str)->len, char, -len);
1214      }
1215      if (RSTRING(val)->len > 0) {
1216          memmove(RSTRING(str)->ptr+beg, RSTRING(val)->ptr, RSTRING(val)->len);
1217      }
1218      RSTRING(str)->len += RSTRING(val)->len - len;
1219      RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
1220      OBJ_INFECT(str, val);
1221  }
1222  
1223  static void
1224  rb_str_subpat_set(str, re, nth, val)
1225      VALUE str, re;
1226      int nth;
1227      VALUE val;
1228  {
1229      VALUE match;
1230      long start, end, len;
1231  
1232      if (rb_reg_search(re, str, 0, 0) < 0) {
1233          rb_raise(rb_eIndexError, "regexp not matched");
1234      }
1235      match = rb_backref_get();
1236      if (nth >= RMATCH(match)->regs->num_regs) {
1237        out_of_range:
1238          rb_raise(rb_eIndexError, "index %d out of regexp", nth);
1239      }
1240      if (nth < 0) {
1241          if (-nth >= RMATCH(match)->regs->num_regs) {
1242              goto out_of_range;
1243          }
1244          nth += RMATCH(match)->regs->num_regs;
1245      }
1246  
1247      start = RMATCH(match)->BEG(nth);
1248      if (start == -1) {
1249          rb_raise(rb_eIndexError, "regexp group %d not matched", nth);
1250      }
1251      end = RMATCH(match)->END(nth);
1252      len = end - start;
1253      rb_str_modify(str);
1254      rb_str_update(str, start, len, val);
1255  }
1256  
1257  static VALUE
1258  rb_str_aset(str, indx, val)
1259      VALUE str;
1260      VALUE indx, val;
1261  {
1262      long idx, beg;
1263  
1264      switch (TYPE(indx)) {
1265        case T_FIXNUM:
1266        num_index:
1267          idx = NUM2LONG(indx);
1268          if (RSTRING(str)->len <= idx) {
1269            out_of_range:
1270              rb_raise(rb_eIndexError, "index %ld out of string", idx);
1271          }
1272          if (idx < 0) {
1273              if (-idx > RSTRING(str)->len)
1274                  goto out_of_range;
1275              idx += RSTRING(str)->len;
1276          }
1277          if (FIXNUM_P(val)) {
1278              if (RSTRING(str)->len == idx) {
1279                  RSTRING(str)->len += 1;
1280                  RESIZE_CAPA(str, RSTRING(str)->len);
1281              }
1282              RSTRING(str)->ptr[idx] = NUM2INT(val) & 0xff;
1283          }
1284          else {
1285              rb_str_update(str, idx, 1, val);
1286          }
1287          return val;
1288  
1289        case T_REGEXP:
1290          rb_str_subpat_set(str, indx, 0, val);
1291          return val;
1292  
1293        case T_STRING:
1294          beg = rb_str_index(str, indx, 0);
1295          if (beg < 0) {
1296              rb_raise(rb_eIndexError, "string not matched");
1297          }
1298          rb_str_update(str, beg, RSTRING(indx)->len, val);
1299          return val;
1300  
1301        default:
1302          /* check if indx is Range */
1303          {
1304              long beg, len;
1305              if (rb_range_beg_len(indx, &beg, &len, RSTRING(str)->len, 2)) {
1306                  rb_str_update(str, beg, len, val);
1307                  return val;
1308              }
1309          }
1310          idx = NUM2LONG(indx);
1311          goto num_index;
1312      }
1313  }
1314  
1315  static VALUE
1316  rb_str_aset_m(argc, argv, str)
1317      int argc;
1318      VALUE *argv;
1319      VALUE str;
1320  {
1321      rb_str_modify(str);
1322      if (argc == 3) {
1323          if (TYPE(argv[0]) == T_REGEXP) {
1324              rb_str_subpat_set(str, argv[0], NUM2INT(argv[1]), argv[2]);
1325          }
1326          else {
1327              rb_str_update(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]), argv[2]);
1328          }
1329          return argv[2];
1330      }
1331      if (argc != 2) {
1332          rb_raise(rb_eArgError, "wrong number of arguments(%d for 2)", argc);
1333      }
1334      return rb_str_aset(str, argv[0], argv[1]);
1335  }
1336  
1337  static VALUE
1338  rb_str_insert(str, idx, str2)
1339      VALUE str, idx, str2;
1340  {
1341      long pos = NUM2LONG(idx);
1342  
1343      rb_str_modify(str);
1344      if (pos == -1) {
1345          pos = RSTRING(str)->len;
1346      }
1347      else if (pos < 0) {
1348          pos++;
1349      }
1350      rb_str_update(str, pos, 0, str2);
1351      return str;
1352  }
1353  
1354  static VALUE
1355  rb_str_slice_bang(argc, argv, str)
1356      int argc;
1357      VALUE *argv;
1358      VALUE str;
1359  {
1360      VALUE result;
1361      VALUE buf[3];
1362      int i;
1363  
1364      if (argc < 1 || 2 < argc) {
1365          rb_raise(rb_eArgError, "wrong number of arguments(%d for 1)", argc);
1366      }
1367      for (i=0; i<argc; i++) {
1368          buf[i] = argv[i];
1369      }
1370      buf[i] = rb_str_new(0,0);
1371      result = rb_str_aref_m(argc, buf, str);
1372      if (!NIL_P(result)) {
1373          rb_str_aset_m(argc+1, buf, str);
1374      }
1375      return result;
1376  }
1377  
1378  static VALUE
1379  get_pat(pat, quote)
1380      VALUE pat;
1381      int quote;
1382  {
1383      VALUE val;
1384  
1385      switch (TYPE(pat)) {
1386        case T_REGEXP:
1387          return pat;
1388  
1389        case T_STRING:
1390          break;
1391  
1392        default:
1393          val = rb_check_convert_type(pat, T_STRING, "String", "to_str");
1394          if (NIL_P(val)) {
1395              Check_Type(pat, T_REGEXP);
1396          }
1397          pat = val;
1398      }
1399  
1400      if (quote) {
1401          val = rb_reg_quote(pat);
1402  #if RUBY_VERSION_CODE < 180
1403          if (val != pat && rb_str_cmp(val, pat) != 0) {
1404              rb_warn("string pattern instead of regexp; metacharacters no longer effective");
1405          }
1406  #endif
1407          pat = val;
1408      }
1409  
1410      return rb_reg_regcomp(pat);
1411  }
1412  
1413  static VALUE
1414  rb_str_sub_bang(argc, argv, str)
1415      int argc;
1416      VALUE *argv;
1417      VALUE str;
1418  {
1419      VALUE pat, repl, match;
1420      struct re_registers *regs;
1421      int iter = 0;
1422      int tainted = 0;
1423      long plen;
1424  
1425      if (argc == 1 && rb_block_given_p()) {
1426          iter = 1;
1427      }
1428      else if (argc == 2) {
1429          repl = argv[1];
1430          StringValue(repl);
1431          if (OBJ_TAINTED(repl)) tainted = 1;
1432      }
1433      else {
1434          rb_raise(rb_eArgError, "wrong number of arguments(%d for 2)", argc);
1435      }
1436  
1437      pat = get_pat(argv[0], 1);
1438      if (rb_reg_search(pat, str, 0, 0) >= 0) {
1439          rb_str_modify(str);
1440          match = rb_backref_get();
1441          regs = RMATCH(match)->regs;
1442  
1443          if (iter) {
1444              rb_match_busy(match);
1445              repl = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
1446              rb_backref_set(match);
1447          }
1448          else {
1449              repl = rb_reg_regsub(repl, str, regs);
1450          }
1451          if (OBJ_TAINTED(repl)) tainted = 1;
1452          plen = END(0) - BEG(0);
1453          if (RSTRING(repl)->len > plen) {
1454              RESIZE_CAPA(str, RSTRING(str)->len + RSTRING(repl)->len - plen);
1455          }
1456          if (RSTRING(repl)->len != plen) {
1457              memmove(RSTRING(str)->ptr + BEG(0) + RSTRING(repl)->len,
1458                      RSTRING(str)->ptr + BEG(0) + plen,
1459                      RSTRING(str)->len - BEG(0) - plen);
1460          }
1461          memcpy(RSTRING(str)->ptr + BEG(0),
1462                 RSTRING(repl)->ptr, RSTRING(repl)->len);
1463          RSTRING(str)->len += RSTRING(repl)->len - plen;
1464          RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
1465          if (tainted) OBJ_TAINT(str);
1466  
1467          return str;
1468      }
1469      return Qnil;
1470  }
1471  
1472  static VALUE
1473  rb_str_sub(argc, argv, str)
1474      int argc;
1475      VALUE *argv;
1476      VALUE str;
1477  {
1478      str = rb_str_dup(str);
1479      rb_str_sub_bang(argc, argv, str);
1480      return str;
1481  }
1482  
1483  static VALUE
1484  str_gsub(argc, argv, str, bang)
1485      int argc;
1486      VALUE *argv;
1487      VALUE str;
1488      int bang;
1489  {
1490      VALUE pat, val, repl, match;
1491      struct re_registers *regs;
1492      long beg, n;
1493      long offset, blen, len;
1494      int iter = 0;
1495      char *buf, *bp, *cp;
1496      int tainted = 0;
1497  
1498      if (argc == 1 && rb_block_given_p()) {
1499          iter = 1;
1500      }
1501      else if (argc == 2) {
1502          repl = argv[1];
1503          StringValue(repl);
1504          if (OBJ_TAINTED(repl)) tainted = 1;
1505      }
1506      else {
1507          rb_raise(rb_eArgError, "wrong number of arguments(%d for 2)", argc);
1508      }
1509  
1510      pat = get_pat(argv[0], 1);
1511      offset=0; n=0; 
1512      beg = rb_reg_search(pat, str, 0, 0);
1513      if (beg < 0) {
1514          if (bang) return Qnil;  /* no match, no substitution */
1515          return rb_str_dup(str);
1516      }
1517  
1518      blen = RSTRING(str)->len + 30; /* len + margin */
1519      buf = ALLOC_N(char, blen);
1520      bp = buf;
1521      cp = RSTRING(str)->ptr;
1522  
1523      while (beg >= 0) {
1524          n++;
1525          match = rb_backref_get();
1526          regs = RMATCH(match)->regs;
1527          if (iter) {
1528              rb_match_busy(match);
1529              val = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
1530              rb_backref_set(match);
1531          }
1532          else {
1533              val = rb_reg_regsub(repl, str, regs);
1534          }
1535          if (OBJ_TAINTED(val)) tainted = 1;
1536          len = (bp - buf) + (beg - offset) + RSTRING(val)->len + 3;
1537          if (blen < len) {
1538              while (blen < len) blen *= 2;
1539              len = bp - buf;
1540              REALLOC_N(buf, char, blen);
1541              bp = buf + len;
1542          }
1543          len = beg - offset;     /* copy pre-match substr */
1544          memcpy(bp, cp, len);
1545          bp += len;
1546          memcpy(bp, RSTRING(val)->ptr, RSTRING(val)->len);
1547          bp += RSTRING(val)->len;
1548          if (BEG(0) == END(0)) {
1549              /*
1550               * Always consume at least one character of the input string
1551               * in order to prevent infinite loops.
1552               */
1553              len = mbclen2(RSTRING(str)->ptr[END(0)], pat);
1554              if (RSTRING(str)->len > END(0)) {
1555                  memcpy(bp, RSTRING(str)->ptr+END(0), len);
1556                  bp += len;
1557              }
1558              offset = END(0) + len;
1559          }
1560          else {
1561              offset = END(0);
1562          }
1563          cp = RSTRING(str)->ptr + offset;
1564          if (offset > RSTRING(str)->len) break;
1565          beg = rb_reg_search(pat, str, offset, 0);
1566      }
1567      if (RSTRING(str)->len > offset) {
1568          len = bp - buf;
1569          if (blen - len < RSTRING(str)->len - offset + 1) {
1570              REALLOC_N(buf, char, len + RSTRING(str)->len - offset + 1);
1571              bp = buf + len;
1572          }
1573          memcpy(bp, cp, RSTRING(str)->len - offset);
1574          bp += RSTRING(str)->len - offset;
1575      }
1576      rb_backref_set(match);
1577      if (bang) {
1578          if (str_independent(str)) {
1579              free(RSTRING(str)->ptr);
1580          }
1581          FL_UNSET(str, ELTS_SHARED|STR_ASSOC);
1582      }
1583      else {
1584          VALUE dup = rb_obj_alloc(rb_obj_class(str));
1585  
1586          OBJ_INFECT(dup, str);
1587          str = dup;
1588      }
1589      RSTRING(str)->ptr = buf;
1590      RSTRING(str)->len = len = bp - buf;
1591      RSTRING(str)->ptr[len] = '\0';
1592      RSTRING(str)->aux.capa = len;
1593  
1594      if (tainted) OBJ_TAINT(str);
1595      return str;
1596  }
1597  
1598  static VALUE
1599  rb_str_gsub_bang(argc, argv, str)
1600      int argc;
1601      VALUE *argv;
1602      VALUE str;
1603  {
1604      return str_gsub(argc, argv, str, 1);
1605  }
1606  
1607  static VALUE
1608  rb_str_gsub(argc, argv, str)
1609      int argc;
1610      VALUE *argv;
1611      VALUE str;
1612  {
1613      return str_gsub(argc, argv, str, 0);
1614  }
1615  
1616  static VALUE
1617  rb_str_replace(str, str2)
1618      VALUE str, str2;
1619  {
1620      if (str == str2) return str;
1621  
1622      StringValue(str2);
1623      if (FL_TEST(str2, ELTS_SHARED)) {
1624          if (str_independent(str)) {
1625              free(RSTRING(str)->ptr);
1626          }
1627          RSTRING(str)->len = RSTRING(str2)->len;
1628          RSTRING(str)->ptr = RSTRING(str2)->ptr;
1629          FL_SET(str, RBASIC(str2)->flags & (ELTS_SHARED|STR_ASSOC));
1630          RSTRING(str)->aux.shared = RSTRING(str2)->aux.shared;
1631      }
1632      else {
1633          rb_str_modify(str);
1634          rb_str_resize(str, RSTRING(str2)->len);
1635          memcpy(RSTRING(str)->ptr, RSTRING(str2)->ptr, RSTRING(str2)->len);
1636          if (FL_TEST(str2, STR_ASSOC)) {
1637              FL_SET(str, RBASIC(str2)->flags & (ELTS_SHARED|STR_ASSOC));
1638              RSTRING(str)->aux.shared = RSTRING(str2)->aux.shared;
1639          }
1640      }
1641  
1642      OBJ_INFECT(str, str2);
1643      return str;
1644  }
1645  
1646  static VALUE
1647  uscore_get()
1648  {
1649      VALUE line;
1650  
1651      line = rb_lastline_get();
1652      if (TYPE(line) != T_STRING) {
1653          rb_raise(rb_eTypeError, "$_ value need to be String (%s given)",
1654                   NIL_P(line) ? "nil" : rb_class2name(CLASS_OF(line)));
1655      }
1656      return line;
1657  }
1658  
1659  static VALUE
1660  rb_f_sub_bang(argc, argv)
1661      int argc;
1662      VALUE *argv;
1663  {
1664      return rb_str_sub_bang(argc, argv, uscore_get());
1665  }
1666  
1667  static VALUE
1668  rb_f_sub(argc, argv)
1669      int argc;
1670      VALUE *argv;
1671  {
1672      VALUE str = rb_str_dup(uscore_get());
1673  
1674      if (NIL_P(rb_str_sub_bang(argc, argv, str)))
1675          return str;
1676      rb_lastline_set(str);
1677      return str;
1678  }
1679  
1680  static VALUE
1681  rb_f_gsub_bang(argc, argv)
1682      int argc;
1683      VALUE *argv;
1684  {
1685      return rb_str_gsub_bang(argc, argv, uscore_get());
1686  }
1687  
1688  static VALUE
1689  rb_f_gsub(argc, argv)
1690      int argc;
1691      VALUE *argv;
1692  {
1693      VALUE str = rb_str_dup(uscore_get());
1694  
1695      if (NIL_P(rb_str_gsub_bang(argc, argv, str)))
1696          return str;
1697      rb_lastline_set(str);
1698      return str;
1699  }
1700  
1701  static VALUE
1702  rb_str_reverse_bang(str)
1703      VALUE str;
1704  {
1705      char *s, *e;
1706      char c;
1707  
1708      rb_str_modify(str);
1709      s = RSTRING(str)->ptr;
1710      e = s + RSTRING(str)->len - 1;
1711      while (s < e) {
1712          c = *s;
1713          *s++ = *e;
1714          *e-- = c;
1715      }
1716  
1717      return str;
1718  }
1719  
1720  static VALUE
1721  rb_str_reverse(str)
1722      VALUE str;
1723  {
1724      VALUE obj;
1725      char *s, *e, *p;
1726  
1727      if (RSTRING(str)->len <= 1) return rb_str_dup(str);
1728  
1729      obj = rb_str_new5(str, 0, RSTRING(str)->len);
1730      s = RSTRING(str)->ptr; e = s + RSTRING(str)->len - 1;
1731      p = RSTRING(obj)->ptr;
1732  
1733      while (e >= s) {
1734          *p++ = *e--;
1735      }
1736      OBJ_INFECT(obj, str);
1737  
1738      return obj;
1739  }
1740  
1741  static VALUE
1742  rb_str_include(str, arg)
1743      VALUE str, arg;
1744  {
1745      long i;
1746  
1747      if (FIXNUM_P(arg)) {
1748          int c = FIX2INT(arg);
1749          long len = RSTRING(str)->len;
1750          char *p = RSTRING(str)->ptr;
1751  
1752          for (i=0; i<len; i++) {
1753              if (p[i] == c) {
1754                  return Qtrue;
1755              }
1756          }
1757          return Qfalse;
1758      }
1759  
1760      StringValue(arg);
1761      i = rb_str_index(str, arg, 0);
1762  
1763      if (i == -1) return Qfalse;
1764      return Qtrue;
1765  }
1766  
1767  static VALUE
1768  rb_str_to_i(argc, argv, str)
1769      int argc;
1770      VALUE *argv;
1771      VALUE str;
1772  {
1773      VALUE b;
1774      int base;
1775  
1776      rb_scan_args(argc, argv, "01", &b);
1777      if (argc == 0) base = 10;
1778      else base = NUM2INT(b);
1779  
1780      switch (base) {
1781        case 0: case 2: case 8: case 10: case 16:
1782          break;
1783        default:
1784          rb_raise(rb_eArgError, "illegal radix %d", base);
1785      }
1786      return rb_str_to_inum(str, base, Qfalse);
1787  }
1788  
1789  static VALUE
1790  rb_str_to_f(str)
1791      VALUE str;
1792  {
1793      return rb_float_new(rb_str_to_dbl(str, Qfalse));
1794  }
1795  
1796  static VALUE
1797  rb_str_to_s(str)
1798      VALUE str;
1799  {
1800      return str;
1801  }
1802  
1803  VALUE
1804  rb_str_inspect(str)
1805      VALUE str;
1806  {
1807      char *p, *pend;
1808      VALUE result = rb_str_buf_new2("\"");
1809      char s[5];
1810  
1811      p = RSTRING(str)->ptr; pend = p + RSTRING(str)->len;
1812      while (p < pend) {
1813          char c = *p++;
1814          if (ismbchar(c) && p < pend) {
1815              int len = mbclen(c);
1816              rb_str_buf_cat(result, p - 1, len);
1817              p += len - 1;
1818          }
1819          else if (c == '"'|| c == '\\') {
1820              s[0] = '\\'; s[1] = c;
1821              rb_str_buf_cat(result, s, 2);
1822          }
1823          else if (ISPRINT(c)) {
1824              s[0] = c;
1825              rb_str_buf_cat(result, s, 1);
1826          }
1827          else if (c == '\n') {
1828              s[0] = '\\'; s[1] = 'n';
1829              rb_str_buf_cat(result, s, 2);
1830          }
1831          else if (c == '\r') {
1832              s[0] = '\\'; s[1] = 'r';
1833              rb_str_buf_cat(result, s, 2);
1834          }
1835          else if (c == '\t') {
1836              s[0] = '\\'; s[1] = 't';
1837              rb_str_buf_cat(result, s, 2);
1838          }
1839          else if (c == '\f') {
1840              s[0] = '\\'; s[1] = 'f';
1841              rb_str_buf_cat(result, s, 2);
1842          }
1843          else if (c == '\013') {
1844              s[0] = '\\'; s[1] = 'v';
1845              rb_str_buf_cat(result, s, 2);
1846          }
1847          else if (c == '\007') {
1848              s[0] = '\\'; s[1] = 'a';
1849              rb_str_buf_cat(result, s, 2);
1850          }
1851          else if (c == 033) {
1852              s[0] = '\\'; s[1] = 'e';
1853              rb_str_buf_cat(result, s, 2);
1854          }
1855          else {
1856              sprintf(s, "\\%03o", c & 0377);
1857              rb_str_buf_cat2(result, s);
1858          }
1859      }
1860      rb_str_buf_cat2(result, "\"");
1861  
1862      OBJ_INFECT(result, str);
1863      return result;
1864  }
1865  
1866  static VALUE
1867  rb_str_dump(str)
1868      VALUE str;
1869  {
1870      long len;
1871      char *p, *pend;
1872      char *q, *qend;
1873      VALUE result;
1874  
1875      len = 2;                    /* "" */
1876      p = RSTRING(str)->ptr; pend = p + RSTRING(str)->len;
1877      while (p < pend) {
1878          char c = *p++;
1879          switch (c) {
1880            case '"':  case '\\':
1881            case '\n': case '\r':
1882            case '\t': case '\f': case '#':
1883            case '\013': case '\007': case '\033': 
1884              len += 2;
1885              break;
1886  
1887            default:
1888              if (ISPRINT(c)) {
1889                  len++;
1890              }
1891              else {
1892                  len += 4;               /* \nnn */
1893              }
1894              break;
1895          }
1896      }
1897  
1898      result = rb_str_new5(str, 0, len);
1899      p = RSTRING(str)->ptr; pend = p + RSTRING(str)->len;
1900      q = RSTRING(result)->ptr; qend = q + len;
1901  
1902      *q++ = '"';
1903      while (p < pend) {
1904          char c = *p++;
1905  
1906          if (c == '"' || c == '\\') {
1907              *q++ = '\\';
1908              *q++ = c;
1909          }
1910          else if (c == '#') {
1911              *q++ = '\\';
1912              *q++ = '#';
1913          }
1914          else if (ISPRINT(c)) {
1915              *q++ = c;
1916          }
1917          else if (c == '\n') {
1918              *q++ = '\\';
1919              *q++ = 'n';
1920          }
1921          else if (c == '\r') {
1922              *q++ = '\\';
1923              *q++ = 'r';
1924          }
1925          else if (c == '\t') {
1926              *q++ = '\\';
1927              *q++ = 't';
1928          }
1929          else if (c == '\f') {
1930              *q++ = '\\';
1931              *q++ = 'f';
1932          }
1933          else if (c == '\013') {
1934              *q++ = '\\';
1935              *q++ = 'v';
1936          }
1937          else if (c == '\007') {
1938              *q++ = '\\';
1939              *q++ = 'a';
1940          }
1941          else if (c == '\033') {
1942              *q++ = '\\';
1943              *q++ = 'e';
1944          }
1945          else {
1946              *q++ = '\\';
1947              sprintf(q, "%03o", c&0xff);
1948              q += 3;
1949          }
1950      }
1951      *q++ = '"';
1952  
1953      OBJ_INFECT(result, str);
1954      return result;
1955  }
1956  
1957  static VALUE
1958  rb_str_upcase_bang(str)
1959      VALUE str;
1960  {
1961      char *s, *send;
1962      int modify = 0;
1963  
1964      rb_str_modify(str);
1965      s = RSTRING(str)->ptr; send = s + RSTRING(str)->len;
1966      while (s < send) {
1967          if (ismbchar(*s)) {
1968              s+=mbclen(*s) - 1;
1969          }
1970          else if (ISLOWER(*s)) {
1971              *s = toupper(*s);
1972              modify = 1;
1973          }
1974          s++;
1975      }
1976  
1977      if (modify) return str;
1978      return Qnil;
1979  }
1980  
1981  static VALUE
1982  rb_str_upcase(str)
1983      VALUE str;
1984  {
1985      str = rb_str_dup(str);
1986      rb_str_upcase_bang(str);
1987      return str;
1988  }
1989  
1990  static VALUE
1991  rb_str_downcase_bang(str)
1992      VALUE str;
1993  {
1994      char *s, *send;
1995      int modify = 0;
1996  
1997      rb_str_modify(str);
1998      s = RSTRING(str)->ptr; send = s + RSTRING(str)->len;
1999      while (s < send) {
2000          if (ismbchar(*s)) {
2001              s+=mbclen(*s) - 1;
2002          }
2003          else if (ISUPPER(*s)) {
2004              *s = tolower(*s);
2005              modify = 1;
2006          }
2007          s++;
2008      }
2009  
2010      if (modify) return str;
2011      return Qnil;
2012  }
2013  
2014  static VALUE
2015  rb_str_downcase(str)
2016      VALUE str;
2017  {
2018      str = rb_str_dup(str);
2019      rb_str_downcase_bang(str);
2020      return str;
2021  }
2022  
2023  static VALUE
2024  rb_str_capitalize_bang(str)
2025      VALUE str;
2026  {
2027      char *s, *send;
2028      int modify = 0;
2029  
2030      rb_str_modify(str);
2031      s = RSTRING(str)->ptr; send = s + RSTRING(str)->len;
2032      if (ISLOWER(*s)) {
2033          *s = toupper(*s);
2034          modify = 1;
2035      }
2036      while (++s < send) {
2037          if (ismbchar(*s)) {
2038              s+=mbclen(*s) - 1;
2039          }
2040          else if (ISUPPER(*s)) {
2041              *s = tolower(*s);
2042              modify = 1;
2043          }
2044      }
2045      if (modify) return str;
2046      return Qnil;
2047  }
2048  
2049  static VALUE
2050  rb_str_capitalize(str)
2051      VALUE str;
2052  {
2053      str = rb_str_dup(str);
2054      rb_str_capitalize_bang(str);
2055      return str;
2056  }
2057  
2058  static VALUE
2059  rb_str_swapcase_bang(str)
2060      VALUE str;
2061  {
2062      char *s, *send;
2063      int modify = 0;
2064  
2065      rb_str_modify(str);
2066      s = RSTRING(str)->ptr; send = s + RSTRING(str)->len;
2067      while (s < send) {
2068          if (ismbchar(*s)) {
2069              s+=mbclen(*s) - 1;
2070          }
2071          else if (ISUPPER(*s)) {
2072              *s = tolower(*s);
2073              modify = 1;
2074          }
2075          else if (ISLOWER(*s)) {
2076              *s = toupper(*s);
2077              modify = 1;
2078          }
2079          s++;
2080      }
2081  
2082      if (modify) return str;
2083      return Qnil;
2084  }
2085  
2086  static VALUE
2087  rb_str_swapcase(str)
2088      VALUE str;
2089  {
2090      str = rb_str_dup(str);
2091      rb_str_swapcase_bang(str);
2092      return str;
2093  }
2094  
2095  typedef unsigned char *USTR;
2096  
2097  struct tr {
2098      int gen, now, max;
2099      char *p, *pend;
2100  };
2101  
2102  static int
2103  trnext(t)
2104      struct tr *t;
2105  {
2106      for (;;) {
2107          if (!t->gen) {
2108              if (t->p == t->pend) return -1;
2109              if (t->p < t->pend - 1 && *t->p == '\\') {
2110                  t->p++;
2111              }
2112              t->now = *(USTR)t->p++;
2113              if (t->p < t->pend - 1 && *t->p == '-') {
2114                  t->p++;
2115                  if (t->p < t->pend) {
2116                      if (t->now > *(USTR)t->p) {
2117                          t->p++;
2118                          continue;
2119                      }
2120                      t->gen = 1;
2121                      t->max = *(USTR)t->p++;
2122                  }
2123              }
2124              return t->now;
2125          }
2126          else if (++t->now < t->max) {
2127              return t->now;
2128          }
2129          else {
2130              t->gen = 0;
2131              return t->max;
2132          }
2133      }
2134  }
2135  
2136  static VALUE rb_str_delete_bang _((int,VALUE*,VALUE));
2137  
2138  static VALUE
2139  tr_trans(str, src, repl, sflag)
2140      VALUE str, src, repl;
2141      int sflag;
2142  {
2143      struct tr trsrc, trrepl;
2144      int cflag = 0;
2145      int trans[256];
2146      int i, c, modify = 0;
2147      char *s, *send;
2148  
2149      rb_str_modify(str);
2150      StringValue(src);
2151      StringValue(repl);
2152      if (RSTRING(str)->len == 0 || !RSTRING(str)->ptr) return Qnil;
2153      trsrc.p = RSTRING(src)->ptr; trsrc.pend = trsrc.p + RSTRING(src)->len;
2154      if (RSTRING(src)->len >= 2 && RSTRING(src)->ptr[0] == '^') {
2155          cflag++;
2156          trsrc.p++;
2157      }
2158      if (RSTRING(repl)->len == 0) {
2159          return rb_str_delete_bang(1, &src, str);
2160      }
2161      trrepl.p = RSTRING(repl)->ptr;
2162      trrepl.pend = trrepl.p + RSTRING(repl)->len;
2163      trsrc.gen = trrepl.gen = 0;
2164      trsrc.now = trrepl.now = 0;
2165      trsrc.max = trrepl.max = 0;
2166  
2167      if (cflag) {
2168          for (i=0; i<256; i++) {
2169              trans[i] = 1;
2170          }
2171          while ((c = trnext(&trsrc)) >= 0) {
2172              trans[c & 0xff] = -1;
2173          }
2174          while ((c = trnext(&trrepl)) >= 0)
2175              /* retrieve last replacer */;
2176          for (i=0; i<256; i++) {
2177              if (trans[i] >= 0) {
2178                  trans[i] = trrepl.now;
2179              }
2180          }
2181      }
2182      else {
2183          int r;
2184  
2185          for (i=0; i<256; i++) {
2186              trans[i] = -1;
2187          }
2188          while ((c = trnext(&trsrc)) >= 0) {
2189              r = trnext(&trrepl);
2190              if (r == -1) r = trrepl.now;
2191              trans[c & 0xff] = r;
2192          }
2193      }
2194  
2195      s = RSTRING(str)->ptr; send = s + RSTRING(str)->len;
2196      if (sflag) {
2197          char *t = s;
2198          int c0, last = -1;
2199  
2200          while (s < send) {
2201              c0 = *s++;
2202              if ((c = trans[c0 & 0xff]) >= 0) {
2203                  if (last == c) continue;
2204                  last = c;
2205                  *t++ = c & 0xff;
2206                  modify = 1;
2207              }
2208              else {
2209                  last = -1;
2210                  *t++ = c0;
2211              }
2212          }
2213          if (RSTRING(str)->len > (t - RSTRING(str)->ptr)) {
2214              RSTRING(str)->len = (t - RSTRING(str)->ptr);
2215              modify = 1;
2216              *t = '\0';
2217          }
2218      }
2219      else {
2220          while (s < send) {
2221              if ((c = trans[*s & 0xff]) >= 0) {
2222                  *s = c & 0xff;
2223                  modify = 1;
2224              }
2225              s++;
2226          }
2227      }
2228  
2229      if (modify) return str;
2230      return Qnil;
2231  }
2232  
2233  static VALUE
2234  rb_str_tr_bang(str, src, repl)
2235      VALUE str, src, repl;
2236  {
2237      return tr_trans(str, src, repl, 0);
2238  }
2239  
2240  static VALUE
2241  rb_str_tr(str, src, repl)
2242      VALUE str, src, repl;
2243  {
2244      str = rb_str_dup(str);
2245      tr_trans(str, src, repl, 0);
2246      return str;
2247  }
2248  
2249  static void
2250  tr_setup_table(str, table, init)
2251      VALUE str;
2252      char table[256];
2253      int init;
2254  {
2255      char buf[256];
2256      struct tr tr;
2257      int i, c;
2258      int cflag = 0;
2259  
2260      tr.p = RSTRING(str)->ptr; tr.pend = tr.p + RSTRING(str)->len;
2261      tr.gen = tr.now = tr.max = 0;
2262      if (RSTRING(str)->len > 1 && RSTRING(str)->ptr[0] == '^') {
2263          cflag = 1;
2264          tr.p++;
2265      }
2266  
2267      if (init) {
2268          for (i=0; i<256; i++) {
2269              table[i] = 1;
2270          }
2271      }
2272      for (i=0; i<256; i++) {
2273          buf[i] = cflag;
2274      }
2275      while ((c = trnext(&tr)) >= 0) {
2276          buf[c & 0xff] = !cflag;
2277      }
2278      for (i=0; i<256; i++) {
2279          table[i] = table[i]&&buf[i];
2280      }
2281  }
2282  
2283  static VALUE
2284  rb_str_delete_bang(argc, argv, str)
2285      int argc;
2286      VALUE *argv;
2287      VALUE str;
2288  {
2289      char *s, *send, *t;
2290      char squeez[256];
2291      int modify = 0;
2292      int init = 1;
2293      int i;
2294  
2295      if (argc < 1) {
2296          rb_raise(rb_eArgError, "wrong number of arguments");
2297      }
2298      for (i=0; i<argc; i++) {
2299          VALUE s = argv[i];
2300  
2301          StringValue(s);
2302          tr_setup_table(s, squeez, init);
2303          init = 0;
2304      }
2305  
2306      rb_str_modify(str);
2307      s = t = RSTRING(str)->ptr;
2308      if (!s || RSTRING(str)->len == 0) return Qnil;
2309      send = s + RSTRING(str)->len;
2310      while (s < send) {
2311          if (squeez[*s & 0xff])
2312              modify = 1;
2313          else
2314              *t++ = *s;
2315          s++;
2316      }
2317      *t = '\0';
2318      RSTRING(str)->len = t - RSTRING(str)->ptr;
2319  
2320      if (modify) return str;
2321      return Qnil;
2322  }
2323  
2324  static VALUE
2325  rb_str_delete(argc, argv, str)
2326      int argc;
2327      VALUE *argv;
2328      VALUE str;
2329  {
2330      str = rb_str_dup(str);
2331      rb_str_delete_bang(argc, argv, str);
2332      return str;
2333  }
2334  
2335  static VALUE
2336  rb_str_squeeze_bang(argc, argv, str)
2337      int argc;
2338      VALUE *argv;
2339      VALUE str;
2340  {
2341      char squeez[256];
2342      char *s, *send, *t;
2343      int c, save, modify = 0;
2344      int init = 1;
2345      int i;
2346  
2347      if (argc == 0) {
2348          for (i=0; i<256; i++) {
2349              squeez[i] = 1;
2350          }
2351      }
2352      else {
2353          for (i=0; i<argc; i++) {
2354              VALUE s = argv[i];
2355  
2356              StringValue(s);
2357              tr_setup_table(s, squeez, init);
2358              init = 0;
2359          }
2360      }
2361  
2362      rb_str_modify(str);
2363      s = t = RSTRING(str)->ptr;
2364      if (!s || RSTRING(str)->len == 0) return Qnil;
2365      send = s + RSTRING(str)->len;
2366      save = -1;
2367      while (s < send) {
2368          c = *s++ & 0xff;
2369          if (c != save || !squeez[c]) {
2370              *t++ = save = c;
2371          }
2372      }
2373      *t = '\0';
2374      if (t - RSTRING(str)->ptr != RSTRING(str)->len) {
2375          RSTRING(str)->len = t - RSTRING(str)->ptr;
2376          modify = 1;
2377      }
2378  
2379      if (modify) return str;
2380      return Qnil;
2381  }
2382  
2383  static VALUE
2384  rb_str_squeeze(argc, argv, str)
2385      int argc;
2386      VALUE *argv;
2387      VALUE str;
2388  {
2389      str = rb_str_dup(str);
2390      rb_str_squeeze_bang(argc, argv, str);
2391      return str;
2392  }
2393  
2394  static VALUE
2395  rb_str_tr_s_bang(str, src, repl)
2396      VALUE str, src, repl;
2397  {
2398      return tr_trans(str, src, repl, 1);
2399  }
2400  
2401  static VALUE
2402  rb_str_tr_s(str, src, repl)
2403      VALUE str, src, repl;
2404  {
2405      str = rb_str_dup(str);
2406      tr_trans(str, src, repl, 1);
2407      return str;
2408  }
2409  
2410  static VALUE
2411  rb_str_count(argc, argv, str)
2412      int argc;
2413      VALUE *argv;
2414      VALUE str;
2415  {
2416      char table[256];
2417      char *s, *send;
2418      int init = 1;
2419      int i;
2420  
2421      if (argc < 1) {
2422          rb_raise(rb_eArgError, "wrong number of arguments");
2423      }
2424      for (i=0; i<argc; i++) {
2425          VALUE s = argv[i];
2426  
2427          StringValue(s);
2428          tr_setup_table(s, table, init);
2429          init = 0;
2430      }
2431  
2432      s = RSTRING(str)->ptr;
2433      if (!s || RSTRING(str)->len == 0) return Qnil;
2434      send = s + RSTRING(str)->len;
2435      i = 0;
2436      while (s < send) {
2437          if (table[*s++ & 0xff]) {
2438              i++;
2439          }
2440      }
2441      return INT2NUM(i);
2442  }
2443  
2444  static VALUE
2445  rb_str_split_m(argc, argv, str)
2446      int argc;
2447      VALUE *argv;
2448      VALUE str;
2449  {
2450      VALUE spat;
2451      VALUE limit;
2452      int awk_split = Qfalse;
2453      long beg, end, i = 0;
2454      int lim = 0;
2455      VALUE result, tmp;
2456  
2457      if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) {
2458          lim = NUM2INT(limit);
2459          if (lim <= 0) limit = Qnil;
2460          else if (lim == 1) return rb_ary_new3(1, str);
2461          i = 1;
2462      }
2463  
2464      if (NIL_P(spat)) {
2465          if (!NIL_P(rb_fs)) {
2466              spat = rb_fs;
2467              goto fs_set;
2468          }
2469          awk_split = Qtrue;
2470      }
2471      else {
2472        fs_set:
2473          if (TYPE(spat) == T_STRING && RSTRING(spat)->len == 1) {
2474              if (RSTRING(spat)->ptr[0] == ' ') {
2475                  awk_split = Qtrue;
2476              }
2477              else {
2478                  spat = rb_reg_regcomp(rb_reg_quote(spat));
2479              }
2480          }
2481          else {
2482              spat = get_pat(spat, 1);
2483          }
2484      }
2485  
2486      result = rb_ary_new();
2487      beg = 0;
2488      if (awk_split) {
2489          char *ptr = RSTRING(str)->ptr;
2490          long len = RSTRING(str)->len;
2491          char *eptr = ptr + len;
2492          int skip = 1;
2493  
2494          for (end = beg = 0; ptr<eptr; ptr++) {
2495              if (skip) {
2496                  if (ISSPACE(*ptr)) {
2497                      beg++;
2498                  }
2499                  else {
2500                      end = beg+1;
2501                      skip = 0;
2502                  }
2503              }
2504              else {
2505                  if (ISSPACE(*ptr)) {
2506                      rb_ary_push(result, rb_str_substr(str, beg, end-beg));
2507                      skip = 1;
2508                      beg = end + 1;
2509                      if (!NIL_P(limit) && lim <= ++i) break;
2510                  }
2511                  else {
2512                      end++;
2513                  }
2514              }
2515          }
2516      }
2517      else {
2518          long start = beg;
2519          long idx;
2520          int last_null = 0;
2521          struct re_registers *regs;
2522  
2523          while ((end = rb_reg_search(spat, str, start, 0)) >= 0) {
2524              regs = RMATCH(rb_backref_get())->regs;
2525              if (start == end && BEG(0) == END(0)) {
2526                  if (last_null == 1) {
2527                      rb_ary_push(result, rb_str_substr(str, beg, mbclen2(RSTRING(str)->ptr[beg],spat)));
2528                      beg = start;
2529                  }
2530                  else {
2531                      start += mbclen2(RSTRING(str)->ptr[start],spat);
2532                      last_null = 1;
2533                      continue;
2534                  }
2535              }
2536              else {
2537                  rb_ary_push(result, rb_str_substr(str, beg, end-beg));
2538                  beg = start = END(0);
2539              }
2540              last_null = 0;
2541  
2542              for (idx=1; idx < regs->num_regs; idx++) {
2543                  if (BEG(idx) == -1) continue;
2544                  if (BEG(idx) == END(idx))
2545                      tmp = rb_str_new5(str, 0, 0);
2546                  else
2547                      tmp = rb_str_substr(str, BEG(idx), END(idx)-BEG(idx));
2548                  rb_ary_push(result, tmp);
2549              }
2550              if (!NIL_P(limit) && lim <= ++i) break;
2551          }
2552      }
2553      if (!NIL_P(limit) || RSTRING(str)->len > beg || lim < 0) {
2554          if (RSTRING(str)->len == beg)
2555              tmp = rb_str_new5(str, 0, 0);
2556          else
2557              tmp = rb_str_substr(str, beg, RSTRING(str)->len-beg);
2558          rb_ary_push(result, tmp);
2559      }
2560      if (NIL_P(limit) && lim == 0) {
2561          while (RARRAY(result)->len > 0 &&
2562                 RSTRING(RARRAY(result)->ptr[RARRAY(result)->len-1])->len == 0)
2563              rb_ary_pop(result);
2564      }
2565  
2566      return result;
2567  }
2568  
2569  VALUE
2570  rb_str_split(str, sep0)
2571      VALUE str;
2572      const char *sep0;
2573  {
2574      VALUE sep;
2575  
2576      StringValue(str);
2577      sep = rb_str_new2(sep0);
2578      return rb_str_split_m(1, &sep, str);
2579  }
2580  
2581  static VALUE
2582  rb_f_split(argc, argv)
2583      int argc;
2584      VALUE *argv;
2585  {
2586      return rb_str_split_m(argc, argv, uscore_get());
2587  }
2588  
2589  static VALUE
2590  rb_str_each_line(argc, argv, str)
2591      int argc;
2592      VALUE *argv;
2593      VALUE str;
2594  {
2595      VALUE rs;
2596      int newline;
2597      char *p = RSTRING(str)->ptr, *pend = p + RSTRING(str)->len, *s;
2598      char *ptr = p;
2599      long len = RSTRING(str)->len, rslen;
2600      VALUE line;
2601  
2602      if (rb_scan_args(argc, argv, "01", &rs) == 0) {
2603          rs = rb_rs;
2604      }
2605  
2606      if (NIL_P(rs)) {
2607          rb_yield(str);
2608          return str;
2609      }
2610      StringValue(rs);
2611      rslen = RSTRING(rs)->len;
2612      if (rslen == 0) {
2613          newline = '\n';
2614      }
2615      else {
2616          newline = RSTRING(rs)->ptr[rslen-1];
2617      }
2618  
2619      for (s = p, p += rslen; p < pend; p++) {
2620          if (rslen == 0 && *p == '\n') {
2621              if (*++p != '\n') continue;
2622              while (*p == '\n') p++;
2623          }
2624          if (p[-1] == newline &&
2625              (rslen <= 1 ||
2626               rb_memcmp(RSTRING(rs)->ptr, p-rslen, rslen) == 0)) {
2627              line = rb_str_new5(str, s, p - s);
2628              OBJ_INFECT(line, str);
2629              rb_yield(line);
2630              if (RSTRING(str)->ptr != ptr || RSTRING(str)->len != len)
2631                  rb_raise(rb_eArgError, "string modified");
2632              s = p;
2633          }
2634      }
2635  
2636      if (s != pend) {
2637          if (p > pend) p = pend;
2638          line = rb_str_new5(str, s, p - s);
2639          OBJ_INFECT(line, str);
2640          rb_yield(line);
2641      }
2642  
2643      return str;
2644  }
2645  
2646  static VALUE
2647  rb_str_each_byte(str)
2648      VALUE str;
2649  {
2650      long i;
2651  
2652      for (i=0; i<RSTRING(str)->len; i++) {
2653          rb_yield(INT2FIX(RSTRING(str)->ptr[i] & 0xff));
2654      }
2655      return str;
2656  }
2657  
2658  static VALUE
2659  rb_str_chop_bang(str)
2660      VALUE str;
2661  {
2662      if (RSTRING(str)->len > 0) {
2663          rb_str_modify(str);
2664          RSTRING(str)->len--;
2665          if (RSTRING(str)->ptr[RSTRING(str)->len] == '\n') {
2666              if (RSTRING(str)->len > 0 &&
2667                  RSTRING(str)->ptr[RSTRING(str)->len-1] == '\r') {
2668                  RSTRING(str)->len--;
2669              }
2670          }
2671          RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
2672          return str;
2673      }
2674      return Qnil;
2675  }
2676  
2677  static VALUE
2678  rb_str_chop(str)
2679      VALUE str;
2680  {
2681      str = rb_str_dup(str);
2682      rb_str_chop_bang(str);
2683      return str;
2684  }
2685  
2686  static VALUE
2687  rb_f_chop_bang(str)
2688      VALUE str;
2689  {
2690      return rb_str_chop_bang(uscore_get());
2691  }
2692  
2693  static VALUE
2694  rb_f_chop()
2695  {
2696      VALUE str = uscore_get();
2697  
2698      if (RSTRING(str)->len > 0) {
2699          str = rb_str_dup(str);
2700          rb_str_chop_bang(str);
2701          rb_lastline_set(str);
2702      }
2703      return str;
2704  }
2705  
2706  static VALUE
2707  rb_str_chomp_bang(argc, argv, str)
2708      int argc;
2709      VALUE *argv;
2710      VALUE str;
2711  {
2712      VALUE rs;
2713      int newline;
2714      char *p = RSTRING(str)->ptr;
2715      long len = RSTRING(str)->len, rslen;
2716  
2717      if (rb_scan_args(argc, argv, "01", &rs) == 0) {
2718          if (len == 0) return Qnil;
2719          rs = rb_rs;
2720          if (rs == rb_default_rs) {
2721            smart_chomp:
2722              rb_str_modify(str);
2723              if (RSTRING(str)->ptr[len-1] == '\n') {
2724                  RSTRING(str)->len--;
2725                  if (RSTRING(str)->len > 0 &&
2726                      RSTRING(str)->ptr[RSTRING(str)->len-1] == '\r') {
2727                      RSTRING(str)->len--;
2728                  }
2729              }
2730              else if (RSTRING(str)->ptr[len-1] == '\r') {
2731                  RSTRING(str)->len--;
2732              }
2733              else {
2734                  return Qnil;
2735              }
2736              RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
2737              return str;
2738          }
2739      }
2740      if (NIL_P(rs)) return Qnil;
2741      if (len == 0) return Qnil;
2742  
2743      StringValue(rs);
2744      rb_str_modify(str);
2745      rslen = RSTRING(rs)->len;
2746      if (rslen == 0) {
2747          while (len>0 && p[len-1] == '\n') {
2748              len--;
2749              if (len>0 && p[len-1] == '\r')
2750                  len--;
2751          }
2752          if (len < RSTRING(str)->len) {
2753              rb_str_modify(str);
2754              RSTRING(str)->len = len;
2755              RSTRING(str)->ptr[len] = '\0';
2756              return str;
2757          }
2758          return Qnil;
2759      }
2760      if (rslen > len) return Qnil;
2761      newline = RSTRING(rs)->ptr[rslen-1];
2762      if (rslen == 1 && newline == '\n')
2763          goto smart_chomp;
2764  
2765      if (p[len-1] == newline &&
2766          (rslen <= 1 ||
2767           rb_memcmp(RSTRING(rs)->ptr, p+len-rslen, rslen) == 0)) {
2768          rb_str_modify(str);
2769          RSTRING(str)->len -= rslen;
2770          RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
2771          return str;
2772      }
2773      return Qnil;
2774  }
2775  
2776  static VALUE
2777  rb_str_chomp(argc, argv, str)
2778      int argc;
2779      VALUE *argv;
2780      VALUE str;
2781  {
2782      str = rb_str_dup(str);
2783      rb_str_chomp_bang(argc, argv, str);
2784      return str;
2785  }
2786  
2787  static VALUE
2788  rb_f_chomp_bang(argc, argv)
2789      int argc;
2790      VALUE *argv;
2791  {
2792      return rb_str_chomp_bang(argc, argv, uscore_get());
2793  }
2794  
2795  static VALUE
2796  rb_f_chomp(argc, argv)
2797      int argc;
2798      VALUE *argv;
2799  {
2800      VALUE str = uscore_get();
2801      VALUE dup = rb_str_dup(str);
2802  
2803      if (NIL_P(rb_str_chomp_bang(argc, argv, dup)))
2804          return str;
2805      rb_lastline_set(dup);
2806      return dup;
2807  }
2808  
2809  static VALUE
2810  rb_str_lstrip_bang(str)
2811      VALUE str;
2812  {
2813      char *s, *t, *e;
2814  
2815      rb_str_modify(str);
2816      s = RSTRING(str)->ptr;
2817      if (!s || RSTRING(str)->len == 0) return Qnil;
2818      e = t = s + RSTRING(str)->len;
2819      /* remove spaces at head */
2820      while (s < t && ISSPACE(*s)) s++;
2821  
2822      RSTRING(str)->len = t-s;
2823      if (s > RSTRING(str)->ptr) {
2824          memmove(RSTRING(str)->ptr, s, RSTRING(str)->len);
2825          RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
2826          return str;
2827      }
2828      return Qnil;
2829  }
2830  
2831  static VALUE
2832  rb_str_lstrip(str)
2833      VALUE str;
2834  {
2835      str = rb_str_dup(str);
2836      rb_str_lstrip_bang(str);
2837      return str;
2838  }
2839  
2840  static VALUE
2841  rb_str_rstrip_bang(str)
2842      VALUE str;
2843  {
2844      char *s, *t, *e;
2845  
2846      rb_str_modify(str);
2847      s = RSTRING(str)->ptr;
2848      if (!s || RSTRING(str)->len == 0) return Qnil;
2849      e = t = s + RSTRING(str)->len;
2850  
2851      /* remove trailing spaces */
2852      while (s < t && ISSPACE(*(t-1))) t--;
2853  
2854      RSTRING(str)->len = t-s;
2855      if (t < e) {
2856          RSTRING(str)->ptr[RSTRING(str)->len] = '\0';
2857          return str;
2858      }
2859      return Qnil;
2860  }
2861  
2862  static VALUE
2863  rb_str_rstrip(str)
2864      VALUE str;
2865  {
2866      str = rb_str_dup(str);
2867      rb_str_rstrip_bang(str);
2868      return str;
2869  }
2870  
2871  static VALUE
2872  rb_str_strip_bang(str)
2873      VALUE str;
2874  {
2875      VALUE l = rb_str_lstrip_bang(str);
2876      VALUE r = rb_str_rstrip_bang(str);
2877  
2878      if (NIL_P(l) && NIL_P(r)) return Qnil;
2879      return str;
2880  }
2881  
2882  static VALUE
2883  rb_str_strip(str)
2884      VALUE str;
2885  {
2886      str = rb_str_dup(str);
2887      rb_str_strip_bang(str);
2888      return str;
2889  }
2890  
2891  static VALUE
2892  scan_once(str, pat, start)
2893      VALUE str, pat;
2894      long *start;
2895  {
2896      VALUE result, match;
2897      struct re_registers *regs;
2898      long i;
2899  
2900      if (rb_reg_search(pat, str, *start, 0) >= 0) {
2901          match = rb_backref_get();
2902          regs = RMATCH(match)->regs;
2903          if (BEG(0) == END(0)) {
2904              /*
2905               * Always consume at least one character of the input string
2906               */
2907              *start = END(0)+mbclen2(RSTRING(str)->ptr[END(0)],pat);
2908          }
2909          else {
2910              *start = END(0);
2911          }
2912          if (regs->num_regs == 1) {
2913              return rb_reg_nth_match(0, match);
2914          }
2915          result = rb_ary_new2(regs->num_regs);
2916          for (i=1; i < regs->num_regs; i++) {
2917              rb_ary_push(result, rb_reg_nth_match(i, match));
2918          }
2919  
2920          return result;
2921      }
2922      return Qnil;
2923  }
2924  
2925  static VALUE
2926  rb_str_scan(str, pat)
2927      VALUE str, pat;
2928  {
2929      VALUE result;
2930      long start = 0;
2931      VALUE match = Qnil;
2932  
2933      pat = get_pat(pat, 1);
2934      if (!rb_block_given_p()) {
2935          VALUE ary = rb_ary_new();
2936  
2937          while (!NIL_P(result = scan_once(str, pat, &start))) {
2938              match = rb_backref_get();
2939              rb_ary_push(ary, result);
2940          }
2941          rb_backref_set(match);
2942          return ary;
2943      }
2944      
2945      while (!NIL_P(result = scan_once(str, pat, &start))) {
2946          match = rb_backref_get();
2947          rb_match_busy(match);
2948          rb_yield(result);
2949          rb_backref_set(match);  /* restore $~ value */
2950      }
2951      rb_backref_set(match);
2952      return str;
2953  }
2954  
2955  static VALUE
2956  rb_f_scan(self, pat)
2957      VALUE self, pat;
2958  {
2959      return rb_str_scan(uscore_get(), pat);
2960  }
2961  
2962  static VALUE
2963  rb_str_hex(str)
2964      VALUE str;
2965  {
2966      return rb_str_to_inum(str, 16, Qfalse);
2967  }
2968  
2969  static VALUE
2970  rb_str_oct(str)
2971      VALUE str;
2972  {
2973      return rb_str_to_inum(str, -8, Qfalse);
2974  }
2975  
2976  static VALUE
2977  rb_str_crypt(str, salt)
2978      VALUE str, salt;
2979  {
2980      extern char *crypt();
2981      VALUE result;
2982      char *s;
2983  
2984      StringValue(salt);
2985      if (RSTRING(salt)->len < 2)
2986          rb_raise(rb_eArgError, "salt too short(need >=2 bytes)");
2987  
2988      if (RSTRING(str)->ptr) s = RSTRING(str)->ptr;
2989      else s = "";
2990      result = rb_str_new2(crypt(s, RSTRING(salt)->ptr));
2991      OBJ_INFECT(result, str);
2992      OBJ_INFECT(result, salt);
2993      return result;
2994  }
2995  
2996  static VALUE
2997  rb_str_intern(str)
2998      VALUE str;
2999  {
3000      ID id;
3001  
3002      if (!RSTRING(str)->ptr || RSTRING(str)->len == 0) {
3003          rb_raise(rb_eArgError, "interning empty string");
3004      }
3005      if (strlen(RSTRING(str)->ptr) != RSTRING(str)->len)
3006          rb_raise(rb_eArgError, "string contains `\\0'");
3007      id = rb_intern(RSTRING(str)->ptr);
3008      return ID2SYM(id);
3009  }
3010  
3011  static VALUE
3012  rb_str_sum(argc, argv, str)
3013      int argc;
3014      VALUE *argv;
3015      VALUE str;
3016  {
3017      VALUE vbits;
3018      int bits;
3019      char *p, *pend;
3020  
3021      if (rb_scan_args(argc, argv, "01", &vbits) == 0) {
3022          bits = 16;
3023      }
3024      else bits = NUM2INT(vbits);
3025  
3026      p = RSTRING(str)->ptr; pend = p + RSTRING(str)->len;
3027      if (bits > sizeof(long)*CHAR_BIT) {
3028          VALUE res = INT2FIX(0);
3029          VALUE mod;
3030  
3031          mod = rb_funcall(INT2FIX(1), rb_intern("<<"), 1, INT2FIX(bits));
3032          mod = rb_funcall(mod, '-', 1, INT2FIX(1));
3033  
3034          while (p < pend) {
3035              res = rb_funcall(res, '+', 1, INT2FIX((unsigned int)*p));
3036              p++;
3037          }
3038          res = rb_funcall(res, '&', 1, mod);
3039          return res;
3040      }
3041      else {
3042          unsigned int res = 0;
3043          unsigned int mod = (1<<bits)-1;
3044  
3045          if (mod == 0) {
3046              mod = -1;
3047          }
3048          while (p < pend) {
3049              res += (unsigned int)*p;
3050              p++;
3051          }
3052          res &= mod;
3053          return rb_int2inum(res);
3054      }
3055  }
3056  
3057  static VALUE
3058  rb_str_ljust(str, w)
3059      VALUE str;
3060      VALUE w;
3061  {
3062      long width = NUM2LONG(w);
3063      VALUE res;
3064      char *p, *pend;
3065  
3066      if (width < 0 || RSTRING(str)->len >= width) return rb_str_dup(str);
3067      res = rb_str_new5(str, 0, width);
3068      memcpy(RSTRING(res)->ptr, RSTRING(str)->ptr, RSTRING(str)->len);
3069      p = RSTRING(res)->ptr + RSTRING(str)->len; pend = RSTRING(res)->ptr + width;
3070      while (p < pend) {
3071          *p++ = ' ';
3072      }
3073      OBJ_INFECT(res, str);
3074      return res;
3075  }
3076  
3077  static VALUE
3078  rb_str_rjust(str, w)
3079      VALUE str;
3080      VALUE w;
3081  {
3082      long width = NUM2LONG(w);
3083      VALUE res;
3084      char *p, *pend;
3085  
3086      if (width < 0 || RSTRING(str)->len >= width) return rb_str_dup(str);
3087      res = rb_str_new5(str, 0, width);
3088      p = RSTRING(res)->ptr; pend = p + width - RSTRING(str)->len;
3089      while (p < pend) {
3090          *p++ = ' ';
3091      }
3092      memcpy(pend, RSTRING(str)->ptr, RSTRING(str)->len);
3093      OBJ_INFECT(res, str);
3094      return res;
3095  }
3096  
3097  static VALUE
3098  rb_str_center(str, w)
3099      VALUE str;
3100      VALUE w;
3101  {
3102      long width = NUM2LONG(w);
3103      VALUE res;
3104      char *p, *pend;
3105      long n;
3106  
3107      if (width < 0 || RSTRING(str)->len >= width) return rb_str_dup(str);
3108      res = rb_str_new5(str, 0, width);
3109      n = (width - RSTRING(str)->len)/2;
3110      p = RSTRING(res)->ptr; pend = p + n;
3111      while (p < pend) {
3112          *p++ = ' ';
3113      }
3114      memcpy(pend, RSTRING(str)->ptr, RSTRING(str)->len);
3115      p = pend + RSTRING(str)->len; pend = RSTRING(res)->ptr + width;
3116      while (p < pend) {
3117          *p++ = ' ';
3118      }
3119      OBJ_INFECT(res, str);
3120      return res;
3121  }
3122  
3123  void
3124  rb_str_setter(val, id, var)
3125      VALUE val;
3126      ID id;
3127      VALUE *var;
3128  {
3129      if (!NIL_P(val) && TYPE(val) != T_STRING) {
3130          rb_raise(rb_eTypeError, "value of %s must be String", rb_id2name(id));
3131      }
3132      *var = val;
3133  }
3134  
3135  void
3136  Init_String()
3137  {
3138      rb_cString  = rb_define_class("String", rb_cObject);
3139      rb_include_module(rb_cString, rb_mComparable);
3140      rb_include_module(rb_cString, rb_mEnumerable);
3141      rb_define_singleton_method(rb_cString, "allocate", rb_str_s_alloc, 0);
3142      rb_define_method(rb_cString, "initialize", rb_str_init, -1);
3143      rb_define_method(rb_cString, "become", rb_str_replace, 1); 
3144      rb_define_method(rb_cString, "<=>", rb_str_cmp_m, 1);
3145      rb_define_method(rb_cString, "==", rb_str_equal, 1);
3146      rb_define_method(rb_cString, "===", rb_str_equal, 1);
3147      rb_define_method(rb_cString, "eql?", rb_str_eql, 1);
3148      rb_define_method(rb_cString, "hash", rb_str_hash_m, 0);
3149      rb_define_method(rb_cString, "casecmp", rb_str_casecmp, 1);
3150      rb_define_method(rb_cString, "+", rb_str_plus, 1);
3151      rb_define_method(rb_cString, "*", rb_str_times, 1);
3152      rb_define_method(rb_cString, "%", rb_str_format, 1);
3153      rb_define_method(rb_cString, "[]", rb_str_aref_m, -1);
3154      rb_define_method(rb_cString, "[]=", rb_str_aset_m, -1);
3155      rb_define_method(rb_cString, "insert", rb_str_insert, 2);
3156      rb_define_method(rb_cString, "length", rb_str_length, 0);
3157      rb_define_method(rb_cString, "size", rb_str_length, 0);
3158      rb_define_method(rb_cString, "empty?", rb_str_empty, 0);
3159      rb_define_method(rb_cString, "=~", rb_str_match, 1);
3160      rb_define_method(rb_cString, "~", rb_str_match2, 0);
3161      rb_define_method(rb_cString, "match", rb_str_match_m, 1);
3162      rb_define_method(rb_cString, "succ", rb_str_succ, 0);
3163      rb_define_method(rb_cString, "succ!", rb_str_succ_bang, 0);
3164      rb_define_method(rb_cString, "next", rb_str_succ, 0);
3165      rb_define_method(rb_cString, "next!", rb_str_succ_bang, 0);
3166      rb_define_method(rb_cString, "upto", rb_str_upto_m, 1);
3167      rb_define_method(rb_cString, "index", rb_str_index_m, -1);
3168      rb_define_method(rb_cString, "rindex", rb_str_rindex_m, -1);
3169      rb_define_method(rb_cString, "replace", rb_str_replace, 1);
3170  
3171      rb_define_method(rb_cString, "to_i", rb_str_to_i, -1);
3172      rb_define_method(rb_cString, "to_f", rb_str_to_f, 0);
3173      rb_define_method(rb_cString, "to_s", rb_str_to_s, 0);
3174      rb_define_method(rb_cString, "to_str", rb_str_to_s, 0);
3175      rb_define_method(rb_cString, "inspect", rb_str_inspect, 0);
3176      rb_define_method(rb_cString, "dump", rb_str_dump, 0);
3177  
3178      rb_define_method(rb_cString, "upcase", rb_str_upcase, 0);
3179      rb_define_method(rb_cString, "downcase", rb_str_downcase, 0);
3180      rb_define_method(rb_cString, "capitalize", rb_str_capitalize, 0);
3181      rb_define_method(rb_cString, "swapcase", rb_str_swapcase, 0);
3182  
3183      rb_define_method(rb_cString, "upcase!", rb_str_upcase_bang, 0);
3184      rb_define_method(rb_cString, "downcase!", rb_str_downcase_bang, 0);
3185      rb_define_method(rb_cString, "capitalize!", rb_str_capitalize_bang, 0);
3186      rb_define_method(rb_cString, "swapcase!", rb_str_swapcase_bang, 0);
3187  
3188      rb_define_method(rb_cString, "hex", rb_str_hex, 0);
3189      rb_define_method(rb_cString, "oct", rb_str_oct, 0);
3190      rb_define_method(rb_cString, "split", rb_str_split_m, -1);
3191      rb_define_method(rb_cString, "reverse", rb_str_reverse, 0);
3192      rb_define_method(rb_cString, "reverse!", rb_str_reverse_bang, 0);
3193      rb_define_method(rb_cString, "concat", rb_str_concat, 1);
3194      rb_define_method(rb_cString, "<<", rb_str_concat, 1);
3195      rb_define_method(rb_cString, "crypt", rb_str_crypt, 1);
3196      rb_define_method(rb_cString, "intern", rb_str_intern, 0);
3197  
3198      rb_define_method(rb_cString, "include?", rb_str_include, 1);
3199  
3200      rb_define_method(rb_cString, "scan", rb_str_scan, 1);
3201  
3202      rb_define_method(rb_cString, "ljust", rb_str_ljust, 1);
3203      rb_define_method(rb_cString, "rjust", rb_str_rjust, 1);
3204      rb_define_method(rb_cString, "center", rb_str_center, 1);
3205  
3206      rb_define_method(rb_cString, "sub", rb_str_sub, -1);
3207      rb_define_method(rb_cString, "gsub", rb_str_gsub, -1);
3208      rb_define_method(rb_cString, "chop", rb_str_chop, 0);
3209      rb_define_method(rb_cString, "chomp", rb_str_chomp, -1);
3210      rb_define_method(rb_cString, "strip", rb_str_strip, 0);
3211      rb_define_method(rb_cString, "lstrip", rb_str_lstrip, 0);
3212      rb_define_method(rb_cString, "rstrip", rb_str_rstrip, 0);
3213  
3214      rb_define_method(rb_cString, "sub!", rb_str_sub_bang, -1);
3215      rb_define_method(rb_cString, "gsub!", rb_str_gsub_bang, -1);
3216      rb_define_method(rb_cString, "chop!", rb_str_chop_bang, 0);
3217      rb_define_method(rb_cString, "chomp!", rb_str_chomp_bang, -1);
3218      rb_define_method(rb_cString, "strip!", rb_str_strip_bang, 0);
3219      rb_define_method(rb_cString, "lstrip!", rb_str_lstrip_bang, 0);
3220      rb_define_method(rb_cString, "rstrip!", rb_str_rstrip_bang, 0);
3221  
3222      rb_define_method(rb_cString, "tr", rb_str_tr, 2);
3223      rb_define_method(rb_cString, "tr_s", rb_str_tr_s, 2);
3224      rb_define_method(rb_cString, "delete", rb_str_delete, -1);
3225      rb_define_method(rb_cString, "squeeze", rb_str_squeeze, -1);
3226      rb_define_method(rb_cString, "count", rb_str_count, -1);
3227  
3228      rb_define_method(rb_cString, "tr!", rb_str_tr_bang, 2);
3229      rb_define_method(rb_cString, "tr_s!", rb_str_tr_s_bang, 2);
3230      rb_define_method(rb_cString, "delete!", rb_str_delete_bang, -1);
3231      rb_define_method(rb_cString, "squeeze!", rb_str_squeeze_bang, -1);
3232  
3233      rb_define_method(rb_cString, "each_line", rb_str_each_line, -1);
3234      rb_define_method(rb_cString, "each", rb_str_each_line, -1);
3235      rb_define_method(rb_cString, "each_byte", rb_str_each_byte, 0);
3236  
3237      rb_define_method(rb_cString, "sum", rb_str_sum, -1);
3238  
3239      rb_define_global_function("sub", rb_f_sub, -1);
3240      rb_define_global_function("gsub", rb_f_gsub, -1);
3241  
3242      rb_define_global_function("sub!", rb_f_sub_bang, -1);
3243      rb_define_global_function("gsub!", rb_f_gsub_bang, -1);
3244  
3245      rb_define_global_function("chop", rb_f_chop, 0);
3246      rb_define_global_function("chop!", rb_f_chop_bang, 0);
3247  
3248      rb_define_global_function("chomp", rb_f_chomp, -1);
3249      rb_define_global_function("chomp!", rb_f_chomp_bang, -1);
3250  
3251      rb_define_global_function("split", rb_f_split, -1);
3252      rb_define_global_function("scan", rb_f_scan, 1);
3253  
3254      rb_define_method(rb_cString, "slice", rb_str_aref_m, -1);
3255      rb_define_method(rb_cString, "slice!", rb_str_slice_bang, -1);
3256  
3257      id_to_s = rb_intern("to_s");
3258  
3259      rb_fs = Qnil;
3260      rb_define_hooked_variable("$;", &rb_fs, 0, rb_str_setter);
3261      rb_define_hooked_variable("$-F", &rb_fs, 0, rb_str_setter);
3262  }