Quick (and dirty) Patch for Ruby Enterprise Edition 2011.03 to Prevent Hash Collision Attacks

As you may have heard, this week on December 28, 2011, a group of security experts released information about a nasty problem in almost all languages and platforms related to hash function collisions and possibility of using those for DoS attack on web applications.

Ruby core team released new 1.8.7-p357 version with the problem fixed. JRuby development team came out with the new 1.6.5.1 release. Unfortunately 2 days after the release there is still no information from Ruby Enterprise Edition team on what to do with all the ree deployments.

So, since there is no patch for Ruby Enterprise Edition 2011.03 to prevent an attack, I’ve quickly ported ruby core patch to be used with the latest ree release. Here it is:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
From 4f69a748ab820c6a8bd204f94d13d970847f575c Mon Sep 17 00:00:00 2001
From: RPM Builder <rpmbuild@livingsocial.com>
Date: Thu, 29 Dec 2011 19:48:52 +0000
Subject: [PATCH] Backport randomized hash patch from upstream

---
 source/inits.c                  |    4 ++
 source/random.c                 |   74 ++++++++++++++++++++++++++++++---------
 source/st.c                     |   14 +++++++-
 source/string.c                 |    7 +++-
 source/test/ruby/test_string.rb |   13 +++++++
 source/version.c                |    2 +-
 6 files changed, 94 insertions(+), 20 deletions(-)

diff --git a/source/inits.c b/source/inits.c
index 947bbbe..a0e061f 100644
--- a/source/inits.c
+++ b/source/inits.c
@@ -38,6 +38,7 @@ void Init_Precision _((void));
 void Init_sym _((void));
 void Init_process _((void));
 void Init_Random _((void));
+void Init_RandomSeed _((void));
 void Init_Range _((void));
 void Init_Regexp _((void));
 void Init_signal _((void));
@@ -46,10 +47,13 @@ void Init_Struct _((void));
 void Init_Time _((void));
 void Init_var_tables _((void));
 void Init_version _((void));
+void Init_st _((void));
 
 void
 rb_call_inits()
 {
+    Init_RandomSeed();
+    Init_st();
     Init_sym();
     Init_var_tables();
     Init_Object();
diff --git a/source/random.c b/source/random.c
index 258b0b2..790eda8 100644
--- a/source/random.c
+++ b/source/random.c
@@ -189,6 +189,7 @@ rb_genrand_real(void)
 #include <fcntl.h>
 #endif
 
+static int seed_initialized = 0;
 static VALUE saved_seed = INT2FIX(0);
 
 static VALUE
@@ -250,27 +251,22 @@ rand_init(vseed)
     return old;
 }
 
-static VALUE
-random_seed()
+#define DEFAULT_SEED_LEN (4 * sizeof(long))
+
+static void
+fill_random_seed(ptr)
+    char *ptr;
 {
     static int n = 0;
+    unsigned long *seed;
     struct timeval tv;
     int fd;
     struct stat statbuf;
+    char *buf = (char*)ptr;
 
-    int seed_len;
-    BDIGIT *digits;
-    unsigned long *seed;
-    NEWOBJ(big, struct RBignum);
-    OBJSETUP(big, rb_cBignum, T_BIGNUM);
-
-    seed_len = 4 * sizeof(long);
-    big->sign = 1;
-    big->len = seed_len / SIZEOF_BDIGITS + 1;
-    digits = big->digits = ALLOC_N(BDIGIT, big->len);
-    seed = (unsigned long *)big->digits;
+    seed = (unsigned long *)buf;
 
-    memset(digits, 0, big->len * SIZEOF_BDIGITS);
+    memset(buf, 0, DEFAULT_SEED_LEN);
 
 #ifdef S_ISCHR
     if ((fd = open("/dev/urandom", O_RDONLY
@@ -285,7 +281,7 @@ random_seed()
 #endif
             )) >= 0) {
         if (fstat(fd, &statbuf) == 0 && S_ISCHR(statbuf.st_mode)) {
-            read(fd, seed, seed_len);
+            read(fd, seed, DEFAULT_SEED_LEN);
         }
         close(fd);
     }
@@ -296,13 +292,37 @@ random_seed()
     seed[1] ^= tv.tv_sec;
     seed[2] ^= getpid() ^ (n++ << 16);
     seed[3] ^= (unsigned long)&seed;
+}
+
+static VALUE
+make_seed_value(char *ptr)
+{
+    BDIGIT *digits;
+    NEWOBJ(big, struct RBignum);
+    OBJSETUP(big, rb_cBignum, T_BIGNUM);
+
+    RBIGNUM_SET_SIGN(big, 1);
+
+    digits = ALLOC_N(char, DEFAULT_SEED_LEN);
+    RBIGNUM(big)->digits = digits;
+    RBIGNUM(big)->len = DEFAULT_SEED_LEN / SIZEOF_BDIGITS;
+
+    MEMCPY(digits, ptr, char, DEFAULT_SEED_LEN);
 
     /* set leading-zero-guard if need. */
-    digits[big->len-1] = digits[big->len-2] <= 1 ? 1 : 0;
+    digits[RBIGNUM_LEN(big)-1] = digits[RBIGNUM_LEN(big)-2] <= 1 ? 1 : 0;
 
     return rb_big_norm((VALUE)big);
 }
 
+static VALUE
+random_seed(void)
+{
+    char buf[DEFAULT_SEED_LEN];
+    fill_random_seed(buf);
+    return make_seed_value(buf);
+}
+
 /*
  *  call-seq:
  *     srand(number=0)    => old_seed
@@ -443,6 +463,9 @@ rb_f_rand(argc, argv, obj)
     long val, max;
 
     rb_scan_args(argc, argv, "01", &vmax);
+    if (!seed_initialized) {
+       rand_init(random_seed());
+    }
     switch (TYPE(vmax)) {
       case T_FLOAT:
    if (RFLOAT(vmax)->value <= LONG_MAX && RFLOAT(vmax)->value >= LONG_MIN) {
@@ -490,10 +513,27 @@ rb_f_rand(argc, argv, obj)
     return LONG2NUM(val);
 }
 
+static char initial_seed[DEFAULT_SEED_LEN];
+
+void
+Init_RandomSeed(void)
+{
+    fill_random_seed(initial_seed);
+    init_by_array((unsigned long*)initial_seed, DEFAULT_SEED_LEN/sizeof(unsigned long));
+    seed_initialized = 1;
+}
+
+static void
+Init_RandomSeed2(void)
+{
+    saved_seed = make_seed_value(initial_seed);
+    memset(initial_seed, 0, DEFAULT_SEED_LEN);
+}
+
 void
 Init_Random()
 {
-    rand_init(random_seed());
+    Init_RandomSeed2();
     rb_define_global_function("srand", rb_f_srand, -1);
     rb_define_global_function("rand", rb_f_rand, -1);
     rb_global_variable(&saved_seed);
diff --git a/source/st.c b/source/st.c
index c16c310..21e157a 100644
--- a/source/st.c
+++ b/source/st.c
@@ -9,6 +9,7 @@
 #include <stdlib.h>
 #endif
 #include <string.h>
+#include <limits.h>
 #include "st.h"
 
 typedef struct st_table_entry st_table_entry;
@@ -521,6 +522,8 @@ st_foreach(table, func, arg)
     return 0;
 }
 
+static unsigned long hash_seed = 0;
+
 static int
 strhash(string)
     register const char *string;
@@ -550,10 +553,11 @@ strhash(string)
 
     return val + (val << 15);
 #else
-    register int val = 0;
+    register unsigned long val = hash_seed;
 
     while ((c = *string++) != '\0') {
    val = val*997 + c;
+   val = (val << 13) | (val >> (sizeof(st_data_t) * CHAR_BIT - 13));
     }
 
     return val + (val>>5);
@@ -573,3 +577,11 @@ numhash(n)
 {
     return n;
 }
+
+extern unsigned long rb_genrand_int32(void);
+
+void
+Init_st(void)
+{
+    hash_seed = rb_genrand_int32();
+}
diff --git a/source/string.c b/source/string.c
index c6b2301..94a0281 100644
--- a/source/string.c
+++ b/source/string.c
@@ -875,13 +875,15 @@ rb_str_concat(str1, str2)
     return str1;
 }
 
+static unsigned long hash_seed;
+
 int
 rb_str_hash(str)
     VALUE str;
 {
     register long len = RSTRING(str)->len;
     register char *p = RSTRING(str)->ptr;
-    register int key = 0;
+    register unsigned long key = hash_seed;
 
 #if defined(HASH_ELFHASH)
     register unsigned int g;
@@ -905,6 +907,7 @@ rb_str_hash(str)
     while (len--) {
    key = key*65599 + *p;
    p++;
+   key = (key << 13) | (key >> ((sizeof(unsigned long) * CHAR_BIT) - 13));
     }
     key = key + (key>>5);
 #endif
@@ -5062,4 +5065,6 @@ Init_String()
     rb_fs = Qnil;
     rb_define_variable("$;", &rb_fs);
     rb_define_variable("$-F", &rb_fs);
+
+    hash_seed = rb_genrand_int32();
 }
diff --git a/source/test/ruby/test_string.rb b/source/test/ruby/test_string.rb
index 5f2c54f..4d97182 100644
--- a/source/test/ruby/test_string.rb
+++ b/source/test/ruby/test_string.rb
@@ -1,4 +1,5 @@
 require 'test/unit'
+require File.expand_path('envutil', File.dirname(__FILE__))
 
 class TestString < Test::Unit::TestCase
   def check_sum(str, bits=16)
@@ -29,4 +30,16 @@ class TestString < Test::Unit::TestCase
   ensure
     $KCODE = original_kcode
   end
+
+  def test_hash_random
+    str = 'abc'
+    a = [str.hash.to_s]
+    cmd = sprintf("%s -e 'print %s.hash'", EnvUtil.rubybin, str.dump)
+    3.times {
+      IO.popen(cmd, "rb") {|o|
+        a << o.read
+      }
+    }
+    assert_not_equal([str.hash.to_s], a.uniq)
+  end
 end
diff --git a/source/version.c b/source/version.c
index 8b41cc9..5781cab 100644
--- a/source/version.c
+++ b/source/version.c
@@ -46,7 +46,7 @@ Init_version()
     rb_define_global_const("RUBY_PATCHLEVEL", INT2FIX(RUBY_PATCHLEVEL));
 
     snprintf(description, sizeof(description),
-             "ruby %s (%s %s %d) [%s], MBARI 0x%x, Ruby Enterprise Edition %s",
+             "ruby %s (%s %s %d) [%s], MBARI 0x%x, Ruby Enterprise Edition %s (with hash random)",
              RUBY_VERSION, RUBY_RELEASE_DATE, RUBY_RELEASE_STR,
              RUBY_RELEASE_NUM, RUBY_PLATFORM,
              STACK_WIPE_SITES, REE_VERSION);
--
1.7.6.4

You can view it or download it from github. Disclaimer: This is provided as is, no guarantees are provided, etc.

For more information about the issue, here is a video of the official presentation:



Update #1: Updated the patch to apply cleanly on REE (w/o a need to switch directory to source).
Update #2: The patch is currently deployed in production at Scribd and is being tested and deployed on LivingSocial servers.
Update #3: Phusion released 2011.12 release of REE now, so this patch is not needed anymore.