Skip to content

Commit a8ea1ad

Browse files
committed
first pass cleaning up Exception messaging before saving to Redis
thanks to @brianmario for the code to do the UTF8 sanitization.
1 parent 598b470 commit a8ea1ad

File tree

5 files changed

+125
-1
lines changed

5 files changed

+125
-1
lines changed

lib/resque.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
require 'resque/worker'
1414
require 'resque/plugin'
1515

16+
require 'resque/vendor/utf8_util'
17+
1618
module Resque
1719
include Helpers
1820
extend self

lib/resque/failure/redis.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ def save
88
:failed_at => Time.now.strftime("%Y/%m/%d %H:%M:%S %Z"),
99
:payload => payload,
1010
:exception => exception.class.to_s,
11-
:error => exception.to_s,
11+
:error => UTF8Util.clean(exception.to_s),
1212
:backtrace => filter_backtrace(Array(exception.backtrace)),
1313
:worker => worker.to_s,
1414
:queue => queue

lib/resque/vendor/utf8_util.rb

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
module UTF8Util
2+
# use '?' intsead of the unicode replace char, since that is 3 bytes
3+
# and can increase the string size if it's done a lot
4+
REPLACEMENT_CHAR = "?"
5+
6+
# Replace invalid UTF-8 character sequences with a replacement character
7+
#
8+
# Returns self as valid UTF-8.
9+
def self.clean!(str)
10+
raise NotImplementedError
11+
end
12+
13+
# Replace invalid UTF-8 character sequences with a replacement character
14+
#
15+
# Returns a copy of this String as valid UTF-8.
16+
def self.clean(str)
17+
clean!(str.dup)
18+
end
19+
20+
end
21+
22+
if RUBY_VERSION <= '1.9'
23+
require 'resque/vendor/utf8_util/utf8_util_18'
24+
else
25+
require 'resque/vendor/utf8_util/utf8_util_19'
26+
end
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
require 'strscan'
2+
3+
module UTF8Util
4+
HIGH_BIT_RANGE = /[\x80-\xff]/
5+
6+
# Check if this String is valid UTF-8
7+
#
8+
# Returns true or false.
9+
def self.valid?(str)
10+
sc = StringScanner.new(str)
11+
12+
while sc.skip_until(HIGH_BIT_RANGE)
13+
sc.pos -= 1
14+
15+
if !sequence_length(sc)
16+
return false
17+
end
18+
end
19+
20+
true
21+
end
22+
23+
# Replace invalid UTF-8 character sequences with a replacement character
24+
#
25+
# Returns self as valid UTF-8.
26+
def self.clean!(str)
27+
sc = StringScanner.new(str)
28+
while sc.skip_until(HIGH_BIT_RANGE)
29+
pos = sc.pos = sc.pos-1
30+
31+
if !sequence_length(sc)
32+
str[pos] = REPLACEMENT_CHAR
33+
end
34+
end
35+
36+
str
37+
end
38+
39+
# Validate the UTF-8 sequence at the current scanner position.
40+
#
41+
# scanner - StringScanner instance so we can advance the pointer as we verify.
42+
#
43+
# Returns The length in bytes of this UTF-8 sequence, false if invalid.
44+
def self.sequence_length(scanner)
45+
leader = scanner.get_byte[0]
46+
47+
if (leader >> 5) == 0x6
48+
if check_next_sequence(scanner)
49+
return 2
50+
else
51+
scanner.pos -= 1
52+
end
53+
elsif (leader >> 4) == 0x0e
54+
if check_next_sequence(scanner)
55+
if check_next_sequence(scanner)
56+
return 3
57+
else
58+
scanner.pos -= 2
59+
end
60+
else
61+
scanner.pos -= 1
62+
end
63+
elsif (leader >> 3) == 0x1e
64+
if check_next_sequence(scanner)
65+
if check_next_sequence(scanner)
66+
if check_next_sequence(scanner)
67+
return 4
68+
else
69+
scanner.pos -= 3
70+
end
71+
else
72+
scanner.pos -= 2
73+
end
74+
else
75+
scanner.pos -= 1
76+
end
77+
end
78+
79+
false
80+
end
81+
82+
private
83+
84+
# Read another byte off the scanner oving the scan position forward one place
85+
#
86+
# Returns nothing.
87+
def self.check_next_sequence(scanner)
88+
byte = scanner.get_byte[0]
89+
(byte >> 6) == 0x2
90+
end
91+
end
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
module UTF8Util
2+
def self.clean!(str)
3+
str.force_encoding("binary").encode("UTF-8", :invalid => :replace, :undef => :replace, :replace => REPLACEMENT_CHAR)
4+
end
5+
end

0 commit comments

Comments
 (0)