Skip to content

Commit 8b7415d

Browse files
committed
Refactors scrape, adds output
AchievementScraper#scrape has turned into a delegator to scrape_progress, scrape_showcase, scrape_recent, and get_response. AchievementScraper#get_response is what handles HTML retrieval to allow easier testing of the sub-scrapes.
1 parent 6f0aa69 commit 8b7415d

File tree

2 files changed

+94
-16
lines changed

2 files changed

+94
-16
lines changed

lib/bnet_scraper/starcraft2/achievement_scraper.rb

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
module BnetScraper
22
module Starcraft2
33
class AchievementScraper
4-
attr_reader :region, :bnet_id, :account, :bnet_index, :recent, :progress, :showcase
4+
attr_reader :region, :bnet_id, :account, :bnet_index, :recent, :progress, :showcase, :response
55
def initialize options = {}
66
if options[:url]
77
extracted_data = options[:url].match(/http:\/\/(.+)\/sc2\/(.+)\/profile\/(.+)\/(\d{1})\/(.+)\/achievements\//)
@@ -45,26 +45,33 @@ def region_info
4545
end
4646

4747
def scrape
48-
response = Nokogiri::HTML(open(achievement_url))
49-
@showcase = response.css("#showcase-module .progress-tile").map do |achievement|
50-
hsh = { title: achievement.css('.tooltip-title').inner_text.strip }
51-
hsh[:description] = achievement.css('div').inner_text.gsub(hsh[:title], '').strip
52-
hsh
53-
end
54-
48+
get_response
49+
scrape_recent
50+
scrape_progress
51+
scrape_showcase
52+
end
53+
54+
def get_response
55+
@response = Nokogiri::HTML(open(achievement_url))
56+
end
57+
58+
def scrape_recent
5559
@recent = []
5660
6.times do |num|
5761
achievement = {}
5862
div = response.css("#achv-recent-#{num}")
5963
if div
60-
achievement[:title] = div.css("div").inner_text.strip
64+
achievement[:title] = div.css("div > div").inner_text.strip
6165
achievement[:description] = div.inner_text.gsub(achievement[:title], '').strip
62-
achievement[:earned] = response.css("#recent-achievements a:nth(#{num}) span:nth(1)").inner_text
66+
achievement[:earned] = response.css("#recent-achievements span")[(num*3)+1].inner_text
6367

6468
@recent << achievement
6569
end
6670
end
71+
@recent
72+
end
6773

74+
def scrape_progress
6875
progress_ach = response.css("#progress-module .achievements-progress:nth(2) span")
6976
@progress = {
7077
liberty_campaign: progress_ach[0].inner_text,
@@ -74,6 +81,23 @@ def scrape
7481
quick_match: progress_ach[4].inner_text,
7582
}
7683
end
84+
85+
def scrape_showcase
86+
@showcase = response.css("#showcase-module .progress-tile").map do |achievement|
87+
hsh = { title: achievement.css('.tooltip-title').inner_text.strip }
88+
hsh[:description] = achievement.css('div').inner_text.gsub(hsh[:title], '').strip
89+
hsh
90+
end
91+
@showcase
92+
end
93+
94+
def output
95+
{
96+
recent: @recent,
97+
progress: @progress,
98+
showcase: @showcase
99+
}
100+
end
77101
end
78102
end
79103
end

spec/starcraft2/achievement_scraper_spec.rb

Lines changed: 60 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
require 'spec_helper'
22

33
describe BnetScraper::Starcraft2::AchievementScraper do
4+
let(:url) { 'http://us.battle.net/sc2/en/profile/2377239/1/Demon/achievements/' }
5+
subject { BnetScraper::Starcraft2::AchievementScraper.new(url: url) }
6+
47
describe '#initialize' do
58
context 'with url parameter passed' do
6-
subject { BnetScraper::Starcraft2::AchievementScraper.new(url: 'http://us.battle.net/sc2/en/profile/2377239/1/Demon/achievements/') }
7-
89
it 'should extract bnet_id from the URL' do
910
subject.bnet_id.should == '2377239'
1011
end
@@ -51,20 +52,61 @@
5152
end
5253
end
5354

55+
describe '#get_response' do
56+
it 'should get the HTML response to be scraped' do
57+
subject.response.should be_nil
58+
subject.get_response
59+
subject.response.should_not be_nil
60+
end
61+
end
62+
5463
describe '#scrape' do
55-
subject { BnetScraper::Starcraft2::AchievementScraper.new(url: 'http://us.battle.net/sc2/en/profile/2377239/1/Demon/achievements/') }
56-
before :each do
64+
it 'should call get_response and trigger scraper methods' do
65+
subject.should_receive(:get_response)
66+
subject.should_receive(:scrape_progress)
67+
subject.should_receive(:scrape_recent)
68+
subject.should_receive(:scrape_showcase)
5769
subject.scrape
5870
end
71+
end
72+
73+
describe '#scrape_showcase' do
74+
before :each do
75+
subject.get_response
76+
subject.scrape_showcase
77+
end
5978

6079
it 'should set the showcase' do
6180
subject.showcase.should have(5).achievements
6281
end
82+
end
6383

64-
it 'should set the recently earned' do
65-
subject.recent.should have(6).achievements
84+
describe '#scrape_recent' do
85+
before :each do
86+
subject.get_response
87+
subject.scrape_recent
6688
end
6789

90+
it 'should have the title of the achievement' do
91+
subject.recent[0][:title].should == 'Blink of an Eye'
92+
end
93+
94+
it 'should have the description of the achievement' do
95+
# this is a cop-out because the string contains UTF-8. Please fix this. - Cad
96+
subject.recent[0][:description].should be_a String
97+
end
98+
99+
it 'should have the date the achievement was earned' do
100+
subject.recent[0][:earned].should == '3/5/2012'
101+
end
102+
end
103+
104+
describe '#scrape_progress' do
105+
before :each do
106+
subject.get_response
107+
subject.scrape_progress
108+
end
109+
68110
it 'should set the liberty campaign progress' do
69111
subject.progress[:liberty_campaign].should == '1580'
70112
end
@@ -85,4 +127,16 @@
85127
subject.progress[:quick_match].should == '170'
86128
end
87129
end
130+
131+
describe '#output' do
132+
it 'should return the scraped data when scrape has been called' do
133+
subject.scrape
134+
expected = {
135+
recent: subject.recent,
136+
showcase: subject.showcase,
137+
progress: subject.progress
138+
}
139+
subject.output.should == expected
140+
end
141+
end
88142
end

0 commit comments

Comments
 (0)