Skip to content

Commit f755577

Browse files
committed
Creates BaseScraper, refactors scrapers
All Starcraft2 scrapers now inherit from Starcraft2::BaseScraper. This encapsulates extracting URL vs key-based object creation, leaving the sub-classes to focus on the scraping and not the account preparation.
1 parent 8b7415d commit f755577

File tree

10 files changed

+158
-234
lines changed

10 files changed

+158
-234
lines changed

lib/bnet_scraper/starcraft2.rb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
require 'bnet_scraper/starcraft2/base_scraper'
12
require 'bnet_scraper/starcraft2/profile_scraper'
23
require 'bnet_scraper/starcraft2/league_scraper'
34
require 'bnet_scraper/starcraft2/achievement_scraper'
@@ -34,7 +35,7 @@ def self.full_profile_scrape bnet_id, account, region = 'na'
3435

3536
parsed_leagues = []
3637
profile_output[:leagues].each do |league|
37-
league_scraper = LeagueScraper.new league[:href]
38+
league_scraper = LeagueScraper.new url: league[:href]
3839
parsed_leagues << league_scraper.scrape
3940
end
4041
profile_output[:leagues] = parsed_leagues

lib/bnet_scraper/starcraft2/achievement_scraper.rb

Lines changed: 3 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,7 @@
11
module BnetScraper
22
module Starcraft2
3-
class AchievementScraper
4-
attr_reader :region, :bnet_id, :account, :bnet_index, :recent, :progress, :showcase, :response
5-
def initialize options = {}
6-
if options[:url]
7-
extracted_data = options[:url].match(/http:\/\/(.+)\/sc2\/(.+)\/profile\/(.+)\/(\d{1})\/(.+)\/achievements\//)
8-
@region = REGIONS.key({ domain: extracted_data[1], dir: extracted_data[2] })
9-
@bnet_id = extracted_data[3]
10-
@bnet_index = extracted_data[4]
11-
@account = extracted_data[5]
12-
elsif options[:bnet_id] && options[:account]
13-
@account = options[:account]
14-
@bnet_id = options[:bnet_id]
15-
@region = options[:region] || 'na'
16-
17-
if options[:bnet_index]
18-
@bnet_index = options[:bnet_index]
19-
else
20-
set_bnet_index
21-
end
22-
end
23-
end
24-
25-
# set_bnet_index
26-
#
27-
# Because profile URLs have to have a specific bnet_index that is seemingly incalculable,
28-
# we must ping both variants to determine the correct bnet_index. We then store that value.
29-
def set_bnet_index
30-
[1,2].each do |idx|
31-
res = Net::HTTP.get_response URI achievement_url idx
32-
if res.is_a? Net::HTTPSuccess
33-
@bnet_index = idx
34-
return
35-
end
36-
end
37-
end
38-
39-
def achievement_url bnet_index = @bnet_index
40-
"http://#{region_info[:domain]}/sc2/#{region_info[:dir]}/profile/#{bnet_id}/#{bnet_index}/#{account}/achievements/"
41-
end
42-
43-
def region_info
44-
REGIONS[region]
45-
end
3+
class AchievementScraper < BaseScraper
4+
attr_reader :recent, :progress, :showcase, :response
465

476
def scrape
487
get_response
@@ -52,7 +11,7 @@ def scrape
5211
end
5312

5413
def get_response
55-
@response = Nokogiri::HTML(open(achievement_url))
14+
@response = Nokogiri::HTML(open(profile_url+"achievements/"))
5615
end
5716

5817
def scrape_recent
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
module BnetScraper
2+
module Starcraft2
3+
class BaseScraper
4+
attr_reader :bnet_id, :account, :region, :bnet_index, :url
5+
6+
def initialize options = {}
7+
if options[:url]
8+
extracted_data = options[:url].match(/http:\/\/(.+)\/sc2\/(.+)\/profile\/(.+)\/(\d{1})\/(.[^\/]+)\//)
9+
@region = REGIONS.key({ domain: extracted_data[1], dir: extracted_data[2] })
10+
@bnet_id = extracted_data[3]
11+
@bnet_index = extracted_data[4]
12+
@account = extracted_data[5]
13+
@url = options[:url]
14+
elsif options[:bnet_id] && options[:account]
15+
@bnet_id = options[:bnet_id]
16+
@account = options[:account]
17+
@region = options[:region] || 'na'
18+
if options[:bnet_index]
19+
@bnet_index = options[:bnet_index]
20+
else
21+
set_bnet_index
22+
end
23+
end
24+
end
25+
26+
# set_bnet_index
27+
#
28+
# Because profile URLs have to have a specific bnet_index that is seemingly incalculable,
29+
# we must ping both variants to determine the correct bnet_index. We then store that value.
30+
def set_bnet_index
31+
[1,2].each do |idx|
32+
res = Net::HTTP.get_response URI profile_url idx
33+
if res.is_a? Net::HTTPSuccess
34+
@bnet_index = idx
35+
return
36+
end
37+
end
38+
end
39+
40+
def profile_url bnet_index = @bnet_index
41+
"http://#{region_info[:domain]}/sc2/#{region_info[:dir]}/profile/#{bnet_id}/#{bnet_index}/#{account}/"
42+
end
43+
44+
def region_info
45+
REGIONS[region]
46+
end
47+
end
48+
end
49+
end

lib/bnet_scraper/starcraft2/league_scraper.rb

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,17 @@ module Starcraft2
99
#
1010
# @param [String] url - The league URL on battle.net
1111
# @return [Hash] league_data - Hash of data extracted
12-
class LeagueScraper
13-
attr_reader :url, :bnet_id, :bnet_index, :account, :league_id, :lang,
14-
:season, :size, :random, :name, :division
12+
class LeagueScraper < BaseScraper
13+
attr_reader :league_id, :season, :size, :random, :name, :division
1514

16-
def initialize(url)
17-
@url, @lang, @bnet_id, @bnet_index, @account, @league_id = url.match(/http:\/\/.+\/sc2\/(.+)\/profile\/(.+)\/(\d{1})\/(.+)\/ladder\/(.+)(#current-rank)?/).to_a
15+
def initialize options = {}
16+
super(options)
17+
18+
if options[:url]
19+
@league_id = options[:url].match(/http:\/\/.+\/sc2\/.+\/profile\/.+\/\d{1}\/.+\/ladder\/(.+)(#current-rank)?/).to_a[1]
20+
else
21+
@league_id = options[:league_id]
22+
end
1823
end
1924

2025
def scrape

lib/bnet_scraper/starcraft2/profile_scraper.rb

Lines changed: 1 addition & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -19,44 +19,7 @@ module Starcraft2
1919
# Using this URL we can extract the critical information. However, sometimes we do not have
2020
# the URL and have to make do with a bnet_id and account. This is the bare minimum needed,
2121
# unless the account is in a region other than 'na'. In such cases, region all needs to be passed.
22-
class ProfileScraper
23-
attr_reader :bnet_id, :account, :region, :agent, :bnet_index
24-
25-
# @param options - Hash of options to parse.
26-
# @return profile_data - The hash of profile data scraped, including array of leagues to scrape
27-
def initialize options = {}
28-
if options[:url]
29-
extracted_data = options[:url].match(/http:\/\/(.+)\/sc2\/(.+)\/profile\/(.+)\/(\d{1})\/(.+)\//)
30-
@region = REGIONS.key({ domain: extracted_data[1], dir: extracted_data[2] })
31-
@bnet_id = extracted_data[3]
32-
@bnet_index = extracted_data[4]
33-
@account = extracted_data[5]
34-
elsif options[:bnet_id] && options[:account]
35-
@bnet_id = options[:bnet_id]
36-
@account = options[:account]
37-
@region = options[:region] || 'na'
38-
if options[:bnet_index]
39-
@bnet_index = options[:bnet_index]
40-
else
41-
set_bnet_index
42-
end
43-
end
44-
end
45-
46-
# set_bnet_index
47-
#
48-
# Because profile URLs have to have a specific bnet_index that is seemingly incalculable,
49-
# we must ping both variants to determine the correct bnet_index. We then store that value.
50-
def set_bnet_index
51-
[1,2].each do |idx|
52-
res = Net::HTTP.get_response URI profile_url idx
53-
if res.is_a? Net::HTTPSuccess
54-
@bnet_index = idx
55-
return
56-
end
57-
end
58-
end
59-
22+
class ProfileScraper < BaseScraper
6023
def scrape
6124
get_profile_data
6225
get_league_list
@@ -94,14 +57,6 @@ def output
9457
leagues: @leagues
9558
}
9659
end
97-
98-
def profile_url bnet_index = @bnet_index
99-
"http://#{region_info[:domain]}/sc2/#{region_info[:dir]}/profile/#{bnet_id}/#{bnet_index}/#{account}/"
100-
end
101-
102-
def region_info
103-
REGIONS[region]
104-
end
10560
end
10661
end
10762
end

spec/starcraft2/achievement_scraper_spec.rb

Lines changed: 3 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -4,52 +4,9 @@
44
let(:url) { 'http://us.battle.net/sc2/en/profile/2377239/1/Demon/achievements/' }
55
subject { BnetScraper::Starcraft2::AchievementScraper.new(url: url) }
66

7-
describe '#initialize' do
8-
context 'with url parameter passed' do
9-
it 'should extract bnet_id from the URL' do
10-
subject.bnet_id.should == '2377239'
11-
end
12-
13-
it 'should extract account from the URL' do
14-
subject.account.should == 'Demon'
15-
end
16-
17-
it 'should extract the bnet_index from the URL' do
18-
subject.bnet_index.should == '1'
19-
end
20-
21-
it 'should extract the region from the URL' do
22-
subject.region.should == 'na'
23-
end
24-
end
25-
26-
context 'when bnet_id and account parameters are passed' do
27-
subject { BnetScraper::Starcraft2::AchievementScraper.new(bnet_id: '2377239', account: 'Demon') }
28-
it 'should set the bnet_id and account parameters' do
29-
subject.bnet_id.should == '2377239'
30-
subject.account.should == 'Demon'
31-
end
32-
33-
it 'should default the region to na' do
34-
subject.region.should == 'na'
35-
end
36-
37-
it 'should assign region if passed' do
38-
BnetScraper::Starcraft2::AchievementScraper.any_instance.should_receive(:set_bnet_index)
39-
scraper = BnetScraper::Starcraft2::AchievementScraper.new(bnet_id: '2377239', account: 'Demon', region: 'fea')
40-
scraper.region.should == 'fea'
41-
end
42-
43-
it 'should not call set_bnet_index if bnet_index is passed' do
44-
BnetScraper::Starcraft2::AchievementScraper.any_instance.should_not_receive(:set_bnet_index)
45-
scraper = BnetScraper::Starcraft2::AchievementScraper.new(bnet_id: '2377239', account: 'Demon', region: 'fea', bnet_index: '1')
46-
end
47-
48-
it 'should call set_bnet_index_if bnet_index is not passed' do
49-
BnetScraper::Starcraft2::AchievementScraper.any_instance.should_receive(:set_bnet_index)
50-
scraper = BnetScraper::Starcraft2::AchievementScraper.new(bnet_id: '2377239', account: 'Demon', region: 'fea')
51-
end
52-
end
7+
it_behaves_like 'an SC2 Scraper' do
8+
let(:scraper_class) { BnetScraper::Starcraft2::AchievementScraper }
9+
let(:subject) { scraper_class.new(url: url) }
5310
end
5411

5512
describe '#get_response' do
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
require 'spec_helper'
2+
3+
describe BnetScraper::Starcraft2::BaseScraper do
4+
it_behaves_like 'an SC2 Scraper' do
5+
let(:scraper_class) { BnetScraper::Starcraft2::BaseScraper }
6+
let(:subject) { scraper_class.new(url: 'http://us.battle.net/sc2/en/profile/2377239/1/Demon/') }
7+
end
8+
end

spec/starcraft2/league_scraper_spec.rb

Lines changed: 6 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,32 +2,17 @@
22

33
describe BnetScraper::Starcraft2::LeagueScraper do
44
let(:url) { "http://us.battle.net/sc2/en/profile/2377239/1/Demon/ladder/12345" }
5-
subject { BnetScraper::Starcraft2::LeagueScraper.new(url) }
5+
subject { BnetScraper::Starcraft2::LeagueScraper.new(url: url) }
66

7-
describe '#initialize' do
8-
it 'should take a league URL parameter' do
9-
subject.url.should == url
10-
end
11-
12-
it 'should dissect the bnet_id from the URL' do
13-
subject.bnet_id.should == '2377239'
14-
end
15-
16-
it 'should dissect the account from the URL' do
17-
subject.account.should == 'Demon'
18-
end
7+
it_behaves_like 'an SC2 Scraper' do
8+
let(:scraper_class) { BnetScraper::Starcraft2::LeagueScraper }
9+
let(:subject) { scraper_class.new(url: "http://us.battle.net/sc2/en/profile/2377239/1/Demon/ladder/12345") }
10+
end
1911

12+
describe '#initialize' do
2013
it 'should dissect the league_id from the URL' do
2114
subject.league_id.should == '12345'
2215
end
23-
24-
it 'should dissect the bnet_index from the URL' do
25-
subject.bnet_index.should == '1'
26-
end
27-
28-
it 'should dissect the lang from the URL' do
29-
subject.lang.should == 'en'
30-
end
3116
end
3217

3318
describe '#scrape' do

spec/starcraft2/profile_scraper_spec.rb

Lines changed: 4 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,79 +1,12 @@
11
require 'spec_helper'
22

33
describe BnetScraper::Starcraft2::ProfileScraper do
4-
subject { BnetScraper::Starcraft2::ProfileScraper.new(bnet_id: '2377239', account: 'Demon') }
5-
6-
describe '#initialize' do
7-
context 'when bnet_id and account parameters are passed' do
8-
it 'should set the bnet_id and account parameters' do
9-
subject.bnet_id.should == '2377239'
10-
subject.account.should == 'Demon'
11-
end
12-
13-
it 'should default the region to na' do
14-
subject.region.should == 'na'
15-
end
16-
17-
it 'should assign region if passed' do
18-
BnetScraper::Starcraft2::ProfileScraper.any_instance.should_receive(:set_bnet_index)
19-
scraper = BnetScraper::Starcraft2::ProfileScraper.new(bnet_id: '2377239', account: 'Demon', region: 'fea')
20-
scraper.region.should == 'fea'
21-
end
22-
23-
it 'should not call set_bnet_index if bnet_index is passed' do
24-
BnetScraper::Starcraft2::ProfileScraper.any_instance.should_not_receive(:set_bnet_index)
25-
scraper = BnetScraper::Starcraft2::ProfileScraper.new(bnet_id: '2377239', account: 'Demon', region: 'fea', bnet_index: '1')
26-
end
27-
28-
it 'should call set_bnet_index_if bnet_index is not passed' do
29-
BnetScraper::Starcraft2::ProfileScraper.any_instance.should_receive(:set_bnet_index)
30-
scraper = BnetScraper::Starcraft2::ProfileScraper.new(bnet_id: '2377239', account: 'Demon', region: 'fea')
31-
end
32-
end
33-
34-
context 'when url is passed' do
35-
subject { BnetScraper::Starcraft2::ProfileScraper.new(url: 'http://us.battle.net/sc2/en/profile/2377239/1/Demon/') }
36-
it 'should extract the bnet_id from the url' do
37-
subject.bnet_id.should == '2377239'
38-
end
39-
40-
it 'should extract the account from the url' do
41-
subject.account.should == 'Demon'
42-
end
43-
44-
it 'should extract the region from the url' do
45-
subject.region.should == 'na'
46-
end
47-
48-
it 'should extract the bnet_index from the url' do
49-
subject.bnet_index.should == '1'
50-
end
51-
end
52-
end
53-
54-
describe '#region_info' do
55-
it 'should return information based on the set region' do
56-
subject.region_info.should == { domain: 'us.battle.net', dir: 'en' }
57-
end
58-
end
59-
60-
describe '#set_bnet_index' do
61-
it 'should return the valid integer needed for a proper URL parse from bnet' do
62-
subject.set_bnet_index
63-
subject.bnet_index.should == 1
64-
end
65-
end
66-
67-
describe '#profile_url' do
68-
it 'should return a string URL for bnet' do
69-
subject.profile_url.should == 'http://us.battle.net/sc2/en/profile/2377239/1/Demon/'
70-
end
71-
72-
it 'should optionally take a bnet_index to use instead of saved bnet_index' do
73-
subject.profile_url(2).should == 'http://us.battle.net/sc2/en/profile/2377239/2/Demon/'
74-
end
4+
it_behaves_like 'an SC2 Scraper' do
5+
let(:scraper_class) { BnetScraper::Starcraft2::BaseScraper }
6+
let(:subject) { scraper_class.new(url: 'http://us.battle.net/sc2/en/profile/2377239/1/Demon/') }
757
end
768

9+
subject { BnetScraper::Starcraft2::ProfileScraper.new(bnet_id: '2377239', account: 'Demon') }
7710

7811
describe '#get_profile_data' do
7912
it 'should set the race, wins, and achievements attributes' do

0 commit comments

Comments
 (0)