Skip to content

Commit b64c1a4

Browse files
committed
Moves scrapers to separate files, adds league list
1 parent 5d562a2 commit b64c1a4

File tree

5 files changed

+205
-93
lines changed

5 files changed

+205
-93
lines changed

lib/bnet_scraper/starcraft2.rb

Lines changed: 3 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
require 'bnet_scraper/starcraft2/profile_scraper'
2+
require 'bnet_scraper/starcraft2/league_scraper'
3+
14
module BnetScraper
25
module Starcraft2
36
REGIONS = {
@@ -7,90 +10,5 @@ module Starcraft2
710
'sea' => { domain: 'sea.battle.net', dir: 'en' },
811
'fea' => { domain: 'tw.battle.net', dir: 'zh' }
912
}
10-
11-
class ProfileScraper
12-
attr_reader :bnet_id, :account, :region, :agent, :bnet_index
13-
14-
def initialize bnet_id, account, region = 'na'
15-
@bnet_id = bnet_id
16-
@account = account
17-
@region = region
18-
@agent = Mechanize.new
19-
set_bnet_index
20-
end
21-
22-
def set_bnet_index
23-
[1,2].each do |idx|
24-
res = Net::HTTP.get_response URI profile_url idx
25-
if res.is_a? Net::HTTPSuccess
26-
@bnet_index = idx
27-
return
28-
end
29-
end
30-
end
31-
32-
def scrape
33-
@response = @agent.get(profile_url)
34-
35-
@race = @response.search("#season-snapshot .module-footer a").first().inner_html()
36-
@wins = @response.search("#career-stats h2").inner_html()
37-
@achievements = @response.search("#profile-header h3").inner_html()
38-
39-
parse_response
40-
end
41-
42-
def parse_response
43-
{
44-
bnet_id: @bnet_id,
45-
account: @account,
46-
bnet_index: @bnet_index,
47-
race: @race,
48-
wins: @wins,
49-
achievements: @achievements
50-
}
51-
end
52-
53-
def profile_url bnet_index = @bnet_index
54-
"http://#{region_info[:domain]}/sc2/#{region_info[:dir]}/profile/#{bnet_id}/#{bnet_index}/#{account}/"
55-
end
56-
57-
def region_info
58-
REGIONS[region]
59-
end
60-
end
61-
62-
class LeagueScraper
63-
attr_reader :url, :bnet_id, :bnet_index, :account, :league_id, :lang,
64-
:season, :size, :random, :name, :division
65-
66-
def initialize(url)
67-
@url, @lang, @bnet_id, @bnet_index, @account, @league_id = url.match(/http:\/\/.+\/sc2\/(.+)\/profile\/(.+)\/(\d{1})\/(.+)\/ladder\/(.+)(#current-rank)?/).to_a
68-
@agent = Mechanize.new
69-
end
70-
71-
def scrape
72-
@response = @agent.get(@url)
73-
value = @response.search(".data-title .data-label h3").inner_text().strip
74-
header_regex = /Season (\d{1}) - \s+(\dv\d)( Random)? (\w+)\s+Division (.+)/
75-
header_values = value.match(header_regex).to_a
76-
header_values.shift()
77-
@season, @size, @random, @division, @name = header_values
78-
79-
@random = !@random.nil?
80-
parse_response
81-
end
82-
83-
def parse_response
84-
{
85-
season: @season,
86-
size: @size,
87-
name: @name,
88-
division: @division,
89-
random: @random,
90-
bnet_id: @bnet_id,
91-
account: @account
92-
}
93-
end
94-
end
9513
end
9614
end
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
module BnetScraper
2+
module Starcraft2
3+
class LeagueScraper
4+
attr_reader :url, :bnet_id, :bnet_index, :account, :league_id, :lang,
5+
:season, :size, :random, :name, :division
6+
7+
def initialize(url)
8+
@url, @lang, @bnet_id, @bnet_index, @account, @league_id = url.match(/http:\/\/.+\/sc2\/(.+)\/profile\/(.+)\/(\d{1})\/(.+)\/ladder\/(.+)(#current-rank)?/).to_a
9+
@agent = Mechanize.new
10+
end
11+
12+
def scrape
13+
@response = @agent.get(@url)
14+
value = @response.search(".data-title .data-label h3").inner_text().strip
15+
header_regex = /Season (\d{1}) - \s+(\dv\d)( Random)? (\w+)\s+Division (.+)/
16+
header_values = value.match(header_regex).to_a
17+
header_values.shift()
18+
@season, @size, @random, @division, @name = header_values
19+
20+
@random = !@random.nil?
21+
parse_response
22+
end
23+
24+
def parse_response
25+
{
26+
season: @season,
27+
size: @size,
28+
name: @name,
29+
division: @division,
30+
random: @random,
31+
bnet_id: @bnet_id,
32+
account: @account
33+
}
34+
end
35+
end
36+
end
37+
end
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
module BnetScraper
2+
module Starcraft2
3+
class ProfileScraper
4+
attr_reader :bnet_id, :account, :region, :agent, :bnet_index
5+
6+
def initialize bnet_id, account, region = 'na'
7+
@bnet_id = bnet_id
8+
@account = account
9+
@region = region
10+
@agent = Mechanize.new
11+
set_bnet_index
12+
end
13+
14+
def set_bnet_index
15+
[1,2].each do |idx|
16+
res = Net::HTTP.get_response URI profile_url idx
17+
if res.is_a? Net::HTTPSuccess
18+
@bnet_index = idx
19+
return
20+
end
21+
end
22+
end
23+
24+
def scrape
25+
get_profile_data
26+
get_league_list
27+
output
28+
end
29+
30+
def get_profile_data
31+
response = @agent.get(profile_url)
32+
33+
@race = response.search("#season-snapshot .module-footer a").first().inner_html()
34+
@wins = response.search("#career-stats h2").inner_html()
35+
@achievements = response.search("#profile-header h3").inner_html()
36+
end
37+
38+
def get_league_list
39+
url = "#{profile_url}ladder/leagues"
40+
response = @agent.get(profile_url + "ladder/leagues")
41+
42+
@leagues = response.search("a[href*='#current-rank']").map do |league|
43+
{
44+
name: league.inner_html().strip,
45+
id: league.attr('href').sub('#current-rank',''),
46+
href: url + league.attr('href')
47+
}
48+
end
49+
end
50+
51+
def output
52+
{
53+
bnet_id: @bnet_id,
54+
account: @account,
55+
bnet_index: @bnet_index,
56+
race: @race,
57+
wins: @wins,
58+
achievements: @achievements,
59+
leagues: @leagues
60+
}
61+
end
62+
63+
def profile_url bnet_index = @bnet_index
64+
"http://#{region_info[:domain]}/sc2/#{region_info[:dir]}/profile/#{bnet_id}/#{bnet_index}/#{account}/"
65+
end
66+
67+
def region_info
68+
REGIONS[region]
69+
end
70+
end
71+
end
72+
end

spec/starcraft2/profile_scraper_spec.rb

Lines changed: 91 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,39 +43,122 @@
4343
end
4444
end
4545

46-
describe '#scrape' do
46+
47+
describe '#get_profile_data' do
4748
it 'should set the race, wins, and achievements attributes' do
4849
subject.instance_variable_get(:@race).should be_nil
4950
subject.instance_variable_get(:@achievements).should be_nil
5051
subject.instance_variable_get(:@wins).should be_nil
5152

52-
subject.scrape
53+
subject.get_profile_data
5354

5455
subject.instance_variable_get(:@race).should == 'Protoss'
5556
subject.instance_variable_get(:@achievements).should == '3630'
5657
subject.instance_variable_get(:@wins).should == '684'
5758
end
59+
end
60+
61+
describe 'get_league_list' do
62+
it 'should set an array of leagues' do
63+
subject.instance_variable_get(:@leagues).should be_nil
64+
subject.get_league_list
65+
66+
subject.instance_variable_get(:@leagues).should have(12).leagues
67+
end
68+
end
69+
70+
describe '#scrape' do
71+
it 'should call get_profile_data' do
72+
subject.should_receive(:get_profile_data)
73+
subject.scrape
74+
end
75+
it 'should call get_league_list' do
76+
subject.should_receive(:get_league_list)
77+
subject.scrape
78+
end
5879

59-
it 'should call parse_response' do
60-
subject.should_receive(:parse_response)
80+
it 'should call output' do
81+
subject.should_receive(:output)
6182
subject.scrape
6283
end
6384
end
6485

65-
describe '#parse_response' do
86+
describe '#output' do
6687
it 'should extract profile data from the response' do
6788
expected = {
6889
bnet_id: '2377239',
6990
account: 'Demon',
7091
bnet_index: 1,
7192
race: 'Protoss',
7293
wins: '684',
73-
achievements: '3630'
94+
achievements: '3630',
95+
leagues: [
96+
{
97+
name: "1v1 Platinum Rank 95",
98+
id: "96905",
99+
href: "http://us.battle.net/sc2/en/profile/2377239/1/Demon/ladder/leagues96905#current-rank"
100+
},
101+
{
102+
name: "2v2 Random Platinum ...",
103+
id: "96716",
104+
href: "http://us.battle.net/sc2/en/profile/2377239/1/Demon/ladder/leagues96716#current-rank"
105+
},
106+
{
107+
name: "2v2 Diamond Rank 45",
108+
id: "98162",
109+
href: "http://us.battle.net/sc2/en/profile/2377239/1/Demon/ladder/leagues98162#current-rank"
110+
},
111+
{
112+
name: "2v2 Silver Rank 8",
113+
id: "97369",
114+
href: "http://us.battle.net/sc2/en/profile/2377239/1/Demon/ladder/leagues97369#current-rank"
115+
},
116+
{
117+
name: "3v3 Random Gold Rank...",
118+
id: "96828",
119+
href: "http://us.battle.net/sc2/en/profile/2377239/1/Demon/ladder/leagues96828#current-rank"
120+
},
121+
{
122+
name: "3v3 Diamond Rank 56",
123+
id: "97985",
124+
href: "http://us.battle.net/sc2/en/profile/2377239/1/Demon/ladder/leagues97985#current-rank"
125+
},
126+
{
127+
name: "3v3 Silver Rank 5",
128+
id: "98523",
129+
href: "http://us.battle.net/sc2/en/profile/2377239/1/Demon/ladder/leagues98523#current-rank"
130+
},
131+
{
132+
name: "3v3 Platinum Rank 88",
133+
id: "96863",
134+
href: "http://us.battle.net/sc2/en/profile/2377239/1/Demon/ladder/leagues96863#current-rank"
135+
},
136+
{
137+
name: "3v3 Gold Rank 75",
138+
id: "97250",
139+
href: "http://us.battle.net/sc2/en/profile/2377239/1/Demon/ladder/leagues97250#current-rank"
140+
},
141+
{
142+
name: "4v4 Random Platinum ...",
143+
id: "96830",
144+
href: "http://us.battle.net/sc2/en/profile/2377239/1/Demon/ladder/leagues96830#current-rank"
145+
},
146+
{
147+
name: "4v4 Gold Rank 38",
148+
id: "98336",
149+
href: "http://us.battle.net/sc2/en/profile/2377239/1/Demon/ladder/leagues98336#current-rank"
150+
},
151+
{
152+
name: "4v4 Diamond Rank 54",
153+
id: "98936",
154+
href: "http://us.battle.net/sc2/en/profile/2377239/1/Demon/ladder/leagues98936#current-rank"
155+
}
156+
]
74157
}
75158

76-
subject.parse_response.should == { bnet_id: '2377239', account: 'Demon', bnet_index: 1, race: nil, wins: nil, achievements: nil }
159+
subject.output.should == { bnet_id: '2377239', account: 'Demon', bnet_index: 1, race: nil, wins: nil, achievements: nil, leagues: nil }
77160
subject.scrape
78-
subject.parse_response.should == expected
161+
subject.output.should == expected
79162
end
80163
end
81164
end

spec/support/load_fakeweb.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
require 'fakeweb'
22

33
profile_html = File.read File.dirname(__FILE__) + '/profile.html'
4+
leagues_html = File.read File.dirname(__FILE__) + '/leagues.html'
45
league_html = File.read File.dirname(__FILE__) + '/league.html'
56

67
FakeWeb.allow_net_connect = false
78
FakeWeb.register_uri :get, 'http://us.battle.net/sc2/en/profile/2377239/1/Demon/', body: profile_html, status: 200, content_type: 'text/html'
89
FakeWeb.register_uri :get, 'http://us.battle.net/sc2/en/profile/2377239/1/Demon/ladder/12345', body: league_html, status: 200, content_type: 'text/html'
10+
FakeWeb.register_uri :get, 'http://us.battle.net/sc2/en/profile/2377239/1/Demon/ladder/leagues', body: leagues_html, status: 200, content_type: 'text/html'

0 commit comments

Comments
 (0)