Skip to content

Commit 6b1a95d

Browse files
author
Jack Li
committed
abstract logic of ticket scraping into classes
1 parent 7aab68d commit 6b1a95d

File tree

6 files changed

+102
-68
lines changed

6 files changed

+102
-68
lines changed

ticketscraping/constants.py

Lines changed: 18 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from uuid import uuid4
2-
from urllib.parse import quote_plus
32

43
ANTIBOT_JS_CODE_URL = "https://epsf.ticketmaster.com/eps-d"
54
TOKEN_INTERROGATION_URL = "https://epsf.ticketmaster.com/eps-d?d=www.ticketmaster.com"
@@ -17,41 +16,28 @@ def get_top_picks_header(): return {
1716
"tmps-correlation-id": str(uuid4())
1817
}
1918

20-
def get_top_picks_query_params_str(qty=2, priceInterval=(
21-
0, 100)):
22-
def get_top_picks_query_params(qty, priceInterval): return {
23-
'show': 'places maxQuantity sections',
24-
'mode': 'primary:ppsectionrow resale:ga_areas platinum:all',
25-
'qty': qty,
26-
'q': f"and(not(\'accessible\'),any(listprices,$and(gte(@,{priceInterval[0]}),lte(@,{priceInterval[1]}))))",
27-
'includeStandard': 'true',
28-
'includeResale': 'true',
29-
'includePlatinumInventoryType': 'false',
30-
'embed': ['area', 'offer', 'description'],
31-
'apikey': 'b462oi7fic6pehcdkzony5bxhe',
32-
'apisecret': 'pquzpfrfz7zd2ylvtz3w5dtyse',
33-
'limit': 100,
34-
'offset': 0,
35-
'sort': '-quality',
36-
}
37-
def encodeURI(s):
38-
return quote_plus(s, safe="():=,$@'")
39-
def formulateURI(params):
40-
items = []
41-
for p in params:
42-
if isinstance(params[p], list):
43-
for i in params[p]:
44-
items.append(encodeURI(f"{p}={i}"))
45-
else:
46-
items.append(encodeURI(f"{p}={params[p]}"))
47-
return '?' + '&'.join(items)
48-
return formulateURI(get_top_picks_query_params(qty, priceInterval))
19+
def get_top_picks_query_params(qty, priceInterval): return {
20+
'show': 'places maxQuantity sections',
21+
'mode': 'primary:ppsectionrow resale:ga_areas platinum:all',
22+
'qty': qty,
23+
'q': f"and(not(\'accessible\'),any(listprices,$and(gte(@,{priceInterval[0]}),lte(@,{priceInterval[1]}))))",
24+
'includeStandard': 'true',
25+
'includeResale': 'true',
26+
'includePlatinumInventoryType': 'false',
27+
'embed': ['area', 'offer', 'description'],
28+
'apikey': 'b462oi7fic6pehcdkzony5bxhe',
29+
'apisecret': 'pquzpfrfz7zd2ylvtz3w5dtyse',
30+
'limit': 100,
31+
'offset': 0,
32+
'sort': '-quality',
33+
}
4934

5035
FN_MATCHING_REGEX = r"\(function\(\){.*}\)\(\)"
5136
TOKEN_RENEW_SEC_OFFSET = 3
52-
TOKEN_RENEW_PRIORITY = 3
53-
TICKET_SCRAPING_PRIORITY = 1
37+
TOKEN_RENEW_PRIORITY = 1
38+
TICKET_SCRAPING_PRIORITY = 3
5439
TICKET_SCRAPING_INTERVAL = 60
40+
TICKET_SCRAPING_TOKEN_AWAIT_MAX_INTERVAL = 5
5541

5642
INJECTOR_LOCATION = "js/injector.js"
5743
INJECTOR_HEADER_LOCATION = "js/injector-header.js"

ticketscraping/scraping.py

Lines changed: 68 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -8,36 +8,77 @@
88
from threading import Semaphore
99
from .prepare_reese84token import getReese84Token
1010

11-
s = sched.scheduler(time.time, time.sleep)
12-
token_semaphore = Semaphore(1)
13-
reese84_token = {}
11+
class Reese84TokenUpdating():
12+
def __init__(self):
13+
self.is_running = False
14+
self.reese84_token = {}
15+
self.token_semaphore = Semaphore(0)
16+
self.scheduler = sched.scheduler(time.time, time.sleep)
17+
18+
def initialize_reese84_token(self):
19+
"""
20+
This method should not be called directly.
21+
"""
22+
self.reese84_token = getReese84Token()
23+
self.token_semaphore.release() # produce a new token
24+
self.scheduler.enter(self.reese84_token['renewInSec'] -
25+
constants.TOKEN_RENEW_SEC_OFFSET, constants.TOKEN_RENEW_PRIORITY, self.renew_reese84_token)
26+
27+
def renew_reese84_token(self):
28+
"""
29+
This method should not be called directly.
30+
"""
31+
self.token_semaphore.acquire() # invalidate a token
32+
self.reese84_token = getReese84Token()
33+
self.token_semaphore.release()
34+
self.scheduler.enter(self.reese84_token['renewInSec'] -
35+
constants.TOKEN_RENEW_SEC_OFFSET, constants.TOKEN_RENEW_PRIORITY, self.renew_reese84_token)
36+
37+
def start(self):
38+
# if the scheduler is already started - do nothing
39+
if self.is_running: return
40+
self.is_running = True
41+
self.initialize_reese84_token()
42+
self.scheduler.run(False)
1443

1544

16-
def obtainReese84Token(sch):
17-
global reese84_token, token_semaphore
18-
token_semaphore.acquire()
19-
reese84_token = getReese84Token()
20-
token_semaphore.release()
21-
print(reese84_token)
22-
sch.enter(reese84_token['renewInSec'] -
23-
constants.TOKEN_RENEW_SEC_OFFSET, constants.TOKEN_RENEW_PRIORITY, obtainReese84Token, (sch,))
2445

46+
class TicketScraping():
47+
def __init__(self, token_generator: Reese84TokenUpdating, event_id = constants.EVENT_ID, num_seats=2, price_range=(0, 200)):
48+
self.event_id = event_id
49+
self.num_seats = num_seats
50+
self.price_range = price_range
51+
self.token_gen = token_generator
52+
self.scheduler = sched.scheduler(time.time, time.sleep)
53+
self.is_running = False
54+
55+
def ticket_scraping(self):
56+
if self.token_gen.token_semaphore._value <= 0:
57+
# retry after a delay
58+
self.scheduler.enter(constants.TICKET_SCRAPING_TOKEN_AWAIT_MAX_INTERVAL,
59+
constants.TICKET_SCRAPING_PRIORITY, self.ticket_scraping)
60+
return
61+
top_picks_url = constants.get_top_picks_url(self.event_id)
62+
top_picks_q_params = constants.get_top_picks_query_params(
63+
self.num_seats, self.price_range)
64+
top_picks_header = constants.get_top_picks_header()
65+
res = requests.get(top_picks_url, headers=top_picks_header, params=top_picks_q_params,
66+
cookies=dict(reese84=self.token_gen.reese84_token['token']))
67+
print(res.json())
68+
self.scheduler.enter(constants.TICKET_SCRAPING_INTERVAL,
69+
constants.TICKET_SCRAPING_PRIORITY, self.ticket_scraping)
70+
71+
def start(self):
72+
# if the scheduler is already started - do nothing
73+
if self.is_running:
74+
return
75+
self.is_running = True
76+
self.ticket_scraping()
77+
self.scheduler.run(False)
2578

26-
def ticket_scraping(sch):
27-
global reese84_token, token_semaphore
28-
token_semaphore.acquire()
29-
top_picks_url = constants.get_top_picks_url(constants.EVENT_ID)
30-
top_picks_q_params_str = constants.get_top_picks_query_params_str(2, (0, 200))
31-
top_picks_url = top_picks_url + top_picks_q_params_str
32-
top_picks_header = constants.get_top_picks_header()
33-
res = requests.get(top_picks_url, headers=top_picks_header,
34-
cookies=dict(reese84=reese84_token['token']))
35-
token_semaphore.release()
36-
print(res.json())
37-
sch.enter(constants.TICKET_SCRAPING_INTERVAL,
38-
constants.TICKET_SCRAPING_PRIORITY, ticket_scraping, (sch,))
3979

4080
def start():
41-
obtainReese84Token(s)
42-
ticket_scraping(s)
43-
s.run()
81+
reese_token_gen = Reese84TokenUpdating()
82+
ticket_scraping = TicketScraping(reese_token_gen)
83+
reese_token_gen.start()
84+
ticket_scraping.start()

tmtracker/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
default_app_config = 'tmtracker.app.MyAppConfig'

tmtracker/app.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from django.apps import AppConfig
2+
from utils import get_db_handle
3+
from ticketscraping.scraping import start
4+
from datetime import datetime
5+
6+
class MyAppConfig(AppConfig):
7+
name = "tmtracker"
8+
verbose_name = "start tmtracker"
9+
10+
def ready(self):
11+
print(f"server started at {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
12+
get_db_handle('tickets')
13+
print("=== database connection is established ===")
14+
start()

tmtracker/settings.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
'django.contrib.contenttypes',
3737
'django.contrib.messages',
3838
'django.contrib.staticfiles',
39+
"tmtracker"
3940
]
4041

4142
MIDDLEWARE = [

tmtracker/urls.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,6 @@
1515
"""
1616
from django.contrib import admin
1717
from django.urls import path
18-
from ticketscraping.scraping import start
19-
from utils import get_db_handle
20-
from datetime import datetime
21-
import requests
22-
23-
get_db_handle('tickets')
24-
print("=== database connection is established ===")
25-
print(f"server started at {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
26-
start()
2718

2819
urlpatterns = [
2920
path('admin/', admin.site.urls),

0 commit comments

Comments
 (0)