Skip to content

Commit 7aab68d

Browse files
author
Jack Li
committed
Automate top-picks API fetching
1 parent 1655f7a commit 7aab68d

File tree

5 files changed

+149
-55
lines changed

5 files changed

+149
-55
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
/**/__pycache__
22
/**/js/node_modules
33
/**/js/antibot-simulation.js
4-
4+
/**/tmp

ticketscraping/constants.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,57 @@
1+
from uuid import uuid4
2+
from urllib.parse import quote_plus
3+
14
ANTIBOT_JS_CODE_URL = "https://epsf.ticketmaster.com/eps-d"
5+
TOKEN_INTERROGATION_URL = "https://epsf.ticketmaster.com/eps-d?d=www.ticketmaster.com"
6+
7+
8+
def get_top_picks_url(
9+
eventId): return f"https://offeradapter.ticketmaster.com/api/ismds/event/{eventId}/quickpicks"
10+
11+
12+
EVENT_ID = "0C005B5587A017CF"
13+
BASIC_REQ_HEADER = {"origin": "https://www.ticketmaster.com",
14+
"referer": "https://www.ticketmaster.com/"}
15+
def get_top_picks_header(): return {
16+
**BASIC_REQ_HEADER,
17+
"tmps-correlation-id": str(uuid4())
18+
}
19+
20+
def get_top_picks_query_params_str(qty=2, priceInterval=(
21+
0, 100)):
22+
def get_top_picks_query_params(qty, priceInterval): return {
23+
'show': 'places maxQuantity sections',
24+
'mode': 'primary:ppsectionrow resale:ga_areas platinum:all',
25+
'qty': qty,
26+
'q': f"and(not(\'accessible\'),any(listprices,$and(gte(@,{priceInterval[0]}),lte(@,{priceInterval[1]}))))",
27+
'includeStandard': 'true',
28+
'includeResale': 'true',
29+
'includePlatinumInventoryType': 'false',
30+
'embed': ['area', 'offer', 'description'],
31+
'apikey': 'b462oi7fic6pehcdkzony5bxhe',
32+
'apisecret': 'pquzpfrfz7zd2ylvtz3w5dtyse',
33+
'limit': 100,
34+
'offset': 0,
35+
'sort': '-quality',
36+
}
37+
def encodeURI(s):
38+
return quote_plus(s, safe="():=,$@'")
39+
def formulateURI(params):
40+
items = []
41+
for p in params:
42+
if isinstance(params[p], list):
43+
for i in params[p]:
44+
items.append(encodeURI(f"{p}={i}"))
45+
else:
46+
items.append(encodeURI(f"{p}={params[p]}"))
47+
return '?' + '&'.join(items)
48+
return formulateURI(get_top_picks_query_params(qty, priceInterval))
249

350
FN_MATCHING_REGEX = r"\(function\(\){.*}\)\(\)"
51+
TOKEN_RENEW_SEC_OFFSET = 3
52+
TOKEN_RENEW_PRIORITY = 3
53+
TICKET_SCRAPING_PRIORITY = 1
54+
TICKET_SCRAPING_INTERVAL = 60
455

556
INJECTOR_LOCATION = "js/injector.js"
657
INJECTOR_HEADER_LOCATION = "js/injector-header.js"
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import os
2+
import re
3+
import json
4+
import requests
5+
import subprocess
6+
from . import constants
7+
8+
9+
def getReese84Token():
10+
def readFileContentToString(filename):
11+
f = open(filename, 'r')
12+
content = f.read()
13+
f.close()
14+
return content
15+
16+
# fetch the javascript that generates the reese84
17+
antibot_js_code_full = requests.get(constants.ANTIBOT_JS_CODE_URL).text
18+
19+
# trim the code to the function that is only used
20+
match_obj = re.search(constants.FN_MATCHING_REGEX, antibot_js_code_full)
21+
if not match_obj:
22+
return None
23+
start, end = match_obj.span()
24+
antibot_js_code_trim = antibot_js_code_full[start:end]
25+
26+
# inject the code to the javascript
27+
injector_js_code_loc = os.path.join(
28+
os.path.dirname(__file__), constants.INJECTOR_LOCATION)
29+
injector_header_js_code_loc = os.path.join(os.path.dirname(
30+
__file__), constants.INJECTOR_HEADER_LOCATION)
31+
injector_js_code, injector_header_js_code = readFileContentToString(
32+
injector_js_code_loc), readFileContentToString(injector_header_js_code_loc)
33+
runnable_js_code = injector_header_js_code + \
34+
antibot_js_code_trim + injector_js_code
35+
36+
# save the runnable js code
37+
runnable_file_loc = os.path.join(os.path.dirname(
38+
__file__), constants.RENNABLE_FILENAME)
39+
runnable_file = open(runnable_file_loc, "w")
40+
runnable_file.write(runnable_js_code)
41+
runnable_file.close()
42+
43+
# run the js code using local node.js
44+
res = subprocess.run(
45+
["node", runnable_file_loc], capture_output=True)
46+
token_str = res.stdout
47+
48+
# produce the reese84 object
49+
token = json.loads(token_str)
50+
51+
# invoke the get token api to get the reese84 token
52+
token_json_res = requests.post(
53+
constants.TOKEN_INTERROGATION_URL, headers=constants.BASIC_REQ_HEADER, json=token)
54+
return token_json_res.json()

ticketscraping/scraping.py

Lines changed: 38 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,43 @@
11
import os
2-
import re
2+
import sched
3+
import time
34
import json
45
import requests
56
import subprocess
67
from . import constants
7-
8-
9-
def readFileContentToString(filename):
10-
f = open(filename, 'r')
11-
content = f.read()
12-
f.close()
13-
return content
14-
15-
def getReese84Token():
16-
# fetch the javascript that generates the reese84
17-
antibot_js_code_full = requests.get(constants.ANTIBOT_JS_CODE_URL).text
18-
19-
# trim the code to the function that is only used
20-
match_obj = re.search(constants.FN_MATCHING_REGEX, antibot_js_code_full)
21-
if not match_obj:
22-
return None
23-
start, end = match_obj.span()
24-
antibot_js_code_trim = antibot_js_code_full[start:end]
25-
26-
# inject the code to the javascript
27-
injector_js_code_loc = os.path.join(
28-
os.path.dirname(__file__), constants.INJECTOR_LOCATION)
29-
injector_header_js_code_loc = os.path.join(os.path.dirname(
30-
__file__), constants.INJECTOR_HEADER_LOCATION)
31-
injector_js_code, injector_header_js_code = readFileContentToString(
32-
injector_js_code_loc), readFileContentToString(injector_header_js_code_loc)
33-
runnable_js_code = injector_header_js_code + \
34-
antibot_js_code_trim + injector_js_code
35-
36-
# save the runnable js code
37-
runnable_file_loc = os.path.join(os.path.dirname(
38-
__file__), constants.RENNABLE_FILENAME)
39-
runnable_file = open(runnable_file_loc, "w")
40-
runnable_file.write(runnable_js_code)
41-
runnable_file.close()
42-
43-
# run the js code using local node.js
44-
res = subprocess.run(
45-
["node", runnable_file_loc], capture_output=True)
46-
token_str = res.stdout
47-
48-
# produce the reese84 object
49-
token = json.loads(token_str)
50-
print(token)
51-
52-
# invoke the get token api to get the reese84 token
53-
# TO-DO
54-
55-
56-
# invoke the top-picks api to get the tickets
57-
# TO-DO
8+
from threading import Semaphore
9+
from .prepare_reese84token import getReese84Token
10+
11+
s = sched.scheduler(time.time, time.sleep)
12+
token_semaphore = Semaphore(1)
13+
reese84_token = {}
14+
15+
16+
def obtainReese84Token(sch):
17+
global reese84_token, token_semaphore
18+
token_semaphore.acquire()
19+
reese84_token = getReese84Token()
20+
token_semaphore.release()
21+
print(reese84_token)
22+
sch.enter(reese84_token['renewInSec'] -
23+
constants.TOKEN_RENEW_SEC_OFFSET, constants.TOKEN_RENEW_PRIORITY, obtainReese84Token, (sch,))
24+
25+
26+
def ticket_scraping(sch):
27+
global reese84_token, token_semaphore
28+
token_semaphore.acquire()
29+
top_picks_url = constants.get_top_picks_url(constants.EVENT_ID)
30+
top_picks_q_params_str = constants.get_top_picks_query_params_str(2, (0, 200))
31+
top_picks_url = top_picks_url + top_picks_q_params_str
32+
top_picks_header = constants.get_top_picks_header()
33+
res = requests.get(top_picks_url, headers=top_picks_header,
34+
cookies=dict(reese84=reese84_token['token']))
35+
token_semaphore.release()
36+
print(res.json())
37+
sch.enter(constants.TICKET_SCRAPING_INTERVAL,
38+
constants.TICKET_SCRAPING_PRIORITY, ticket_scraping, (sch,))
39+
40+
def start():
41+
obtainReese84Token(s)
42+
ticket_scraping(s)
43+
s.run()

tmtracker/urls.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,15 @@
1515
"""
1616
from django.contrib import admin
1717
from django.urls import path
18-
from ticketscraping.scraping import getReese84Token
18+
from ticketscraping.scraping import start
1919
from utils import get_db_handle
20+
from datetime import datetime
21+
import requests
2022

2123
get_db_handle('tickets')
2224
print("=== database connection is established ===")
23-
getReese84Token()
25+
print(f"server started at {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
26+
start()
2427

2528
urlpatterns = [
2629
path('admin/', admin.site.urls),

0 commit comments

Comments
 (0)