From d1bca864c33e6e4bda07c34063aae7911a84dca4 Mon Sep 17 00:00:00 2001 From: Jack Li Date: Sun, 16 Oct 2022 19:00:19 -0500 Subject: [PATCH 1/2] Resolve issue 8 due to lack of attributes --- apps/ticketscraping/scraping.py | 2 +- apps/ticketscraping/seat_analysis.py | 132 +++++++++++++++++---------- 2 files changed, 87 insertions(+), 47 deletions(-) diff --git a/apps/ticketscraping/scraping.py b/apps/ticketscraping/scraping.py index 0b18d5c..18403e4 100644 --- a/apps/ticketscraping/scraping.py +++ b/apps/ticketscraping/scraping.py @@ -87,7 +87,7 @@ def ticket_scraping(self): cookies=dict(reese84=self.token_gen.reese84_token['token'])) # print(res.json()) res_obj = res.json() - store_seats(res_obj, {'subscribe_req_id': self.subscribe_id}) + store_seats(res_obj, self.subscribe_id) print("Got the ticket info from TM. /", res.status_code) self.scheduler.enter(constants.TICKET_SCRAPING_INTERVAL, constants.TICKET_SCRAPING_PRIORITY, self.ticket_scraping) diff --git a/apps/ticketscraping/seat_analysis.py b/apps/ticketscraping/seat_analysis.py index 9be47b4..2acb7c8 100644 --- a/apps/ticketscraping/seat_analysis.py +++ b/apps/ticketscraping/seat_analysis.py @@ -3,7 +3,7 @@ from ..ticketscraping import constants -def store_seats(data, scheduler_config): +def store_seats(data, subscriber_id): # prune top-picks data structure pruned_picks = prune_pick_attributes(data) @@ -12,7 +12,7 @@ def store_seats(data, scheduler_config): append_scraping_config_ref, map_prices_to_seats, remove_embedded_field - ], pruned_picks, scheduler_config) + ], pruned_picks, subscriber_id) # store in db # print(res) @@ -28,61 +28,98 @@ def pipe(fns: list, *args): out = fn(out) return out +def get_value_from_map(map: dict, *args, **kwargs): + # input validation + if type(map) is not dict: + return kwargs.get('default', None) + res = kwargs.get('default', None) + for attr in args: + res = map.get(attr) + if res is not None: + break + return res + +def get_value_from_nested_map(map: dict, *args, **kwargs): + # input validation + if type(map) is not dict: + return kwargs.get('default', None) + res = None + m = map + count = 0 + for attr in args: + res = m.get(attr) + count += 1 + if res is None: + break + elif type(res) is dict: + m = res + else: + break + return res if res is not None and count == len(args) else kwargs.get('default', None) + +def get_fn_return(fn, *args, **kwargs): + res = kwargs.get('default', None) + try: + res = fn(*args) + except: + pass + finally: + return res def prune_pick_attributes(data): - def prune_pick_offer_attributes(pick): + def prune_pick_offer_attributes(pick: dict): return { - 'type': pick['type'], - 'selection': pick['selection'], - 'quality': pick['quality'], - 'section': pick['section'], - 'row': pick['row'], - 'offerGroups': pick['offerGroups'], - 'area': pick['area'], - 'maxQuantity': pick['maxQuantity'], + 'type': get_value_from_map(pick, 'type'), + 'selection': get_value_from_map(pick, 'selection'), + 'quality': get_value_from_map(pick, 'quality'), + 'section': get_value_from_map(pick, 'section'), + 'row': get_value_from_map(pick, 'row'), + 'offerGroups': get_value_from_map(pick, 'offerGroups', 'offers'), + 'area': get_value_from_map(pick, 'area'), + 'maxQuantity': get_value_from_map(pick, 'maxQuantity'), } - def prune_pick_embedded_attributes(embedded): + def prune_pick_embedded_attributes(embedded: dict): def prune_pick_embedded_offer_attributes(item): return { - 'expired_date': parser.parse(item['meta']['expires']), - 'offerId': item['offerId'], - 'rank': item['rank'], - 'online': item['online'], - 'protected': item['protected'], - 'rollup': item['rollup'], - 'inventoryType': item['inventoryType'], - 'offerType': item['offerType'], - 'currency': item['currency'], - 'listPrice': item['listPrice'], - 'faceValue': item['faceValue'], - 'totalPrice': item['totalPrice'], - 'noChargesPrice': item['noChargesPrice'], - # 'listingId': item['listingId'], - # 'listingVersionId': item['listingVersionId'], - # 'charges': item['charges'], - # 'sellableQuantities': item['sellableQuantities'], - # 'section': item['section'], - # 'row': item['row'], - # 'seatFrom': item['seatFrom'], - # 'seatTo': item['seatTo'], - # 'ticketTypeId': item['ticketTypeId'] + 'expired_date': get_fn_return(parser.parse, get_value_from_nested_map(item, 'meta', 'expires'), default=None), + 'offerId': get_value_from_map(item, 'offerId'), + 'rank': get_value_from_map(item, 'rank'), + 'online': get_value_from_map(item, 'online'), + 'protected': get_value_from_map(item, 'protected'), + 'rollup': get_value_from_map(item, 'rollup'), + 'inventoryType': get_value_from_map(item, 'inventoryType'), + 'offerType': get_value_from_map(item, 'offerType'), + 'currency': get_value_from_map(item, 'currency'), + 'listPrice': get_value_from_map(item, 'listPrice'), + 'faceValue': get_value_from_map(item, 'faceValue'), + 'totalPrice': get_value_from_map(item, 'totalPrice'), + 'noChargesPrice': get_value_from_map(item, 'noChargesPrice'), + # 'listingId': get_value_from_map(item, 'listingId'), + # 'listingVersionId': get_value_from_map(item, 'listingVersionId'), + # 'charges': get_value_from_map(item, 'charges'), + # 'sellableQuantities': get_value_from_map(item, 'sellableQuantities'), + # 'section': get_value_from_map(item, 'section'), + # 'row': get_value_from_map(item, 'row'), + # 'seatFrom': get_value_from_map(item, 'seatFrom'), + # 'seatTo': get_value_from_map(item, 'seatTo'), + # 'ticketTypeId': get_value_from_map(item, 'ticketTypeId') } return { - 'offer': list(map(prune_pick_embedded_offer_attributes, embedded['offer'])) + 'offer': list(map(prune_pick_embedded_offer_attributes, get_value_from_map(embedded, 'offer', default=dict()))) } return { - 'expired_date': parser.parse(data['meta']['expires']), - 'eventId': data['eventId'], - 'offset': data['offset'], - 'total': data['total'], - 'picks': list(map(prune_pick_offer_attributes, data['picks'])), - '_embedded': prune_pick_embedded_attributes(data['_embedded']) + 'expired_date': get_fn_return(parser.parse, get_value_from_nested_map(data, 'meta', 'expires'), default=None), + 'eventId': get_value_from_map(data, 'eventId'), + 'offset': get_value_from_map(data, 'offset'), + 'total': get_value_from_map(data, 'total'), + 'picks': list(map(prune_pick_offer_attributes, get_value_from_map(data, 'picks', default=dict()))), + '_embedded': prune_pick_embedded_attributes(get_value_from_map(data, '_embedded', default=dict())) } -def append_scraping_config_ref(data, scheduler_config): - data['scraping_config_ref'] = scheduler_config +def append_scraping_config_ref(data, config_id): + data['scraping_config_ref'] = config_id return data @@ -90,10 +127,12 @@ def map_prices_to_seats(data): def map_prices_to_seat_helper(offer_table: dict): def __map_prices_to_seat_helper(pick): offerGroups = pick['offerGroups'] + if offerGroups is None or len(offerGroups) == 0: + return {'offer_available': False} offerGroup = offerGroups[0] - offerIds = offerGroup['offers'] - offerSeatCols = offerGroup['seats'] - if len(offerGroups) == 0 or len(offerIds) == 0: + offerIds = get_value_from_map(offerGroup, 'offers', default=[offerGroup]) + offerSeatCols = get_value_from_map(offerGroup, 'seats') + if len(offerIds) == 0: return {'offer_available': False} offerId = offerIds[0] offerObj = offer_table.get(offerId) @@ -101,6 +140,7 @@ def __map_prices_to_seat_helper(pick): del res['offerGroups'] return res return __map_prices_to_seat_helper + offer_dict = {offer['offerId']: offer for offer in data['_embedded']['offer']} picks_list = list( map(map_prices_to_seat_helper(offer_dict), data['picks'])) From 016c2ce77383e0c2a680e9bf5a2f50e2d72cc031 Mon Sep 17 00:00:00 2001 From: Jack Li Date: Sun, 16 Oct 2022 22:26:04 -0500 Subject: [PATCH 2/2] Addperiodic task --- apps/storage/storage.py | 4 +- apps/ticketscraping/constants.py | 6 ++- apps/ticketscraping/models/pick.py | 32 ++++++++++++ apps/ticketscraping/scraping.py | 15 ++++-- apps/ticketscraping/seat_analysis.py | 14 +++--- apps/ticketscraping/tasks/periodic.py | 71 +++++++++++++++++++++++++++ 6 files changed, 128 insertions(+), 14 deletions(-) create mode 100644 apps/ticketscraping/models/pick.py create mode 100644 apps/ticketscraping/tasks/periodic.py diff --git a/apps/storage/storage.py b/apps/storage/storage.py index f1f3a95..ef1d48d 100644 --- a/apps/storage/storage.py +++ b/apps/storage/storage.py @@ -10,7 +10,9 @@ def insert_one(collection_name, doc: dict, db_name="tickets"): return insert_one__(coll, doc) # insert many -def insert_many(collection_name, docs: list, db_name="tickets"): +def insert_many(collection_name, docs: list[dict], db_name="tickets"): + if len(docs) == 0: + return True db = get_db_handle(db_name) coll = db[collection_name] # additional attributes diff --git a/apps/ticketscraping/constants.py b/apps/ticketscraping/constants.py index 80ea73f..87d60d7 100644 --- a/apps/ticketscraping/constants.py +++ b/apps/ticketscraping/constants.py @@ -13,7 +13,9 @@ def get_top_picks_url( "referer": "https://www.ticketmaster.com/"} DATABASE = { "EVENTS": "events", - "TOP_PICKS": "top-picks" + "TOP_PICKS": "top-picks", + "BEST_AVAILABLE_SEATS": "best-available-seats", + "BEST_HISTORY_SEATS": "best-history-seats" } def get_top_picks_header(): return { **BASIC_REQ_HEADER, @@ -31,7 +33,7 @@ def get_top_picks_query_params(qty, priceInterval): return { 'embed': ['area', 'offer', 'description'], 'apikey': 'b462oi7fic6pehcdkzony5bxhe', 'apisecret': 'pquzpfrfz7zd2ylvtz3w5dtyse', - 'limit': 25, + 'limit': 100, 'offset': 0, 'sort': '-quality', } diff --git a/apps/ticketscraping/models/pick.py b/apps/ticketscraping/models/pick.py new file mode 100644 index 0000000..22c4678 --- /dev/null +++ b/apps/ticketscraping/models/pick.py @@ -0,0 +1,32 @@ +class Pick(): + def __init__(self, type, selection, quality, section, row, area, maxQuantity, offer, seat_columns, _id=None, scraping_id=None): + self._id = _id + self.scraping_id = scraping_id + self.type = type + self.selection = selection + self.quality = quality + self.section = section + self.row = row + self.area = area + self.maxQuantity = maxQuantity + self.offer = offer + self.price = offer.get('listPrice') + self.seat_columns = seat_columns + + def setScrapingId(self, scraping_id: str): + self.scraping_id = scraping_id + + def __eq__(self, other): + return (self.section == other.section and self.row == other.row and + ((type(self.seat_columns) is list and len( + self.seat_columns) > 0 and type(other.seat_columns) is list and len( + other.seat_columns) > 0 and self.seat_columns[0] == other.seat_columns[0]) or + (self.seat_columns is None and other.seat_columns is None)) and + self.price == other.price) + + def __hash__(self): + return hash((self.section, + self.row, + self.seat_columns[0] if type(self.seat_columns) is list and len( + self.seat_columns) > 0 else None, + self.price)) diff --git a/apps/ticketscraping/scraping.py b/apps/ticketscraping/scraping.py index 18403e4..2a7efed 100644 --- a/apps/ticketscraping/scraping.py +++ b/apps/ticketscraping/scraping.py @@ -8,7 +8,8 @@ from threading import Semaphore from .prepare_reese84token import getReese84Token from ..storage.storage import * -from .seat_analysis import store_seats +from .seat_analysis import format_seats +from .tasks.periodic import run_periodic_task class Reese84TokenUpdating(): def __init__(self): @@ -75,10 +76,11 @@ def flag_for_termination(self): def ticket_scraping(self): if self.token_gen.token_semaphore._value <= 0: - # retry after a delay + # phase: retry after a delay self.scheduler.enter(constants.TICKET_SCRAPING_TOKEN_AWAIT_MAX_INTERVAL, constants.TICKET_SCRAPING_PRIORITY, self.ticket_scraping) return + # scrape the top-picks from ticketmaster top_picks_url = constants.get_top_picks_url(self.event_id) top_picks_q_params = constants.get_top_picks_query_params( self.num_seats, self.price_range) @@ -86,8 +88,13 @@ def ticket_scraping(self): res = requests.get(top_picks_url, headers=top_picks_header, params=top_picks_q_params, cookies=dict(reese84=self.token_gen.reese84_token['token'])) # print(res.json()) - res_obj = res.json() - store_seats(res_obj, self.subscribe_id) + + # prune and format the received picks + picks_obj = format_seats(res.json(), self.subscribe_id) + + # periodic task: update collections best_available_seats and best_history_seats + run_periodic_task(picks_obj, self.subscribe_id) + print("Got the ticket info from TM. /", res.status_code) self.scheduler.enter(constants.TICKET_SCRAPING_INTERVAL, constants.TICKET_SCRAPING_PRIORITY, self.ticket_scraping) diff --git a/apps/ticketscraping/seat_analysis.py b/apps/ticketscraping/seat_analysis.py index 2acb7c8..8c1494f 100644 --- a/apps/ticketscraping/seat_analysis.py +++ b/apps/ticketscraping/seat_analysis.py @@ -1,9 +1,9 @@ from dateutil import parser -from ..storage.storage import insert_one -from ..ticketscraping import constants +# from ..storage.storage import insert_one +# from ..ticketscraping import constants -def store_seats(data, subscriber_id): +def format_seats(data, subscriber_id): # prune top-picks data structure pruned_picks = prune_pick_attributes(data) @@ -14,10 +14,10 @@ def store_seats(data, subscriber_id): remove_embedded_field ], pruned_picks, subscriber_id) - # store in db - # print(res) - insert_one(constants.DATABASE['TOP_PICKS'], res) - pass + # # store in db + # # print(res) + # insert_one(constants.DATABASE['TOP_PICKS'], res) + return res def pipe(fns: list, *args): out = args diff --git a/apps/ticketscraping/tasks/periodic.py b/apps/ticketscraping/tasks/periodic.py new file mode 100644 index 0000000..d0ce8ac --- /dev/null +++ b/apps/ticketscraping/tasks/periodic.py @@ -0,0 +1,71 @@ +from ...storage.storage import find_many, insert_many, delete_many +from ...ticketscraping import constants +from ..models.pick import Pick + +def generate_picks_set_from_picks(picks): + def __helper(pick: dict): + return Pick(_id=pick.get('_id'), + scraping_id=pick.get('scraping_id'), + type=pick['type'], + selection=pick['selection'], + quality=pick['quality'], + section=pick['section'], + row=pick['row'], + area=pick['area'], + maxQuantity=pick['maxQuantity'], + offer=pick['offer'], + seat_columns=pick['seat_columns']) + + if type(picks) is dict: + return set(map(__helper, picks['picks'])) + elif type(picks) is list: + return set(map(__helper, picks)) + else: + raise Exception('argument type error') + +def get_current_best_available(scraping_id: str): + return find_many(constants.DATABASE['BEST_AVAILABLE_SEATS'], {"scraping_id": scraping_id}) +def remove_best_seats(seats: set[Pick]): + ids = [] + for seat in seats: + ids.append(seat._id) + return delete_many(constants.DATABASE['BEST_AVAILABLE_SEATS'], {"_id" : {"$in": ids}}) +def insert_best_seats(seats: set[Pick], scraping_id: str): + for seat in seats: + seat.setScrapingId(scraping_id) + return insert_many(constants.DATABASE['BEST_AVAILABLE_SEATS'], list(map(lambda seat: vars(seat), seats))) +def insert_history_seats(seats: set[Pick]): + return insert_many(constants.DATABASE['BEST_HISTORY_SEATS'], list(map(lambda seat: vars(seat), seats))) + + + +def run_periodic_task(picks: dict, scraping_id: str): + # B the list of new best available seats + new_best_avail = generate_picks_set_from_picks(picks) + # A be the list of current best available seats + cur_best_avail = generate_picks_set_from_picks(get_current_best_available(scraping_id)) + + # Compute C := A-B which is the seats + overwritten_seats = cur_best_avail - new_best_avail + + # Compute D := B-A which is the new seats + new_seats = new_best_avail - cur_best_avail + + print(f"size of B is {len(new_best_avail)}") + print(f"size of A is {len(cur_best_avail)}") + print(f"size of C is {len(overwritten_seats)}") + print(f"size of D is {len(new_seats)}") + + # Remove C from best_available_seats + remove_best_seats(overwritten_seats) + + # Insert D to best_available_seats + insert_best_seats(new_seats, scraping_id) + + # Save C to best_history_seats. + insert_history_seats(overwritten_seats) + + # TODO + # Use D to invoke a handler to analyze them against the best_history_seats asynchronously. + + pass \ No newline at end of file