Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Add async task
  • Loading branch information
Jackiebibili committed Oct 17, 2022
commit cd4083601cf3bdaa030d80c3637098cbe94f42bd
10 changes: 8 additions & 2 deletions apps/storage/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,18 @@ def find_one_and_update__(coll: collection.Collection, filter: dict, update=dict
def find_one_and_delete__(coll: collection.Collection, filter: dict):
return coll.find_one_and_delete(filter)

def find_one__(coll: collection.Collection, filter: dict, projection):
return coll.find_one(filter=filter, projection=projection)
def find_one__(coll: collection.Collection, filter: dict, projection, **kwargs):
return coll.find_one(filter=filter, projection=projection, **kwargs)

def find_many__(coll: collection.Collection, filter: dict, projection, **kwargs):
return coll.find(filter=filter, projection=projection, **kwargs)

def count_docs__(coll: collection.Collection, filter: dict):
return coll.count_documents(filter=filter)

def estimated_count_docs__(coll: collection.Collection):
return coll.estimated_document_count()

def watch__(coll: collection.Collection, **kwargs):
return coll.watch(**kwargs)

Expand Down
16 changes: 16 additions & 0 deletions apps/storage/query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from .storage import *
import pymongo

# find the max value in a collection
def find_max(collection_name, filter: dict, sort_key: str, db_name="tickets"):
sort_seq = [(sort_key, pymongo.DESCENDING)]
return find_one(collection_name, filter, db_name=db_name, sort=sort_seq)

# find the min value in a collection
def find_min(collection_name, filter: dict, sort_key: str, db_name="tickets"):
sort_seq = [(sort_key, pymongo.ASCENDING)]
return find_one(collection_name, filter, db_name=db_name, sort=sort_seq)

def find_many_ascending_order(collection_name, filter: dict, sort_key: str, db_name="tickets"):
sort_seq = [(sort_key, pymongo.ASCENDING)]
return find_many(collection_name, filter, db_name=db_name, sort=sort_seq)
16 changes: 14 additions & 2 deletions apps/storage/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,29 @@ def find_one_and_delete(collection_name, filter: dict, db_name="tickets"):
return find_one_and_delete__(coll, filter)

# find one
def find_one(collection_name, filter: dict, projection=None, db_name="tickets"):
def find_one(collection_name, filter: dict, projection=None, db_name="tickets", **kwargs):
db = get_db_handle(db_name)
coll = db[collection_name]
return find_one__(coll, filter, projection)
return find_one__(coll, filter, projection, **kwargs)

# find many
def find_many(collection_name, filter: dict, projection=None, db_name="tickets", **kwargs):
db = get_db_handle(db_name)
coll = db[collection_name]
return list(find_many__(coll, filter, projection, **kwargs))

# count with filter
def count_docs(collection_name, filter: dict, db_name="tickets"):
db = get_db_handle(db_name)
coll = db[collection_name]
return count_docs__(coll, filter)

# count all docs in a collection
def estimated_count_docs(collection_name, db_name="tickets"):
db = get_db_handle(db_name)
coll = db[collection_name]
return estimated_count_docs__(coll)

# watch changes
def watch(collection_name, db_name="tickets", **kwargs):
db = get_db_handle(db_name)
Expand Down
5 changes: 5 additions & 0 deletions apps/ticketscraping/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ def get_top_picks_url(
"BEST_AVAILABLE_SEATS": "best-available-seats",
"BEST_HISTORY_SEATS": "best-history-seats"
}

# metric thresholds
PERCENT_OF_CHANGE = 0.5
PERCENTILE_HISTORY_PRICES = 0.5

def get_top_picks_header(): return {
**BASIC_REQ_HEADER,
"tmps-correlation-id": str(uuid4())
Expand Down
83 changes: 83 additions & 0 deletions apps/ticketscraping/tasks/asynchronous.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
from ...storage.storage import count_docs
from ...storage.query import find_max, find_min, find_many_ascending_order
from ...ticketscraping import constants
from ..models.pick import Pick

# metric 1


def percent_of_change_metric(pick: Pick, scraping_id: str) -> float:
# Find the % of change
max_seat = find_max(constants.DATABASE['BEST_HISTORY_SEATS'], {
"scraping_id": scraping_id}, 'price')
min_seat = find_min(constants.DATABASE['BEST_HISTORY_SEATS'], {
"scraping_id": scraping_id}, 'price')
max_price = max_seat.get('price', 0)
min_price = min_seat.get('price', 0)

# min and max are identical - abort
if max_price == min_price:
raise Exception('min and max prices are identical')

percent_change = (pick.price - min_price) / (max_price - min_price)

# price of change exceeds the metric value - abort
if percent_change > constants.PERCENT_OF_CHANGE:
raise Exception(
f'price of change ({percent_change}) exceeds the metric value')

return percent_change

# metric 2


def percentile_metric(pick: Pick, scraping_id: str) -> float:
rank = count_docs(constants.DATABASE['BEST_HISTORY_SEATS'],
{"scraping_id": scraping_id, "price": {"$lte": pick.price}})
total_count = count_docs(constants.DATABASE['BEST_HISTORY_SEATS'],
{
"scraping_id": scraping_id})

# no history seats data - abort
if total_count == 0:
raise Exception('no history seats data')

percentile = rank / total_count

# percentile of history prices exceeds the metric value - abort
if percentile > constants.PERCENTILE_HISTORY_PRICES:
raise Exception(
'percentile of history prices ({percentile}) exceeds the metric value')

return percentile


def get_exact_same_seats(pick: Pick, scraping_id: str):
return find_many_ascending_order(constants.DATABASE['BEST_HISTORY_SEATS'],
{"scraping_id": scraping_id, "section": pick.section,
"row": pick.row, "seat_columns": pick.seat_columns},
'last_modified')


def run_async_task(pick: Pick, scraping_id: str):
try:
# Find the % of change
percent_change = percent_of_change_metric(pick, scraping_id)
# Find the percentile of the seat based on some criteria(e.g. rank or price).
percentile = percentile_metric(pick, scraping_id)
# If found the exact same seat based on(sec, row?, seat?), get the history price(s) of the seat.
same_seats = get_exact_same_seats(pick, scraping_id)

print(f"percent change: {percent_change*100}")
print(f"percentile: {percentile*100}")
print(f"same seats in chronological order")
print(f"new seat price: {pick.price}")
print(f"history seat prices:")
print(list(map(lambda seat: seat.get('price', -1), same_seats)))

# TODO
# Alert the user based on alert conditions

except Exception as ex:
print(ex)
pass
5 changes: 3 additions & 2 deletions apps/ticketscraping/tasks/periodic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from ...storage.storage import find_many, insert_many, delete_many
from ...ticketscraping import constants
from ..models.pick import Pick
from .asynchronous import run_async_task

def generate_picks_set_from_picks(picks):
def __helper(pick: dict):
Expand Down Expand Up @@ -65,7 +66,7 @@ def run_periodic_task(picks: dict, scraping_id: str):
# Save C to best_history_seats.
insert_history_seats(overwritten_seats)

# TODO
# Use D to invoke a handler to analyze them against the best_history_seats asynchronously.

for seat in new_seats:
run_async_task(seat, scraping_id)
pass