Add async task

Jackiebibili · Jackiebibili · commit cd4083601cf3 · 2022-10-17T13:17:57.000-05:00
diff --git a/apps/storage/base.py b/apps/storage/base.py
@@ -30,12 +30,18 @@ def find_one_and_update__(coll: collection.Collection, filter: dict, update=dict
 def find_one_and_delete__(coll: collection.Collection, filter: dict):
    return coll.find_one_and_delete(filter)
 
-def find_one__(coll: collection.Collection, filter: dict, projection):
-   return coll.find_one(filter=filter, projection=projection)
+def find_one__(coll: collection.Collection, filter: dict, projection, **kwargs):
+   return coll.find_one(filter=filter, projection=projection, **kwargs)
 
 def find_many__(coll: collection.Collection, filter: dict, projection, **kwargs):
    return coll.find(filter=filter, projection=projection, **kwargs)
 
+def count_docs__(coll: collection.Collection, filter: dict):
+   return coll.count_documents(filter=filter)
+
+def estimated_count_docs__(coll: collection.Collection):
+   return coll.estimated_document_count()
+
 def watch__(coll: collection.Collection, **kwargs):
    return coll.watch(**kwargs)
 
diff --git a/apps/storage/query.py b/apps/storage/query.py
@@ -0,0 +1,16 @@
+from .storage import *
+import pymongo
+
+# find the max value in a collection
+def find_max(collection_name, filter: dict, sort_key: str, db_name="tickets"):
+   sort_seq = [(sort_key, pymongo.DESCENDING)]
+   return find_one(collection_name, filter, db_name=db_name, sort=sort_seq)
+
+# find the min value in a collection
+def find_min(collection_name, filter: dict, sort_key: str, db_name="tickets"):
+   sort_seq = [(sort_key, pymongo.ASCENDING)]
+   return find_one(collection_name, filter, db_name=db_name, sort=sort_seq)
+
+def find_many_ascending_order(collection_name, filter: dict, sort_key: str, db_name="tickets"):
+   sort_seq = [(sort_key, pymongo.ASCENDING)]
+   return find_many(collection_name, filter, db_name=db_name, sort=sort_seq)
diff --git a/apps/storage/storage.py b/apps/storage/storage.py
@@ -51,17 +51,29 @@ def find_one_and_delete(collection_name, filter: dict, db_name="tickets"):
    return find_one_and_delete__(coll, filter)
 
 # find one
-def find_one(collection_name, filter: dict, projection=None, db_name="tickets"):
+def find_one(collection_name, filter: dict, projection=None, db_name="tickets", **kwargs):
    db = get_db_handle(db_name)
    coll = db[collection_name]
-   return find_one__(coll, filter, projection)
+   return find_one__(coll, filter, projection, **kwargs)
 
 # find many
 def find_many(collection_name, filter: dict, projection=None, db_name="tickets", **kwargs):
    db = get_db_handle(db_name)
    coll = db[collection_name]
    return list(find_many__(coll, filter, projection, **kwargs))
 
+# count with filter
+def count_docs(collection_name, filter: dict, db_name="tickets"):
+   db = get_db_handle(db_name)
+   coll = db[collection_name]
+   return count_docs__(coll, filter)
+
+# count all docs in a collection
+def estimated_count_docs(collection_name, db_name="tickets"):
+   db = get_db_handle(db_name)
+   coll = db[collection_name]
+   return estimated_count_docs__(coll)
+
 # watch changes
 def watch(collection_name, db_name="tickets", **kwargs):
    db = get_db_handle(db_name)
diff --git a/apps/ticketscraping/constants.py b/apps/ticketscraping/constants.py
@@ -17,6 +17,11 @@ def get_top_picks_url(
     "BEST_AVAILABLE_SEATS": "best-available-seats",
     "BEST_HISTORY_SEATS": "best-history-seats"
 }
+
+# metric thresholds
+PERCENT_OF_CHANGE = 0.5
+PERCENTILE_HISTORY_PRICES = 0.5
+
 def get_top_picks_header(): return {
     **BASIC_REQ_HEADER,
     "tmps-correlation-id": str(uuid4())
diff --git a/apps/ticketscraping/tasks/asynchronous.py b/apps/ticketscraping/tasks/asynchronous.py
@@ -0,0 +1,83 @@
+from ...storage.storage import count_docs
+from ...storage.query import find_max, find_min, find_many_ascending_order
+from ...ticketscraping import constants
+from ..models.pick import Pick
+
+# metric 1
+
+
+def percent_of_change_metric(pick: Pick, scraping_id: str) -> float:
+    # Find the % of change
+    max_seat = find_max(constants.DATABASE['BEST_HISTORY_SEATS'], {
+        "scraping_id": scraping_id}, 'price')
+    min_seat = find_min(constants.DATABASE['BEST_HISTORY_SEATS'], {
+        "scraping_id": scraping_id}, 'price')
+    max_price = max_seat.get('price', 0)
+    min_price = min_seat.get('price', 0)
+
+    # min and max are identical - abort
+    if max_price == min_price:
+        raise Exception('min and max prices are identical')
+
+    percent_change = (pick.price - min_price) / (max_price - min_price)
+
+    # price of change exceeds the metric value - abort
+    if percent_change > constants.PERCENT_OF_CHANGE:
+        raise Exception(
+            f'price of change ({percent_change}) exceeds the metric value')
+
+    return percent_change
+
+# metric 2
+
+
+def percentile_metric(pick: Pick, scraping_id: str) -> float:
+    rank = count_docs(constants.DATABASE['BEST_HISTORY_SEATS'],
+                      {"scraping_id": scraping_id, "price": {"$lte": pick.price}})
+    total_count = count_docs(constants.DATABASE['BEST_HISTORY_SEATS'],
+                             {
+        "scraping_id": scraping_id})
+
+    # no history seats data - abort
+    if total_count == 0:
+        raise Exception('no history seats data')
+
+    percentile = rank / total_count
+
+    # percentile of history prices exceeds the metric value - abort
+    if percentile > constants.PERCENTILE_HISTORY_PRICES:
+        raise Exception(
+            'percentile of history prices ({percentile}) exceeds the metric value')
+
+    return percentile
+
+
+def get_exact_same_seats(pick: Pick, scraping_id: str):
+    return find_many_ascending_order(constants.DATABASE['BEST_HISTORY_SEATS'],
+                                     {"scraping_id": scraping_id, "section": pick.section,
+                                      "row": pick.row, "seat_columns": pick.seat_columns},
+                                     'last_modified')
+
+
+def run_async_task(pick: Pick, scraping_id: str):
+    try:
+        # Find the % of change
+        percent_change = percent_of_change_metric(pick, scraping_id)
+        # Find the percentile of the seat based on some criteria(e.g. rank or price).
+        percentile = percentile_metric(pick, scraping_id)
+        # If found the exact same seat based on(sec, row?, seat?), get the history price(s) of the seat.
+        same_seats = get_exact_same_seats(pick, scraping_id)
+
+        print(f"percent change: {percent_change*100}")
+        print(f"percentile: {percentile*100}")
+        print(f"same seats in chronological order")
+        print(f"new seat price: {pick.price}")
+        print(f"history seat prices:")
+        print(list(map(lambda seat: seat.get('price', -1), same_seats)))
+
+        # TODO
+        # Alert the user based on alert conditions
+
+    except Exception as ex:
+        print(ex)
+    pass
diff --git a/apps/ticketscraping/tasks/periodic.py b/apps/ticketscraping/tasks/periodic.py
@@ -1,6 +1,7 @@
 from ...storage.storage import find_many, insert_many, delete_many
 from ...ticketscraping import constants
 from ..models.pick import Pick
+from .asynchronous import run_async_task
 
 def generate_picks_set_from_picks(picks):
    def __helper(pick: dict):
@@ -65,7 +66,7 @@ def run_periodic_task(picks: dict, scraping_id: str):
    # Save C to best_history_seats.
    insert_history_seats(overwritten_seats)
 
-   # TODO
    # Use D to invoke a handler to analyze them against the best_history_seats asynchronously.
-   
+   for seat in new_seats:
+      run_async_task(seat, scraping_id)
    pass