diff --git a/app/location/__init__.py b/app/location/__init__.py index 1da5e9e5..f46cead3 100644 --- a/app/location/__init__.py +++ b/app/location/__init__.py @@ -2,30 +2,163 @@ from ..coordinates import Coordinates from ..utils import countries from ..utils.populations import country_population +from abc import abstractmethod +class Builder: + def build_base(self) -> None: + pass + def build_stat(self) -> None: + pass + def build_geo(self) -> None: + pass + def build_timelines(self) -> None: + pass +class Location_Builder(Builder): + def __init__(self, basinfo, statistic, geoinfo) -> None: + self.reset() + + def reset(self) -> None: + self._location = Location() + + def locate(self) -> Location: + location = self._location + self.reset() + return location + + def build_base(self, baseinfo) -> None: + self.baseinfo = baseinfo + def build_stat(self, statistic) -> None: + self.statistic = statistic + def build_geo(self, geoinfo) -> None: + self.geoinfo = geoinfo + +class TimelinedLocation_Builder(Builder): + def __init__(self, baseinfo, statistic, geoinfo, timelines) -> None: + self.reset() + + def reset(self) -> None: + self._location = TimelinedLocation() + + def locate(self) -> TimelinedLocation: + timelined_location = self._timelined_location + self.reset() + return timelined_location + def build_base(self, baseinfo) -> None: + self.baseinfo = baseinfo + def build_stat(self, statistic) -> None: + self.statistic = statistic + def build_geo(self, geoinfo) -> None: + self.geoinfo = geoinfo + def build_timelines(self, timelines) -> None: + self.timelines = timelines + +class Location: + + def __init__(self): + self.id = None + self.last_updated = None + self.country_code = None + self.country_population = None + self.serialize = None + self.geoinfo = None + self.last_updated = None + self.statistic = None + + def set_base(self, id, last_updated, serialize) -> None: + self.id = id + self.last_updated = last_updated + self.serialize = serialize + def set_geoinfo(self, geoinfo): + self._geoinfo = geoinfo + def set_statistic(self, statistic): + self._statistic = statistic + +class TimelinedLocation: + + def set_base(self, id, last_updated, serialize) -> None: + self.id = id + self.last_updated = last_updated + self.serialize = serialize + def set_geoinfo(self, geoinfo): + self._geoinfo = geoinfo + def set_timelines(self, timelines): + self._timelines = timelines + +class Director: + def __init__(self) -> None: + self._builder = None + + def set_builder(self, builder: Builder) -> None: + self._builder = builder + + def build_location(self) -> None: + location = Location() + self.builder.build_location() + self.builder.build_base() + self.builder.build_stat() + self.builder.build_geo() + + def build_timelinedlocation(self) -> None: + self.builder.build_location() + self.builder.build_base() + self.builder.build_stat() + self.builder.build_geo() + self.builder.build_timelines() + +class Stastistic: + def __init__(self, confirmed, deaths, recovered): + self.confirmed = confirmed + self.deaths = deaths + self.recovered = recovered + +class GeoInfo: + def __init__(self, country, province, coorinates): + self.country = country + self.province = province + self.coordinates = coorinates + self.country_code = (countries.country_code(self.geoinfo.country) or countries.DEFAULT_COUNTRY_CODE).upper() + self.country_population = country_population(self.country_code) + +class BaseInfo: + def __init__(self, id, last_updated, serialize): + self.id = id + self.last_updated = last_updated + self.serialize = { + # General info. + "id": self.id, + "country": self.geoinfo.country, + "country_code": self.country_code, + "country_population": self.country_population, + "province": self.geoinfo.province, + # Coordinates. + "coordinates": self.geoinfo.coordinates.serialize(), + # Last updated. + "last_updated": self.last_updated, + # Latest data (statistics). + "latest": { + "confirmed": self.statistic.confirmed, + "deaths": self.statistic.deaths, + "recovered": self.statistic.recovered, + }, + } # pylint: disable=redefined-builtin,invalid-name -class Location: # pylint: disable=too-many-instance-attributes +'''class Location: # pylint: disable=too-many-instance-attributes """ A location in the world affected by the coronavirus. """ def __init__( - self, id, country, province, coordinates, last_updated, confirmed, deaths, recovered, + self, id, geoinfo, last_updated, statistic ): # pylint: disable=too-many-arguments # General info. self.id = id - self.country = country.strip() - self.province = province.strip() - self.coordinates = coordinates - + self.geoinfo = geoinfo # Last update. self.last_updated = last_updated # Statistics. - self.confirmed = confirmed - self.deaths = deaths - self.recovered = recovered + self.statistic = statistic @property def country_code(self): @@ -35,7 +168,7 @@ def country_code(self): :returns: The country code. :rtype: str """ - return (countries.country_code(self.country) or countries.DEFAULT_COUNTRY_CODE).upper() + return (countries.country_code(self.geoinfo.country) or countries.DEFAULT_COUNTRY_CODE).upper() @property def country_population(self): @@ -57,19 +190,19 @@ def serialize(self): return { # General info. "id": self.id, - "country": self.country, + "country": self.geoinfo.country, "country_code": self.country_code, "country_population": self.country_population, - "province": self.province, + "province": self.geoinfo.province, # Coordinates. - "coordinates": self.coordinates.serialize(), + "coordinates": self.geoinfo.coordinates.serialize(), # Last updated. "last_updated": self.last_updated, # Latest data (statistics). "latest": { - "confirmed": self.confirmed, - "deaths": self.deaths, - "recovered": self.recovered, + "confirmed": self.statistic.confirmed, + "deaths": self.statistic.deaths, + "recovered": self.statistic.recovered, }, } @@ -80,13 +213,11 @@ class TimelinedLocation(Location): """ # pylint: disable=too-many-arguments - def __init__(self, id, country, province, coordinates, last_updated, timelines): + def __init__(self, id, geoinfo, last_updated, timelines): super().__init__( # General info. id, - country, - province, - coordinates, + geoinfo, last_updated, # Statistics (retrieve latest from timelines). confirmed=timelines.get("confirmed").latest or 0, @@ -122,3 +253,4 @@ def serialize(self, timelines=False): # Return the serialized location. return serialized +''' \ No newline at end of file diff --git a/app/location/csbs.py b/app/location/csbs.py index 649e8b22..43710aa2 100644 --- a/app/location/csbs.py +++ b/app/location/csbs.py @@ -1,29 +1,20 @@ """app.locations.csbs.py""" -from . import Location +from . import Director, LocationBuilder, BaseInfo, GeoInfo, Statistic -class CSBSLocation(Location): +class CSBSLocation: """ A CSBS (county) location. """ - # pylint: disable=too-many-arguments,redefined-builtin def __init__(self, id, state, county, coordinates, last_updated, confirmed, deaths): - super().__init__( - # General info. - id, - "US", - state, - coordinates, - last_updated, - # Statistics. - confirmed=confirmed, - deaths=deaths, - recovered=0, - ) - - self.state = state - self.county = county + director = Director() + baseinfo = BaseInfo(id=id,last_updated=last_updated) + geoinfo = GeoInfo(county="US", province=state, coordinates=coordinates) + statistic = Statistic(confirmed=confirmed, deaths=deaths, recovered=0) + locationBuilder = LocationBuilder(baseinfo=baseinfo, statistic=statistic, geoinfo=geoinfo) + director.set_builder(LocationBuilder) + csbs = director.build_location() def serialize(self, timelines=False): # pylint: disable=arguments-differ,unused-argument """ diff --git a/app/location/nyt.py b/app/location/nyt.py index ad92212e..e3b85eb7 100644 --- a/app/location/nyt.py +++ b/app/location/nyt.py @@ -1,5 +1,5 @@ """app.locations.nyt.py""" -from . import TimelinedLocation +from . import Director, TimelinedLocationBuilder, BaseInfo, GeoInfo, Statistic class NYTLocation(TimelinedLocation): @@ -9,10 +9,13 @@ class NYTLocation(TimelinedLocation): # pylint: disable=too-many-arguments,redefined-builtin def __init__(self, id, state, county, coordinates, last_updated, timelines): - super().__init__(id, "US", state, coordinates, last_updated, timelines) - self.state = state - self.county = county + director = Director() + baseinfo = BaseInfo(id=id,last_updated=last_updated) + geoinfo = GeoInfo(county="US", province=state, coordinates=coordinates) + locationBuilder = TimelinedLocationBuilder(baseinfo=baseinfo, geoinfo=geoinfo, timelines=timelines) + director.set_builder(TimelinedLocationBuilder) + nyt = director.build_location() def serialize(self, timelines=False): # pylint: disable=arguments-differ,unused-argument """ diff --git a/app/services/location/__init__.py b/app/services/location/__init__.py index 6d292b54..e7ec5859 100644 --- a/app/services/location/__init__.py +++ b/app/services/location/__init__.py @@ -1,28 +1,20 @@ """app.services.location""" -from abc import ABC, abstractmethod - -class LocationService(ABC): +class LocationService: """ Service for retrieving locations. """ - @abstractmethod async def get_all(self): - """ - Gets and returns all of the locations. + # Get the locations. + locations = await get_locations() + return locations - :returns: The locations. - :rtype: List[Location] - """ - raise NotImplementedError + async def get(self, loc_id): # pylint: disable=arguments-differ + # Get location at the index equal to the provided id. + locations = await self.get_all() + return locations[loc_id] @abstractmethod - async def get(self, id): # pylint: disable=redefined-builtin,invalid-name - """ - Gets and returns location with the provided id. - - :returns: The location. - :rtype: Location - """ - raise NotImplementedError + async def get_locations(): + raise NotImplementedError \ No newline at end of file diff --git a/app/services/location/csbs.py b/app/services/location/csbs.py index 444ebad6..015b3f88 100644 --- a/app/services/location/csbs.py +++ b/app/services/location/csbs.py @@ -19,84 +19,74 @@ class CSBSLocationService(LocationService): """ Service for retrieving locations from csbs """ - - async def get_all(self): - # Get the locations. - locations = await get_locations() - return locations - - async def get(self, loc_id): # pylint: disable=arguments-differ - # Get location at the index equal to the provided id. - locations = await self.get_all() - return locations[loc_id] - - + def __init__(self): + super().__init__() # Base URL for fetching data -BASE_URL = "https://facts.csbs.org/covid-19/covid19_county.csv" - - -@cached(cache=TTLCache(maxsize=1, ttl=1800)) -async def get_locations(): - """ - Retrieves county locations; locations are cached for 1 hour - - :returns: The locations. - :rtype: dict - """ - data_id = "csbs.locations" - LOGGER.info(f"{data_id} Requesting data...") - # check shared cache - cache_results = await check_cache(data_id) - if cache_results: - LOGGER.info(f"{data_id} using shared cache results") - locations = cache_results - else: - LOGGER.info(f"{data_id} shared cache empty") - async with httputils.CLIENT_SESSION.get(BASE_URL) as response: - text = await response.text() - - LOGGER.debug(f"{data_id} Data received") - - data = list(csv.DictReader(text.splitlines())) - LOGGER.debug(f"{data_id} CSV parsed") - - locations = [] - - for i, item in enumerate(data): - # General info. - state = item["State Name"] - county = item["County Name"] - - # Ensure country is specified. - if county in {"Unassigned", "Unknown"}: - continue - - # Date string without "EDT" at end. - last_update = " ".join(item["Last Update"].split(" ")[0:2]) - - # Append to locations. - locations.append( - CSBSLocation( - # General info. - i, - state, - county, - # Coordinates. - Coordinates(item["Latitude"], item["Longitude"]), - # Last update (parse as ISO). - datetime.strptime(last_update, "%Y-%m-%d %H:%M").isoformat() + "Z", - # Statistics. - int(item["Confirmed"] or 0), - int(item["Death"] or 0), + BASE_URL = "https://facts.csbs.org/covid-19/covid19_county.csv" + + + @cached(cache=TTLCache(maxsize=1, ttl=1800)) + async def get_locations(): + """ + Retrieves county locations; locations are cached for 1 hour + + :returns: The locations. + :rtype: dict + """ + data_id = "csbs.locations" + LOGGER.info(f"{data_id} Requesting data...") + # check shared cache + cache_results = await check_cache(data_id) + if cache_results: + LOGGER.info(f"{data_id} using shared cache results") + locations = cache_results + else: + LOGGER.info(f"{data_id} shared cache empty") + async with httputils.CLIENT_SESSION.get(BASE_URL) as response: + text = await response.text() + + LOGGER.debug(f"{data_id} Data received") + + data = list(csv.DictReader(text.splitlines())) + LOGGER.debug(f"{data_id} CSV parsed") + + locations = [] + + for i, item in enumerate(data): + # General info. + state = item["State Name"] + county = item["County Name"] + + # Ensure country is specified. + if county in {"Unassigned", "Unknown"}: + continue + + # Date string without "EDT" at end. + last_update = " ".join(item["Last Update"].split(" ")[0:2]) + + # Append to locations. + locations.append( + CSBSLocation( + # General info. + i, + state, + county, + # Coordinates. + Coordinates(item["Latitude"], item["Longitude"]), + # Last update (parse as ISO). + datetime.strptime(last_update, "%Y-%m-%d %H:%M").isoformat() + "Z", + # Statistics. + int(item["Confirmed"] or 0), + int(item["Death"] or 0), + ) ) - ) - LOGGER.info(f"{data_id} Data normalized") - # save the results to distributed cache - # TODO: fix json serialization - try: - await load_cache(data_id, locations) - except TypeError as type_err: - LOGGER.error(type_err) - - # Return the locations. - return locations + LOGGER.info(f"{data_id} Data normalized") + # save the results to distributed cache + # TODO: fix json serialization + try: + await load_cache(data_id, locations) + except TypeError as type_err: + LOGGER.error(type_err) + + # Return the locations. + return locations diff --git a/app/services/location/jhu.py b/app/services/location/jhu.py index ebed3960..043aab67 100644 --- a/app/services/location/jhu.py +++ b/app/services/location/jhu.py @@ -25,204 +25,195 @@ class JhuLocationService(LocationService): """ Service for retrieving locations from Johns Hopkins CSSE (https://github.com/CSSEGISandData/COVID-19). """ - - async def get_all(self): - # Get the locations. - locations = await get_locations() - return locations - - async def get(self, loc_id): # pylint: disable=arguments-differ - # Get location at the index equal to provided id. - locations = await self.get_all() - return locations[loc_id] - + def __init__(self): + super().__init__() # --------------------------------------------------------------- # Base URL for fetching category. -BASE_URL = "https://raw.githubusercontent.com/CSSEGISandData/2019-nCoV/master/csse_covid_19_data/csse_covid_19_time_series/" - - -@cached(cache=TTLCache(maxsize=4, ttl=1800)) -async def get_category(category): - """ - Retrieves the data for the provided category. The data is cached for 30 minutes locally, 1 hour via shared Redis. - - :returns: The data for category. - :rtype: dict - """ - # Adhere to category naming standard. - category = category.lower() - data_id = f"jhu.{category}" - - # check shared cache - cache_results = await check_cache(data_id) - if cache_results: - LOGGER.info(f"{data_id} using shared cache results") - results = cache_results - else: - LOGGER.info(f"{data_id} shared cache empty") - # URL to request data from. - url = BASE_URL + "time_series_covid19_%s_global.csv" % category - - # Request the data - LOGGER.info(f"{data_id} Requesting data...") - async with httputils.CLIENT_SESSION.get(url) as response: - text = await response.text() - - LOGGER.debug(f"{data_id} Data received") - - # Parse the CSV. - data = list(csv.DictReader(text.splitlines())) - LOGGER.debug(f"{data_id} CSV parsed") - - # The normalized locations. + BASE_URL = "https://raw.githubusercontent.com/CSSEGISandData/2019-nCoV/master/csse_covid_19_data/csse_covid_19_time_series/" + + + @cached(cache=TTLCache(maxsize=4, ttl=1800)) + async def get_category(category): + """ + Retrieves the data for the provided category. The data is cached for 30 minutes locally, 1 hour via shared Redis. + + :returns: The data for category. + :rtype: dict + """ + # Adhere to category naming standard. + category = category.lower() + data_id = f"jhu.{category}" + + # check shared cache + cache_results = await check_cache(data_id) + if cache_results: + LOGGER.info(f"{data_id} using shared cache results") + results = cache_results + else: + LOGGER.info(f"{data_id} shared cache empty") + # URL to request data from. + url = BASE_URL + "time_series_covid19_%s_global.csv" % category + + # Request the data + LOGGER.info(f"{data_id} Requesting data...") + async with httputils.CLIENT_SESSION.get(url) as response: + text = await response.text() + + LOGGER.debug(f"{data_id} Data received") + + # Parse the CSV. + data = list(csv.DictReader(text.splitlines())) + LOGGER.debug(f"{data_id} CSV parsed") + + # The normalized locations. + locations = [] + + for item in data: + # Filter out all the dates. + dates = dict(filter(lambda element: date_util.is_date(element[0]), item.items())) + + # Make location history from dates. + history = {date: int(float(amount or 0)) for date, amount in dates.items()} + + # Country for this location. + country = item["Country/Region"] + + # Latest data insert value. + latest = list(history.values())[-1] + + # Normalize the item and append to locations. + locations.append( + { + # General info. + "country": country, + "country_code": countries.country_code(country), + "province": item["Province/State"], + # Coordinates. + "coordinates": {"lat": item["Lat"], "long": item["Long"],}, + # History. + "history": history, + # Latest statistic. + "latest": int(latest or 0), + } + ) + LOGGER.debug(f"{data_id} Data normalized") + + # Latest total. + latest = sum(map(lambda location: location["latest"], locations)) + + # Return the final data. + results = { + "locations": locations, + "latest": latest, + "last_updated": datetime.utcnow().isoformat() + "Z", + "source": "https://github.com/ExpDev07/coronavirus-tracker-api", + } + # save the results to distributed cache + await load_cache(data_id, results) + + LOGGER.info(f"{data_id} results:\n{pf(results, depth=1)}") + return results + + + @cached(cache=TTLCache(maxsize=1, ttl=1800)) + async def get_locations(): + """ + Retrieves the locations from the categories. The locations are cached for 1 hour. + + :returns: The locations. + :rtype: List[Location] + """ + data_id = "jhu.locations" + LOGGER.info(f"pid:{PID}: {data_id} Requesting data...") + # Get all of the data categories locations. + confirmed = await get_category("confirmed") + deaths = await get_category("deaths") + recovered = await get_category("recovered") + + locations_confirmed = confirmed["locations"] + locations_deaths = deaths["locations"] + locations_recovered = recovered["locations"] + + # Final locations to return. locations = [] - - for item in data: - # Filter out all the dates. - dates = dict(filter(lambda element: date_util.is_date(element[0]), item.items())) - - # Make location history from dates. - history = {date: int(float(amount or 0)) for date, amount in dates.items()} - - # Country for this location. - country = item["Country/Region"] - - # Latest data insert value. - latest = list(history.values())[-1] - - # Normalize the item and append to locations. + # *************************************************************************** + # TODO: This iteration approach assumes the indexes remain the same + # and opens us to a CRITICAL ERROR. The removal of a column in the data source + # would break the API or SHIFT all the data confirmed, deaths, recovery producting + # incorrect data to consumers. + # *************************************************************************** + # Go through locations. + for index, location in enumerate(locations_confirmed): + # Get the timelines. + + # TEMP: Fix for merging recovery data. See TODO above for more details. + key = (location["country"], location["province"]) + + timelines = { + "confirmed": location["history"], + "deaths": parse_history(key, locations_deaths, index), + "recovered": parse_history(key, locations_recovered, index), + } + + # Grab coordinates. + coordinates = location["coordinates"] + + # Create location (supporting timelines) and append. locations.append( - { + TimelinedLocation( # General info. - "country": country, - "country_code": countries.country_code(country), - "province": item["Province/State"], + index, + location["country"], + location["province"], # Coordinates. - "coordinates": {"lat": item["Lat"], "long": item["Long"],}, - # History. - "history": history, - # Latest statistic. - "latest": int(latest or 0), - } + Coordinates(latitude=coordinates["lat"], longitude=coordinates["long"]), + # Last update. + datetime.utcnow().isoformat() + "Z", + # Timelines (parse dates as ISO). + { + "confirmed": Timeline( + timeline={ + datetime.strptime(date, "%m/%d/%y").isoformat() + "Z": amount + for date, amount in timelines["confirmed"].items() + } + ), + "deaths": Timeline( + timeline={ + datetime.strptime(date, "%m/%d/%y").isoformat() + "Z": amount + for date, amount in timelines["deaths"].items() + } + ), + "recovered": Timeline( + timeline={ + datetime.strptime(date, "%m/%d/%y").isoformat() + "Z": amount + for date, amount in timelines["recovered"].items() + } + ), + }, + ) ) - LOGGER.debug(f"{data_id} Data normalized") - - # Latest total. - latest = sum(map(lambda location: location["latest"], locations)) - - # Return the final data. - results = { - "locations": locations, - "latest": latest, - "last_updated": datetime.utcnow().isoformat() + "Z", - "source": "https://github.com/ExpDev07/coronavirus-tracker-api", - } - # save the results to distributed cache - await load_cache(data_id, results) - - LOGGER.info(f"{data_id} results:\n{pf(results, depth=1)}") - return results - - -@cached(cache=TTLCache(maxsize=1, ttl=1800)) -async def get_locations(): - """ - Retrieves the locations from the categories. The locations are cached for 1 hour. + LOGGER.info(f"{data_id} Data normalized") - :returns: The locations. - :rtype: List[Location] - """ - data_id = "jhu.locations" - LOGGER.info(f"pid:{PID}: {data_id} Requesting data...") - # Get all of the data categories locations. - confirmed = await get_category("confirmed") - deaths = await get_category("deaths") - recovered = await get_category("recovered") - - locations_confirmed = confirmed["locations"] - locations_deaths = deaths["locations"] - locations_recovered = recovered["locations"] - - # Final locations to return. - locations = [] - # *************************************************************************** - # TODO: This iteration approach assumes the indexes remain the same - # and opens us to a CRITICAL ERROR. The removal of a column in the data source - # would break the API or SHIFT all the data confirmed, deaths, recovery producting - # incorrect data to consumers. - # *************************************************************************** - # Go through locations. - for index, location in enumerate(locations_confirmed): - # Get the timelines. - - # TEMP: Fix for merging recovery data. See TODO above for more details. - key = (location["country"], location["province"]) - - timelines = { - "confirmed": location["history"], - "deaths": parse_history(key, locations_deaths, index), - "recovered": parse_history(key, locations_recovered, index), - } - - # Grab coordinates. - coordinates = location["coordinates"] - - # Create location (supporting timelines) and append. - locations.append( - TimelinedLocation( - # General info. - index, - location["country"], - location["province"], - # Coordinates. - Coordinates(latitude=coordinates["lat"], longitude=coordinates["long"]), - # Last update. - datetime.utcnow().isoformat() + "Z", - # Timelines (parse dates as ISO). - { - "confirmed": Timeline( - timeline={ - datetime.strptime(date, "%m/%d/%y").isoformat() + "Z": amount - for date, amount in timelines["confirmed"].items() - } - ), - "deaths": Timeline( - timeline={ - datetime.strptime(date, "%m/%d/%y").isoformat() + "Z": amount - for date, amount in timelines["deaths"].items() - } - ), - "recovered": Timeline( - timeline={ - datetime.strptime(date, "%m/%d/%y").isoformat() + "Z": amount - for date, amount in timelines["recovered"].items() - } - ), - }, - ) - ) - LOGGER.info(f"{data_id} Data normalized") + # Finally, return the locations. + return locations - # Finally, return the locations. - return locations + def parse_history(key: tuple, locations: list, index: int): + """ + Helper for validating and extracting history content from + locations data based on index. Validates with the current country/province + key to make sure no index/column issue. -def parse_history(key: tuple, locations: list, index: int): - """ - Helper for validating and extracting history content from - locations data based on index. Validates with the current country/province - key to make sure no index/column issue. + TEMP: solution because implement a more efficient and better approach in the refactor. + """ + location_history = {} + try: + if key == (locations[index]["country"], locations[index]["province"]): + location_history = locations[index]["history"] + except (IndexError, KeyError): + LOGGER.debug(f"iteration data merge error: {index} {key}") - TEMP: solution because implement a more efficient and better approach in the refactor. - """ - location_history = {} - try: - if key == (locations[index]["country"], locations[index]["province"]): - location_history = locations[index]["history"] - except (IndexError, KeyError): - LOGGER.debug(f"iteration data merge error: {index} {key}") - - return location_history + return location_history diff --git a/app/services/location/nyt.py b/app/services/location/nyt.py index 1f25ec34..e56843e8 100644 --- a/app/services/location/nyt.py +++ b/app/services/location/nyt.py @@ -21,125 +21,115 @@ class NYTLocationService(LocationService): Service for retrieving locations from New York Times (https://github.com/nytimes/covid-19-data). """ - async def get_all(self): - # Get the locations. - locations = await get_locations() - return locations - - async def get(self, loc_id): # pylint: disable=arguments-differ - # Get location at the index equal to provided id. - locations = await self.get_all() - return locations[loc_id] - - # --------------------------------------------------------------- - + def __init__(self): + super().__init__() # Base URL for fetching category. -BASE_URL = "https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv" - - -def get_grouped_locations_dict(data): - """ - Helper function to group history for locations into one dict. - - :returns: The complete data for each unique US county - :rdata: dict - """ - grouped_locations = {} - - # in increasing order of dates - for row in data: - county_state = (row["county"], row["state"]) - date = row["date"] - confirmed = row["cases"] - deaths = row["deaths"] - - # initialize if not existing - if county_state not in grouped_locations: - grouped_locations[county_state] = {"confirmed": [], "deaths": []} - - # append confirmed tuple to county_state (date, # confirmed) - grouped_locations[county_state]["confirmed"].append((date, confirmed)) - # append deaths tuple to county_state (date, # deaths) - grouped_locations[county_state]["deaths"].append((date, deaths)) - - return grouped_locations - - -@cached(cache=TTLCache(maxsize=1, ttl=1800)) -async def get_locations(): - """ - Returns a list containing parsed NYT data by US county. The data is cached for 1 hour. - - :returns: The complete data for US Counties. - :rtype: dict - """ - data_id = "nyt.locations" - # Request the data. - LOGGER.info(f"{data_id} Requesting data...") - # check shared cache - cache_results = await check_cache(data_id) - if cache_results: - LOGGER.info(f"{data_id} using shared cache results") - locations = cache_results - else: - LOGGER.info(f"{data_id} shared cache empty") - async with httputils.CLIENT_SESSION.get(BASE_URL) as response: - text = await response.text() - - LOGGER.debug(f"{data_id} Data received") - - # Parse the CSV. - data = list(csv.DictReader(text.splitlines())) - LOGGER.debug(f"{data_id} CSV parsed") - - # Group together locations (NYT data ordered by dates not location). - grouped_locations = get_grouped_locations_dict(data) - - # The normalized locations. - locations = [] - - for idx, (county_state, histories) in enumerate(grouped_locations.items()): - # Make location history for confirmed and deaths from dates. - # List is tuples of (date, amount) in order of increasing dates. - confirmed_list = histories["confirmed"] - confirmed_history = {date: int(amount or 0) for date, amount in confirmed_list} - - deaths_list = histories["deaths"] - deaths_history = {date: int(amount or 0) for date, amount in deaths_list} - - # Normalize the item and append to locations. - locations.append( - NYTLocation( - id=idx, - state=county_state[1], - county=county_state[0], - coordinates=Coordinates(None, None), # NYT does not provide coordinates - last_updated=datetime.utcnow().isoformat() + "Z", # since last request - timelines={ - "confirmed": Timeline( - timeline={ - datetime.strptime(date, "%Y-%m-%d").isoformat() + "Z": amount - for date, amount in confirmed_history.items() - } - ), - "deaths": Timeline( - timeline={ - datetime.strptime(date, "%Y-%m-%d").isoformat() + "Z": amount - for date, amount in deaths_history.items() - } - ), - "recovered": Timeline(), - }, + BASE_URL = "https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv" + + + def get_grouped_locations_dict(data): + """ + Helper function to group history for locations into one dict. + + :returns: The complete data for each unique US county + :rdata: dict + """ + grouped_locations = {} + + # in increasing order of dates + for row in data: + county_state = (row["county"], row["state"]) + date = row["date"] + confirmed = row["cases"] + deaths = row["deaths"] + + # initialize if not existing + if county_state not in grouped_locations: + grouped_locations[county_state] = {"confirmed": [], "deaths": []} + + # append confirmed tuple to county_state (date, # confirmed) + grouped_locations[county_state]["confirmed"].append((date, confirmed)) + # append deaths tuple to county_state (date, # deaths) + grouped_locations[county_state]["deaths"].append((date, deaths)) + + return grouped_locations + + + @cached(cache=TTLCache(maxsize=1, ttl=1800)) + async def get_locations(): + """ + Returns a list containing parsed NYT data by US county. The data is cached for 1 hour. + + :returns: The complete data for US Counties. + :rtype: dict + """ + data_id = "nyt.locations" + # Request the data. + LOGGER.info(f"{data_id} Requesting data...") + # check shared cache + cache_results = await check_cache(data_id) + if cache_results: + LOGGER.info(f"{data_id} using shared cache results") + locations = cache_results + else: + LOGGER.info(f"{data_id} shared cache empty") + async with httputils.CLIENT_SESSION.get(BASE_URL) as response: + text = await response.text() + + LOGGER.debug(f"{data_id} Data received") + + # Parse the CSV. + data = list(csv.DictReader(text.splitlines())) + LOGGER.debug(f"{data_id} CSV parsed") + + # Group together locations (NYT data ordered by dates not location). + grouped_locations = get_grouped_locations_dict(data) + + # The normalized locations. + locations = [] + + for idx, (county_state, histories) in enumerate(grouped_locations.items()): + # Make location history for confirmed and deaths from dates. + # List is tuples of (date, amount) in order of increasing dates. + confirmed_list = histories["confirmed"] + confirmed_history = {date: int(amount or 0) for date, amount in confirmed_list} + + deaths_list = histories["deaths"] + deaths_history = {date: int(amount or 0) for date, amount in deaths_list} + + # Normalize the item and append to locations. + locations.append( + NYTLocation( + id=idx, + state=county_state[1], + county=county_state[0], + coordinates=Coordinates(None, None), # NYT does not provide coordinates + last_updated=datetime.utcnow().isoformat() + "Z", # since last request + timelines={ + "confirmed": Timeline( + timeline={ + datetime.strptime(date, "%Y-%m-%d").isoformat() + "Z": amount + for date, amount in confirmed_history.items() + } + ), + "deaths": Timeline( + timeline={ + datetime.strptime(date, "%Y-%m-%d").isoformat() + "Z": amount + for date, amount in deaths_history.items() + } + ), + "recovered": Timeline(), + }, + ) ) - ) - LOGGER.info(f"{data_id} Data normalized") - # save the results to distributed cache - # TODO: fix json serialization - try: - await load_cache(data_id, locations) - except TypeError as type_err: - LOGGER.error(type_err) - - return locations + LOGGER.info(f"{data_id} Data normalized") + # save the results to distributed cache + # TODO: fix json serialization + try: + await load_cache(data_id, locations) + except TypeError as type_err: + LOGGER.error(type_err) + + return locations