diff --git a/.deepsource.toml b/.deepsource.toml new file mode 100644 index 00000000..f772ce8c --- /dev/null +++ b/.deepsource.toml @@ -0,0 +1,10 @@ +version = 1 + +test_patterns = ["tests/**"] + +[[analyzers]] +name = "python" +enabled = true + + [analyzers.meta] + runtime_version = "3.x.x" diff --git a/.gitignore b/.gitignore index 9c41818c..ab6f17ff 100644 --- a/.gitignore +++ b/.gitignore @@ -51,6 +51,7 @@ htmlcov/ nosetests.xml coverage.xml *,cover +locustfile.py # Translations *.mo diff --git a/app/data/__init__.py b/app/data/__init__.py index 265bf3d3..60a75dac 100644 --- a/app/data/__init__.py +++ b/app/data/__init__.py @@ -4,7 +4,11 @@ from ..services.location.nyt import NYTLocationService # Mapping of services to data-sources. -DATA_SOURCES = {"jhu": JhuLocationService(), "csbs": CSBSLocationService(), "nyt": NYTLocationService()} +DATA_SOURCES = { + "jhu": JhuLocationService(), + "csbs": CSBSLocationService(), + "nyt": NYTLocationService(), +} def data_source(source): diff --git a/app/io.py b/app/io.py index 3bd443b6..2a563b15 100644 --- a/app/io.py +++ b/app/io.py @@ -10,7 +10,11 @@ def save( - name: str, content: Union[str, Dict, List], write_mode: str = "w", indent: int = 2, **json_dumps_kwargs + name: str, + content: Union[str, Dict, List], + write_mode: str = "w", + indent: int = 2, + **json_dumps_kwargs, ) -> pathlib.Path: """Save content to a file. If content is a dictionary, use json.dumps().""" path = DATA / name @@ -35,7 +39,12 @@ class AIO: @classmethod async def save( - cls, name: str, content: Union[str, Dict, List], write_mode: str = "w", indent: int = 2, **json_dumps_kwargs + cls, + name: str, + content: Union[str, Dict, List], + write_mode: str = "w", + indent: int = 2, + **json_dumps_kwargs, ): """Save content to a file. If content is a dictionary, use json.dumps().""" path = DATA / name diff --git a/app/location/__init__.py b/app/location/__init__.py index d12f28c3..1da5e9e5 100644 --- a/app/location/__init__.py +++ b/app/location/__init__.py @@ -11,7 +11,7 @@ class Location: # pylint: disable=too-many-instance-attributes """ def __init__( - self, id, country, province, coordinates, last_updated, confirmed, deaths, recovered + self, id, country, province, coordinates, last_updated, confirmed, deaths, recovered, ): # pylint: disable=too-many-arguments # General info. self.id = id @@ -66,7 +66,11 @@ def serialize(self): # Last updated. "last_updated": self.last_updated, # Latest data (statistics). - "latest": {"confirmed": self.confirmed, "deaths": self.deaths, "recovered": self.recovered}, + "latest": { + "confirmed": self.confirmed, + "deaths": self.deaths, + "recovered": self.recovered, + }, } diff --git a/app/main.py b/app/main.py index 9b335df3..b43b4aae 100644 --- a/app/main.py +++ b/app/main.py @@ -59,7 +59,11 @@ # Enable CORS. APP.add_middleware( - CORSMiddleware, allow_credentials=True, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], + CORSMiddleware, + allow_credentials=True, + allow_origins=["*"], + allow_methods=["*"], + allow_headers=["*"], ) APP.add_middleware(GZipMiddleware, minimum_size=1000) diff --git a/app/routers/v1.py b/app/routers/v1.py index 662514a0..517bc625 100644 --- a/app/routers/v1.py +++ b/app/routers/v1.py @@ -19,7 +19,11 @@ async def all_categories(): "deaths": deaths, "recovered": recovered, # Latest. - "latest": {"confirmed": confirmed["latest"], "deaths": deaths["latest"], "recovered": recovered["latest"],}, + "latest": { + "confirmed": confirmed["latest"], + "deaths": deaths["latest"], + "recovered": recovered["latest"], + }, } diff --git a/app/routers/v2.py b/app/routers/v2.py index de5a5312..fe9d2475 100644 --- a/app/routers/v2.py +++ b/app/routers/v2.py @@ -65,11 +65,17 @@ async def get_locations( # Do filtering. try: - locations = [location for location in locations if str(getattr(location, key)).lower() == str(value)] + locations = [ + location + for location in locations + if str(getattr(location, key)).lower() == str(value) + ] except AttributeError: pass if not locations: - raise HTTPException(404, detail=f"Source `{source}` does not have the desired location data.") + raise HTTPException( + 404, detail=f"Source `{source}` does not have the desired location data.", + ) # Return final serialized data. return { @@ -84,7 +90,9 @@ async def get_locations( # pylint: disable=invalid-name @V2.get("/locations/{id}", response_model=LocationResponse) -async def get_location_by_id(request: Request, id: int, source: Sources = "jhu", timelines: bool = True): +async def get_location_by_id( + request: Request, id: int, source: Sources = "jhu", timelines: bool = True +): """ Getting specific location by id. """ diff --git a/app/services/location/csbs.py b/app/services/location/csbs.py index 68bdb01c..ddfe48b7 100644 --- a/app/services/location/csbs.py +++ b/app/services/location/csbs.py @@ -6,6 +6,7 @@ from asyncache import cached from cachetools import TTLCache +from ...caches import check_cache, load_cache from ...coordinates import Coordinates from ...location.csbs import CSBSLocation from ...utils import httputils @@ -34,7 +35,7 @@ async def get(self, loc_id): # pylint: disable=arguments-differ BASE_URL = "https://facts.csbs.org/covid-19/covid19_county.csv" -@cached(cache=TTLCache(maxsize=1, ttl=3600)) +@cached(cache=TTLCache(maxsize=1, ttl=1800)) async def get_locations(): """ Retrieves county locations; locations are cached for 1 hour @@ -44,48 +45,58 @@ async def get_locations(): """ data_id = "csbs.locations" LOGGER.info(f"{data_id} Requesting data...") - async with httputils.CLIENT_SESSION.get(BASE_URL) as response: - text = await response.text() - - LOGGER.debug(f"{data_id} Data received") - - data = list(csv.DictReader(text.splitlines())) - LOGGER.debug(f"{data_id} CSV parsed") - - locations = [] - - for i, item in enumerate(data): - # General info. - state = item["State Name"] - county = item["County Name"] - - # Ensure country is specified. - if county in {"Unassigned", "Unknown"}: - continue - - # Coordinates. - coordinates = Coordinates(item["Latitude"], item["Longitude"]) # pylint: disable=unused-variable - - # Date string without "EDT" at end. - last_update = " ".join(item["Last Update"].split(" ")[0:2]) - - # Append to locations. - locations.append( - CSBSLocation( - # General info. - i, - state, - county, - # Coordinates. - Coordinates(item["Latitude"], item["Longitude"]), - # Last update (parse as ISO). - datetime.strptime(last_update, "%Y-%m-%d %H:%M").isoformat() + "Z", - # Statistics. - int(item["Confirmed"] or 0), - int(item["Death"] or 0), + # check shared cache + cache_results = await check_cache(data_id) + if cache_results: + LOGGER.info(f"{data_id} using shared cache results") + locations = cache_results + else: + LOGGER.info(f"{data_id} shared cache empty") + async with httputils.CLIENT_SESSION.get(BASE_URL) as response: + text = await response.text() + + LOGGER.debug(f"{data_id} Data received") + + data = list(csv.DictReader(text.splitlines())) + LOGGER.debug(f"{data_id} CSV parsed") + + locations = [] + + for i, item in enumerate(data): + # General info. + state = item["State Name"] + county = item["County Name"] + + # Ensure country is specified. + if county in {"Unassigned", "Unknown"}: + continue + + # Date string without "EDT" at end. + last_update = " ".join(item["Last Update"].split(" ")[0:2]) + + # Append to locations. + locations.append( + CSBSLocation( + # General info. + i, + state, + county, + # Coordinates. + Coordinates(item["Latitude"], item["Longitude"]), + # Last update (parse as ISO). + datetime.strptime(last_update, "%Y-%m-%d %H:%M").isoformat() + "Z", + # Statistics. + int(item["Confirmed"] or 0), + int(item["Death"] or 0), + ) ) - ) - LOGGER.info(f"{data_id} Data normalized") - + LOGGER.info(f"{data_id} Data normalized") + # save the results to distributed cache + # TODO: fix json serialization + try: + await load_cache(data_id, locations) + except TypeError as type_err: + LOGGER.error(type_err) + # Return the locations. return locations diff --git a/app/services/location/jhu.py b/app/services/location/jhu.py index 1a11e8ac..6f488742 100644 --- a/app/services/location/jhu.py +++ b/app/services/location/jhu.py @@ -41,12 +41,10 @@ async def get(self, loc_id): # pylint: disable=arguments-differ # Base URL for fetching category. -BASE_URL = ( - "https://raw.githubusercontent.com/CSSEGISandData/2019-nCoV/master/csse_covid_19_data/csse_covid_19_time_series/" -) +BASE_URL = "https://raw.githubusercontent.com/CSSEGISandData/2019-nCoV/master/csse_covid_19_data/csse_covid_19_time_series/" -@cached(cache=TTLCache(maxsize=128, ttl=1800)) +@cached(cache=TTLCache(maxsize=4, ttl=1800)) async def get_category(category): """ Retrieves the data for the provided category. The data is cached for 30 minutes locally, 1 hour via shared Redis. @@ -129,7 +127,7 @@ async def get_category(category): return results -@cached(cache=TTLCache(maxsize=1024, ttl=1800)) +@cached(cache=TTLCache(maxsize=1, ttl=1800)) async def get_locations(): """ Retrieves the locations from the categories. The locations are cached for 1 hour. @@ -142,22 +140,31 @@ async def get_locations(): # Get all of the data categories locations. confirmed = await get_category("confirmed") deaths = await get_category("deaths") - # recovered = await get_category("recovered") + recovered = await get_category("recovered") locations_confirmed = confirmed["locations"] locations_deaths = deaths["locations"] - # locations_recovered = recovered["locations"] + locations_recovered = recovered["locations"] # Final locations to return. locations = [] - + # *************************************************************************** + # TODO: This iteration approach assumes the indexes remain the same + # and opens us to a CRITICAL ERROR. The removal of a column in the data source + # would break the API or SHIFT all the data confirmed, deaths, recovery producting + # incorrect data to consumers. + # *************************************************************************** # Go through locations. for index, location in enumerate(locations_confirmed): # Get the timelines. + + # TEMP: Fix for merging recovery data. See TODO above for more details. + key = (location["country"], location["province"]) + timelines = { - "confirmed": locations_confirmed[index]["history"], - "deaths": locations_deaths[index]["history"], - # 'recovered' : locations_recovered[index]['history'], + "confirmed": location["history"], + "deaths": parse_history(key, locations_deaths, index), + "recovered": parse_history(key, locations_recovered, index), } # Grab coordinates. @@ -188,7 +195,12 @@ async def get_locations(): for date, amount in timelines["deaths"].items() } ), - "recovered": Timeline({}), + "recovered": Timeline( + { + datetime.strptime(date, "%m/%d/%y").isoformat() + "Z": amount + for date, amount in timelines["recovered"].items() + } + ), }, ) ) @@ -196,3 +208,21 @@ async def get_locations(): # Finally, return the locations. return locations + + +def parse_history(key: tuple, locations: list, index: int): + """ + Helper for validating and extracting history content from + locations data based on index. Validates with the current country/province + key to make sure no index/column issue. + + TEMP: solution because implement a more efficient and better approach in the refactor. + """ + location_history = {} + try: + if key == (locations[index]["country"], locations[index]["province"]): + location_history = locations[index]["history"] + except (IndexError, KeyError): + LOGGER.debug(f"iteration data merge error: {index} {key}") + + return location_history diff --git a/app/services/location/nyt.py b/app/services/location/nyt.py index 8b70c5cc..52b565ae 100644 --- a/app/services/location/nyt.py +++ b/app/services/location/nyt.py @@ -6,6 +6,7 @@ from asyncache import cached from cachetools import TTLCache +from ...caches import check_cache, load_cache from ...coordinates import Coordinates from ...location.nyt import NYTLocation from ...timeline import Timeline @@ -66,7 +67,7 @@ def get_grouped_locations_dict(data): return grouped_locations -@cached(cache=TTLCache(maxsize=128, ttl=3600)) +@cached(cache=TTLCache(maxsize=1, ttl=1800)) async def get_locations(): """ Returns a list containing parsed NYT data by US county. The data is cached for 1 hour. @@ -77,55 +78,68 @@ async def get_locations(): data_id = "nyt.locations" # Request the data. LOGGER.info(f"{data_id} Requesting data...") - async with httputils.CLIENT_SESSION.get(BASE_URL) as response: - text = await response.text() - - LOGGER.debug(f"{data_id} Data received") - - # Parse the CSV. - data = list(csv.DictReader(text.splitlines())) - LOGGER.debug(f"{data_id} CSV parsed") - - # Group together locations (NYT data ordered by dates not location). - grouped_locations = get_grouped_locations_dict(data) - - # The normalized locations. - locations = [] - - for idx, (county_state, histories) in enumerate(grouped_locations.items()): - # Make location history for confirmed and deaths from dates. - # List is tuples of (date, amount) in order of increasing dates. - confirmed_list = histories["confirmed"] - confirmed_history = {date: int(amount or 0) for date, amount in confirmed_list} - - deaths_list = histories["deaths"] - deaths_history = {date: int(amount or 0) for date, amount in deaths_list} - - # Normalize the item and append to locations. - locations.append( - NYTLocation( - id=idx, - state=county_state[1], - county=county_state[0], - coordinates=Coordinates(None, None), # NYT does not provide coordinates - last_updated=datetime.utcnow().isoformat() + "Z", # since last request - timelines={ - "confirmed": Timeline( - { - datetime.strptime(date, "%Y-%m-%d").isoformat() + "Z": amount - for date, amount in confirmed_history.items() - } - ), - "deaths": Timeline( - { - datetime.strptime(date, "%Y-%m-%d").isoformat() + "Z": amount - for date, amount in deaths_history.items() - } - ), - "recovered": Timeline({}), - }, + # check shared cache + cache_results = await check_cache(data_id) + if cache_results: + LOGGER.info(f"{data_id} using shared cache results") + locations = cache_results + else: + LOGGER.info(f"{data_id} shared cache empty") + async with httputils.CLIENT_SESSION.get(BASE_URL) as response: + text = await response.text() + + LOGGER.debug(f"{data_id} Data received") + + # Parse the CSV. + data = list(csv.DictReader(text.splitlines())) + LOGGER.debug(f"{data_id} CSV parsed") + + # Group together locations (NYT data ordered by dates not location). + grouped_locations = get_grouped_locations_dict(data) + + # The normalized locations. + locations = [] + + for idx, (county_state, histories) in enumerate(grouped_locations.items()): + # Make location history for confirmed and deaths from dates. + # List is tuples of (date, amount) in order of increasing dates. + confirmed_list = histories["confirmed"] + confirmed_history = {date: int(amount or 0) for date, amount in confirmed_list} + + deaths_list = histories["deaths"] + deaths_history = {date: int(amount or 0) for date, amount in deaths_list} + + # Normalize the item and append to locations. + locations.append( + NYTLocation( + id=idx, + state=county_state[1], + county=county_state[0], + coordinates=Coordinates(None, None), # NYT does not provide coordinates + last_updated=datetime.utcnow().isoformat() + "Z", # since last request + timelines={ + "confirmed": Timeline( + { + datetime.strptime(date, "%Y-%m-%d").isoformat() + "Z": amount + for date, amount in confirmed_history.items() + } + ), + "deaths": Timeline( + { + datetime.strptime(date, "%Y-%m-%d").isoformat() + "Z": amount + for date, amount in deaths_history.items() + } + ), + "recovered": Timeline({}), + }, + ) ) - ) - LOGGER.info(f"{data_id} Data normalized") + LOGGER.info(f"{data_id} Data normalized") + # save the results to distributed cache + # TODO: fix json serialization + try: + await load_cache(data_id, locations) + except TypeError as type_err: + LOGGER.error(type_err) return locations diff --git a/app/utils/populations.py b/app/utils/populations.py index 24f0fa4e..c02f15a9 100644 --- a/app/utils/populations.py +++ b/app/utils/populations.py @@ -28,7 +28,9 @@ def fetch_populations(save=False): # Fetch the countries. try: - countries = requests.get(GEONAMES_URL, params={"username": "dperic"}, timeout=1.25).json()["geonames"] + countries = requests.get(GEONAMES_URL, params={"username": "dperic"}, timeout=1.25).json()[ + "geonames" + ] # Go through all the countries and perform the mapping. for country in countries: mappings.update({country["countryCode"]: int(country["population"]) or None}) diff --git a/pyproject.toml b/pyproject.toml index b6bc6af6..df1ad168 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [tool.black] -line-length = 120 +line-length = 100 target-version = ['py36', 'py37', 'py38'] include = '\.pyi?$' exclude = ''' @@ -23,7 +23,7 @@ multi_line_output = 3 include_trailing_comma = "True" force_grid_wrap = 0 use_parentheses = "True" -line_length = 120 +line_length = 100 [tool.pylint.master] extension-pkg-whitelist = "pydantic" @@ -42,7 +42,7 @@ logging-modules = "logging" allow-wildcard-with-all = "no" [tool.pylint.format] indent-after-paren = "4" -max-line-length = "120" # matches black setting +max-line-length = "100" # matches black setting max-module-lines = "800" no-space-check = ''' trailing-comma, diff --git a/tasks.py b/tasks.py index ae1f09cd..0f6d6995 100644 --- a/tasks.py +++ b/tasks.py @@ -72,12 +72,21 @@ def test(ctx): @invoke.task def generate_reqs(ctx): """Generate requirements.txt""" - reqs = ["pipenv lock -r > requirements.txt", "pipenv lock -r --dev > requirements-dev.txt"] + reqs = [ + "pipenv lock -r > requirements.txt", + "pipenv lock -r --dev > requirements-dev.txt", + ] [ctx.run(req) for req in reqs] @invoke.task -def docker(ctx, build=False, run=False, tag="covid-tracker-api:latest", name=f"covid-api-{random.randint(0,999)}"): +def docker( + ctx, + build=False, + run=False, + tag="covid-tracker-api:latest", + name=f"covid-api-{random.randint(0,999)}", +): """Build and run docker container.""" if not any([build, run]): raise invoke.Exit(message="Specify either --build or --run", code=1) diff --git a/tests/test_io.py b/tests/test_io.py index c5d16c3a..ba926011 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -10,7 +10,11 @@ [ ("test_file.txt", string.ascii_lowercase, {}), ("test_json_file.json", {"a": 0, "b": 1, "c": 2}, {}), - ("test_custom_json.json", {"z": -1, "b": 1, "y": -2, "a": 0}, {"indent": 4, "sort_keys": True}), + ( + "test_custom_json.json", + {"z": -1, "b": 1, "y": -2, "a": 0}, + {"indent": 4, "sort_keys": True}, + ), ], ) diff --git a/tests/test_jhu.py b/tests/test_jhu.py index 3790218d..f6af4b9e 100644 --- a/tests/test_jhu.py +++ b/tests/test_jhu.py @@ -22,3 +22,27 @@ async def test_get_locations(mock_client_session): # `jhu.get_locations()` creates id based on confirmed list location_confirmed = await jhu.get_category("confirmed") assert len(output) == len(location_confirmed["locations"]) + + # `jhu.get_locations()` creates id based on deaths list + location_deaths = await jhu.get_category("deaths") + assert len(output) == len(location_deaths["locations"]) + + # `jhu.get_locations()` creates id based on recovered list + location_recovered = await jhu.get_category("recovered") + assert len(output) == len(location_recovered["locations"]) + + +@pytest.mark.parametrize( + "key, locations, index, expected", + [ + (("Thailand", "TH"), [{"country": "Thailand", "province": "TH", "history": {"test": "yes"}}], 0, {"test": "yes"}), # Success + (("Deutschland", "DE"), [{"country": "Deutschland", "province": "DE", "history": {"test": "no"}}], 1, {}), # IndexError + (("US", "NJ"), [{"country": "Deutschland", "province": "DE", "history": {"test": "no"}}], 0, {}), # Invaid Key Merge + ], +) +def test_parse_history(key, locations, index, expected): + """ + Test validating and extracting history content from + locations data based on index. + """ + assert jhu.parse_history(key, locations, index) == expected diff --git a/tests/test_location.py b/tests/test_location.py index 567eddcd..50129a52 100644 --- a/tests/test_location.py +++ b/tests/test_location.py @@ -48,7 +48,12 @@ def test_location_class( # Location. location_obj = location.TimelinedLocation( - test_id, country, province, coords, now, {"confirmed": confirmed, "deaths": deaths, "recovered": recovered,} + test_id, + country, + province, + coords, + now, + {"confirmed": confirmed, "deaths": deaths, "recovered": recovered,}, ) assert location_obj.country_code == country_code diff --git a/tests/test_routes.py b/tests/test_routes.py index 52d26843..5c81641b 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -112,8 +112,7 @@ async def test_v2_locations(self): with open(filepath, "r") as file: expected_json_output = file.read() - # TODO: Why is this failing? - # assert return_data == json.loads(expected_json_output) + assert return_data == json.loads(expected_json_output) async def test_v2_locations_id(self): state = "locations" @@ -126,12 +125,13 @@ async def test_v2_locations_id(self): return_data = response.json() - filepath = "tests/expected_output/v2_{state}_id_{test_id}.json".format(state=state, test_id=test_id) + filepath = "tests/expected_output/v2_{state}_id_{test_id}.json".format( + state=state, test_id=test_id + ) with open(filepath, "r") as file: expected_json_output = file.read() - # TODO: Why is this failing? - # assert return_data == expected_json_output + assert return_data == json.loads(expected_json_output) @pytest.mark.asyncio @@ -151,7 +151,9 @@ async def test_v2_locations_id(self): ({"source": "jhu", "country_code": "US"}, 404), ], ) -async def test_locations_status_code(async_api_client, query_params, expected_status, mock_client_session): +async def test_locations_status_code( + async_api_client, query_params, expected_status, mock_client_session +): response = await async_api_client.get("/v2/locations", query_string=query_params) print(f"GET {response.url}\n{response}") @@ -183,4 +185,4 @@ async def test_latest(async_api_client, query_params, mock_client_session): assert response.status_code == 200 assert response_json["latest"]["confirmed"] - assert response_json["latest"]["deaths"] + assert response_json["latest"]["deaths"] \ No newline at end of file diff --git a/tests/test_timeline.py b/tests/test_timeline.py index 056286aa..79612f5a 100644 --- a/tests/test_timeline.py +++ b/tests/test_timeline.py @@ -21,7 +21,12 @@ def test_timeline_class(): assert history_data.latest == 7 # validate order - assert list(dict(history_data.timeline).keys()) == ["1/22/20", "1/23/20", "1/24/20", "1/25/20"] + assert list(dict(history_data.timeline).keys()) == [ + "1/22/20", + "1/23/20", + "1/24/20", + "1/25/20", + ] # validate serialize check_serialize = {