diff --git a/README.md b/README.md index fb5ad988..3345eacc 100644 --- a/README.md +++ b/README.md @@ -24,18 +24,24 @@ Support multiple data-sources. ![Covid-19 Recovered](https://covid19-badges.herokuapp.com/recovered/latest) ![Covid-19 Deaths](https://covid19-badges.herokuapp.com/deaths/latest) +## New York Times is now available as a source! + +**Specify source parameter with ?source=nyt. NYT also provides a timeseries! To view timelines of cases by US counties use ?source=nyt&timelines=true** + ## Recovered cases showing 0 -**JHU (our main data provider) [no longer provides data for amount of recoveries](https://github.com/CSSEGISandData/COVID-19/issues/1250), and as a result, the API will be showing 0 for this statistic. Apolegies for any inconvenience. Hopefully we'll be able to find an alternative data-source that offers this.** +**JHU (our main data provider) [no longer provides data for amount of recoveries](https://github.com/CSSEGISandData/COVID-19/issues/1250), and as a result, the API will be showing 0 for this statistic. Apologies for any inconvenience. Hopefully we'll be able to find an alternative data-source that offers this.** ## Available data-sources: -Currently 2 different data-sources are available to retrieve the data: +Currently 3 different data-sources are available to retrieve the data: * **jhu** - https://github.com/CSSEGISandData/COVID-19 - Worldwide Data repository operated by the Johns Hopkins University Center for Systems Science and Engineering (JHU CSSE). * **csbs** - https://www.csbs.org/information-covid-19-coronavirus - U.S. County data that comes from the Conference of State Bank Supervisors. +* **nyt** - https://github.com/nytimes/covid-19-data - The New York Times is releasing a series of data files with cumulative counts of coronavirus cases in the United States, at the state and county level, over time. + __jhu__ data-source will be used as a default source if you don't specify a *source parameter* in your request. ## API Reference @@ -71,7 +77,8 @@ __Sample response__ { "sources": [ "jhu", - "csbs" + "csbs", + "nyt" ] } ``` @@ -87,7 +94,7 @@ GET /v2/latest __Query String Parameters__ | __Query string parameter__ | __Description__ | __Type__ | | -------------------------- | -------------------------------------------------------------------------------- | -------- | -| source | The data-source where data will be retrieved from *(jhu/csbs)*. Default is *jhu* | String | +| source | The data-source where data will be retrieved from *(jhu/csbs/nyt)*. Default is *jhu* | String | __Sample response__ ```json @@ -117,7 +124,7 @@ __Path Parameters__ __Query String Parameters__ | __Query string parameter__ | __Description__ | __Type__ | | -------------------------- | -------------------------------------------------------------------------------- | -------- | -| source | The data-source where data will be retrieved from *(jhu/csbs)*. Default is *jhu* | String | +| source | The data-source where data will be retrieved from *(jhu/csbs/nyt)*. Default is *jhu* | String | #### Example Request ```http @@ -160,7 +167,7 @@ GET /v2/locations __Query String Parameters__ | __Query string parameter__ | __Description__ | __Type__ | | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ | -------- | -| source | The data-source where data will be retrieved from.
__Value__ can be: *jhu/csbs*. __Default__ is *jhu* | String | +| source | The data-source where data will be retrieved from.
__Value__ can be: *jhu/csbs/nyt*. __Default__ is *jhu* | String | | country_code | The ISO ([alpha-2 country_code](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2)) to the Country/Province for which you're calling the Endpoint | String | | timelines | To set the visibility of timelines (*daily tracking*).
__Value__ can be: *0/1*. __Default__ is *0* (timelines are not visible) | Integer | diff --git a/app/data/__init__.py b/app/data/__init__.py index aef58e8c..265bf3d3 100644 --- a/app/data/__init__.py +++ b/app/data/__init__.py @@ -1,9 +1,10 @@ """app.data""" from ..services.location.csbs import CSBSLocationService from ..services.location.jhu import JhuLocationService +from ..services.location.nyt import NYTLocationService # Mapping of services to data-sources. -DATA_SOURCES = {"jhu": JhuLocationService(), "csbs": CSBSLocationService()} +DATA_SOURCES = {"jhu": JhuLocationService(), "csbs": CSBSLocationService(), "nyt": NYTLocationService()} def data_source(source): diff --git a/app/enums/sources.py b/app/enums/sources.py index b4538c45..9fc00744 100644 --- a/app/enums/sources.py +++ b/app/enums/sources.py @@ -8,3 +8,4 @@ class Sources(str, Enum): jhu = "jhu" csbs = "csbs" + nyt = "nyt" diff --git a/app/location/nyt.py b/app/location/nyt.py new file mode 100644 index 00000000..ad92212e --- /dev/null +++ b/app/location/nyt.py @@ -0,0 +1,32 @@ +"""app.locations.nyt.py""" +from . import TimelinedLocation + + +class NYTLocation(TimelinedLocation): + """ + A NYT (county) Timelinedlocation. + """ + + # pylint: disable=too-many-arguments,redefined-builtin + def __init__(self, id, state, county, coordinates, last_updated, timelines): + super().__init__(id, "US", state, coordinates, last_updated, timelines) + + self.state = state + self.county = county + + def serialize(self, timelines=False): # pylint: disable=arguments-differ,unused-argument + """ + Serializes the location into a dict. + + :returns: The serialized location. + :rtype: dict + """ + serialized = super().serialize(timelines) + + # Update with new fields. + serialized.update( + {"state": self.state, "county": self.county,} + ) + + # Return the serialized location. + return serialized diff --git a/app/services/location/nyt.py b/app/services/location/nyt.py new file mode 100644 index 00000000..7f73c1de --- /dev/null +++ b/app/services/location/nyt.py @@ -0,0 +1,123 @@ +"""app.services.location.nyt.py""" +import csv +from datetime import datetime + +from asyncache import cached +from cachetools import TTLCache + +from ...coordinates import Coordinates +from ...location.nyt import NYTLocation +from ...timeline import Timeline +from ...utils import httputils +from . import LocationService + + +class NYTLocationService(LocationService): + """ + Service for retrieving locations from New York Times (https://github.com/nytimes/covid-19-data). + """ + + async def get_all(self): + # Get the locations. + locations = await get_locations() + return locations + + async def get(self, loc_id): # pylint: disable=arguments-differ + # Get location at the index equal to provided id. + locations = await self.get_all() + return locations[loc_id] + + +# --------------------------------------------------------------- + + +# Base URL for fetching category. +BASE_URL = "https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv" + + +def get_grouped_locations_dict(data): + """ + Helper function to group history for locations into one dict. + + :returns: The complete data for each unique US county + :rdata: dict + """ + grouped_locations = {} + + # in increasing order of dates + for row in data: + county_state = (row["county"], row["state"]) + date = row["date"] + confirmed = row["cases"] + deaths = row["deaths"] + + # initialize if not existing + if county_state not in grouped_locations: + grouped_locations[county_state] = {"confirmed": [], "deaths": []} + + # append confirmed tuple to county_state (date, # confirmed) + grouped_locations[county_state]["confirmed"].append((date, confirmed)) + # append deaths tuple to county_state (date, # deaths) + grouped_locations[county_state]["deaths"].append((date, deaths)) + + return grouped_locations + + +@cached(cache=TTLCache(maxsize=1024, ttl=3600)) +async def get_locations(): + """ + Returns a list containing parsed NYT data by US county. The data is cached for 1 hour. + + :returns: The complete data for US Counties. + :rtype: dict + """ + + # Request the data. + async with httputils.CLIENT_SESSION.get(BASE_URL) as response: + text = await response.text() + + # Parse the CSV. + data = list(csv.DictReader(text.splitlines())) + + # Group together locations (NYT data ordered by dates not location). + grouped_locations = get_grouped_locations_dict(data) + + # The normalized locations. + locations = [] + + for idx, (county_state, histories) in enumerate(grouped_locations.items()): + # Make location history for confirmed and deaths from dates. + # List is tuples of (date, amount) in order of increasing dates. + confirmed_list = histories["confirmed"] + confirmed_history = {date: int(amount or 0) for date, amount in confirmed_list} + + deaths_list = histories["deaths"] + deaths_history = {date: int(amount or 0) for date, amount in deaths_list} + + # Normalize the item and append to locations. + locations.append( + NYTLocation( + id=idx, + state=county_state[1], + county=county_state[0], + coordinates=Coordinates(None, None), # NYT does not provide coordinates + last_updated=datetime.utcnow().isoformat() + "Z", # since last request + timelines={ + "confirmed": Timeline( + { + datetime.strptime(date, "%Y-%m-%d").isoformat() + "Z": amount + for date, amount in confirmed_history.items() + } + ), + "deaths": Timeline( + { + datetime.strptime(date, "%Y-%m-%d").isoformat() + "Z": amount + for date, amount in deaths_history.items() + } + ), + "recovered": Timeline({}), + }, + ) + ) + + return locations diff --git a/tests/example_data/counties.csv b/tests/example_data/counties.csv new file mode 100644 index 00000000..0e76bf0d --- /dev/null +++ b/tests/example_data/counties.csv @@ -0,0 +1,49 @@ +date,county,state,fips,cases,deaths +2020-01-21,Snohomish,Washington,53061,1,0 +2020-01-22,Snohomish,Washington,53061,1,0 +2020-01-23,Snohomish,Washington,53061,1,0 +2020-01-24,Cook,Illinois,17031,1,0 +2020-01-24,Snohomish,Washington,53061,1,0 +2020-01-25,Orange,California,06059,1,0 +2020-01-25,Cook,Illinois,17031,1,0 +2020-01-25,Snohomish,Washington,53061,1,0 +2020-01-26,Maricopa,Arizona,04013,1,0 +2020-01-26,Los Angeles,California,06037,1,0 +2020-01-26,Orange,California,06059,1,0 +2020-01-26,Cook,Illinois,17031,1,0 +2020-01-26,Snohomish,Washington,53061,1,0 +2020-01-27,Maricopa,Arizona,04013,1,0 +2020-01-27,Los Angeles,California,06037,1,0 +2020-01-27,Orange,California,06059,1,0 +2020-01-27,Cook,Illinois,17031,1,0 +2020-01-27,Snohomish,Washington,53061,1,0 +2020-01-28,Maricopa,Arizona,04013,1,0 +2020-01-28,Los Angeles,California,06037,1,0 +2020-01-28,Orange,California,06059,1,0 +2020-01-28,Cook,Illinois,17031,1,0 +2020-01-28,Snohomish,Washington,53061,1,0 +2020-01-29,Maricopa,Arizona,04013,1,0 +2020-01-29,Los Angeles,California,06037,1,0 +2020-01-29,Orange,California,06059,1,0 +2020-01-29,Cook,Illinois,17031,1,0 +2020-01-29,Snohomish,Washington,53061,1,0 +2020-01-30,Maricopa,Arizona,04013,1,0 +2020-01-30,Los Angeles,California,06037,1,0 +2020-01-30,Orange,California,06059,1,0 +2020-01-30,Cook,Illinois,17031,2,0 +2020-01-30,Snohomish,Washington,53061,1,0 +2020-01-31,Maricopa,Arizona,04013,1,0 +2020-01-31,Los Angeles,California,06037,1,0 +2020-01-31,Orange,California,06059,1,0 +2020-01-31,Santa Clara,California,06085,1,0 +2020-01-31,Cook,Illinois,17031,2,0 +2020-01-31,Snohomish,Washington,53061,1,0 +2020-02-28,Snohomish,Washington,53061,2,0 +2020-03-10,Snohomish,Washington,53061,61,0 +2020-03-11,Snohomish,Washington,53061,69,1 +2020-03-12,Snohomish,Washington,53061,107,3 +2020-03-15,Snohomish,Washington,53061,175,3 +2020-03-17,Snohomish,Washington,53061,265,4 +2020-03-18,Snohomish,Washington,53061,309,5 +2020-03-19,Snohomish,Washington,53061,347,6 +2020-03-20,Snohomish,Washington,53061,384,7 \ No newline at end of file diff --git a/tests/expected_output/nyt_locations.json b/tests/expected_output/nyt_locations.json new file mode 100644 index 00000000..3af82c40 --- /dev/null +++ b/tests/expected_output/nyt_locations.json @@ -0,0 +1,302 @@ +[ + { + "id": 0, + "country": "US", + "country_code": "US", + "province": "Washington", + "coordinates": { + "latitude": null, + "longitude": null + }, + "last_updated": "2020-04-12T19:14:59.638001Z", + "latest": { + "confirmed": 384, + "deaths": 7, + "recovered": 0 + }, + "timelines": { + "confirmed": { + "latest": 384, + "timeline": { + "2020-01-21T00:00:00Z": 1, + "2020-01-22T00:00:00Z": 1, + "2020-01-23T00:00:00Z": 1, + "2020-01-24T00:00:00Z": 1, + "2020-01-25T00:00:00Z": 1, + "2020-01-26T00:00:00Z": 1, + "2020-01-27T00:00:00Z": 1, + "2020-01-28T00:00:00Z": 1, + "2020-01-29T00:00:00Z": 1, + "2020-01-30T00:00:00Z": 1, + "2020-01-31T00:00:00Z": 1, + "2020-02-28T00:00:00Z": 2, + "2020-03-10T00:00:00Z": 61, + "2020-03-11T00:00:00Z": 69, + "2020-03-12T00:00:00Z": 107, + "2020-03-15T00:00:00Z": 175, + "2020-03-17T00:00:00Z": 265, + "2020-03-18T00:00:00Z": 309, + "2020-03-19T00:00:00Z": 347, + "2020-03-20T00:00:00Z": 384 + } + }, + "deaths": { + "latest": 7, + "timeline": { + "2020-01-21T00:00:00Z": 0, + "2020-01-22T00:00:00Z": 0, + "2020-01-23T00:00:00Z": 0, + "2020-01-24T00:00:00Z": 0, + "2020-01-25T00:00:00Z": 0, + "2020-01-26T00:00:00Z": 0, + "2020-01-27T00:00:00Z": 0, + "2020-01-28T00:00:00Z": 0, + "2020-01-29T00:00:00Z": 0, + "2020-01-30T00:00:00Z": 0, + "2020-01-31T00:00:00Z": 0, + "2020-02-28T00:00:00Z": 0, + "2020-03-10T00:00:00Z": 0, + "2020-03-11T00:00:00Z": 1, + "2020-03-12T00:00:00Z": 3, + "2020-03-15T00:00:00Z": 3, + "2020-03-17T00:00:00Z": 4, + "2020-03-18T00:00:00Z": 5, + "2020-03-19T00:00:00Z": 6, + "2020-03-20T00:00:00Z": 7 + } + }, + "recovered": { + "latest": 0, + "timeline": {} + } + }, + "state": "Washington", + "county": "Snohomish" + }, + { + "id": 1, + "country": "US", + "country_code": "US", + "province": "Illinois", + "coordinates": { + "latitude": null, + "longitude": null + }, + "last_updated": "2020-04-12T19:14:59.638001Z", + "latest": { + "confirmed": 2, + "deaths": 0, + "recovered": 0 + }, + "timelines": { + "confirmed": { + "latest": 2, + "timeline": { + "2020-01-24T00:00:00Z": 1, + "2020-01-25T00:00:00Z": 1, + "2020-01-26T00:00:00Z": 1, + "2020-01-27T00:00:00Z": 1, + "2020-01-28T00:00:00Z": 1, + "2020-01-29T00:00:00Z": 1, + "2020-01-30T00:00:00Z": 2, + "2020-01-31T00:00:00Z": 2 + } + }, + "deaths": { + "latest": 0, + "timeline": { + "2020-01-24T00:00:00Z": 0, + "2020-01-25T00:00:00Z": 0, + "2020-01-26T00:00:00Z": 0, + "2020-01-27T00:00:00Z": 0, + "2020-01-28T00:00:00Z": 0, + "2020-01-29T00:00:00Z": 0, + "2020-01-30T00:00:00Z": 0, + "2020-01-31T00:00:00Z": 0 + } + }, + "recovered": { + "latest": 0, + "timeline": {} + } + }, + "state": "Illinois", + "county": "Cook" + }, + { + "id": 2, + "country": "US", + "country_code": "US", + "province": "California", + "coordinates": { + "latitude": null, + "longitude": null + }, + "last_updated": "2020-04-12T19:14:59.638001Z", + "latest": { + "confirmed": 1, + "deaths": 0, + "recovered": 0 + }, + "timelines": { + "confirmed": { + "latest": 1, + "timeline": { + "2020-01-25T00:00:00Z": 1, + "2020-01-26T00:00:00Z": 1, + "2020-01-27T00:00:00Z": 1, + "2020-01-28T00:00:00Z": 1, + "2020-01-29T00:00:00Z": 1, + "2020-01-30T00:00:00Z": 1, + "2020-01-31T00:00:00Z": 1 + } + }, + "deaths": { + "latest": 0, + "timeline": { + "2020-01-25T00:00:00Z": 0, + "2020-01-26T00:00:00Z": 0, + "2020-01-27T00:00:00Z": 0, + "2020-01-28T00:00:00Z": 0, + "2020-01-29T00:00:00Z": 0, + "2020-01-30T00:00:00Z": 0, + "2020-01-31T00:00:00Z": 0 + } + }, + "recovered": { + "latest": 0, + "timeline": {} + } + }, + "state": "California", + "county": "Orange" + }, + { + "id": 3, + "country": "US", + "country_code": "US", + "province": "Arizona", + "coordinates": { + "latitude": null, + "longitude": null + }, + "last_updated": "2020-04-12T19:14:59.638001Z", + "latest": { + "confirmed": 1, + "deaths": 0, + "recovered": 0 + }, + "timelines": { + "confirmed": { + "latest": 1, + "timeline": { + "2020-01-26T00:00:00Z": 1, + "2020-01-27T00:00:00Z": 1, + "2020-01-28T00:00:00Z": 1, + "2020-01-29T00:00:00Z": 1, + "2020-01-30T00:00:00Z": 1, + "2020-01-31T00:00:00Z": 1 + } + }, + "deaths": { + "latest": 0, + "timeline": { + "2020-01-26T00:00:00Z": 0, + "2020-01-27T00:00:00Z": 0, + "2020-01-28T00:00:00Z": 0, + "2020-01-29T00:00:00Z": 0, + "2020-01-30T00:00:00Z": 0, + "2020-01-31T00:00:00Z": 0 + } + }, + "recovered": { + "latest": 0, + "timeline": {} + } + }, + "state": "Arizona", + "county": "Maricopa" + }, + { + "id": 4, + "country": "US", + "country_code": "US", + "province": "California", + "coordinates": { + "latitude": null, + "longitude": null + }, + "last_updated": "2020-04-12T19:14:59.638001Z", + "latest": { + "confirmed": 1, + "deaths": 0, + "recovered": 0 + }, + "timelines": { + "confirmed": { + "latest": 1, + "timeline": { + "2020-01-26T00:00:00Z": 1, + "2020-01-27T00:00:00Z": 1, + "2020-01-28T00:00:00Z": 1, + "2020-01-29T00:00:00Z": 1, + "2020-01-30T00:00:00Z": 1, + "2020-01-31T00:00:00Z": 1 + } + }, + "deaths": { + "latest": 0, + "timeline": { + "2020-01-26T00:00:00Z": 0, + "2020-01-27T00:00:00Z": 0, + "2020-01-28T00:00:00Z": 0, + "2020-01-29T00:00:00Z": 0, + "2020-01-30T00:00:00Z": 0, + "2020-01-31T00:00:00Z": 0 + } + }, + "recovered": { + "latest": 0, + "timeline": {} + } + }, + "state": "California", + "county": "Los Angeles" + }, + { + "id": 5, + "country": "US", + "country_code": "US", + "province": "California", + "coordinates": { + "latitude": null, + "longitude": null + }, + "last_updated": "2020-04-12T19:14:59.638001Z", + "latest": { + "confirmed": 1, + "deaths": 0, + "recovered": 0 + }, + "timelines": { + "confirmed": { + "latest": 1, + "timeline": { + "2020-01-31T00:00:00Z": 1 + } + }, + "deaths": { + "latest": 0, + "timeline": { + "2020-01-31T00:00:00Z": 0 + } + }, + "recovered": { + "latest": 0, + "timeline": {} + } + }, + "state": "California", + "county": "Santa Clara" + } +] \ No newline at end of file diff --git a/tests/test_nyt.py b/tests/test_nyt.py new file mode 100644 index 00000000..ca9c9dca --- /dev/null +++ b/tests/test_nyt.py @@ -0,0 +1,42 @@ +import json +from unittest import mock + +import pytest + +from app.location import TimelinedLocation +from app.location.nyt import NYTLocation +from app.services.location import nyt +from tests.conftest import mocked_strptime_isoformat + +DATETIME_STRING = "2020-04-12T19:14:59.638001" + + +@pytest.mark.asyncio +async def test_get_locations(mock_client_session): + with mock.patch("app.services.location.nyt.datetime") as mock_datetime: + mock_datetime.utcnow.return_value.isoformat.return_value = DATETIME_STRING + mock_datetime.strptime.side_effect = mocked_strptime_isoformat + locations = await nyt.get_locations() + + assert isinstance(locations, list) + + serialized_locations = [] + for location in locations: + assert isinstance(location, NYTLocation) + assert isinstance(location, TimelinedLocation) + + # Making sure country population is a non-zero value + assert location.country_population != 0 + serialized_location = location.serialize(timelines=True) + # Not checking for exact value of country population + del serialized_location["country_population"] + + serialized_locations.append(serialized_location) + + produced_json_output = json.dumps(serialized_locations) + + with open("tests/expected_output/nyt_locations.json", "r") as file: + expected_json_output = file.read() + + # translate them into python lists for ordering + assert json.loads(expected_json_output) == json.loads(produced_json_output) diff --git a/tests/test_routes.py b/tests/test_routes.py index 605ce2c0..52d26843 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -140,11 +140,14 @@ async def test_v2_locations_id(self): [ ({"source": "csbs"}, 200), ({"source": "jhu"}, 200), + ({"source": "nyt"}, 200), ({"timelines": True}, 200), ({"timelines": "true"}, 200), ({"timelines": 1}, 200), ({"source": "jhu", "timelines": True}, 200), + ({"source": "nyt", "timelines": True}, 200), ({"source": "csbs", "country_code": "US"}, 200), + ({"source": "nyt", "country_code": "US"}, 200), ({"source": "jhu", "country_code": "US"}, 404), ], ) @@ -162,10 +165,12 @@ async def test_locations_status_code(async_api_client, query_params, expected_st [ {"source": "csbs"}, {"source": "jhu"}, + {"source": "nyt"}, {"timelines": True}, {"timelines": "true"}, {"timelines": 1}, {"source": "jhu", "timelines": True}, + {"source": "nyt", "timelines": True}, ], ) async def test_latest(async_api_client, query_params, mock_client_session):