diff --git a/app/location/country.py b/app/location/country.py new file mode 100644 index 00000000..25391786 --- /dev/null +++ b/app/location/country.py @@ -0,0 +1,456 @@ +import json +import logging +import requests +import app.io + +''' +Main country class to find respective populations and name. +''' + +class Country: + LOGGER = logging.getLogger(__name__) + GEONAMES_URL = "http://api.geonames.org/countryInfoJSON" + GEONAMES_BACKUP_PATH = "geonames_population_mappings.json" + + # Fetching of the populations. + def fetch_populations(save=False): + """ + Returns a dictionary containing the population of each country fetched from the GeoNames. + https://www.geonames.org/ + + TODO: only skip writing to the filesystem when deployed with gunicorn, or handle concurent access, or use DB. + + :returns: The mapping of populations. + :rtype: dict + """ + LOGGER.info("Fetching populations...") + + # Mapping of populations + mappings = {} + + # Fetch the countries. + try: + countries = requests.get(GEONAMES_URL, params={"username": "dperic"}, timeout=1.25).json()[ + "geonames" + ] + # Go through all the countries and perform the mapping. + for country in countries: + mappings.update({country["countryCode"]: int(country["population"]) or None}) + + if mappings and save: + LOGGER.info(f"Saving population data to {app.io.save(GEONAMES_BACKUP_PATH, mappings)}") + except (json.JSONDecodeError, KeyError, requests.exceptions.Timeout) as err: + LOGGER.warning(f"Error pulling population data. {err.__class__.__name__}: {err}") + mappings = app.io.load(GEONAMES_BACKUP_PATH) + LOGGER.info(f"Using backup data from {GEONAMES_BACKUP_PATH}") + # Finally, return the mappings. + LOGGER.info("Fetched populations") + return mappings + + + # Mapping of alpha-2 codes country codes to population. + POPULATIONS = fetch_populations() + + # Retrieving. + def country_population(country_code, default=None): + """ + Fetches the population of the country with the provided country code. + + :returns: The population. + :rtype: int + """ + return POPULATIONS.get(country_code, default) + + LOGGER = logging.getLogger(__name__) + + # Default country code. + DEFAULT_COUNTRY_CODE = "XX" + + # Mapping of country names to alpha-2 codes according to + # https://en.wikipedia.org/wiki/ISO_3166-1. + # As a reference see also https://github.com/TakahikoKawasaki/nv-i18n (in Java) + # fmt: off + COUNTRY_NAME__COUNTRY_CODE = { + "Afghanistan" : "AF", + "Åland Islands" : "AX", + "Albania" : "AL", + "Algeria" : "DZ", + "American Samoa" : "AS", + "Andorra" : "AD", + "Angola" : "AO", + "Anguilla" : "AI", + "Antarctica" : "AQ", + "Antigua and Barbuda" : "AG", + "Argentina" : "AR", + "Armenia" : "AM", + "Aruba" : "AW", + "Australia" : "AU", + "Austria" : "AT", + "Azerbaijan" : "AZ", + " Azerbaijan" : "AZ", + "Bahamas" : "BS", + "The Bahamas" : "BS", + "Bahamas, The" : "BS", + "Bahrain" : "BH", + "Bangladesh" : "BD", + "Barbados" : "BB", + "Belarus" : "BY", + "Belgium" : "BE", + "Belize" : "BZ", + "Benin" : "BJ", + "Bermuda" : "BM", + "Bhutan" : "BT", + "Bolivia, Plurinational State of" : "BO", + "Bolivia" : "BO", + "Bonaire, Sint Eustatius and Saba" : "BQ", + "Caribbean Netherlands" : "BQ", + "Bosnia and Herzegovina" : "BA", + # "Bosnia–Herzegovina" : "BA", + "Bosnia" : "BA", + "Botswana" : "BW", + "Bouvet Island" : "BV", + "Brazil" : "BR", + "British Indian Ocean Territory" : "IO", + "Brunei Darussalam" : "BN", + "Brunei" : "BN", + "Bulgaria" : "BG", + "Burkina Faso" : "BF", + "Burundi" : "BI", + "Cambodia" : "KH", + "Cameroon" : "CM", + "Canada" : "CA", + "Cape Verde" : "CV", + "Cabo Verde" : "CV", + "Cayman Islands" : "KY", + "Central African Republic" : "CF", + "Chad" : "TD", + "Chile" : "CL", + "China" : "CN", + "Mainland China" : "CN", + "Christmas Island" : "CX", + "Cocos (Keeling) Islands" : "CC", + "Colombia" : "CO", + "Comoros" : "KM", + "Congo" : "CG", + "Congo (Brazzaville)" : "CG", + "Republic of the Congo" : "CG", + "Congo, the Democratic Republic of the" : "CD", + "Congo (Kinshasa)" : "CD", + "DR Congo" : "CD", + "Cook Islands" : "CK", + "Costa Rica" : "CR", + "Côte d'Ivoire" : "CI", + "Cote d'Ivoire" : "CI", + "Ivory Coast" : "CI", + "Croatia" : "HR", + "Cuba" : "CU", + "Curaçao" : "CW", + "Curacao" : "CW", + "Cyprus" : "CY", + "Czech Republic" : "CZ", + "Czechia" : "CZ", + "Denmark" : "DK", + "Djibouti" : "DJ", + "Dominica" : "DM", + "Dominican Republic" : "DO", + "Dominican Rep" : "DO", + "Ecuador" : "EC", + "Egypt" : "EG", + "El Salvador" : "SV", + "Equatorial Guinea" : "GQ", + "Eritrea" : "ER", + "Estonia" : "EE", + "Ethiopia" : "ET", + "Falkland Islands (Malvinas)" : "FK", + "Falkland Islands" : "FK", + "Faroe Islands" : "FO", + "Faeroe Islands" : "FO", + "Fiji" : "FJ", + "Finland" : "FI", + "France" : "FR", + "French Guiana" : "GF", + "French Polynesia" : "PF", + "French Southern Territories" : "TF", + "Gabon" : "GA", + "Gambia" : "GM", + "The Gambia" : "GM", + "Gambia, The" : "GM", + "Georgia" : "GE", + "Germany" : "DE", + "Deutschland" : "DE", + "Ghana" : "GH", + "Gibraltar" : "GI", + "Greece" : "GR", + "Greenland" : "GL", + "Grenada" : "GD", + "Guadeloupe" : "GP", + "Guam" : "GU", + "Guatemala" : "GT", + "Guernsey" : "GG", + "Guinea" : "GN", + "Guinea-Bissau" : "GW", + "Guyana" : "GY", + "Haiti" : "HT", + "Heard Island and McDonald Islands" : "HM", + "Holy See (Vatican City State)" : "VA", + "Holy See" : "VA", + "Vatican City" : "VA", + "Honduras" : "HN", + "Hong Kong" : "HK", + "Hong Kong SAR" : "HK", + "Hungary" : "HU", + "Iceland" : "IS", + "India" : "IN", + "Indonesia" : "ID", + "Iran, Islamic Republic of" : "IR", + "Iran" : "IR", + "Iran (Islamic Republic of)" : "IR", + "Iraq" : "IQ", + "Ireland" : "IE", + "Republic of Ireland" : "IE", + "Isle of Man" : "IM", + "Israel" : "IL", + "Italy" : "IT", + "Jamaica" : "JM", + "Japan" : "JP", + "Jersey" : "JE", + # Guernsey and Jersey form Channel Islands. Conjoin Guernsey on Jersey. + # Jersey has higher population. + # https://en.wikipedia.org/wiki/Channel_Islands + "Guernsey and Jersey" : "JE", + "Channel Islands" : "JE", + # "Channel Islands" : "GB", + "Jordan" : "JO", + "Kazakhstan" : "KZ", + "Kenya" : "KE", + "Kiribati" : "KI", + "Korea, Democratic People's Republic of" : "KP", + "North Korea" : "KP", + "Korea, Republic of" : "KR", + "Korea, South" : "KR", + "South Korea" : "KR", + "Republic of Korea" : "KR", + "Kosovo, Republic of" : "XK", + "Kosovo" : "XK", + "Kuwait" : "KW", + "Kyrgyzstan" : "KG", + "Lao People's Democratic Republic" : "LA", + "Laos" : "LA", + "Latvia" : "LV", + "Lebanon" : "LB", + "Lesotho" : "LS", + "Liberia" : "LR", + "Libya" : "LY", + "Liechtenstein" : "LI", + "Lithuania" : "LT", + "Luxembourg" : "LU", + "Macao" : "MO", + # TODO Macau is probably a typo. Report it to CSSEGISandData/COVID-19 + "Macau" : "MO", + "Macao SAR" : "MO", + "North Macedonia" : "MK", + "Macedonia" : "MK", + "Madagascar" : "MG", + "Malawi" : "MW", + "Malaysia" : "MY", + "Maldives" : "MV", + "Mali" : "ML", + "Malta" : "MT", + "Marshall Islands" : "MH", + "Martinique" : "MQ", + "Mauritania" : "MR", + "Mauritius" : "MU", + "Mayotte" : "YT", + "Mexico" : "MX", + "Micronesia, Federated States of" : "FM", + "F.S. Micronesia" : "FM", + "Micronesia" : "FM", + "Moldova, Republic of" : "MD", + "Republic of Moldova" : "MD", + "Moldova" : "MD", + "Monaco" : "MC", + "Mongolia" : "MN", + "Montenegro" : "ME", + "Montserrat" : "MS", + "Morocco" : "MA", + "Mozambique" : "MZ", + "Myanmar" : "MM", + "Burma" : "MM", + "Namibia" : "NA", + "Nauru" : "NR", + "Nepal" : "NP", + "Netherlands" : "NL", + "New Caledonia" : "NC", + "New Zealand" : "NZ", + "Nicaragua" : "NI", + "Niger" : "NE", + "Nigeria" : "NG", + "Niue" : "NU", + "Norfolk Island" : "NF", + "Northern Mariana Islands" : "MP", + "Norway" : "NO", + "Oman" : "OM", + "Pakistan" : "PK", + "Palau" : "PW", + "Palestine, State of" : "PS", + "Palestine" : "PS", + "occupied Palestinian territory" : "PS", + "State of Palestine" : "PS", + "The West Bank and Gaza" : "PS", + "West Bank and Gaza" : "PS", + "Panama" : "PA", + "Papua New Guinea" : "PG", + "Paraguay" : "PY", + "Peru" : "PE", + "Philippines" : "PH", + "Pitcairn" : "PN", + "Poland" : "PL", + "Portugal" : "PT", + "Puerto Rico" : "PR", + "Qatar" : "QA", + "Réunion" : "RE", + "Reunion" : "RE", + "Romania" : "RO", + "Russian Federation" : "RU", + "Russia" : "RU", + "Rwanda" : "RW", + "Saint Barthélemy" : "BL", + "Saint Barthelemy" : "BL", + "Saint Helena, Ascension and Tristan da Cunha" : "SH", + "Saint Helena" : "SH", + "Saint Kitts and Nevis" : "KN", + "Saint Kitts & Nevis" : "KN", + "Saint Lucia" : "LC", + "Saint Martin (French part)" : "MF", + "Saint Martin" : "MF", + "St. Martin" : "MF", + "Saint Pierre and Miquelon" : "PM", + "Saint Pierre & Miquelon" : "PM", + "Saint Vincent and the Grenadines" : "VC", + "St. Vincent & Grenadines" : "VC", + "Samoa" : "WS", + "San Marino" : "SM", + "Sao Tome and Principe" : "ST", + "São Tomé and Príncipe" : "ST", + "Sao Tome & Principe" : "ST", + "Saudi Arabia" : "SA", + "Senegal" : "SN", + "Serbia" : "RS", + "Seychelles" : "SC", + "Sierra Leone" : "SL", + "Singapore" : "SG", + "Sint Maarten (Dutch part)" : "SX", + "Sint Maarten" : "SX", + "Slovakia" : "SK", + "Slovenia" : "SI", + "Solomon Islands" : "SB", + "Somalia" : "SO", + "South Africa" : "ZA", + "South Georgia and the South Sandwich Islands" : "GS", + "South Sudan" : "SS", + "Spain" : "ES", + "Sri Lanka" : "LK", + "Sudan" : "SD", + "Suriname" : "SR", + "Svalbard and Jan Mayen" : "SJ", + "Eswatini" : "SZ", # previous name "Swaziland" + "Swaziland" : "SZ", + "Sweden" : "SE", + "Switzerland" : "CH", + "Syrian Arab Republic" : "SY", + "Syria" : "SY", + "Taiwan, Province of China" : "TW", + "Taiwan*" : "TW", + "Taipei and environs" : "TW", + "Taiwan" : "TW", + "Tajikistan" : "TJ", + "Tanzania, United Republic of" : "TZ", + "Tanzania" : "TZ", + "Thailand" : "TH", + "Timor-Leste" : "TL", + "East Timor" : "TL", + "Togo" : "TG", + "Tokelau" : "TK", + "Tonga" : "TO", + "Trinidad and Tobago" : "TT", + "Tunisia" : "TN", + "Turkey" : "TR", + "Turkmenistan" : "TM", + "Turks and Caicos Islands" : "TC", + "Turks and Caicos" : "TC", + "Tuvalu" : "TV", + "Uganda" : "UG", + "Ukraine" : "UA", + "United Arab Emirates" : "AE", + "Emirates" : "AE", + "United Kingdom" : "GB", + "UK" : "GB", + # Conjoin North Ireland on United Kingdom + "North Ireland" : "GB", + "United States" : "US", + "US" : "US", + "United States Minor Outlying Islands" : "UM", + "Uruguay" : "UY", + "Uzbekistan" : "UZ", + "Vanuatu" : "VU", + "Venezuela, Bolivarian Republic of" : "VE", + "Venezuela" : "VE", + "Viet Nam" : "VN", + "Vietnam" : "VN", + "Virgin Islands, British" : "VG", + "British Virgin Islands" : "VG", + "Virgin Islands, U.S." : "VI", + "U.S. Virgin Islands" : "VI", + "Wallis and Futuna" : "WF", + "Wallis & Futuna" : "WF", + "Western Sahara" : "EH", + "Yemen" : "YE", + "Zambia" : "ZM", + "Zimbabwe" : "ZW", + + # see also + # https://en.wikipedia.org/wiki/List_of_sovereign_states_and_dependent_territories_by_continent_(data_file)#Data_file + # https://en.wikipedia.org/wiki/List_of_sovereign_states_and_dependent_territories_by_continent + "United Nations Neutral Zone" : "XD", + "Iraq-Saudi Arabia Neutral Zone" : "XE", + "Spratly Islands" : "XS", + + # "Diamond Princess" : default_country_code, + # TODO "Disputed Territory" conflicts with `default_country_code` + # "Disputed Territory" : "XX", + + # "Others" has no mapping, i.e. the default val is used + + # ships: + # "Cruise Ship" + # "MS Zaandam" + } + + # fmt: on + def country_code(value): + """ + Return two letter country code (Alpha-2) according to https://en.wikipedia.org/wiki/ISO_3166-1 + Defaults to "XX". + """ + code = COUNTRY_NAME__COUNTRY_CODE.get(value, DEFAULT_COUNTRY_CODE) + if code == DEFAULT_COUNTRY_CODE: + # log at sub DEBUG level + LOGGER.log(5, f"No country code found for '{value}'. Using '{code}'!") + + return code + + def coordinates(self, latitude, longitude): + self.latitude = latitude + self.longitude = longitude + + def serialize(self): + """ + Serializes the coordinates into a dict. + + :returns: The serialized coordinates. + :rtype: dict + """ + return {"latitude": self.latitude, "longitude": self.longitude} + + def __str__(self): + return "lat: %s, long: %s" % (self.latitude, self.longitude) \ No newline at end of file diff --git a/app/services/__init__.py b/app/services/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/app/services/location/__init__.py b/app/services/location/__init__.py deleted file mode 100644 index 6d292b54..00000000 --- a/app/services/location/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -"""app.services.location""" -from abc import ABC, abstractmethod - - -class LocationService(ABC): - """ - Service for retrieving locations. - """ - - @abstractmethod - async def get_all(self): - """ - Gets and returns all of the locations. - - :returns: The locations. - :rtype: List[Location] - """ - raise NotImplementedError - - @abstractmethod - async def get(self, id): # pylint: disable=redefined-builtin,invalid-name - """ - Gets and returns location with the provided id. - - :returns: The location. - :rtype: Location - """ - raise NotImplementedError diff --git a/app/services/location/csbs.py b/app/services/location/csbs.py deleted file mode 100644 index 444ebad6..00000000 --- a/app/services/location/csbs.py +++ /dev/null @@ -1,102 +0,0 @@ -"""app.services.location.csbs.py""" -import csv -import logging -from datetime import datetime - -from asyncache import cached -from cachetools import TTLCache - -from ...caches import check_cache, load_cache -from ...coordinates import Coordinates -from ...location.csbs import CSBSLocation -from ...utils import httputils -from . import LocationService - -LOGGER = logging.getLogger("services.location.csbs") - - -class CSBSLocationService(LocationService): - """ - Service for retrieving locations from csbs - """ - - async def get_all(self): - # Get the locations. - locations = await get_locations() - return locations - - async def get(self, loc_id): # pylint: disable=arguments-differ - # Get location at the index equal to the provided id. - locations = await self.get_all() - return locations[loc_id] - - -# Base URL for fetching data -BASE_URL = "https://facts.csbs.org/covid-19/covid19_county.csv" - - -@cached(cache=TTLCache(maxsize=1, ttl=1800)) -async def get_locations(): - """ - Retrieves county locations; locations are cached for 1 hour - - :returns: The locations. - :rtype: dict - """ - data_id = "csbs.locations" - LOGGER.info(f"{data_id} Requesting data...") - # check shared cache - cache_results = await check_cache(data_id) - if cache_results: - LOGGER.info(f"{data_id} using shared cache results") - locations = cache_results - else: - LOGGER.info(f"{data_id} shared cache empty") - async with httputils.CLIENT_SESSION.get(BASE_URL) as response: - text = await response.text() - - LOGGER.debug(f"{data_id} Data received") - - data = list(csv.DictReader(text.splitlines())) - LOGGER.debug(f"{data_id} CSV parsed") - - locations = [] - - for i, item in enumerate(data): - # General info. - state = item["State Name"] - county = item["County Name"] - - # Ensure country is specified. - if county in {"Unassigned", "Unknown"}: - continue - - # Date string without "EDT" at end. - last_update = " ".join(item["Last Update"].split(" ")[0:2]) - - # Append to locations. - locations.append( - CSBSLocation( - # General info. - i, - state, - county, - # Coordinates. - Coordinates(item["Latitude"], item["Longitude"]), - # Last update (parse as ISO). - datetime.strptime(last_update, "%Y-%m-%d %H:%M").isoformat() + "Z", - # Statistics. - int(item["Confirmed"] or 0), - int(item["Death"] or 0), - ) - ) - LOGGER.info(f"{data_id} Data normalized") - # save the results to distributed cache - # TODO: fix json serialization - try: - await load_cache(data_id, locations) - except TypeError as type_err: - LOGGER.error(type_err) - - # Return the locations. - return locations diff --git a/app/services/location/jhu.py b/app/services/location/jhu.py deleted file mode 100644 index ebed3960..00000000 --- a/app/services/location/jhu.py +++ /dev/null @@ -1,228 +0,0 @@ -"""app.services.location.jhu.py""" -import csv -import logging -import os -from datetime import datetime -from pprint import pformat as pf - -from asyncache import cached -from cachetools import TTLCache - -from ...caches import check_cache, load_cache -from ...coordinates import Coordinates -from ...location import TimelinedLocation -from ...models import Timeline -from ...utils import countries -from ...utils import date as date_util -from ...utils import httputils -from . import LocationService - -LOGGER = logging.getLogger("services.location.jhu") -PID = os.getpid() - - -class JhuLocationService(LocationService): - """ - Service for retrieving locations from Johns Hopkins CSSE (https://github.com/CSSEGISandData/COVID-19). - """ - - async def get_all(self): - # Get the locations. - locations = await get_locations() - return locations - - async def get(self, loc_id): # pylint: disable=arguments-differ - # Get location at the index equal to provided id. - locations = await self.get_all() - return locations[loc_id] - - -# --------------------------------------------------------------- - - -# Base URL for fetching category. -BASE_URL = "https://raw.githubusercontent.com/CSSEGISandData/2019-nCoV/master/csse_covid_19_data/csse_covid_19_time_series/" - - -@cached(cache=TTLCache(maxsize=4, ttl=1800)) -async def get_category(category): - """ - Retrieves the data for the provided category. The data is cached for 30 minutes locally, 1 hour via shared Redis. - - :returns: The data for category. - :rtype: dict - """ - # Adhere to category naming standard. - category = category.lower() - data_id = f"jhu.{category}" - - # check shared cache - cache_results = await check_cache(data_id) - if cache_results: - LOGGER.info(f"{data_id} using shared cache results") - results = cache_results - else: - LOGGER.info(f"{data_id} shared cache empty") - # URL to request data from. - url = BASE_URL + "time_series_covid19_%s_global.csv" % category - - # Request the data - LOGGER.info(f"{data_id} Requesting data...") - async with httputils.CLIENT_SESSION.get(url) as response: - text = await response.text() - - LOGGER.debug(f"{data_id} Data received") - - # Parse the CSV. - data = list(csv.DictReader(text.splitlines())) - LOGGER.debug(f"{data_id} CSV parsed") - - # The normalized locations. - locations = [] - - for item in data: - # Filter out all the dates. - dates = dict(filter(lambda element: date_util.is_date(element[0]), item.items())) - - # Make location history from dates. - history = {date: int(float(amount or 0)) for date, amount in dates.items()} - - # Country for this location. - country = item["Country/Region"] - - # Latest data insert value. - latest = list(history.values())[-1] - - # Normalize the item and append to locations. - locations.append( - { - # General info. - "country": country, - "country_code": countries.country_code(country), - "province": item["Province/State"], - # Coordinates. - "coordinates": {"lat": item["Lat"], "long": item["Long"],}, - # History. - "history": history, - # Latest statistic. - "latest": int(latest or 0), - } - ) - LOGGER.debug(f"{data_id} Data normalized") - - # Latest total. - latest = sum(map(lambda location: location["latest"], locations)) - - # Return the final data. - results = { - "locations": locations, - "latest": latest, - "last_updated": datetime.utcnow().isoformat() + "Z", - "source": "https://github.com/ExpDev07/coronavirus-tracker-api", - } - # save the results to distributed cache - await load_cache(data_id, results) - - LOGGER.info(f"{data_id} results:\n{pf(results, depth=1)}") - return results - - -@cached(cache=TTLCache(maxsize=1, ttl=1800)) -async def get_locations(): - """ - Retrieves the locations from the categories. The locations are cached for 1 hour. - - :returns: The locations. - :rtype: List[Location] - """ - data_id = "jhu.locations" - LOGGER.info(f"pid:{PID}: {data_id} Requesting data...") - # Get all of the data categories locations. - confirmed = await get_category("confirmed") - deaths = await get_category("deaths") - recovered = await get_category("recovered") - - locations_confirmed = confirmed["locations"] - locations_deaths = deaths["locations"] - locations_recovered = recovered["locations"] - - # Final locations to return. - locations = [] - # *************************************************************************** - # TODO: This iteration approach assumes the indexes remain the same - # and opens us to a CRITICAL ERROR. The removal of a column in the data source - # would break the API or SHIFT all the data confirmed, deaths, recovery producting - # incorrect data to consumers. - # *************************************************************************** - # Go through locations. - for index, location in enumerate(locations_confirmed): - # Get the timelines. - - # TEMP: Fix for merging recovery data. See TODO above for more details. - key = (location["country"], location["province"]) - - timelines = { - "confirmed": location["history"], - "deaths": parse_history(key, locations_deaths, index), - "recovered": parse_history(key, locations_recovered, index), - } - - # Grab coordinates. - coordinates = location["coordinates"] - - # Create location (supporting timelines) and append. - locations.append( - TimelinedLocation( - # General info. - index, - location["country"], - location["province"], - # Coordinates. - Coordinates(latitude=coordinates["lat"], longitude=coordinates["long"]), - # Last update. - datetime.utcnow().isoformat() + "Z", - # Timelines (parse dates as ISO). - { - "confirmed": Timeline( - timeline={ - datetime.strptime(date, "%m/%d/%y").isoformat() + "Z": amount - for date, amount in timelines["confirmed"].items() - } - ), - "deaths": Timeline( - timeline={ - datetime.strptime(date, "%m/%d/%y").isoformat() + "Z": amount - for date, amount in timelines["deaths"].items() - } - ), - "recovered": Timeline( - timeline={ - datetime.strptime(date, "%m/%d/%y").isoformat() + "Z": amount - for date, amount in timelines["recovered"].items() - } - ), - }, - ) - ) - LOGGER.info(f"{data_id} Data normalized") - - # Finally, return the locations. - return locations - - -def parse_history(key: tuple, locations: list, index: int): - """ - Helper for validating and extracting history content from - locations data based on index. Validates with the current country/province - key to make sure no index/column issue. - - TEMP: solution because implement a more efficient and better approach in the refactor. - """ - location_history = {} - try: - if key == (locations[index]["country"], locations[index]["province"]): - location_history = locations[index]["history"] - except (IndexError, KeyError): - LOGGER.debug(f"iteration data merge error: {index} {key}") - - return location_history diff --git a/app/services/location/locationservice.py b/app/services/location/locationservice.py new file mode 100644 index 00000000..ce06b75c --- /dev/null +++ b/app/services/location/locationservice.py @@ -0,0 +1,482 @@ +from abc import ABC, abstractmethod +import csv +import logging +import os +from datetime import datetime +from pprint import pformat as pf + +from asyncache import cached +from cachetools import TTLCache + +from ...caches import check_cache, load_cache +from ...coordinates import Coordinates +from ...location.csbs import CSBSLocation +from ...location.nyt import NYTLocation +from ...location import TimelinedLocation +from ...utils import countries +from ...utils import date as date_util +from ...utils import httputils +from . import LocationService +from ...models import Timeline + + +class LocationService(ABC): + """ + Service for retrieving locations. + """ + + @abstractmethod + async def get_all(self): + """ + Gets and returns all of the locations. + + :returns: The locations. + :rtype: List[Location] + """ + raise NotImplementedError + + @abstractmethod + async def get(self, id): # pylint: disable=redefined-builtin,invalid-name + """ + Gets and returns location with the provided id. + + :returns: The location. + :rtype: Location + """ + raise NotImplementedError + + +class LocationService(object): + def __init__(self, service): + self.service = service + + +LOGGER = logging.getLogger("services.location.csbs") + + +class CSBSLocationService(LocationService): + """ + Service for retrieving locations from csbs + """ + + async def get_all(self): + # Get the locations. + locations = await get_locations() + return locations + + async def get(self, loc_id): # pylint: disable=arguments-differ + # Get location at the index equal to the provided id. + locations = await self.get_all() + return locations[loc_id] + + +# Base URL for fetching data +BASE_URL = "https://facts.csbs.org/covid-19/covid19_county.csv" + + +@cached(cache=TTLCache(maxsize=1, ttl=1800)) +async def get_locations(): + """ + Retrieves county locations; locations are cached for 1 hour + + :returns: The locations. + :rtype: dict + """ + data_id = "csbs.locations" + LOGGER.info(f"{data_id} Requesting data...") + # check shared cache + cache_results = await check_cache(data_id) + if cache_results: + LOGGER.info(f"{data_id} using shared cache results") + locations = cache_results + else: + LOGGER.info(f"{data_id} shared cache empty") + async with httputils.CLIENT_SESSION.get(BASE_URL) as response: + text = await response.text() + + LOGGER.debug(f"{data_id} Data received") + + data = list(csv.DictReader(text.splitlines())) + LOGGER.debug(f"{data_id} CSV parsed") + + locations = [] + + for i, item in enumerate(data): + # General info. + state = item["State Name"] + county = item["County Name"] + + # Ensure country is specified. + if county in {"Unassigned", "Unknown"}: + continue + + # Date string without "EDT" at end. + last_update = " ".join(item["Last Update"].split(" ")[0:2]) + + # Append to locations. + locations.append( + CSBSLocation( + # General info. + i, + state, + county, + # Coordinates. + Coordinates(item["Latitude"], item["Longitude"]), + # Last update (parse as ISO). + datetime.strptime(last_update, "%Y-%m-%d %H:%M").isoformat() + "Z", + # Statistics. + int(item["Confirmed"] or 0), + int(item["Death"] or 0), + ) + ) + LOGGER.info(f"{data_id} Data normalized") + # save the results to distributed cache + # TODO: fix json serialization + try: + await load_cache(data_id, locations) + except TypeError as type_err: + LOGGER.error(type_err) + + # Return the locations. + return locations + +LOGGERJHU = logging.getLOGGERJHU("services.location.jhu") +PID = os.getpid() + + +class JhuLocationService(LocationService): + """ + Service for retrieving locations from Johns Hopkins CSSE (https://github.com/CSSEGISandData/COVID-19). + """ + + async def get_all(self): + # Get the locations. + locations = await get_locations() + return locations + + async def get(self, loc_id): # pylint: disable=arguments-differ + # Get location at the index equal to provided id. + locations = await self.get_all() + return locations[loc_id] + + +# --------------------------------------------------------------- + + +# Base URL for fetching category. +BASE_URL = "https://raw.githubusercontent.com/CSSEGISandData/2019-nCoV/master/csse_covid_19_data/csse_covid_19_time_series/" + + +@cached(cache=TTLCache(maxsize=4, ttl=1800)) +async def get_category(category): + """ + Retrieves the data for the provided category. The data is cached for 30 minutes locally, 1 hour via shared Redis. + + :returns: The data for category. + :rtype: dict + """ + # Adhere to category naming standard. + category = category.lower() + data_id = f"jhu.{category}" + + # check shared cache + cache_results = await check_cache(data_id) + if cache_results: + LOGGERJHU.info(f"{data_id} using shared cache results") + results = cache_results + else: + LOGGERJHU.info(f"{data_id} shared cache empty") + # URL to request data from. + url = BASE_URL + "time_series_covid19_%s_global.csv" % category + + # Request the data + LOGGERJHU.info(f"{data_id} Requesting data...") + async with httputils.CLIENT_SESSION.get(url) as response: + text = await response.text() + + LOGGERJHU.debug(f"{data_id} Data received") + + # Parse the CSV. + data = list(csv.DictReader(text.splitlines())) + LOGGERJHU.debug(f"{data_id} CSV parsed") + + # The normalized locations. + locations = [] + + for item in data: + # Filter out all the dates. + dates = dict(filter(lambda element: date_util.is_date(element[0]), item.items())) + + # Make location history from dates. + history = {date: int(float(amount or 0)) for date, amount in dates.items()} + + # Country for this location. + country = item["Country/Region"] + + # Latest data insert value. + latest = list(history.values())[-1] + + # Normalize the item and append to locations. + locations.append( + { + # General info. + "country": country, + "country_code": countries.country_code(country), + "province": item["Province/State"], + # Coordinates. + "coordinates": {"lat": item["Lat"], "long": item["Long"],}, + # History. + "history": history, + # Latest statistic. + "latest": int(latest or 0), + } + ) + LOGGERJHU.debug(f"{data_id} Data normalized") + + # Latest total. + latest = sum(map(lambda location: location["latest"], locations)) + + # Return the final data. + results = { + "locations": locations, + "latest": latest, + "last_updated": datetime.utcnow().isoformat() + "Z", + "source": "https://github.com/ExpDev07/coronavirus-tracker-api", + } + # save the results to distributed cache + await load_cache(data_id, results) + + LOGGERJHU.info(f"{data_id} results:\n{pf(results, depth=1)}") + return results + + +@cached(cache=TTLCache(maxsize=1, ttl=1800)) +async def get_locations(): + """ + Retrieves the locations from the categories. The locations are cached for 1 hour. + + :returns: The locations. + :rtype: List[Location] + """ + data_id = "jhu.locations" + LOGGERJHU.info(f"pid:{PID}: {data_id} Requesting data...") + # Get all of the data categories locations. + confirmed = await get_category("confirmed") + deaths = await get_category("deaths") + recovered = await get_category("recovered") + + locations_confirmed = confirmed["locations"] + locations_deaths = deaths["locations"] + locations_recovered = recovered["locations"] + + # Final locations to return. + locations = [] + # *************************************************************************** + # TODO: This iteration approach assumes the indexes remain the same + # and opens us to a CRITICAL ERROR. The removal of a column in the data source + # would break the API or SHIFT all the data confirmed, deaths, recovery producting + # incorrect data to consumers. + # *************************************************************************** + # Go through locations. + for index, location in enumerate(locations_confirmed): + # Get the timelines. + + # TEMP: Fix for merging recovery data. See TODO above for more details. + key = (location["country"], location["province"]) + + timelines = { + "confirmed": location["history"], + "deaths": parse_history(key, locations_deaths, index), + "recovered": parse_history(key, locations_recovered, index), + } + + # Grab coordinates. + coordinates = location["coordinates"] + + # Create location (supporting timelines) and append. + locations.append( + TimelinedLocation( + # General info. + index, + location["country"], + location["province"], + # Coordinates. + Coordinates(latitude=coordinates["lat"], longitude=coordinates["long"]), + # Last update. + datetime.utcnow().isoformat() + "Z", + # Timelines (parse dates as ISO). + { + "confirmed": Timeline( + timeline={ + datetime.strptime(date, "%m/%d/%y").isoformat() + "Z": amount + for date, amount in timelines["confirmed"].items() + } + ), + "deaths": Timeline( + timeline={ + datetime.strptime(date, "%m/%d/%y").isoformat() + "Z": amount + for date, amount in timelines["deaths"].items() + } + ), + "recovered": Timeline( + timeline={ + datetime.strptime(date, "%m/%d/%y").isoformat() + "Z": amount + for date, amount in timelines["recovered"].items() + } + ), + }, + ) + ) + LOGGERJHU.info(f"{data_id} Data normalized") + + # Finally, return the locations. + return locations + + +def parse_history(key: tuple, locations: list, index: int): + """ + Helper for validating and extracting history content from + locations data based on index. Validates with the current country/province + key to make sure no index/column issue. + + TEMP: solution because implement a more efficient and better approach in the refactor. + """ + location_history = {} + try: + if key == (locations[index]["country"], locations[index]["province"]): + location_history = locations[index]["history"] + except (IndexError, KeyError): + LOGGERJHU.debug(f"iteration data merge error: {index} {key}") + + return location_history + +LOGGERNYT = logging.getLoggerNYT("services.location.nyt") + + +class NYTLocationService(LocationService): + """ + Service for retrieving locations from New York Times (https://github.com/nytimes/covid-19-data). + """ + + async def get_all(self): + # Get the locations. + locations = await get_locations() + return locations + + async def get(self, loc_id): # pylint: disable=arguments-differ + # Get location at the index equal to provided id. + locations = await self.get_all() + return locations[loc_id] + + +# --------------------------------------------------------------- + + +# Base URL for fetching category. +BASE_URL = "https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv" + + +def get_grouped_locations_dict(data): + """ + Helper function to group history for locations into one dict. + + :returns: The complete data for each unique US county + :rdata: dict + """ + grouped_locations = {} + + # in increasing order of dates + for row in data: + county_state = (row["county"], row["state"]) + date = row["date"] + confirmed = row["cases"] + deaths = row["deaths"] + + # initialize if not existing + if county_state not in grouped_locations: + grouped_locations[county_state] = {"confirmed": [], "deaths": []} + + # append confirmed tuple to county_state (date, # confirmed) + grouped_locations[county_state]["confirmed"].append((date, confirmed)) + # append deaths tuple to county_state (date, # deaths) + grouped_locations[county_state]["deaths"].append((date, deaths)) + + return grouped_locations + + +@cached(cache=TTLCache(maxsize=1, ttl=1800)) +async def get_locations(): + """ + Returns a list containing parsed NYT data by US county. The data is cached for 1 hour. + + :returns: The complete data for US Counties. + :rtype: dict + """ + data_id = "nyt.locations" + # Request the data. + LOGGERNYT.info(f"{data_id} Requesting data...") + # check shared cache + cache_results = await check_cache(data_id) + if cache_results: + LOGGERNYT.info(f"{data_id} using shared cache results") + locations = cache_results + else: + LOGGERNYT.info(f"{data_id} shared cache empty") + async with httputils.CLIENT_SESSION.get(BASE_URL) as response: + text = await response.text() + + LOGGERNYT.debug(f"{data_id} Data received") + + # Parse the CSV. + data = list(csv.DictReader(text.splitlines())) + LOGGERNYT.debug(f"{data_id} CSV parsed") + + # Group together locations (NYT data ordered by dates not location). + grouped_locations = get_grouped_locations_dict(data) + + # The normalized locations. + locations = [] + + for idx, (county_state, histories) in enumerate(grouped_locations.items()): + # Make location history for confirmed and deaths from dates. + # List is tuples of (date, amount) in order of increasing dates. + confirmed_list = histories["confirmed"] + confirmed_history = {date: int(amount or 0) for date, amount in confirmed_list} + + deaths_list = histories["deaths"] + deaths_history = {date: int(amount or 0) for date, amount in deaths_list} + + # Normalize the item and append to locations. + locations.append( + NYTLocation( + id=idx, + state=county_state[1], + county=county_state[0], + coordinates=Coordinates(None, None), # NYT does not provide coordinates + last_updated=datetime.utcnow().isoformat() + "Z", # since last request + timelines={ + "confirmed": Timeline( + timeline={ + datetime.strptime(date, "%Y-%m-%d").isoformat() + "Z": amount + for date, amount in confirmed_history.items() + } + ), + "deaths": Timeline( + timeline={ + datetime.strptime(date, "%Y-%m-%d").isoformat() + "Z": amount + for date, amount in deaths_history.items() + } + ), + "recovered": Timeline(), + }, + ) + ) + LOGGERNYT.info(f"{data_id} Data normalized") + # save the results to distributed cache + # TODO: fix json serialization + try: + await load_cache(data_id, locations) + except TypeError as type_err: + LOGGERNYT.error(type_err) + + return locations \ No newline at end of file diff --git a/app/services/location/nyt.py b/app/services/location/nyt.py deleted file mode 100644 index 1f25ec34..00000000 --- a/app/services/location/nyt.py +++ /dev/null @@ -1,145 +0,0 @@ -"""app.services.location.nyt.py""" -import csv -import logging -from datetime import datetime - -from asyncache import cached -from cachetools import TTLCache - -from ...caches import check_cache, load_cache -from ...coordinates import Coordinates -from ...location.nyt import NYTLocation -from ...models import Timeline -from ...utils import httputils -from . import LocationService - -LOGGER = logging.getLogger("services.location.nyt") - - -class NYTLocationService(LocationService): - """ - Service for retrieving locations from New York Times (https://github.com/nytimes/covid-19-data). - """ - - async def get_all(self): - # Get the locations. - locations = await get_locations() - return locations - - async def get(self, loc_id): # pylint: disable=arguments-differ - # Get location at the index equal to provided id. - locations = await self.get_all() - return locations[loc_id] - - -# --------------------------------------------------------------- - - -# Base URL for fetching category. -BASE_URL = "https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv" - - -def get_grouped_locations_dict(data): - """ - Helper function to group history for locations into one dict. - - :returns: The complete data for each unique US county - :rdata: dict - """ - grouped_locations = {} - - # in increasing order of dates - for row in data: - county_state = (row["county"], row["state"]) - date = row["date"] - confirmed = row["cases"] - deaths = row["deaths"] - - # initialize if not existing - if county_state not in grouped_locations: - grouped_locations[county_state] = {"confirmed": [], "deaths": []} - - # append confirmed tuple to county_state (date, # confirmed) - grouped_locations[county_state]["confirmed"].append((date, confirmed)) - # append deaths tuple to county_state (date, # deaths) - grouped_locations[county_state]["deaths"].append((date, deaths)) - - return grouped_locations - - -@cached(cache=TTLCache(maxsize=1, ttl=1800)) -async def get_locations(): - """ - Returns a list containing parsed NYT data by US county. The data is cached for 1 hour. - - :returns: The complete data for US Counties. - :rtype: dict - """ - data_id = "nyt.locations" - # Request the data. - LOGGER.info(f"{data_id} Requesting data...") - # check shared cache - cache_results = await check_cache(data_id) - if cache_results: - LOGGER.info(f"{data_id} using shared cache results") - locations = cache_results - else: - LOGGER.info(f"{data_id} shared cache empty") - async with httputils.CLIENT_SESSION.get(BASE_URL) as response: - text = await response.text() - - LOGGER.debug(f"{data_id} Data received") - - # Parse the CSV. - data = list(csv.DictReader(text.splitlines())) - LOGGER.debug(f"{data_id} CSV parsed") - - # Group together locations (NYT data ordered by dates not location). - grouped_locations = get_grouped_locations_dict(data) - - # The normalized locations. - locations = [] - - for idx, (county_state, histories) in enumerate(grouped_locations.items()): - # Make location history for confirmed and deaths from dates. - # List is tuples of (date, amount) in order of increasing dates. - confirmed_list = histories["confirmed"] - confirmed_history = {date: int(amount or 0) for date, amount in confirmed_list} - - deaths_list = histories["deaths"] - deaths_history = {date: int(amount or 0) for date, amount in deaths_list} - - # Normalize the item and append to locations. - locations.append( - NYTLocation( - id=idx, - state=county_state[1], - county=county_state[0], - coordinates=Coordinates(None, None), # NYT does not provide coordinates - last_updated=datetime.utcnow().isoformat() + "Z", # since last request - timelines={ - "confirmed": Timeline( - timeline={ - datetime.strptime(date, "%Y-%m-%d").isoformat() + "Z": amount - for date, amount in confirmed_history.items() - } - ), - "deaths": Timeline( - timeline={ - datetime.strptime(date, "%Y-%m-%d").isoformat() + "Z": amount - for date, amount in deaths_history.items() - } - ), - "recovered": Timeline(), - }, - ) - ) - LOGGER.info(f"{data_id} Data normalized") - # save the results to distributed cache - # TODO: fix json serialization - try: - await load_cache(data_id, locations) - except TypeError as type_err: - LOGGER.error(type_err) - - return locations diff --git a/app/utils/countries.py b/app/utils/countries.py deleted file mode 100644 index 9fb4f98a..00000000 --- a/app/utils/countries.py +++ /dev/null @@ -1,380 +0,0 @@ -"""app.utils.countries.py""" -import logging - -LOGGER = logging.getLogger(__name__) - -# Default country code. -DEFAULT_COUNTRY_CODE = "XX" - -# Mapping of country names to alpha-2 codes according to -# https://en.wikipedia.org/wiki/ISO_3166-1. -# As a reference see also https://github.com/TakahikoKawasaki/nv-i18n (in Java) -# fmt: off -COUNTRY_NAME__COUNTRY_CODE = { - "Afghanistan" : "AF", - "Åland Islands" : "AX", - "Albania" : "AL", - "Algeria" : "DZ", - "American Samoa" : "AS", - "Andorra" : "AD", - "Angola" : "AO", - "Anguilla" : "AI", - "Antarctica" : "AQ", - "Antigua and Barbuda" : "AG", - "Argentina" : "AR", - "Armenia" : "AM", - "Aruba" : "AW", - "Australia" : "AU", - "Austria" : "AT", - "Azerbaijan" : "AZ", - " Azerbaijan" : "AZ", - "Bahamas" : "BS", - "The Bahamas" : "BS", - "Bahamas, The" : "BS", - "Bahrain" : "BH", - "Bangladesh" : "BD", - "Barbados" : "BB", - "Belarus" : "BY", - "Belgium" : "BE", - "Belize" : "BZ", - "Benin" : "BJ", - "Bermuda" : "BM", - "Bhutan" : "BT", - "Bolivia, Plurinational State of" : "BO", - "Bolivia" : "BO", - "Bonaire, Sint Eustatius and Saba" : "BQ", - "Caribbean Netherlands" : "BQ", - "Bosnia and Herzegovina" : "BA", - # "Bosnia–Herzegovina" : "BA", - "Bosnia" : "BA", - "Botswana" : "BW", - "Bouvet Island" : "BV", - "Brazil" : "BR", - "British Indian Ocean Territory" : "IO", - "Brunei Darussalam" : "BN", - "Brunei" : "BN", - "Bulgaria" : "BG", - "Burkina Faso" : "BF", - "Burundi" : "BI", - "Cambodia" : "KH", - "Cameroon" : "CM", - "Canada" : "CA", - "Cape Verde" : "CV", - "Cabo Verde" : "CV", - "Cayman Islands" : "KY", - "Central African Republic" : "CF", - "Chad" : "TD", - "Chile" : "CL", - "China" : "CN", - "Mainland China" : "CN", - "Christmas Island" : "CX", - "Cocos (Keeling) Islands" : "CC", - "Colombia" : "CO", - "Comoros" : "KM", - "Congo" : "CG", - "Congo (Brazzaville)" : "CG", - "Republic of the Congo" : "CG", - "Congo, the Democratic Republic of the" : "CD", - "Congo (Kinshasa)" : "CD", - "DR Congo" : "CD", - "Cook Islands" : "CK", - "Costa Rica" : "CR", - "Côte d'Ivoire" : "CI", - "Cote d'Ivoire" : "CI", - "Ivory Coast" : "CI", - "Croatia" : "HR", - "Cuba" : "CU", - "Curaçao" : "CW", - "Curacao" : "CW", - "Cyprus" : "CY", - "Czech Republic" : "CZ", - "Czechia" : "CZ", - "Denmark" : "DK", - "Djibouti" : "DJ", - "Dominica" : "DM", - "Dominican Republic" : "DO", - "Dominican Rep" : "DO", - "Ecuador" : "EC", - "Egypt" : "EG", - "El Salvador" : "SV", - "Equatorial Guinea" : "GQ", - "Eritrea" : "ER", - "Estonia" : "EE", - "Ethiopia" : "ET", - "Falkland Islands (Malvinas)" : "FK", - "Falkland Islands" : "FK", - "Faroe Islands" : "FO", - "Faeroe Islands" : "FO", - "Fiji" : "FJ", - "Finland" : "FI", - "France" : "FR", - "French Guiana" : "GF", - "French Polynesia" : "PF", - "French Southern Territories" : "TF", - "Gabon" : "GA", - "Gambia" : "GM", - "The Gambia" : "GM", - "Gambia, The" : "GM", - "Georgia" : "GE", - "Germany" : "DE", - "Deutschland" : "DE", - "Ghana" : "GH", - "Gibraltar" : "GI", - "Greece" : "GR", - "Greenland" : "GL", - "Grenada" : "GD", - "Guadeloupe" : "GP", - "Guam" : "GU", - "Guatemala" : "GT", - "Guernsey" : "GG", - "Guinea" : "GN", - "Guinea-Bissau" : "GW", - "Guyana" : "GY", - "Haiti" : "HT", - "Heard Island and McDonald Islands" : "HM", - "Holy See (Vatican City State)" : "VA", - "Holy See" : "VA", - "Vatican City" : "VA", - "Honduras" : "HN", - "Hong Kong" : "HK", - "Hong Kong SAR" : "HK", - "Hungary" : "HU", - "Iceland" : "IS", - "India" : "IN", - "Indonesia" : "ID", - "Iran, Islamic Republic of" : "IR", - "Iran" : "IR", - "Iran (Islamic Republic of)" : "IR", - "Iraq" : "IQ", - "Ireland" : "IE", - "Republic of Ireland" : "IE", - "Isle of Man" : "IM", - "Israel" : "IL", - "Italy" : "IT", - "Jamaica" : "JM", - "Japan" : "JP", - "Jersey" : "JE", - # Guernsey and Jersey form Channel Islands. Conjoin Guernsey on Jersey. - # Jersey has higher population. - # https://en.wikipedia.org/wiki/Channel_Islands - "Guernsey and Jersey" : "JE", - "Channel Islands" : "JE", - # "Channel Islands" : "GB", - "Jordan" : "JO", - "Kazakhstan" : "KZ", - "Kenya" : "KE", - "Kiribati" : "KI", - "Korea, Democratic People's Republic of" : "KP", - "North Korea" : "KP", - "Korea, Republic of" : "KR", - "Korea, South" : "KR", - "South Korea" : "KR", - "Republic of Korea" : "KR", - "Kosovo, Republic of" : "XK", - "Kosovo" : "XK", - "Kuwait" : "KW", - "Kyrgyzstan" : "KG", - "Lao People's Democratic Republic" : "LA", - "Laos" : "LA", - "Latvia" : "LV", - "Lebanon" : "LB", - "Lesotho" : "LS", - "Liberia" : "LR", - "Libya" : "LY", - "Liechtenstein" : "LI", - "Lithuania" : "LT", - "Luxembourg" : "LU", - "Macao" : "MO", - # TODO Macau is probably a typo. Report it to CSSEGISandData/COVID-19 - "Macau" : "MO", - "Macao SAR" : "MO", - "North Macedonia" : "MK", - "Macedonia" : "MK", - "Madagascar" : "MG", - "Malawi" : "MW", - "Malaysia" : "MY", - "Maldives" : "MV", - "Mali" : "ML", - "Malta" : "MT", - "Marshall Islands" : "MH", - "Martinique" : "MQ", - "Mauritania" : "MR", - "Mauritius" : "MU", - "Mayotte" : "YT", - "Mexico" : "MX", - "Micronesia, Federated States of" : "FM", - "F.S. Micronesia" : "FM", - "Micronesia" : "FM", - "Moldova, Republic of" : "MD", - "Republic of Moldova" : "MD", - "Moldova" : "MD", - "Monaco" : "MC", - "Mongolia" : "MN", - "Montenegro" : "ME", - "Montserrat" : "MS", - "Morocco" : "MA", - "Mozambique" : "MZ", - "Myanmar" : "MM", - "Burma" : "MM", - "Namibia" : "NA", - "Nauru" : "NR", - "Nepal" : "NP", - "Netherlands" : "NL", - "New Caledonia" : "NC", - "New Zealand" : "NZ", - "Nicaragua" : "NI", - "Niger" : "NE", - "Nigeria" : "NG", - "Niue" : "NU", - "Norfolk Island" : "NF", - "Northern Mariana Islands" : "MP", - "Norway" : "NO", - "Oman" : "OM", - "Pakistan" : "PK", - "Palau" : "PW", - "Palestine, State of" : "PS", - "Palestine" : "PS", - "occupied Palestinian territory" : "PS", - "State of Palestine" : "PS", - "The West Bank and Gaza" : "PS", - "West Bank and Gaza" : "PS", - "Panama" : "PA", - "Papua New Guinea" : "PG", - "Paraguay" : "PY", - "Peru" : "PE", - "Philippines" : "PH", - "Pitcairn" : "PN", - "Poland" : "PL", - "Portugal" : "PT", - "Puerto Rico" : "PR", - "Qatar" : "QA", - "Réunion" : "RE", - "Reunion" : "RE", - "Romania" : "RO", - "Russian Federation" : "RU", - "Russia" : "RU", - "Rwanda" : "RW", - "Saint Barthélemy" : "BL", - "Saint Barthelemy" : "BL", - "Saint Helena, Ascension and Tristan da Cunha" : "SH", - "Saint Helena" : "SH", - "Saint Kitts and Nevis" : "KN", - "Saint Kitts & Nevis" : "KN", - "Saint Lucia" : "LC", - "Saint Martin (French part)" : "MF", - "Saint Martin" : "MF", - "St. Martin" : "MF", - "Saint Pierre and Miquelon" : "PM", - "Saint Pierre & Miquelon" : "PM", - "Saint Vincent and the Grenadines" : "VC", - "St. Vincent & Grenadines" : "VC", - "Samoa" : "WS", - "San Marino" : "SM", - "Sao Tome and Principe" : "ST", - "São Tomé and Príncipe" : "ST", - "Sao Tome & Principe" : "ST", - "Saudi Arabia" : "SA", - "Senegal" : "SN", - "Serbia" : "RS", - "Seychelles" : "SC", - "Sierra Leone" : "SL", - "Singapore" : "SG", - "Sint Maarten (Dutch part)" : "SX", - "Sint Maarten" : "SX", - "Slovakia" : "SK", - "Slovenia" : "SI", - "Solomon Islands" : "SB", - "Somalia" : "SO", - "South Africa" : "ZA", - "South Georgia and the South Sandwich Islands" : "GS", - "South Sudan" : "SS", - "Spain" : "ES", - "Sri Lanka" : "LK", - "Sudan" : "SD", - "Suriname" : "SR", - "Svalbard and Jan Mayen" : "SJ", - "Eswatini" : "SZ", # previous name "Swaziland" - "Swaziland" : "SZ", - "Sweden" : "SE", - "Switzerland" : "CH", - "Syrian Arab Republic" : "SY", - "Syria" : "SY", - "Taiwan, Province of China" : "TW", - "Taiwan*" : "TW", - "Taipei and environs" : "TW", - "Taiwan" : "TW", - "Tajikistan" : "TJ", - "Tanzania, United Republic of" : "TZ", - "Tanzania" : "TZ", - "Thailand" : "TH", - "Timor-Leste" : "TL", - "East Timor" : "TL", - "Togo" : "TG", - "Tokelau" : "TK", - "Tonga" : "TO", - "Trinidad and Tobago" : "TT", - "Tunisia" : "TN", - "Turkey" : "TR", - "Turkmenistan" : "TM", - "Turks and Caicos Islands" : "TC", - "Turks and Caicos" : "TC", - "Tuvalu" : "TV", - "Uganda" : "UG", - "Ukraine" : "UA", - "United Arab Emirates" : "AE", - "Emirates" : "AE", - "United Kingdom" : "GB", - "UK" : "GB", - # Conjoin North Ireland on United Kingdom - "North Ireland" : "GB", - "United States" : "US", - "US" : "US", - "United States Minor Outlying Islands" : "UM", - "Uruguay" : "UY", - "Uzbekistan" : "UZ", - "Vanuatu" : "VU", - "Venezuela, Bolivarian Republic of" : "VE", - "Venezuela" : "VE", - "Viet Nam" : "VN", - "Vietnam" : "VN", - "Virgin Islands, British" : "VG", - "British Virgin Islands" : "VG", - "Virgin Islands, U.S." : "VI", - "U.S. Virgin Islands" : "VI", - "Wallis and Futuna" : "WF", - "Wallis & Futuna" : "WF", - "Western Sahara" : "EH", - "Yemen" : "YE", - "Zambia" : "ZM", - "Zimbabwe" : "ZW", - - # see also - # https://en.wikipedia.org/wiki/List_of_sovereign_states_and_dependent_territories_by_continent_(data_file)#Data_file - # https://en.wikipedia.org/wiki/List_of_sovereign_states_and_dependent_territories_by_continent - "United Nations Neutral Zone" : "XD", - "Iraq-Saudi Arabia Neutral Zone" : "XE", - "Spratly Islands" : "XS", - - # "Diamond Princess" : default_country_code, - # TODO "Disputed Territory" conflicts with `default_country_code` - # "Disputed Territory" : "XX", - - # "Others" has no mapping, i.e. the default val is used - - # ships: - # "Cruise Ship" - # "MS Zaandam" -} - -# fmt: on -def country_code(value): - """ - Return two letter country code (Alpha-2) according to https://en.wikipedia.org/wiki/ISO_3166-1 - Defaults to "XX". - """ - code = COUNTRY_NAME__COUNTRY_CODE.get(value, DEFAULT_COUNTRY_CODE) - if code == DEFAULT_COUNTRY_CODE: - # log at sub DEBUG level - LOGGER.log(5, f"No country code found for '{value}'. Using '{code}'!") - - return code diff --git a/app/utils/populations.py b/app/utils/populations.py deleted file mode 100644 index c02f15a9..00000000 --- a/app/utils/populations.py +++ /dev/null @@ -1,60 +0,0 @@ -"""app.utils.populations.py""" -import json -import logging - -import requests - -import app.io - -LOGGER = logging.getLogger(__name__) -GEONAMES_URL = "http://api.geonames.org/countryInfoJSON" -GEONAMES_BACKUP_PATH = "geonames_population_mappings.json" - -# Fetching of the populations. -def fetch_populations(save=False): - """ - Returns a dictionary containing the population of each country fetched from the GeoNames. - https://www.geonames.org/ - - TODO: only skip writing to the filesystem when deployed with gunicorn, or handle concurent access, or use DB. - - :returns: The mapping of populations. - :rtype: dict - """ - LOGGER.info("Fetching populations...") - - # Mapping of populations - mappings = {} - - # Fetch the countries. - try: - countries = requests.get(GEONAMES_URL, params={"username": "dperic"}, timeout=1.25).json()[ - "geonames" - ] - # Go through all the countries and perform the mapping. - for country in countries: - mappings.update({country["countryCode"]: int(country["population"]) or None}) - - if mappings and save: - LOGGER.info(f"Saving population data to {app.io.save(GEONAMES_BACKUP_PATH, mappings)}") - except (json.JSONDecodeError, KeyError, requests.exceptions.Timeout) as err: - LOGGER.warning(f"Error pulling population data. {err.__class__.__name__}: {err}") - mappings = app.io.load(GEONAMES_BACKUP_PATH) - LOGGER.info(f"Using backup data from {GEONAMES_BACKUP_PATH}") - # Finally, return the mappings. - LOGGER.info("Fetched populations") - return mappings - - -# Mapping of alpha-2 codes country codes to population. -POPULATIONS = fetch_populations() - -# Retrieving. -def country_population(country_code, default=None): - """ - Fetches the population of the country with the provided country code. - - :returns: The population. - :rtype: int - """ - return POPULATIONS.get(country_code, default) diff --git a/app/utils/utils.py b/app/utils/utils.py new file mode 100644 index 00000000..0cd4e4ef --- /dev/null +++ b/app/utils/utils.py @@ -0,0 +1,44 @@ +import logging + +from aiohttp import ClientSession +from dateutil.parser import parse + +class Utils: + # Singleton aiohttp.ClientSession instance. + CLIENT_SESSION: ClientSession + + LOGGER = logging.getLogger(__name__) + + async def setup_client_session(): + """Set up the application-global aiohttp.ClientSession instance. + + aiohttp recommends that only one ClientSession exist for the lifetime of an application. + See: https://docs.aiohttp.org/en/stable/client_quickstart.html#make-a-request + + """ + global CLIENT_SESSION # pylint: disable=global-statement + LOGGER.info("Setting up global aiohttp.ClientSession.") + CLIENT_SESSION = ClientSession() + + async def teardown_client_session(): + """Close the application-global aiohttp.ClientSession. + """ + global CLIENT_SESSION # pylint: disable=global-statement + LOGGER.info("Closing global aiohttp.ClientSession.") + await CLIENT_SESSION.close() + + + def is_date(string, fuzzy=False): + """ + Return whether the string can be interpreted as a date. + - https://stackoverflow.com/a/25341965/7120095 + + :param string: str, string to check for date + :param fuzzy: bool, ignore unknown tokens in string if True + """ + + try: + parse(string, fuzzy=fuzzy) + return True + except ValueError: + return False \ No newline at end of file