diff --git a/app/data/__init__.py b/app/data/__init__.py index 8d1b7e00..8a54ebb1 100644 --- a/app/data/__init__.py +++ b/app/data/__init__.py @@ -3,6 +3,8 @@ from cachetools import cached, TTLCache from app.utils import date as date_util +from . import countrycodes as cc + """ Base URL for fetching data. """ @@ -16,8 +18,8 @@ def get_data(category): # Adhere to category naming standard. category = category.lower().capitalize(); - - # Request the data + + # Request the data request = requests.get(base_url % category) text = request.text @@ -31,10 +33,13 @@ def get_data(category): # Filter out all the dates. history = dict(filter(lambda element: date_util.is_date(element[0]), item.items())) + country = item['Country/Region'] + # Normalize the item and append to locations. locations.append({ # General info. - 'country': item['Country/Region'], + 'country': country, + 'country_code': cc.country_code(country), 'province': item['Province/State'], # Coordinates. @@ -58,9 +63,3 @@ def get_data(category): 'locations': locations, 'latest': latest } - - - - - - diff --git a/app/data/countrycodes.py b/app/data/countrycodes.py new file mode 100644 index 00000000..e8ef2092 --- /dev/null +++ b/app/data/countrycodes.py @@ -0,0 +1,289 @@ +default_country_name = "XX" + +is_3166_1 = { + "Afghanistan" : "AF", + "Åland Islands" : "AX", + "Albania" : "AL", + "Algeria" : "DZ", + "American Samoa" : "AS", + "Andorra" : "AD", + "Angola" : "AO", + "Anguilla" : "AI", + "Antarctica" : "AQ", + "Antigua and Barbuda" : "AG", + "Argentina" : "AR", + "Armenia" : "AM", + "Aruba" : "AW", + "Australia" : "AU", + "Austria" : "AT", + "Azerbaijan" : "AZ", + "Bahamas" : "BS", + "Bahrain" : "BH", + "Bangladesh" : "BD", + "Barbados" : "BB", + "Belarus" : "BY", + "Belgium" : "BE", + "Belize" : "BZ", + "Benin" : "BJ", + "Bermuda" : "BM", + "Bhutan" : "BT", + "Bolivia, Plurinational State of" : "BO", + "Bonaire, Sint Eustatius and Saba" : "BQ", + "Bosnia and Herzegovina" : "BA", + "Botswana" : "BW", + "Bouvet Island" : "BV", + "Brazil" : "BR", + "British Indian Ocean Territory" : "IO", + "Brunei Darussalam" : "BN", + "Bulgaria" : "BG", + "Burkina Faso" : "BF", + "Burundi" : "BI", + "Cambodia" : "KH", + "Cameroon" : "CM", + "Canada" : "CA", + "Cape Verde" : "CV", + "Cayman Islands" : "KY", + "Central African Republic" : "CF", + "Chad" : "TD", + "Chile" : "CL", + "China" : "CN", + "Christmas Island" : "CX", + "Cocos (Keeling) Islands" : "CC", + "Colombia" : "CO", + "Comoros" : "KM", + "Congo" : "CG", + "Congo, the Democratic Republic of the" : "CD", + "Cook Islands" : "CK", + "Costa Rica" : "CR", + "Côte d'Ivoire" : "CI", + "Croatia" : "HR", + "Cuba" : "CU", + "Curaçao" : "CW", + "Cyprus" : "CY", + "Czech Republic" : "CZ", + "Denmark" : "DK", + "Djibouti" : "DJ", + "Dominica" : "DM", + "Dominican Republic" : "DO", + "Ecuador" : "EC", + "Egypt" : "EG", + "El Salvador" : "SV", + "Equatorial Guinea" : "GQ", + "Eritrea" : "ER", + "Estonia" : "EE", + "Ethiopia" : "ET", + "Falkland Islands (Malvinas)" : "FK", + "Faroe Islands" : "FO", + "Fiji" : "FJ", + "Finland" : "FI", + "France" : "FR", + "French Guiana" : "GF", + "French Polynesia" : "PF", + "French Southern Territories" : "TF", + "Gabon" : "GA", + "Gambia" : "GM", + "Georgia" : "GE", + "Germany" : "DE", + "Ghana" : "GH", + "Gibraltar" : "GI", + "Greece" : "GR", + "Greenland" : "GL", + "Grenada" : "GD", + "Guadeloupe" : "GP", + "Guam" : "GU", + "Guatemala" : "GT", + "Guernsey" : "GG", + "Guinea" : "GN", + "Guinea-Bissau" : "GW", + "Guyana" : "GY", + "Haiti" : "HT", + "Heard Island and McDonald Islands" : "HM", + "Holy See (Vatican City State)" : "VA", + "Honduras" : "HN", + "Hong Kong" : "HK", + "Hungary" : "HU", + "Iceland" : "IS", + "India" : "IN", + "Indonesia" : "ID", + "Iran, Islamic Republic of" : "IR", + "Iraq" : "IQ", + "Ireland" : "IE", + "Isle of Man" : "IM", + "Israel" : "IL", + "Italy" : "IT", + "Jamaica" : "JM", + "Japan" : "JP", + "Jersey" : "JE", + "Jordan" : "JO", + "Kazakhstan" : "KZ", + "Kenya" : "KE", + "Kiribati" : "KI", + "Korea, Democratic People's Republic of" : "KP", + "Korea, Republic of" : "KR", + "Kuwait" : "KW", + "Kyrgyzstan" : "KG", + "Lao People's Democratic Republic" : "LA", + "Latvia" : "LV", + "Lebanon" : "LB", + "Lesotho" : "LS", + "Liberia" : "LR", + "Libya" : "LY", + "Liechtenstein" : "LI", + "Lithuania" : "LT", + "Luxembourg" : "LU", + "Macao" : "MO", + "North Macedonia" : "MK", + "Madagascar" : "MG", + "Malawi" : "MW", + "Malaysia" : "MY", + "Maldives" : "MV", + "Mali" : "ML", + "Malta" : "MT", + "Marshall Islands" : "MH", + "Martinique" : "MQ", + "Mauritania" : "MR", + "Mauritius" : "MU", + "Mayotte" : "YT", + "Mexico" : "MX", + "Micronesia, Federated States of" : "FM", + "Moldova, Republic of" : "MD", + "Monaco" : "MC", + "Mongolia" : "MN", + "Montenegro" : "ME", + "Montserrat" : "MS", + "Morocco" : "MA", + "Mozambique" : "MZ", + "Myanmar" : "MM", + "Namibia" : "NA", + "Nauru" : "NR", + "Nepal" : "NP", + "Netherlands" : "NL", + "New Caledonia" : "NC", + "New Zealand" : "NZ", + "Nicaragua" : "NI", + "Niger" : "NE", + "Nigeria" : "NG", + "Niue" : "NU", + "Norfolk Island" : "NF", + "Northern Mariana Islands" : "MP", + "Norway" : "NO", + "Oman" : "OM", + "Pakistan" : "PK", + "Palau" : "PW", + "Palestine, State of" : "PS", + "Panama" : "PA", + "Papua New Guinea" : "PG", + "Paraguay" : "PY", + "Peru" : "PE", + "Philippines" : "PH", + "Pitcairn" : "PN", + "Poland" : "PL", + "Portugal" : "PT", + "Puerto Rico" : "PR", + "Qatar" : "QA", + "Réunion" : "RE", + "Romania" : "RO", + "Russian Federation" : "RU", + "Rwanda" : "RW", + "Saint Barthélemy" : "BL", + "Saint Helena, Ascension and Tristan da Cunha" : "SH", + "Saint Kitts and Nevis" : "KN", + "Saint Lucia" : "LC", + "Saint Martin (French part)" : "MF", + "Saint Pierre and Miquelon" : "PM", + "Saint Vincent and the Grenadines" : "VC", + "Samoa" : "WS", + "San Marino" : "SM", + "Sao Tome and Principe" : "ST", + "Saudi Arabia" : "SA", + "Senegal" : "SN", + "Serbia" : "RS", + "Seychelles" : "SC", + "Sierra Leone" : "SL", + "Singapore" : "SG", + "Sint Maarten (Dutch part)" : "SX", + "Slovakia" : "SK", + "Slovenia" : "SI", + "Solomon Islands" : "SB", + "Somalia" : "SO", + "South Africa" : "ZA", + "South Georgia and the South Sandwich Islands" : "GS", + "South Sudan" : "SS", + "Spain" : "ES", + "Sri Lanka" : "LK", + "Sudan" : "SD", + "Suriname" : "SR", + "Svalbard and Jan Mayen" : "SJ", + "Swaziland" : "SZ", + "Sweden" : "SE", + "Switzerland" : "CH", + "Syrian Arab Republic" : "SY", + "Taiwan, Province of China" : "TW", + "Tajikistan" : "TJ", + "Tanzania, United Republic of" : "TZ", + "Thailand" : "TH", + "Timor-Leste" : "TL", + "Togo" : "TG", + "Tokelau" : "TK", + "Tonga" : "TO", + "Trinidad and Tobago" : "TT", + "Tunisia" : "TN", + "Turkey" : "TR", + "Turkmenistan" : "TM", + "Turks and Caicos Islands" : "TC", + "Tuvalu" : "TV", + "Uganda" : "UG", + "Ukraine" : "UA", + "United Arab Emirates" : "AE", + "United Kingdom" : "GB", + "United States" : "US", + "United States Minor Outlying Islands" : "UM", + "Uruguay" : "UY", + "Uzbekistan" : "UZ", + "Vanuatu" : "VU", + "Venezuela, Bolivarian Republic of" : "VE", + "Viet Nam" : "VN", + "Virgin Islands, British" : "VG", + "Virgin Islands, U.S." : "VI", + "Wallis and Futuna" : "WF", + "Western Sahara" : "EH", + "Yemen" : "YE", + "Zambia" : "ZM", + "Zimbabwe" : "ZW", +} + +# Mapping of alternative names, spelling, typos to the names of countries used +# by the ISO 3166-1 norm +country_name_synonyms = { + "Mainland China" : "China", + "Czechia" : "Czech Republic", + "South Korea" : "Korea, Republic of", + "Taiwan" : "Taiwan, Province of China", + "US" : "United States", + # TODO Macau is probably a typo. Report it to CSSEGISandData/COVID-19 + "Macau" : "Macao", + "Vietnam" : "Viet Nam", + "UK" : "United Kingdom", + "Russia" : "Russian Federation", + "Iran" : "Iran, Islamic Republic of", + "Saint Barthelemy" : "Saint Barthélemy", + "Palestine" : "Palestine, State of", + # "Others" has no mapping +} + +def country_code(country): + """ + Return two letter country code (Alpha-2) according to + https://en.wikipedia.org/wiki/ISO_3166-1 + Defaults to "XX". + """ + if country in is_3166_1: + return is_3166_1[country] + else: + if country in country_name_synonyms: + synonym = country_name_synonyms[country] + return is_3166_1[synonym] + else: + print ("No country_code found for '" + country + "'. Using '" + +default_country_name+"'") + return default_country_name