diff --git a/app/location/__init__.py b/app/location/__init__.py index cafcf547..4782fddb 100644 --- a/app/location/__init__.py +++ b/app/location/__init__.py @@ -1,5 +1,5 @@ from ..coordinates import Coordinates -from ..utils import countrycodes +from ..utils import countries from ..utils.populations import country_population @@ -31,7 +31,7 @@ def country_code(self): :returns: The country code. :rtype: str """ - return (countrycodes.country_code(self.country) or countrycodes.default_code).upper() + return (countries.country_code(self.country) or countries.default_country_code).upper() @property def country_population(self): diff --git a/app/services/location/jhu.py b/app/services/location/jhu.py index 9fde386f..ef99dddc 100644 --- a/app/services/location/jhu.py +++ b/app/services/location/jhu.py @@ -7,7 +7,7 @@ from ...coordinates import Coordinates from ...location import TimelinedLocation from ...timeline import Timeline -from ...utils import countrycodes +from ...utils import countries from ...utils import date as date_util from . import LocationService @@ -80,7 +80,7 @@ def get_category(category): { # General info. "country": country, - "country_code": countrycodes.country_code(country), + "country_code": countries.country_code(country), "province": item["Province/State"], # Coordinates. "coordinates": {"lat": item["Lat"], "long": item["Long"],}, diff --git a/app/utils/countrycodes.py b/app/utils/countries.py similarity index 73% rename from app/utils/countrycodes.py rename to app/utils/countries.py index f3e90e8f..f4f9fad0 100644 --- a/app/utils/countrycodes.py +++ b/app/utils/countries.py @@ -4,13 +4,13 @@ LOGGER = logging.getLogger(__name__) # Default country code. -default_code = "XX" +default_country_code = "XX" # Mapping of country names to alpha-2 codes according to # https://en.wikipedia.org/wiki/ISO_3166-1. # As a reference see also https://github.com/TakahikoKawasaki/nv-i18n (in Java) # fmt: off -is_3166_1 = { +country_name__country_code = { "Afghanistan" : "AF", "Åland Islands" : "AX", "Albania" : "AL", @@ -27,7 +27,10 @@ "Australia" : "AU", "Austria" : "AT", "Azerbaijan" : "AZ", + " Azerbaijan" : "AZ", "Bahamas" : "BS", + "The Bahamas" : "BS", + "Bahamas, The" : "BS", "Bahrain" : "BH", "Bangladesh" : "BD", "Barbados" : "BB", @@ -38,13 +41,18 @@ "Bermuda" : "BM", "Bhutan" : "BT", "Bolivia, Plurinational State of" : "BO", + "Bolivia" : "BO", "Bonaire, Sint Eustatius and Saba" : "BQ", + "Caribbean Netherlands" : "BQ", "Bosnia and Herzegovina" : "BA", + # "Bosnia–Herzegovina" : "BA", + "Bosnia" : "BA", "Botswana" : "BW", "Bouvet Island" : "BV", "Brazil" : "BR", "British Indian Ocean Territory" : "IO", "Brunei Darussalam" : "BN", + "Brunei" : "BN", "Bulgaria" : "BG", "Burkina Faso" : "BF", "Burundi" : "BI", @@ -52,29 +60,40 @@ "Cameroon" : "CM", "Canada" : "CA", "Cape Verde" : "CV", + "Cabo Verde" : "CV", "Cayman Islands" : "KY", "Central African Republic" : "CF", "Chad" : "TD", "Chile" : "CL", "China" : "CN", + "Mainland China" : "CN", "Christmas Island" : "CX", "Cocos (Keeling) Islands" : "CC", "Colombia" : "CO", "Comoros" : "KM", "Congo" : "CG", + "Congo (Brazzaville)" : "CG", + "Republic of the Congo" : "CG", "Congo, the Democratic Republic of the" : "CD", + "Congo (Kinshasa)" : "CD", + "DR Congo" : "CD", "Cook Islands" : "CK", "Costa Rica" : "CR", "Côte d'Ivoire" : "CI", + "Cote d'Ivoire" : "CI", + "Ivory Coast" : "CI", "Croatia" : "HR", "Cuba" : "CU", "Curaçao" : "CW", + "Curacao" : "CW", "Cyprus" : "CY", "Czech Republic" : "CZ", + "Czechia" : "CZ", "Denmark" : "DK", "Djibouti" : "DJ", "Dominica" : "DM", "Dominican Republic" : "DO", + "Dominican Rep" : "DO", "Ecuador" : "EC", "Egypt" : "EG", "El Salvador" : "SV", @@ -83,7 +102,9 @@ "Estonia" : "EE", "Ethiopia" : "ET", "Falkland Islands (Malvinas)" : "FK", + "Falkland Islands" : "FK", "Faroe Islands" : "FO", + "Faeroe Islands" : "FO", "Fiji" : "FJ", "Finland" : "FI", "France" : "FR", @@ -92,8 +113,11 @@ "French Southern Territories" : "TF", "Gabon" : "GA", "Gambia" : "GM", + "The Gambia" : "GM", + "Gambia, The" : "GM", "Georgia" : "GE", "Germany" : "DE", + "Deutschland" : "DE", "Ghana" : "GH", "Gibraltar" : "GI", "Greece" : "GR", @@ -109,31 +133,49 @@ "Haiti" : "HT", "Heard Island and McDonald Islands" : "HM", "Holy See (Vatican City State)" : "VA", + "Holy See" : "VA", + "Vatican City" : "VA", "Honduras" : "HN", "Hong Kong" : "HK", + "Hong Kong SAR" : "HK", "Hungary" : "HU", "Iceland" : "IS", "India" : "IN", "Indonesia" : "ID", "Iran, Islamic Republic of" : "IR", + "Iran" : "IR", + "Iran (Islamic Republic of)" : "IR", "Iraq" : "IQ", "Ireland" : "IE", + "Republic of Ireland" : "IE", "Isle of Man" : "IM", "Israel" : "IL", "Italy" : "IT", "Jamaica" : "JM", "Japan" : "JP", "Jersey" : "JE", + # Guernsey and Jersey form Channel Islands. Conjoin Guernsey on Jersey. + # Jersey has higher population. + # https://en.wikipedia.org/wiki/Channel_Islands + "Guernsey and Jersey" : "JE", + "Channel Islands" : "JE", + # "Channel Islands" : "GB", "Jordan" : "JO", "Kazakhstan" : "KZ", "Kenya" : "KE", "Kiribati" : "KI", "Korea, Democratic People's Republic of" : "KP", + "North Korea" : "KP", "Korea, Republic of" : "KR", + "Korea, South" : "KR", + "South Korea" : "KR", + "Republic of Korea" : "KR", "Kosovo, Republic of" : "XK", + "Kosovo" : "XK", "Kuwait" : "KW", "Kyrgyzstan" : "KG", "Lao People's Democratic Republic" : "LA", + "Laos" : "LA", "Latvia" : "LV", "Lebanon" : "LB", "Lesotho" : "LS", @@ -143,7 +185,11 @@ "Lithuania" : "LT", "Luxembourg" : "LU", "Macao" : "MO", + # TODO Macau is probably a typo. Report it to CSSEGISandData/COVID-19 + "Macau" : "MO", + "Macao SAR" : "MO", "North Macedonia" : "MK", + "Macedonia" : "MK", "Madagascar" : "MG", "Malawi" : "MW", "Malaysia" : "MY", @@ -157,7 +203,11 @@ "Mayotte" : "YT", "Mexico" : "MX", "Micronesia, Federated States of" : "FM", + "F.S. Micronesia" : "FM", + "Micronesia" : "FM", "Moldova, Republic of" : "MD", + "Republic of Moldova" : "MD", + "Moldova" : "MD", "Monaco" : "MC", "Mongolia" : "MN", "Montenegro" : "ME", @@ -182,6 +232,10 @@ "Pakistan" : "PK", "Palau" : "PW", "Palestine, State of" : "PS", + "Palestine" : "PS", + "occupied Palestinian territory" : "PS", + "State of Palestine" : "PS", + "The West Bank and Gaza" : "PS", "Panama" : "PA", "Papua New Guinea" : "PG", "Paraguay" : "PY", @@ -193,19 +247,30 @@ "Puerto Rico" : "PR", "Qatar" : "QA", "Réunion" : "RE", + "Reunion" : "RE", "Romania" : "RO", "Russian Federation" : "RU", + "Russia" : "RU", "Rwanda" : "RW", "Saint Barthélemy" : "BL", + "Saint Barthelemy" : "BL", "Saint Helena, Ascension and Tristan da Cunha" : "SH", + "Saint Helena" : "SH", "Saint Kitts and Nevis" : "KN", + "Saint Kitts & Nevis" : "KN", "Saint Lucia" : "LC", "Saint Martin (French part)" : "MF", + "Saint Martin" : "MF", + "St. Martin" : "MF", "Saint Pierre and Miquelon" : "PM", + "Saint Pierre & Miquelon" : "PM", "Saint Vincent and the Grenadines" : "VC", + "St. Vincent & Grenadines" : "VC", "Samoa" : "WS", "San Marino" : "SM", "Sao Tome and Principe" : "ST", + "São Tomé and Príncipe" : "ST", + "Sao Tome & Principe" : "ST", "Saudi Arabia" : "SA", "Senegal" : "SN", "Serbia" : "RS", @@ -213,6 +278,7 @@ "Sierra Leone" : "SL", "Singapore" : "SG", "Sint Maarten (Dutch part)" : "SX", + "Sint Maarten" : "SX", "Slovakia" : "SK", "Slovenia" : "SI", "Solomon Islands" : "SB", @@ -226,14 +292,21 @@ "Suriname" : "SR", "Svalbard and Jan Mayen" : "SJ", "Eswatini" : "SZ", # previous name "Swaziland" + "Swaziland" : "SZ", "Sweden" : "SE", "Switzerland" : "CH", "Syrian Arab Republic" : "SY", + "Syria" : "SY", "Taiwan, Province of China" : "TW", + "Taiwan*" : "TW", + "Taipei and environs" : "TW", + "Taiwan" : "TW", "Tajikistan" : "TJ", "Tanzania, United Republic of" : "TZ", + "Tanzania" : "TZ", "Thailand" : "TH", "Timor-Leste" : "TL", + "East Timor" : "TL", "Togo" : "TG", "Tokelau" : "TK", "Tonga" : "TO", @@ -242,21 +315,32 @@ "Turkey" : "TR", "Turkmenistan" : "TM", "Turks and Caicos Islands" : "TC", + "Turks and Caicos" : "TC", "Tuvalu" : "TV", "Uganda" : "UG", "Ukraine" : "UA", "United Arab Emirates" : "AE", + "Emirates" : "AE", "United Kingdom" : "GB", + "UK" : "GB", + # Conjoin North Ireland on United Kingdom + "North Ireland" : "GB", "United States" : "US", + "US" : "US", "United States Minor Outlying Islands" : "UM", "Uruguay" : "UY", "Uzbekistan" : "UZ", "Vanuatu" : "VU", "Venezuela, Bolivarian Republic of" : "VE", + "Venezuela" : "VE", "Viet Nam" : "VN", + "Vietnam" : "VN", "Virgin Islands, British" : "VG", + "British Virgin Islands" : "VG", "Virgin Islands, U.S." : "VI", + "U.S. Virgin Islands" : "VI", "Wallis and Futuna" : "WF", + "Wallis & Futuna" : "WF", "Western Sahara" : "EH", "Yemen" : "YE", "Zambia" : "ZM", @@ -265,123 +349,25 @@ # see also # https://en.wikipedia.org/wiki/List_of_sovereign_states_and_dependent_territories_by_continent_(data_file)#Data_file # https://en.wikipedia.org/wiki/List_of_sovereign_states_and_dependent_territories_by_continent - "United Nations Neutral Zone" : "XD", - "Iraq-Saudi Arabia Neutral Zone" : "XE", - "Spratly Islands" : "XS", + "United Nations Neutral Zone" : "XD", + "Iraq-Saudi Arabia Neutral Zone" : "XE", + "Spratly Islands" : "XS", - # TODO "Disputed Territory" conflicts with `default_code` - # "Disputed Territory" : "XX", -} + # TODO "Disputed Territory" conflicts with `default_country_code` + # "Disputed Territory" : "XX", -# Mapping of alternative names, spelling, typos to the names of countries used -# by the ISO 3166-1 norm -synonyms = { - "Mainland China" : "China", - "Czechia" : "Czech Republic", - "Channel Islands" : "United Kingdom", - "Republic of Korea" : "Korea, Republic of", - "Republic of Moldova" : "Moldova, Republic of", - "Taiwan" : "Taiwan, Province of China", - "US" : "United States", - # TODO Macau is probably a typo. Report it to CSSEGISandData/COVID-19 - "Macau" : "Macao", - "Macao SAR" : "Macao", - "Vietnam" : "Viet Nam", - "UK" : "United Kingdom", - "Russia" : "Russian Federation", - "Iran (Islamic Republic of)" : "Iran, Islamic Republic of", - "Saint Barthelemy" : "Saint Barthélemy", - "Saint Martin" : "Saint Martin (French part)", - "Palestine" : "Palestine, State of", - "occupied Palestinian territory" : "Palestine, State of", - "State of Palestine" : "Palestine, State of", - "The West Bank and Gaza" : "Palestine, State of", - "Holy See" : "Holy See (Vatican City State)", - "Brunei" : "Brunei Darussalam", - "Hong Kong SAR" : "Hong Kong", - "Taipei and environs" : "Taiwan, Province of China", - "South Korea" : "Korea, Republic of", - "Iran" : "Iran, Islamic Republic of", - "Vatican City" : "Holy See (Vatican City State)", - "DR Congo" : "Congo, the Democratic Republic of the", - "Republic of the Congo" : "Congo", - "Tanzania" : "Tanzania, United Republic of", - "Venezuela" : "Venezuela, Bolivarian Republic of", - "North Korea" : "Korea, Democratic People's Republic of", - "Syria" : "Syrian Arab Republic", - "Bolivia" : "Bolivia, Plurinational State of", - "Laos" : "Lao People's Democratic Republic", - "Moldova" : "Moldova, Republic of", - "Eswatini" : "Swaziland", - "Cabo Verde" : "Cape Verde", - "Sao Tome & Principe" : "Sao Tome and Principe", - "Micronesia" : "Micronesia, Federated States of", - "St. Vincent & Grenadines" : "Saint Vincent and the Grenadines", - "U.S. Virgin Islands" : "Virgin Islands, U.S.", - "Saint Kitts & Nevis" : "Saint Kitts and Nevis", - "Faeroe Islands" : "Faroe Islands", - "Sint Maarten" : "Sint Maarten (Dutch part)", - "Turks and Caicos" : "Turks and Caicos Islands", - "Saint Martin" : "Saint Martin (French part)", - "British Virgin Islands" : "Virgin Islands, British", - "Wallis & Futuna" : "Wallis and Futuna", - "Saint Helena" : "Saint Helena, Ascension and Tristan da Cunha", - "Saint Pierre & Miquelon" : "Saint Pierre and Miquelon", - "Falkland Islands" : "Falkland Islands (Malvinas)", - "Republic of Ireland" : "Ireland", - "Ivory Coast" : "Côte d'Ivoire", - " Azerbaijan" : "Azerbaijan", - # Conjoin North Ireland on United Kingdom - "North Ireland" : "United Kingdom", - "East Timor" : "Timor-Leste", - "São Tomé and Príncipe" : "Sao Tome and Principe", - # Guernsey and Jersey form Channel Islands. Conjoin Guernsey on Jersey. - # Jersey has higher population. - # https://en.wikipedia.org/wiki/Channel_Islands - "Guernsey and Jersey" : "Jersey", - "Channel Islands" : "Jersey", - "Caribbean Netherlands" : "Bonaire, Sint Eustatius and Saba", - "F.S. Micronesia" : "Micronesia, Federated States of", - "Emirates" : "United Arab Emirates", - # "Bosnia–Herzegovina" : "Bosnia and Herzegovina", - "Bosnia" : "Bosnia and Herzegovina", - "Dominican Rep" : "Dominican Republic", - "Macedonia" : "North Macedonia", - "Korea, South" : "Korea, Republic of", - "Cote d'Ivoire" : "Côte d'Ivoire", - "St. Martin" : "Saint Martin (French part)", - "Congo (Kinshasa)" : "Congo, the Democratic Republic of the", - "Taiwan*" : "Taiwan, Province of China", - "Reunion" : "Réunion", - "Curacao" : "Curaçao", - "Congo (Brazzaville)" : "Congo", - "Deutschland" : "Germany", - "The Bahamas" : "Bahamas", - "The Gambia" : "Gambia", - "Kosovo" : "Kosovo, Republic of", - "Swaziland" : "Eswatini", - "Gambia, The" : "Gambia", - "Bahamas, The" : "Bahamas", # "Others" has no mapping, i.e. the default val is used # "Cruise Ship" has no mapping, i.e. the default val is used } # fmt: on -def country_code(country): +def country_code(s): """ Return two letter country code (Alpha-2) according to https://en.wikipedia.org/wiki/ISO_3166-1 Defaults to "XX". """ - # Look in synonyms if not found. - if not country in is_3166_1 and country in synonyms: - country = synonyms[country] - - # Get country or fallback to default_code. - country_code = is_3166_1.get(country, default_code) - - # Default picked? - if country_code == default_code: - LOGGER.warning(f"No country_code found for '{country}'. Using '{country_code}'!") + country_code = country_name__country_code.get(s, default_country_code) + if country_code == default_country_code: + LOGGER.warning(f"No country code found for '{s}'. Using '{country_code}'!") - # Return. return country_code diff --git a/app/utils/populations.py b/app/utils/populations.py index 8a78ec50..ea72c334 100644 --- a/app/utils/populations.py +++ b/app/utils/populations.py @@ -5,7 +5,7 @@ import requests from cachetools import TTLCache, cached -from .countrycodes import country_code +from .countries import country_code LOGGER = logging.getLogger(__name__) diff --git a/tests/test_countries.py b/tests/test_countries.py new file mode 100644 index 00000000..2c9ba65e --- /dev/null +++ b/tests/test_countries.py @@ -0,0 +1,24 @@ +import pytest + +from app.utils import countries + + +""" +Todo: + * Test cases for capturing of stdout/stderr +""" + + +@pytest.mark.parametrize( + "country_name,expected_country_code", + [ + ("Germany", "DE"), + ("Bolivia, Plurinational State of", "BO"), + ("Korea, Democratic People's Republic of", "KP"), + ("US", "US"), + ("BlaBla", countries.default_country_code), + ("Others", countries.default_country_code), + ], +) +def test_countries_country_name__country_code(country_name, expected_country_code): + assert countries.country_code(country_name) == expected_country_code diff --git a/tests/test_countrycodes.py b/tests/test_countrycodes.py deleted file mode 100644 index 1b132266..00000000 --- a/tests/test_countrycodes.py +++ /dev/null @@ -1,30 +0,0 @@ -import pytest - -from app.utils import countrycodes - - -""" -Todo: - * Test cases for capturing of stdout/stderr -""" - - -@pytest.mark.parametrize( - "country_name,expected_country_code", - [ - ("Germany", "DE"), - ("Bolivia, Plurinational State of", "BO"), - ("Korea, Democratic People's Republic of", "KP"), - ("BlaBla", "XX"), - ], -) -def test_countrycodes_is_3166_1(country_name, expected_country_code): - assert countrycodes.country_code(country_name) == expected_country_code - - -@pytest.mark.parametrize( - "country_name_synonym, expected_country_code", - [("Deutschland", "DE"), ("Iran (Islamic Republic of)", "IR"), ("British Virgin Islands", "VG")], -) -def test_countrycodes_synonym(country_name_synonym, expected_country_code): - assert countrycodes.country_code(country_name_synonym) == expected_country_code