https://www.geonames.org から取れる、人口500人以上の都市の名前に限定すると、
Santa Maria Magdalena Cahuacan
import logging import tempfile import zipfile from collections import Counter import httpx FILE_NAME_BASE = 'cities500' GEONAME_FIELDS = ( 'geoname_id', 'name', 'ascii_name', 'alternate_names', 'latitude', 'longitude', 'feature_class', 'feature_code', 'country_code', 'cc2', 'admin1_code', 'admin2_code', 'admin3_code', 'admin4_code', 'population', 'elevation', 'dem', 'timezone', 'modification_date', ) def retrieve_cities(): """Retrieve city names from a remote server.""" response = httpx.get(f'https://download.geonames.org/export/dump/{FILE_NAME_BASE}.zip') response.raise_for_status() tmpdir = tempfile.TemporaryDirectory() with open(tmpdir.name + f'/{FILE_NAME_BASE}.zip', 'wb') as f: f.write(response.content) with zipfile.ZipFile(tmpdir.name + f'/{FILE_NAME_BASE}.zip', 'r') as z: z.extractall(tmpdir.name) with open(tmpdir.name + f'/{FILE_NAME_BASE}.txt', 'r') as f: for line in f: yield line.split('\t') def count_characters(to_check='ascii_name', filter_func=lambda _: True): """Count characters in city names.""" cities = {} for city_fields in retrieve_cities(): city = dict(zip(GEONAME_FIELDS, city_fields)) if not filter_func(city): continue counter = Counter() for c in city[to_check]: counter[c] += 1 cities[city['geoname_id']] = {'characters': counter, 'city': city} return cities def count_chars_of_city_names(cities, char=None): """Find the city with the most occurrences of a given character.""" cities_by_char_count = {} max_count = 0 max_count_char = None for city_id, data in cities.items(): if 'characters' not in data or not data['characters']: logging.debug(f'No characters found for city {city_id}', data) continue count = 0 if char and char in data['characters']: count = data['characters'][char] cities_by_char_count.setdefault(count, []).append(data) elif char is None: most_common = data['characters'].most_common(1)[0] char, count = most_common cities_by_char_count.setdefault(count, []).append(data) if count > max_count: max_count = count max_count_char = char cities_by_char_count.setdefault(count, []).append(data) return cities_by_char_count.get(max_count, []), max_count_char def not_contain_invalid_chars(city): return ( '(' not in city.get('ascii_name', '') and '/' not in city.get('ascii_name', '') ) def main(): cities = count_characters(filter_func=not_contain_invalid_chars) for char in 'abcdefghijklmnopqrstuvwxyz': cities_counted, char = count_chars_of_city_names(cities, char) max_count = cities_counted[0]['characters'][char] print(f'The character "{char}" appears the most ({max_count} times) in the following cities:') for city in cities_counted: print("\t", city['city']['ascii_name']) if __name__ == "__main__": main()
Ulaanbaatar(ウランバートル)でいいの?
https://www.geonames.org から取れる、人口500人以上の都市の名前に限定すると、 La Calzada de Calatrava が8文字の `a` を含んで最大。 import tempfileimport zipfilefrom collections import Counterimport httpxFILE_NAME_...
じゃあ任意の文字が最も多い割合で含まれる都市は……?
津 終了
志布志市志布志町
ランヴァイル・プルグウィンギル・ゴゲリフウィルンドロブル・ランティシリオゴゴゴホ(Llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogoch)