geodata.xml is now generated correctly
Data is downloaded from geonames.org, and processed with an AWK and a Perl script. The result is the part of the distribution, so the average user (or a packager) doesn't have to download that much of data.
This commit is contained in:
		
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -37,6 +37,7 @@ Makefile.in | ||||
|  | ||||
| # Geonames related things | ||||
| /data/geonames/*.txt | ||||
| /data/geonames/*.zip | ||||
| /data/geonames/geodata.xml | ||||
|  | ||||
| # Translation related files | ||||
|   | ||||
							
								
								
									
										2
									
								
								TODO
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								TODO
									
									
									
									
									
								
							| @@ -9,4 +9,4 @@ | ||||
| ** Hidden ascendent | ||||
| ** Vertex | ||||
| ** Symbols of Uranus and Pluto | ||||
|  | ||||
| * Custom city database (or multiple databases) or adding custom locations | ||||
|   | ||||
| @@ -13,6 +13,9 @@ AM_PROG_CC_C_O | ||||
| AM_PROG_AR | ||||
| AC_PROG_LIBTOOL | ||||
| AC_PATH_PROGS(PERL, [perl5 perl]) | ||||
| AC_PATH_PROGS(WGET, [wget]) | ||||
| AC_PATH_PROGS(CURL, [curl]) | ||||
| AC_PATH_PROGS(UNZIP, [unzip]) | ||||
| have_geonames_perl_modules=no | ||||
| AX_PROG_PERL_MODULES([XML::Writer IO::File], [have_geonames_perl_modules=yes], AC_MSG_WARN([XML::Writer and IO::File perl modules are required if you want to regenerate geodata.xml!])) | ||||
| AC_SUBST([have_geonames_perl_modules]) | ||||
|   | ||||
| @@ -5,7 +5,58 @@ EXTRA_DIST = geodata.xml | ||||
|  | ||||
| MAINTAINERCLEANFILES = geodata.xml | ||||
|  | ||||
| geodata.xml: | ||||
| countryInfoURL  = "http://download.geonames.org/export/dump/countryInfo.txt" | ||||
| timeZonesURL    = "http://download.geonames.org/export/dump/timeZones.txt" | ||||
| allCountriesURL = "http://download.geonames.org/export/dump/allCountries.zip" | ||||
|  | ||||
| countryInfo.txt: | ||||
| 	@-if test -x $(WGET); then \ | ||||
| 	    $(WGET) $(countryInfoURL); \ | ||||
| 	else \ | ||||
| 	    if test -x $(CURL); then \ | ||||
| 	        $(CURL) $(countryInfoURL) > "$@" ; \ | ||||
| 	    else \ | ||||
| 	        echo "wget and curl could not be found in your PATH."; \ | ||||
| 	        echo "One of them is needed to create geodata.xml!"; \ | ||||
| 	    fi; \ | ||||
| 	fi | ||||
|  | ||||
| timeZones.txt: | ||||
| 	@-if test -x $(WGET); then \ | ||||
| 	    $(WGET) $(timeZonesURL); \ | ||||
| 	else \ | ||||
| 	    if test -x $(CURL); then \ | ||||
| 	        $(CURL) $(timeZonesURL) > "$@" ; \ | ||||
| 	    else \ | ||||
| 	        echo "wget and curl could not be found in your PATH."; \ | ||||
| 	        echo "One of them is needed to create geodata.xml!"; \ | ||||
| 	    fi; \ | ||||
| 	fi | ||||
|  | ||||
| allCountries.zip: | ||||
| 	@-if test -x $(WGET); then \ | ||||
| 	    $(WGET) $(allCountriesURL); \ | ||||
| 	else \ | ||||
| 	    if test -x $(CURL); then \ | ||||
| 	        $(CURL) $(allCountriesURL) > "$@" ; \ | ||||
| 	    else \ | ||||
| 	        echo "wget and curl could not be found in your PATH."; \ | ||||
| 	        echo "One of them is needed to create geodata.xml!"; \ | ||||
| 	    fi; \ | ||||
| 	fi; | ||||
|  | ||||
| cities.txt: allCountries.zip | ||||
| 	$(AM_V_GEN) if test -x $(UNZIP); then \ | ||||
| 	    $(UNZIP) allCountries.zip; \ | ||||
| 	else \ | ||||
| 	    echo "unzip could not be found in your PATH."; \ | ||||
| 	    echo "It is needed to create geodata.xml!"; \ | ||||
| 	fi; \ | ||||
| 	if test -f allCountries.txt; then \ | ||||
| 	    $(AWK) -f geonames_process.awk allCountries.txt > $@ ; \ | ||||
| 	fi | ||||
|  | ||||
| geodata.xml: countryInfo.txt timeZones.txt cities.txt | ||||
| 	$(AM_V_GEN) if test -x "$(PERL)"; then \ | ||||
| 	    if test "x$(have_geonames_perl_modules)" = "xyes" -o "x$(I_HAVE_PERL_MODULES)" = "xyes"; then \ | ||||
| 	        $(PERL) geonames_process.pl; \ | ||||
|   | ||||
							
								
								
									
										7
									
								
								data/geonames/geonames_process.awk
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								data/geonames/geonames_process.awk
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,7 @@ | ||||
| BEGIN { | ||||
|     FS="\t" | ||||
| } | ||||
| { | ||||
|     if ($7 != "P" || $8 != "PPL" || $15 < 1000) next | ||||
|     print $9 FS $2 FS $5 FS $6 FS $16 FS $18 | ||||
| } | ||||
| @@ -4,25 +4,22 @@ use strict; | ||||
| use IO::File; | ||||
| use XML::Writer; | ||||
|  | ||||
| # TODO: Download http://download.geonames.org/export/dump/countryInfo.txt | ||||
| # TODO: Download http://download.geonames.org/export/dump/timeZones.txt | ||||
| # TODO: Download http://download.geonames.org/export/dump/allCountries.zip | ||||
| # TODO: Unzip allCountries.zip | ||||
|  | ||||
| my %time_zones = (); | ||||
| my %countries = (); | ||||
|  | ||||
| open(TIMEZONES, 'timeZones.txt') or die("Cannot open timeZones.txt: $!\n"); | ||||
| while (<TIMEZONES>) { | ||||
|     my ($country_code, $timezone_id, $gmt_offset_january, $gmt_offset_july, $gmt_offset_raw) = split(/\t/, $_); | ||||
|     next if ($country_code !~ /^[A-Z]{2}$/); | ||||
|  | ||||
|     $time_zones{$country_code . '_' . $timezone_id} = {offset => $gmt_offset_january, dst_offset => $gmt_offset_july}; | ||||
|     $time_zones{$timezone_id} = {offset => $gmt_offset_january, dst_offset => $gmt_offset_july}; | ||||
| } | ||||
| close(TIMEZONES); | ||||
|  | ||||
| open(COUNTRIES, 'countryInfo.txt') or die("Cannot open countryInfo.txt: $!\n"); | ||||
| while (<COUNTRIES>) { | ||||
|     my ($country_code, $iso3, $iso_numeric, $fips, $name, $capital, $area, $population, $continent, $tld, $currency_code, $currency_name, $phone, $postal_code_format, $postal_code_regex, $languages, $geonameid, $neighbours, $equivalent_fips_code) = split(/\t/, $_); | ||||
|     next if ($country_code !~ /^[A-Z]{2}$/); | ||||
|  | ||||
|     if ($country_code =~ /^[A-Z]{2}$/) { | ||||
|         $countries{$country_code} = $name; | ||||
| @@ -30,7 +27,7 @@ while (<COUNTRIES>) { | ||||
| } | ||||
| close(COUNTRIES); | ||||
|  | ||||
| open(GEONAMES, "HU.txt") or die("Cannot open HU.txt: $!\n"); | ||||
| open(GEONAMES, "cities.txt") or die("Cannot open cities.txt: $!\n"); | ||||
|  | ||||
| my $xml_file = IO::File->new('>geodata.xml'); | ||||
| my $writer = XML::Writer->new(OUTPUT => $xml_file, NEWLINES => 0); | ||||
| @@ -38,23 +35,36 @@ my $writer = XML::Writer->new(OUTPUT => $xml_file, NEWLINES => 0); | ||||
| $writer->xmlDecl('utf-8'); | ||||
| $writer->startTag('geodata'); | ||||
|  | ||||
| # TODO: process all files, not just HU.txt! | ||||
| while (<GEONAMES>) { | ||||
|     my ($geonameid, $name, $asciiname, $alternatenames, $latitude, $longitude, $feature_class, $feature_code, $country_code, $alt_country_code, $admin1, $admin2, $admin3, $admin4, $population, $elevation, $dem, $timezone, $mod_date) = split(/\t/, $_); | ||||
|     chomp($_); | ||||
|     my ($country_code, $name, $latitude, $longitude, $elevation, $timezone) = split(/\t/, $_); | ||||
|  | ||||
|     if (($feature_class eq 'P') && ($feature_code eq 'PPL')) { | ||||
|         $writer->emptyTag('place', | ||||
|                 'name'            => $name, | ||||
|                 'latitude'        => $latitude, | ||||
|                 'longitude'       => $longitude, | ||||
|                 'elevation'       => $elevation, | ||||
|                 'country'         => $countries{$country_code}, | ||||
|                 'time_offset'     => $time_zones{$country_code . '_' . $timezone}->{offset}, | ||||
|                 'time_offset_dst' => $time_zones{$country_code . '_' . $timezone}->{dst_offset} | ||||
|             ); | ||||
|     if (!exists($countries{$country_code})) { | ||||
|         print "Unknown country code: $country_code\n"; | ||||
|         next; | ||||
|     } | ||||
|  | ||||
|     if (!exists($time_zones{$timezone})) { | ||||
|         print "Unknown time zone: $timezone\n"; | ||||
|         next; | ||||
|     } | ||||
|  | ||||
|     $writer->emptyTag('p', | ||||
|             'n'   => $name, | ||||
|             'lat' => $latitude, | ||||
|             'lon' => $longitude, | ||||
|             'alt' => $elevation, | ||||
|             'c'   => $country_code, | ||||
|             'tzo' => $time_zones{$timezone}->{offset}, | ||||
|             'tzd' => $time_zones{$timezone}->{dst_offset} | ||||
|         ); | ||||
|  | ||||
|     print $., "\n" if ($. % 19083 == 0); | ||||
| } | ||||
| close GEONAMES; | ||||
|  | ||||
| $writer->endTag('geodata'); | ||||
| $writer->end(); | ||||
| $xml_file->close(); | ||||
|  | ||||
| close GEONAMES; | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user