geodata.xml is now generated correctly
Data is downloaded from geonames.org, and processed with an AWK and a Perl script. The result is the part of the distribution, so the average user (or a packager) doesn't have to download that much of data.
This commit is contained in:
parent
45e4308692
commit
12d2e57c55
1
.gitignore
vendored
1
.gitignore
vendored
@ -37,6 +37,7 @@ Makefile.in
|
||||
|
||||
# Geonames related things
|
||||
/data/geonames/*.txt
|
||||
/data/geonames/*.zip
|
||||
/data/geonames/geodata.xml
|
||||
|
||||
# Translation related files
|
||||
|
2
TODO
2
TODO
@ -9,4 +9,4 @@
|
||||
** Hidden ascendent
|
||||
** Vertex
|
||||
** Symbols of Uranus and Pluto
|
||||
|
||||
* Custom city database (or multiple databases) or adding custom locations
|
||||
|
@ -13,6 +13,9 @@ AM_PROG_CC_C_O
|
||||
AM_PROG_AR
|
||||
AC_PROG_LIBTOOL
|
||||
AC_PATH_PROGS(PERL, [perl5 perl])
|
||||
AC_PATH_PROGS(WGET, [wget])
|
||||
AC_PATH_PROGS(CURL, [curl])
|
||||
AC_PATH_PROGS(UNZIP, [unzip])
|
||||
have_geonames_perl_modules=no
|
||||
AX_PROG_PERL_MODULES([XML::Writer IO::File], [have_geonames_perl_modules=yes], AC_MSG_WARN([XML::Writer and IO::File perl modules are required if you want to regenerate geodata.xml!]))
|
||||
AC_SUBST([have_geonames_perl_modules])
|
||||
|
@ -5,7 +5,58 @@ EXTRA_DIST = geodata.xml
|
||||
|
||||
MAINTAINERCLEANFILES = geodata.xml
|
||||
|
||||
geodata.xml:
|
||||
countryInfoURL = "http://download.geonames.org/export/dump/countryInfo.txt"
|
||||
timeZonesURL = "http://download.geonames.org/export/dump/timeZones.txt"
|
||||
allCountriesURL = "http://download.geonames.org/export/dump/allCountries.zip"
|
||||
|
||||
countryInfo.txt:
|
||||
@-if test -x $(WGET); then \
|
||||
$(WGET) $(countryInfoURL); \
|
||||
else \
|
||||
if test -x $(CURL); then \
|
||||
$(CURL) $(countryInfoURL) > "$@" ; \
|
||||
else \
|
||||
echo "wget and curl could not be found in your PATH."; \
|
||||
echo "One of them is needed to create geodata.xml!"; \
|
||||
fi; \
|
||||
fi
|
||||
|
||||
timeZones.txt:
|
||||
@-if test -x $(WGET); then \
|
||||
$(WGET) $(timeZonesURL); \
|
||||
else \
|
||||
if test -x $(CURL); then \
|
||||
$(CURL) $(timeZonesURL) > "$@" ; \
|
||||
else \
|
||||
echo "wget and curl could not be found in your PATH."; \
|
||||
echo "One of them is needed to create geodata.xml!"; \
|
||||
fi; \
|
||||
fi
|
||||
|
||||
allCountries.zip:
|
||||
@-if test -x $(WGET); then \
|
||||
$(WGET) $(allCountriesURL); \
|
||||
else \
|
||||
if test -x $(CURL); then \
|
||||
$(CURL) $(allCountriesURL) > "$@" ; \
|
||||
else \
|
||||
echo "wget and curl could not be found in your PATH."; \
|
||||
echo "One of them is needed to create geodata.xml!"; \
|
||||
fi; \
|
||||
fi;
|
||||
|
||||
cities.txt: allCountries.zip
|
||||
$(AM_V_GEN) if test -x $(UNZIP); then \
|
||||
$(UNZIP) allCountries.zip; \
|
||||
else \
|
||||
echo "unzip could not be found in your PATH."; \
|
||||
echo "It is needed to create geodata.xml!"; \
|
||||
fi; \
|
||||
if test -f allCountries.txt; then \
|
||||
$(AWK) -f geonames_process.awk allCountries.txt > $@ ; \
|
||||
fi
|
||||
|
||||
geodata.xml: countryInfo.txt timeZones.txt cities.txt
|
||||
$(AM_V_GEN) if test -x "$(PERL)"; then \
|
||||
if test "x$(have_geonames_perl_modules)" = "xyes" -o "x$(I_HAVE_PERL_MODULES)" = "xyes"; then \
|
||||
$(PERL) geonames_process.pl; \
|
||||
|
7
data/geonames/geonames_process.awk
Normal file
7
data/geonames/geonames_process.awk
Normal file
@ -0,0 +1,7 @@
|
||||
BEGIN {
|
||||
FS="\t"
|
||||
}
|
||||
{
|
||||
if ($7 != "P" || $8 != "PPL" || $15 < 1000) next
|
||||
print $9 FS $2 FS $5 FS $6 FS $16 FS $18
|
||||
}
|
@ -4,25 +4,22 @@ use strict;
|
||||
use IO::File;
|
||||
use XML::Writer;
|
||||
|
||||
# TODO: Download http://download.geonames.org/export/dump/countryInfo.txt
|
||||
# TODO: Download http://download.geonames.org/export/dump/timeZones.txt
|
||||
# TODO: Download http://download.geonames.org/export/dump/allCountries.zip
|
||||
# TODO: Unzip allCountries.zip
|
||||
|
||||
my %time_zones = ();
|
||||
my %countries = ();
|
||||
|
||||
open(TIMEZONES, 'timeZones.txt') or die("Cannot open timeZones.txt: $!\n");
|
||||
while (<TIMEZONES>) {
|
||||
my ($country_code, $timezone_id, $gmt_offset_january, $gmt_offset_july, $gmt_offset_raw) = split(/\t/, $_);
|
||||
next if ($country_code !~ /^[A-Z]{2}$/);
|
||||
|
||||
$time_zones{$country_code . '_' . $timezone_id} = {offset => $gmt_offset_january, dst_offset => $gmt_offset_july};
|
||||
$time_zones{$timezone_id} = {offset => $gmt_offset_january, dst_offset => $gmt_offset_july};
|
||||
}
|
||||
close(TIMEZONES);
|
||||
|
||||
open(COUNTRIES, 'countryInfo.txt') or die("Cannot open countryInfo.txt: $!\n");
|
||||
while (<COUNTRIES>) {
|
||||
my ($country_code, $iso3, $iso_numeric, $fips, $name, $capital, $area, $population, $continent, $tld, $currency_code, $currency_name, $phone, $postal_code_format, $postal_code_regex, $languages, $geonameid, $neighbours, $equivalent_fips_code) = split(/\t/, $_);
|
||||
next if ($country_code !~ /^[A-Z]{2}$/);
|
||||
|
||||
if ($country_code =~ /^[A-Z]{2}$/) {
|
||||
$countries{$country_code} = $name;
|
||||
@ -30,7 +27,7 @@ while (<COUNTRIES>) {
|
||||
}
|
||||
close(COUNTRIES);
|
||||
|
||||
open(GEONAMES, "HU.txt") or die("Cannot open HU.txt: $!\n");
|
||||
open(GEONAMES, "cities.txt") or die("Cannot open cities.txt: $!\n");
|
||||
|
||||
my $xml_file = IO::File->new('>geodata.xml');
|
||||
my $writer = XML::Writer->new(OUTPUT => $xml_file, NEWLINES => 0);
|
||||
@ -38,23 +35,36 @@ my $writer = XML::Writer->new(OUTPUT => $xml_file, NEWLINES => 0);
|
||||
$writer->xmlDecl('utf-8');
|
||||
$writer->startTag('geodata');
|
||||
|
||||
# TODO: process all files, not just HU.txt!
|
||||
while (<GEONAMES>) {
|
||||
my ($geonameid, $name, $asciiname, $alternatenames, $latitude, $longitude, $feature_class, $feature_code, $country_code, $alt_country_code, $admin1, $admin2, $admin3, $admin4, $population, $elevation, $dem, $timezone, $mod_date) = split(/\t/, $_);
|
||||
chomp($_);
|
||||
my ($country_code, $name, $latitude, $longitude, $elevation, $timezone) = split(/\t/, $_);
|
||||
|
||||
if (($feature_class eq 'P') && ($feature_code eq 'PPL')) {
|
||||
$writer->emptyTag('place',
|
||||
'name' => $name,
|
||||
'latitude' => $latitude,
|
||||
'longitude' => $longitude,
|
||||
'elevation' => $elevation,
|
||||
'country' => $countries{$country_code},
|
||||
'time_offset' => $time_zones{$country_code . '_' . $timezone}->{offset},
|
||||
'time_offset_dst' => $time_zones{$country_code . '_' . $timezone}->{dst_offset}
|
||||
if (!exists($countries{$country_code})) {
|
||||
print "Unknown country code: $country_code\n";
|
||||
next;
|
||||
}
|
||||
|
||||
if (!exists($time_zones{$timezone})) {
|
||||
print "Unknown time zone: $timezone\n";
|
||||
next;
|
||||
}
|
||||
|
||||
$writer->emptyTag('p',
|
||||
'n' => $name,
|
||||
'lat' => $latitude,
|
||||
'lon' => $longitude,
|
||||
'alt' => $elevation,
|
||||
'c' => $country_code,
|
||||
'tzo' => $time_zones{$timezone}->{offset},
|
||||
'tzd' => $time_zones{$timezone}->{dst_offset}
|
||||
);
|
||||
|
||||
print $., "\n" if ($. % 19083 == 0);
|
||||
}
|
||||
}
|
||||
close GEONAMES;
|
||||
|
||||
$writer->endTag('geodata');
|
||||
$writer->end();
|
||||
$xml_file->close();
|
||||
|
||||
close GEONAMES;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user