Created geonames_process.pl Perl script to download and process geonames.org data

It is *NOT* ready for distribution yet! The script must be finished with
downloading real data, or separate rules must be added to create
(download) dependency files like timeZones.txt, allCountries.zip and
countryInfo.txt.
This commit is contained in:
Gergely Polonkai 2013-09-06 16:22:16 +02:00
parent ecfac69a67
commit 6595f0f961
5 changed files with 166 additions and 1 deletions

7
.gitignore vendored
View File

@ -15,7 +15,8 @@ Makefile.in
/src/astrognome /src/astrognome
/ltmain.sh /ltmain.sh
/autom4te.cache/ /autom4te.cache/
/m4/ /m4/*
!m4/ax_prog_perl_modules.m4
/missing /missing
/stamp-h1 /stamp-h1
/config.guess /config.guess
@ -30,6 +31,10 @@ Makefile.in
/compile /compile
/tags /tags
# Geonames related things
/data/geonames/*.txt
/data/geonames/geodata.xml
# Translation related files # Translation related files
/po/POTFILES /po/POTFILES
/po/Makevars.template /po/Makevars.template

View File

@ -12,6 +12,10 @@ AC_PROG_CC
AM_PROG_CC_C_O AM_PROG_CC_C_O
AM_PROG_AR AM_PROG_AR
AC_PROG_LIBTOOL AC_PROG_LIBTOOL
AC_PATH_PROGS(PERL, [perl5 perl])
have_geonames_perl_modules=no
AX_PROG_PERL_MODULES([XML::Writer IO::File], [have_geonames_perl_modules=yes], AC_MSG_WARN([XML::Writer and IO::File perl modules are required if you want to regenerate geodata.xml!]))
AC_SUBST([have_geonames_perl_modules])
IT_PROG_INTLTOOL([0.35.0]) IT_PROG_INTLTOOL([0.35.0])
GETTEXT_PACKAGE=astrognome GETTEXT_PACKAGE=astrognome
AC_SUBST(GETTEXT_PACKAGE) AC_SUBST(GETTEXT_PACKAGE)
@ -48,6 +52,7 @@ AC_CONFIG_FILES([
Makefile Makefile
src/Makefile src/Makefile
po/Makefile.in po/Makefile.in
data/geonames/Makefile
docs/reference/astrognome/Makefile docs/reference/astrognome/Makefile
]) ])
AC_OUTPUT AC_OUTPUT

19
data/geonames/Makefile.am Normal file
View File

@ -0,0 +1,19 @@
geodatadir = $(pkgdatadir)
geodata_DATA = geodata.xml
EXTRA_DIST = geodata.xml
MAINTAINERCLEANFILES = geodata.xml
geodata.xml:
$(AM_V_GEN) if test -x "$(PERL)"; then \
if test "x$(have_geonames_perl_modules)" = "xyes" -o "x$(I_HAVE_PERL_MODULES)" = "xyes"; then \
$(PERL) geonames_process.pl; \
else \
echo "XML::Writer and IO::File perl modules are required to process geonames data."; \
echo "configure reported they are not installed. If you are sure they are,"; \
echo "set the I_HAVE_PERL_MODULES environment variable to yes"; \
fi; \
else \
echo "perl5 is required to create geodata.xml!"; \
fi

View File

@ -0,0 +1,59 @@
#! /usr/bin/perl -w
use strict;
use IO::File;
use XML::Writer;
# TODO: Download http://download.geonames.org/export/dump/countryInfo.txt
# TODO: Download http://download.geonames.org/export/dump/timeZones.txt
# TODO: Download http://download.geonames.org/export/dump/allCountries.zip
# TODO: Unzip allCountries.zip
my %time_zones = ();
my %countries = ();
open(TIMEZONES, 'timeZones.txt');
while (<TIMEZONES>) {
my ($country_code, $timezone_id, $gmt_offset_january, $gmt_offset_july, $gmt_offset_raw) = split(/\t/, $_);
$time_zones{$country_code . '_' . $timezone_id} = {offset => $gmt_offset_january, dst_offset => $gmt_offset_july};
}
close(TIMEZONES);
open(COUNTRIES, 'countryInfo.txt');
while (<COUNTRIES>) {
my ($country_code, $iso3, $iso_numeric, $fips, $name, $capital, $area, $population, $continent, $tld, $currency_code, $currency_name, $phone, $postal_code_format, $postal_code_regex, $languages, $geonameid, $neighbours, $equivalent_fips_code) = split(/\t/, $_);
if ($country_code =~ /^[A-Z]{2}$/) {
$countries{$country_code} = $name;
}
}
close(COUNTRIES);
my $xml_file = IO::File->new('>geodata.xml');
my $writer = XML::Writer->new(OUTPUT => $xml_file, NEWLINES => 0);
$writer->xmlDecl('utf-8');
$writer->startTag('geodata');
# TODO: process all files, not just HU.txt!
open(GEONAMES, "HU.txt");
while (<GEONAMES>) {
my ($geonameid, $name, $asciiname, $alternatenames, $latitude, $longitude, $feature_class, $feature_code, $country_code, $alt_country_code, $admin1, $admin2, $admin3, $admin4, $population, $elevation, $dem, $timezone, $mod_date) = split(/\t/, $_);
if (($feature_class eq 'P') && ($feature_code eq 'PPL')) {
$writer->emptyTag('place',
'name' => $name,
'latitude' => $latitude,
'longitude' => $longitude,
'elevation' => $elevation,
'country' => $countries{$country_code},
'time_offset' => $time_zones{$country_code . '_' . $timezone}->{offset},
'time_offset_dst' => $time_zones{$country_code . '_' . $timezone}->{dst_offset}
);
}
}
close GEONAMES;
$writer->endTag('geodata');

View File

@ -0,0 +1,77 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_prog_perl_modules.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_PROG_PERL_MODULES([MODULES], [ACTION-IF-TRUE], [ACTION-IF-FALSE])
#
# DESCRIPTION
#
# Checks to see if the given perl modules are available. If true the shell
# commands in ACTION-IF-TRUE are executed. If not the shell commands in
# ACTION-IF-FALSE are run. Note if $PERL is not set (for example by
# calling AC_CHECK_PROG, or AC_PATH_PROG), AC_CHECK_PROG(PERL, perl, perl)
# will be run.
#
# MODULES is a space separated list of module names. To check for a
# minimum version of a module, append the version number to the module
# name, separated by an equals sign.
#
# Example:
#
# AX_PROG_PERL_MODULES( Text::Wrap Net::LDAP=1.0.3, ,
# AC_MSG_WARN(Need some Perl modules)
#
# LICENSE
#
# Copyright (c) 2009 Dean Povey <povey@wedgetail.com>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 7
AU_ALIAS([AC_PROG_PERL_MODULES], [AX_PROG_PERL_MODULES])
AC_DEFUN([AX_PROG_PERL_MODULES],[dnl
m4_define([ax_perl_modules])
m4_foreach([ax_perl_module], m4_split(m4_normalize([$1])),
[
m4_append([ax_perl_modules],
[']m4_bpatsubst(ax_perl_module,=,[ ])[' ])
])
# Make sure we have perl
if test -z "$PERL"; then
AC_CHECK_PROG(PERL,perl,perl)
fi
if test "x$PERL" != x; then
ax_perl_modules_failed=0
for ax_perl_module in ax_perl_modules; do
AC_MSG_CHECKING(for perl module $ax_perl_module)
# Would be nice to log result here, but can't rely on autoconf internals
$PERL -e "use $ax_perl_module; exit" > /dev/null 2>&1
if test $? -ne 0; then
AC_MSG_RESULT(no);
ax_perl_modules_failed=1
else
AC_MSG_RESULT(ok);
fi
done
# Run optional shell commands
if test "$ax_perl_modules_failed" = 0; then
:
$2
else
:
$3
fi
else
AC_MSG_WARN(could not find perl)
fi])dnl