From 6595f0f9614c410e73483527c56d4463e20075d8 Mon Sep 17 00:00:00 2001 From: "Gergely POLONKAI (W00d5t0ck)" Date: Fri, 6 Sep 2013 16:22:16 +0200 Subject: [PATCH] Created geonames_process.pl Perl script to download and process geonames.org data It is *NOT* ready for distribution yet! The script must be finished with downloading real data, or separate rules must be added to create (download) dependency files like timeZones.txt, allCountries.zip and countryInfo.txt. --- .gitignore | 7 ++- configure.ac | 5 ++ data/geonames/Makefile.am | 19 ++++++++ data/geonames/geonames_process.pl | 59 +++++++++++++++++++++++ m4/ax_prog_perl_modules.m4 | 77 +++++++++++++++++++++++++++++++ 5 files changed, 166 insertions(+), 1 deletion(-) create mode 100644 data/geonames/Makefile.am create mode 100644 data/geonames/geonames_process.pl create mode 100644 m4/ax_prog_perl_modules.m4 diff --git a/.gitignore b/.gitignore index c7811a4..ab14003 100644 --- a/.gitignore +++ b/.gitignore @@ -15,7 +15,8 @@ Makefile.in /src/astrognome /ltmain.sh /autom4te.cache/ -/m4/ +/m4/* +!m4/ax_prog_perl_modules.m4 /missing /stamp-h1 /config.guess @@ -30,6 +31,10 @@ Makefile.in /compile /tags +# Geonames related things +/data/geonames/*.txt +/data/geonames/geodata.xml + # Translation related files /po/POTFILES /po/Makevars.template diff --git a/configure.ac b/configure.ac index 86249f8..34bedfa 100644 --- a/configure.ac +++ b/configure.ac @@ -12,6 +12,10 @@ AC_PROG_CC AM_PROG_CC_C_O AM_PROG_AR AC_PROG_LIBTOOL +AC_PATH_PROGS(PERL, [perl5 perl]) +have_geonames_perl_modules=no +AX_PROG_PERL_MODULES([XML::Writer IO::File], [have_geonames_perl_modules=yes], AC_MSG_WARN([XML::Writer and IO::File perl modules are required if you want to regenerate geodata.xml!])) +AC_SUBST([have_geonames_perl_modules]) IT_PROG_INTLTOOL([0.35.0]) GETTEXT_PACKAGE=astrognome AC_SUBST(GETTEXT_PACKAGE) @@ -48,6 +52,7 @@ AC_CONFIG_FILES([ Makefile src/Makefile po/Makefile.in + data/geonames/Makefile docs/reference/astrognome/Makefile ]) AC_OUTPUT diff --git a/data/geonames/Makefile.am b/data/geonames/Makefile.am new file mode 100644 index 0000000..0941a1a --- /dev/null +++ b/data/geonames/Makefile.am @@ -0,0 +1,19 @@ +geodatadir = $(pkgdatadir) +geodata_DATA = geodata.xml + +EXTRA_DIST = geodata.xml + +MAINTAINERCLEANFILES = geodata.xml + +geodata.xml: + $(AM_V_GEN) if test -x "$(PERL)"; then \ + if test "x$(have_geonames_perl_modules)" = "xyes" -o "x$(I_HAVE_PERL_MODULES)" = "xyes"; then \ + $(PERL) geonames_process.pl; \ + else \ + echo "XML::Writer and IO::File perl modules are required to process geonames data."; \ + echo "configure reported they are not installed. If you are sure they are,"; \ + echo "set the I_HAVE_PERL_MODULES environment variable to yes"; \ + fi; \ + else \ + echo "perl5 is required to create geodata.xml!"; \ + fi diff --git a/data/geonames/geonames_process.pl b/data/geonames/geonames_process.pl new file mode 100644 index 0000000..da1262a --- /dev/null +++ b/data/geonames/geonames_process.pl @@ -0,0 +1,59 @@ +#! /usr/bin/perl -w + +use strict; +use IO::File; +use XML::Writer; + +# TODO: Download http://download.geonames.org/export/dump/countryInfo.txt +# TODO: Download http://download.geonames.org/export/dump/timeZones.txt +# TODO: Download http://download.geonames.org/export/dump/allCountries.zip +# TODO: Unzip allCountries.zip + +my %time_zones = (); +my %countries = (); + +open(TIMEZONES, 'timeZones.txt'); +while () { + my ($country_code, $timezone_id, $gmt_offset_january, $gmt_offset_july, $gmt_offset_raw) = split(/\t/, $_); + + $time_zones{$country_code . '_' . $timezone_id} = {offset => $gmt_offset_january, dst_offset => $gmt_offset_july}; +} +close(TIMEZONES); + +open(COUNTRIES, 'countryInfo.txt'); +while () { + my ($country_code, $iso3, $iso_numeric, $fips, $name, $capital, $area, $population, $continent, $tld, $currency_code, $currency_name, $phone, $postal_code_format, $postal_code_regex, $languages, $geonameid, $neighbours, $equivalent_fips_code) = split(/\t/, $_); + + if ($country_code =~ /^[A-Z]{2}$/) { + $countries{$country_code} = $name; + } +} +close(COUNTRIES); + +my $xml_file = IO::File->new('>geodata.xml'); +my $writer = XML::Writer->new(OUTPUT => $xml_file, NEWLINES => 0); + +$writer->xmlDecl('utf-8'); +$writer->startTag('geodata'); + +# TODO: process all files, not just HU.txt! +open(GEONAMES, "HU.txt"); +while () { + my ($geonameid, $name, $asciiname, $alternatenames, $latitude, $longitude, $feature_class, $feature_code, $country_code, $alt_country_code, $admin1, $admin2, $admin3, $admin4, $population, $elevation, $dem, $timezone, $mod_date) = split(/\t/, $_); + + if (($feature_class eq 'P') && ($feature_code eq 'PPL')) { + $writer->emptyTag('place', + 'name' => $name, + 'latitude' => $latitude, + 'longitude' => $longitude, + 'elevation' => $elevation, + 'country' => $countries{$country_code}, + 'time_offset' => $time_zones{$country_code . '_' . $timezone}->{offset}, + 'time_offset_dst' => $time_zones{$country_code . '_' . $timezone}->{dst_offset} + ); + } +} +close GEONAMES; + +$writer->endTag('geodata'); + diff --git a/m4/ax_prog_perl_modules.m4 b/m4/ax_prog_perl_modules.m4 new file mode 100644 index 0000000..11a326c --- /dev/null +++ b/m4/ax_prog_perl_modules.m4 @@ -0,0 +1,77 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_prog_perl_modules.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_PROG_PERL_MODULES([MODULES], [ACTION-IF-TRUE], [ACTION-IF-FALSE]) +# +# DESCRIPTION +# +# Checks to see if the given perl modules are available. If true the shell +# commands in ACTION-IF-TRUE are executed. If not the shell commands in +# ACTION-IF-FALSE are run. Note if $PERL is not set (for example by +# calling AC_CHECK_PROG, or AC_PATH_PROG), AC_CHECK_PROG(PERL, perl, perl) +# will be run. +# +# MODULES is a space separated list of module names. To check for a +# minimum version of a module, append the version number to the module +# name, separated by an equals sign. +# +# Example: +# +# AX_PROG_PERL_MODULES( Text::Wrap Net::LDAP=1.0.3, , +# AC_MSG_WARN(Need some Perl modules) +# +# LICENSE +# +# Copyright (c) 2009 Dean Povey +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 7 + +AU_ALIAS([AC_PROG_PERL_MODULES], [AX_PROG_PERL_MODULES]) +AC_DEFUN([AX_PROG_PERL_MODULES],[dnl + +m4_define([ax_perl_modules]) +m4_foreach([ax_perl_module], m4_split(m4_normalize([$1])), + [ + m4_append([ax_perl_modules], + [']m4_bpatsubst(ax_perl_module,=,[ ])[' ]) + ]) + +# Make sure we have perl +if test -z "$PERL"; then +AC_CHECK_PROG(PERL,perl,perl) +fi + +if test "x$PERL" != x; then + ax_perl_modules_failed=0 + for ax_perl_module in ax_perl_modules; do + AC_MSG_CHECKING(for perl module $ax_perl_module) + + # Would be nice to log result here, but can't rely on autoconf internals + $PERL -e "use $ax_perl_module; exit" > /dev/null 2>&1 + if test $? -ne 0; then + AC_MSG_RESULT(no); + ax_perl_modules_failed=1 + else + AC_MSG_RESULT(ok); + fi + done + + # Run optional shell commands + if test "$ax_perl_modules_failed" = 0; then + : + $2 + else + : + $3 + fi +else + AC_MSG_WARN(could not find perl) +fi])dnl