Created geonames_process.pl Perl script to download and process geonames.org data

It is *NOT* ready for distribution yet! The script must be finished with
downloading real data, or separate rules must be added to create
(download) dependency files like timeZones.txt, allCountries.zip and
countryInfo.txt.
pull/18/head
Gergely Polonkai 10 years ago
parent ecfac69a67
commit 6595f0f961

7
.gitignore vendored

@ -15,7 +15,8 @@ Makefile.in
/src/astrognome
/ltmain.sh
/autom4te.cache/
/m4/
/m4/*
!m4/ax_prog_perl_modules.m4
/missing
/stamp-h1
/config.guess
@ -30,6 +31,10 @@ Makefile.in
/compile
/tags
# Geonames related things
/data/geonames/*.txt
/data/geonames/geodata.xml
# Translation related files
/po/POTFILES
/po/Makevars.template

@ -12,6 +12,10 @@ AC_PROG_CC
AM_PROG_CC_C_O
AM_PROG_AR
AC_PROG_LIBTOOL
AC_PATH_PROGS(PERL, [perl5 perl])
have_geonames_perl_modules=no
AX_PROG_PERL_MODULES([XML::Writer IO::File], [have_geonames_perl_modules=yes], AC_MSG_WARN([XML::Writer and IO::File perl modules are required if you want to regenerate geodata.xml!]))
AC_SUBST([have_geonames_perl_modules])
IT_PROG_INTLTOOL([0.35.0])
GETTEXT_PACKAGE=astrognome
AC_SUBST(GETTEXT_PACKAGE)
@ -48,6 +52,7 @@ AC_CONFIG_FILES([
Makefile
src/Makefile
po/Makefile.in
data/geonames/Makefile
docs/reference/astrognome/Makefile
])
AC_OUTPUT

@ -0,0 +1,19 @@
geodatadir = $(pkgdatadir)
geodata_DATA = geodata.xml
EXTRA_DIST = geodata.xml
MAINTAINERCLEANFILES = geodata.xml
geodata.xml:
$(AM_V_GEN) if test -x "$(PERL)"; then \
if test "x$(have_geonames_perl_modules)" = "xyes" -o "x$(I_HAVE_PERL_MODULES)" = "xyes"; then \
$(PERL) geonames_process.pl; \
else \
echo "XML::Writer and IO::File perl modules are required to process geonames data."; \
echo "configure reported they are not installed. If you are sure they are,"; \
echo "set the I_HAVE_PERL_MODULES environment variable to yes"; \
fi; \
else \
echo "perl5 is required to create geodata.xml!"; \
fi

@ -0,0 +1,59 @@
#! /usr/bin/perl -w
use strict;
use IO::File;
use XML::Writer;
# TODO: Download http://download.geonames.org/export/dump/countryInfo.txt
# TODO: Download http://download.geonames.org/export/dump/timeZones.txt
# TODO: Download http://download.geonames.org/export/dump/allCountries.zip
# TODO: Unzip allCountries.zip
my %time_zones = ();
my %countries = ();
open(TIMEZONES, 'timeZones.txt');
while (<TIMEZONES>) {
my ($country_code, $timezone_id, $gmt_offset_january, $gmt_offset_july, $gmt_offset_raw) = split(/\t/, $_);
$time_zones{$country_code . '_' . $timezone_id} = {offset => $gmt_offset_january, dst_offset => $gmt_offset_july};
}
close(TIMEZONES);
open(COUNTRIES, 'countryInfo.txt');
while (<COUNTRIES>) {
my ($country_code, $iso3, $iso_numeric, $fips, $name, $capital, $area, $population, $continent, $tld, $currency_code, $currency_name, $phone, $postal_code_format, $postal_code_regex, $languages, $geonameid, $neighbours, $equivalent_fips_code) = split(/\t/, $_);
if ($country_code =~ /^[A-Z]{2}$/) {
$countries{$country_code} = $name;
}
}
close(COUNTRIES);
my $xml_file = IO::File->new('>geodata.xml');
my $writer = XML::Writer->new(OUTPUT => $xml_file, NEWLINES => 0);
$writer->xmlDecl('utf-8');
$writer->startTag('geodata');
# TODO: process all files, not just HU.txt!
open(GEONAMES, "HU.txt");
while (<GEONAMES>) {
my ($geonameid, $name, $asciiname, $alternatenames, $latitude, $longitude, $feature_class, $feature_code, $country_code, $alt_country_code, $admin1, $admin2, $admin3, $admin4, $population, $elevation, $dem, $timezone, $mod_date) = split(/\t/, $_);
if (($feature_class eq 'P') && ($feature_code eq 'PPL')) {
$writer->emptyTag('place',
'name' => $name,
'latitude' => $latitude,
'longitude' => $longitude,
'elevation' => $elevation,
'country' => $countries{$country_code},
'time_offset' => $time_zones{$country_code . '_' . $timezone}->{offset},
'time_offset_dst' => $time_zones{$country_code . '_' . $timezone}->{dst_offset}
);
}
}
close GEONAMES;
$writer->endTag('geodata');

@ -0,0 +1,77 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_prog_perl_modules.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_PROG_PERL_MODULES([MODULES], [ACTION-IF-TRUE], [ACTION-IF-FALSE])
#
# DESCRIPTION
#
# Checks to see if the given perl modules are available. If true the shell
# commands in ACTION-IF-TRUE are executed. If not the shell commands in
# ACTION-IF-FALSE are run. Note if $PERL is not set (for example by
# calling AC_CHECK_PROG, or AC_PATH_PROG), AC_CHECK_PROG(PERL, perl, perl)
# will be run.
#
# MODULES is a space separated list of module names. To check for a
# minimum version of a module, append the version number to the module
# name, separated by an equals sign.
#
# Example:
#
# AX_PROG_PERL_MODULES( Text::Wrap Net::LDAP=1.0.3, ,
# AC_MSG_WARN(Need some Perl modules)
#
# LICENSE
#
# Copyright (c) 2009 Dean Povey <povey@wedgetail.com>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 7
AU_ALIAS([AC_PROG_PERL_MODULES], [AX_PROG_PERL_MODULES])
AC_DEFUN([AX_PROG_PERL_MODULES],[dnl
m4_define([ax_perl_modules])
m4_foreach([ax_perl_module], m4_split(m4_normalize([$1])),
[
m4_append([ax_perl_modules],
[']m4_bpatsubst(ax_perl_module,=,[ ])[' ])
])
# Make sure we have perl
if test -z "$PERL"; then
AC_CHECK_PROG(PERL,perl,perl)
fi
if test "x$PERL" != x; then
ax_perl_modules_failed=0
for ax_perl_module in ax_perl_modules; do
AC_MSG_CHECKING(for perl module $ax_perl_module)
# Would be nice to log result here, but can't rely on autoconf internals
$PERL -e "use $ax_perl_module; exit" > /dev/null 2>&1
if test $? -ne 0; then
AC_MSG_RESULT(no);
ax_perl_modules_failed=1
else
AC_MSG_RESULT(ok);
fi
done
# Run optional shell commands
if test "$ax_perl_modules_failed" = 0; then
:
$2
else
:
$3
fi
else
AC_MSG_WARN(could not find perl)
fi])dnl
Loading…
Cancel
Save