PostgreSQL. ltree. Заливка базы.

Nov 23, 2014 16:28

А стало быть решил я немного развлечься с ltree. А т.к. тупой робот, то нечего умнее и оригинальнее КЛАДР-а для иерархий я не придумал. Ну вот так. А стало быть первоначальная заливка данных в этом посте.
Скопитырено отседова. Ну и, естественно, немного подредактировано.
Собственно, сам скрипт.
[kladr.sh - скачивание и первоначальная заливка данных в БД]

#!/bin/bash
# Get Kladr and upload it to PostgreSQL

NO7ZIP=65
NOARJ=66
NOXBASE=67
NOPGPASS=68
NODBACCESS=69
NODB=70
NOLTREE=71
NOPOSTGRES=72

OLDPATH=$(pwd)
Z7=$(which 7z)
ARJ=$(which arj)
DBFDUMP=$(which dbfdump)

[ -z $Z7 ] && {
echo "Install 7zip before actions"
exit $NO7ZIP
}

[ -z $ARJ ] && {
echo "Install arj before actions"
exit $NOARJ
}

[ -z $DBFDUMP ] && {
echo "Install DBD-XBase before actions"
exit $NOXBASE
}

[ -z `find ~/ -name ".pgpass" -perm 600` ] && {
echo "You need ~/.pgpass with mode 600."
echo "See documentation about this file in postgresql documentation."
exit $NODBACCESS
}

[ -z `which psql` ] && {
echo "You need install a postgresql client"
exit $NOPOSTGRES
}

PGCMD="psql -h127.0.0.1 -w -Ukladr kladr"
$PGCMD -tc "select 'OK' as OK;" | sed -e '/^$/d' -e 's/\s\+//' || {
echo "You need DB kladr whith owner kladr"
echo "Access on DB kladr for user kladr should be in ~/.pgpass"
exit $NODB
}

$PGCMD -tc "\dT"|cut -d' ' -f4|grep ltree > /dev/null 2>&1 || {
$PGCMD -c "CREATE EXTENSION ltree;" || {
echo "You need ltree extension"
echo "Check documentation for set up it"
exit $NOLTREE
}
}

URLS="http://www.gnivc.ru/html/gnivcsoft/KLADR/4/osob4.0.doc
http://www.gnivc.ru/html/gnivcsoft/KLADR/Read_me.doc
http://www.gnivc.ru/html/gnivcsoft/KLADR/DOCUM.ARJ
http://www.gnivc.ru/html/gnivcsoft/KLADR/licenziya_kladr.doc
http://www.gnivc.ru/html/gnivcsoft/KLADR/Base.7z"

[ -d /var/tmp/kladr ] && rm -rf /var/tmp/kladr/* || mkdir /var/tmp/kladr
cd /var/tmp/kladr

for url in $URLS; do
wget $url
done

[ -d tmp ] || mkdir tmp
cd tmp
7z e ../Base.7z

$PGCMD -f ~/prj/kladr/drop-tables.sql

for fold in *.DBF; do
fnew=${fold,,}
TABLE=${fnew/\.dbf/}
fset=${fnew/dbf/set}
fsql=${fset/\.set/\.sql}
fcsv=${fset/\.set/\.csv}
dbfdump ${fold} |iconv -fcp866 -tutf8 > ${fcsv}
dbfdump --info ${fold}|sed -e '1,/Field info:/d'|sed -e '1d' > ${fset}
echo "CREATE TABLE $TABLE (" >> $fsql
tac $fset | awk '{print " "$2,"varchar("$4"),"}'|sed -e '1s/,$//'|tac >> $fsql
echo ");" >> $fsql
$PGCMD < $fsql
$PGCMD -c "\copy $TABLE FROM '$fcsv' DELIMITER ':'"
case $TABLE in
"kladr"|"street"|"doma")
echo "$TABLE - first"
$PGCMD -c "alter table $TABLE add column hierarch ltree;"
$PGCMD -c "update $TABLE set hierarch = ltree(case
when substring(code from 3 for 9) = '000000000' then
substring(code from 1 for 2)
when substring(code from 6 for 6) = '000000' then
substring(code from 1 for 2) || '.' || substring(code from 3 for 3)
when substring(code from 9 for 3) = '000' then
substring(code from 1 for 2) || '.' || substring(code from 3 for 3) || '.' || substring(code from 6 for 3)
else substring(code from 1 for 2) || '.' || substring(code from 3 for 3) || '.' || substring(code from 6 for 3) || '.' || substring(code from 9 for 3) end);"
$PGCMD -c "alter table $TABLE add column level int2;"
;;&
"street"|"doma")
echo "$TABLE - 2"
$PGCMD -c "alter table $TABLE add column streetcode char(4);"
$PGCMD -c "update $TABLE set streetcode=substring(code from 12 for 4);"
;;&
"doma")
echo "$TABLE - 3"
$PGCMD -c "alter table $TABLE add column domcode char(4);"
$PGCMD -c "update $TABLE set domcode=substring(code from 16 for 4);"
;;
esac
done

$PGCMD -f ~/prj/kladr/alter-tables.sql

cd $OLDPATH
exit 0


Первоначальная зачистка.
[drop-tables.sql - первоначальная зачистка данных]
DROP TABLE IF EXISTS flat;
DROP TABLE IF EXISTS doma_hist;
DROP TABLE IF EXISTS street_hist;
DROP TABLE IF EXISTS kladr_hist;
-- DROP TABLE IF EXISTS kladr_hierarch;
DROP TABLE IF EXISTS doma;
DROP TABLE IF EXISTS street;
DROP TABLE IF EXISTS kladr;
DROP TABLE IF EXISTS altnames;
DROP TABLE IF EXISTS socrbase;


И манипуляции с базой: создание нужных индексов, таблиц и прочих функций/процедур.
[alter-tables.sql - более следующая обработка данных]
select 'alter table altnames' as alteraltnames;
alter table altnames alter column level type int2 using (level::int2);

select 'alter table socrbase' as altersocrbase;
alter table socrbase alter column kod_t_st type char(3);
alter table socrbase alter column level type int2 using (level::int2);
alter table socrbase add constraint socrbase_scname_level_pk primary key (scname,level);

select 'alter table kladr' as alterkladr;
alter table kladr alter column code type char(13);
alter table kladr add column parent_code char(13);
alter table kladr add constraint socrname_fk foreign key (socr,level) references socrbase (scname,level);
update kladr set level=nlevel(hierarch);

select 'create table kladr_hist' as createkladrhist;
create table kladr_hist (like kladr);
insert into kladr_hist (select * from kladr where substring(code from 12 for 2) != '00');
delete from kladr where substring(code from 12 for 2) != '00';
alter table kladr_hist add constraint socrname_fk foreign key (socr,level) references socrbase (scname,level);

select 'alter table kladr' as alterkladr;
alter table kladr add constraint kladr_hierarch_pk primary key (hierarch);
create index kladr_hierarch_gist on kladr USING gist (hierarch);
create unique index kladr_code_idx on kladr (substring(code from 1 for 11));

select 'create table street_hist' as createstreethist;
create table street_hist (like street);
insert into street_hist (
select s.name,s.socr,s.code,s.index,s.gninmb,s.uno,s.ocatd,s.hierarch,s.level,s.streetcode
from street as s left outer join kladr as k using (hierarch)
where k.hierarch IS NULL
);
delete from street where code in (
select s.code from street as s left outer join kladr as k using(hierarch) where k.hierarch IS NULL);
insert into street_hist (
select * from street where substring(code from 16 for 2)!='00');
delete from street where substring(code from 16 for 2)!='00';
update street_hist set level='5';

select 'alter table street' as alterstreet;
alter table street alter column code type char(17);
update street set level='5';
alter table street add constraint socrname_fk foreign key (socr,level) references socrbase (scname,level);
alter table street add constraint street_hierarch_fk foreign key (hierarch) references kladr (hierarch);
alter table street add constraint street_hierarch_sc_pk primary key (hierarch, streetcode);
create index street_hierarch_gist on street USING gist (hierarch);
create index street_code_idx on street (substring(code from 1 for 15));

select 'alter table doma' as alterdoma;
alter table doma alter column code type char(19);
update doma set level='6';

select 'create table doma_hist' as createdomahist;
create table doma_hist (like doma);
insert into doma_hist
select d.name,d.korp,d.socr,d.code,d.index,d.gninmb,d.uno,d.ocatd,d.hierarch,d.level,d.streetcode,d.domcode
from doma as d left outer join kladr as k using (hierarch)
where k.hierarch IS NULL;
delete from doma where code in (
select d.code
from doma as d left outer join kladr as k using (hierarch)
where k.hierarch IS NULL
);
insert into doma_hist
select d.name,d.korp,d.socr,d.code,d.index,d.gninmb,d.uno,d.ocatd,d.hierarch,d.level,d.streetcode,d.domcode
from doma as d left outer join street as s using(hierarch,streetcode)
where s.code is null;
delete from doma where code in (
select d.code
from doma as d left outer join street as s using(hierarch,streetcode)
where s.code is null
);
update doma_hist set level='6';

select 'alter table doma' as alterdoma;
insert into street (name,socr,code,hierarch,streetcode,level)
select distinct k.name,k.socr, substring(d.code from 1 for 15) || '00' as code, k.hierarch, d.streetcode, k.level
from doma as d inner join kladr as k using (hierarch)
where d.streetcode='0000';
alter table doma add constraint socrname_fk foreign key (socr,level) references socrbase (scname,level);
alter table doma add constraint doma_hierarch_sc_fk foreign key (hierarch, streetcode) references street (hierarch,streetcode);
alter table doma add constraint doma_hierarch_sc_dc_pk primary key (hierarch, streetcode, domcode);
create index doma_hierarch_gist on doma USING gist (hierarch);
create unique index doma_code_idx on doma (substring(code from 1 for 19));

-- Fill parent_code on kladr
-- get parent_code for hierarch
create or replace function get_parent_code(hrrh ltree, lvl int2, OUT pcode char(13)) as $$
declare phrrh ltree;
begin
case lvl
when 2 then
phrrh := subpath(hrrh,0,1);
when 3 then
if subpath(hrrh,1,1) = '000' then
phrrh := subpath(hrrh,0,1);
else
phrrh := subpath(hrrh,0,2);
end if;
when 4 then
if subpath(hrrh,1,2) = '000.000' then
phrrh := subpath(hrrh,0,1);
elsif subpath(hrrh,2,1) = '000' then
phrrh := subpath(hrrh,0,2);
else
phrrh := subpath(hrrh,0,3);
end if;
else
phrrh := ''::ltree;
end case;
if lvl = 1 then
pcode := '';
else
execute 'select code from kladr where hierarch = $1' into pcode using phrrh;
end if;
end;
$$ LANGUAGE plpgsql;

create or replace function initkh() returns bigint as $$
declare
rowcount bigint;
pcode char(13);
lvl int2;
hrrh ltree;
phrrh ltree;
begin
rowcount := 0;
for hrrh,lvl in select hierarch,level from kladr where level > 1 loop
case lvl
when 2 then
phrrh := subpath(hrrh,0,1);
when 3 then
if subpath(hrrh,1,1) = '000' then
phrrh := subpath(hrrh,0,1);
else
phrrh := subpath(hrrh,0,2);
end if;
when 4 then
if subpath(hrrh,1,2) = '000.000' then
phrrh := subpath(hrrh,0,1);
elsif subpath(hrrh,2,1) = '000' then
phrrh := subpath(hrrh,0,2);
else
phrrh := subpath(hrrh,0,3);
end if;
end case;
rowcount := rowcount+1;
execute 'select code from kladr where hierarch = $1' into pcode using phrrh;
execute 'update kladr set parent_code = $1 where hierarch = $2' using pcode,hrrh;
end loop;
return rowcount;
end;
$$ LANGUAGE plpgsql;
-- It is no used
-- select 'create table kladr_hierarch' as createkladrhierarch;
-- create table kladr_hierarch (
-- parent_code char[13] NOT NULL,
-- parent_hierarch ltree NOT NULL,
-- obj_code char[13] NOT NULL,
-- obj_hierarch ltree NOT NULL,
-- obj_level int2 NOT NULL
-- );
-- insert into kladr_hierarch select '','',code,hierarch,level from kladr where level = 1;

-- Fisrt edition of filling of table kladr_hierarch
-- create or replace function inithierarch() returns bigint as $$
-- declare
-- pcode char(13);
-- phierarch ltree;
-- ocode char(13);
-- ohierarch ltree;
-- olevel int2;
-- rowcount bigint;
-- workrow record;
-- initstring cursor for select code,hierarch,level from kladr where level != 1;
-- begin
-- rowcount := 0;
-- for workrow in initstring loop
-- case workrow.level
-- when 2 then
-- phierarch := subpath(workrow.hierarch,0,1);
-- when 3 then
-- if subpath(workrow.hierarch,1,1) = '000' then
-- phierarch := subpath(workrow.hierarch,0,1);
-- else
-- phierarch := subpath(workrow.hierarch,0,2);
-- end if;
-- when 4 then
-- if subpath(workrow.hierarch,1,2) = '000.000' then
-- phierarch := subpath(workrow.hierarch,0,1);
-- elsif subpath(workrow.hierarch,2,1) = '000' then
-- phierarch := subpath(workrow.hierarch,0,2);
-- else
-- phierarch := subpath(workrow.hierarch,0,3);
-- end if
-- end case;
-- rowcount := rowcount + 1;
-- execute select code from kladr where hierarch = phierarch into pcode;
-- ocode := workrow.code;
-- ohierarch := workrow.hierarch;
-- olevel := workrow.level;
-- insert into kladr_hierarch (parent_code,parent_hierarch,obj_code,obj_hierarch,obj_level)
-- values (pcode,phierarch,ocode,ohierarch,olevel);
-- end loop;
-- return rowcount;
-- end;
-- $$ LANGUAGE plpgsql;

-- Second edition of filling of table kladr_hierarch
-- create or replace function get_parent_data (hrrh ltree, lvl int2, OUT phrrh ltree, OUT pcode char(13)) as $$
-- begin
-- case lvl
-- when 2 then
-- phrrh := subpath(hrrh,0,1);
-- when 3 then
-- if subpath(hrrh,1,1) = '000' then
-- phrrh := subpath(hrrh,0,1);
-- else
-- phrrh := subpath(hrrh,0,2);
-- end if;
-- when 4 then
-- if subpath(hrrh,1,2) = '000.000' then
-- phrrh := subpath(hrrh,0,1);
-- elsif subpath(hrrh,2,1) = '000' then
-- phrrh := subpath(hrrh,0,2);
-- else
-- phrrh := subpath(hrrh,0,3);
-- end if;
-- end case;
-- execute 'select code from kladr where hierarch = $1' into pcode using phrrh;
-- end;
-- $$ LANGUAGE plpgsql;

-- create or replace function initkh() returns bigint as $$
-- declare
-- phrrh ltree;
-- ohrrh ltree;
-- pcode char(13);
-- ocode char(13);
-- olevel int2;
-- rowcount int8;
-- begin
-- rowcount := 0;
-- for ocode,ohrrh,olevel in select code,hierarch,level from kladr where level != 1 loop
-- execute 'select * from get_parent_data($1,$2)' into (phrrh,pcode) using ohrrh,olevel;
-- insert into kladr_hierarch (parent_code,parent_hierarch,obj_code,obj_hierarch,obj_level)
-- values (pcode,phrrh,ocode,ohrrh,olevel);
-- rowcount := rowcount + 1;
-- end loop;
-- return rowcount;
-- end
-- $$ language plpgsql;

-- there is a function regexp_replace...



На самом деле один нужный индекс здесь не создается по причине ручного заполнения поля parent_code в таблице kladr. Но это в следующем номере программы. :)

PostgreSQL. ltree. Заливка базы.
PostgreSQL. ltree. Инициализация поля parent_code.
PostgreSQL. ltree. Выборки потомков.

иерархии, postgresql, postgres, ltree

Previous post Next post
Up