Recursive SQL statement (PostgreSQL 9.1.4)
It's a big task, split it up to make it more manageable. I would put that in a plpgsql function with RETURN TABLE
:
Create a temporary table for your "Calculation Process" matrix using a crosstab queryYou need the
tablefunc
module installed for that. Run (once per database):CREATE EXTENSION tablefunc;
Update the temp table field by field.
- Return table.
The following demo is fully functional and tested with PostgreSQL 9.1.4. Building on the table definition provided in the question:
-- DROP FUNCTION f_forcast();CREATE OR REPLACE FUNCTION f_forcast() RETURNS TABLE ( granularity date ,entry_accounts numeric ,entry_amount numeric ,d1 numeric ,d2 numeric ,d3 numeric ,d4 numeric ,d5 numeric ,d6 numeric) AS$BODY$BEGIN--== Create temp table with result of crosstab() ==--CREATE TEMP TABLE matrix ON COMMIT DROP ASSELECT *FROM crosstab ( 'SELECT granularity, entry_accounts, entry_amount ,distance_in_months, recovery_amount FROM vintage_data ORDER BY 1, 2', 'SELECT DISTINCT distance_in_months FROM vintage_data ORDER BY 1')AS tbl ( granularity date ,entry_accounts numeric ,entry_amount numeric ,d1 numeric ,d2 numeric ,d3 numeric ,d4 numeric ,d5 numeric ,d6 numeric );ANALYZE matrix; -- update statistics to help calculations--== Calculations ==---- I implemented the first calculation for X1 and leave the rest to you.-- Can probably be generalized in a loop or even a single statement.UPDATE matrix mSET d4 = ( SELECT (sum(x.d1) + sum(x.d2) + sum(x.d3) + sum(x.d4)) /(sum(x.d1) + sum(x.d2) + sum(x.d3)) - 1 -- removed redundant sum(entry_amount) from equation FROM ( SELECT * FROM matrix a WHERE a.granularity < m.granularity ORDER BY a.granularity DESC LIMIT 3 ) x ) * (m.d1 + m.d2 + m.d3)WHERE m.granularity = '2012-04-30';--- Next update X2 ..--== Return results ==--RETURN QUERYTABLE matrixORDER BY 1;END;$BODY$ LANGUAGE plpgsql;
Call:
SELECT * FROM f_forcast();
I have simplified quite a bit, removing some redundant steps in the calculation.
The solution employs a variety of advanced techniques. You need to know your way around PostgreSQL to work with this.
-- -- rank the dates. -- , also fetch the the fields that seem to depend on them. -- (this should have been done in the data model) --CREATE VIEW date_rank AS ( SELECT uniq.granularity,uniq.entry_accounts,uniq.entry_amount , row_number() OVER(ORDER BY 0) AS zrank FROM ( SELECT DISTINCT granularity, entry_accounts, entry_amount FROM vintage_data) AS uniq );-- SELECT * FROM date_rank ORDER BY granularity; -- -- transform to an x*y matrix, avoiding the date key and the slack columns --CREATE VIEW matrix_data AS ( SELECT vd.distance_in_months AS xxx , dr.zrank AS yyy , vd.recovery_amount AS val FROM vintage_data vd JOIN date_rank dr ON dr.granularity = vd.granularity );-- SELECT * FROM matrix_data; -- -- In order to perform the reversed transformation: -- make the view insertable. -- INSERTS to matrix_data will percolate back into the vintage_data table -- (don't try this at home ;-) --CREATE RULE magic_by_the_plasser AS ON INSERT TO matrix_data DO INSTEAD ( INSERT INTO vintage_data (granularity,distance_in_months,entry_accounts,entry_amount,recovery_amount) SELECT dr.granularity, new.xxx, dr.entry_accounts, dr.entry_amount, new.val FROM date_rank dr WHERE dr.zrank = new.yyy ; ); -- -- This CTE creates the weights for a Pascal-triangle ---- EXPLAIN -- ANALYZEWITH RECURSIVE pascal AS ( WITH empty AS ( -- -- "cart" is a cathesian product of X*Y -- its function is similar to a "calendar table": -- filling in the missing X,Y pairs, making the matrix "square". -- (well: rectangular, but in the given case nX==nY) -- WITH cart AS ( WITH mmx AS ( WITH xx AS ( SELECT MIN(xxx) AS x0 , MAX(xxx) AS x1 FROM matrix_data) SELECT generate_series(xx.x0,xx.x1) AS xxx FROM xx ) , mmy AS ( WITH yy AS ( SELECT MIN(yyy) AS y0 , MAX(yyy) AS y1 FROM matrix_data) SELECT generate_series(yy.y0,yy.y1) AS yyy FROM yy ) SELECT * FROM mmx JOIN mmy ON (1=1) -- Carthesian product here! ) -- -- The (x,y) pairs that are not present in the current matrix -- SELECT * FROM cart ca WHERE NOT EXISTS ( SELECT * FROM matrix_data nx WHERE nx.xxx = ca.xxx AND nx.yyy = ca.yyy ) ) SELECT md.yyy AS src_y , md.xxx AS src_x , md.yyy AS dst_y , md.xxx AS dst_x -- The filled-in matrix cells have weight 1 , 1::numeric AS weight FROM matrix_data md UNION ALL SELECT pa.src_y AS src_y , pa.src_x AS src_x , em.yyy AS dst_y , em.xxx AS dst_x -- the derived matrix cells inherit weight/2 from both their parents , (pa.weight/2) AS weight FROM pascal pa JOIN empty em ON ( em.yyy = pa.dst_y+1 AND em.xxx = pa.dst_x) OR ( em.yyy = pa.dst_y AND em.xxx = pa.dst_x+1 ) )INSERT INTO matrix_data(yyy,xxx,val)SELECT pa.dst_y,pa.dst_x ,SUM(ma.val*pa.weight)FROM pascal paJOIN matrix_data ma ON pa.src_y = ma.yyy AND pa.src_x = ma.xxx -- avoid the filled-in matrix cells (which map to themselves)WHERE NOT (pa.src_y = pa.dst_y AND pa.src_x = pa.dst_x)GROUP BY pa.dst_y,pa.dst_x ; -- -- This will also get rid of the matrix_data view and the rule. --DROP VIEW date_rank CASCADE;-- SELECT * FROM matrix_data ;SELECT * FROM vintage_data ORDER BY granularity, distance_in_months;
RESULT:
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "vintage_data_pkey" for table "vintage_data"CREATE TABLENOTICE: ALTER TABLE / ADD UNIQUE will create implicit index "mx_xy" for table "vintage_data"ALTER TABLEINSERT 0 21VACUUMCREATE VIEWCREATE VIEWCREATE RULEINSERT 0 15NOTICE: drop cascades to view matrix_dataDROP VIEW granularity | distance_in_months | entry_accounts | entry_amount | recovery_amount -------------+--------------------+----------------+--------------+--------------------------- 2012-01-31 | 1 | 200 | 100000 | 1000 2012-01-31 | 2 | 200 | 100000 | 2000 2012-01-31 | 3 | 200 | 100000 | 3000 2012-01-31 | 4 | 200 | 100000 | 3500 2012-01-31 | 5 | 200 | 100000 | 3400 2012-01-31 | 6 | 200 | 100000 | 3300 2012-02-28 | 1 | 250 | 150000 | 1200 2012-02-28 | 2 | 250 | 150000 | 1600 2012-02-28 | 3 | 250 | 150000 | 1800 2012-02-28 | 4 | 250 | 150000 | 1200 2012-02-28 | 5 | 250 | 150000 | 1600 2012-02-28 | 6 | 250 | 150000 | 2381.25000000000000000000 2012-03-31 | 1 | 200 | 90000 | 1300 2012-03-31 | 2 | 200 | 90000 | 1200 2012-03-31 | 3 | 200 | 90000 | 1400 2012-03-31 | 4 | 200 | 90000 | 1000 2012-03-31 | 5 | 200 | 90000 | 2200.00000000000000000000 2012-03-31 | 6 | 200 | 90000 | 2750.00000000000000000000 2012-04-30 | 1 | 300 | 180000 | 1600 2012-04-30 | 2 | 300 | 180000 | 1500 2012-04-30 | 3 | 300 | 180000 | 4000 2012-04-30 | 4 | 300 | 180000 | 2500.00000000000000000000 2012-04-30 | 5 | 300 | 180000 | 2350.00000000000000000000 2012-04-30 | 6 | 300 | 180000 | 2550.00000000000000000000 2012-05-31 | 1 | 400 | 225000 | 2200 2012-05-31 | 2 | 400 | 225000 | 6000 2012-05-31 | 3 | 400 | 225000 | 5000.00000000000000000000 2012-05-31 | 4 | 400 | 225000 | 3750.00000000000000000000 2012-05-31 | 5 | 400 | 225000 | 3050.00000000000000000000 2012-05-31 | 6 | 400 | 225000 | 2800.00000000000000000000 2012-06-30 | 1 | 100 | 60000 | 1000 2012-06-30 | 2 | 100 | 60000 | 3500.00000000000000000000 2012-06-30 | 3 | 100 | 60000 | 4250.00000000000000000000 2012-06-30 | 4 | 100 | 60000 | 4000.00000000000000000000 2012-06-30 | 5 | 100 | 60000 | 3525.00000000000000000000 2012-06-30 | 6 | 100 | 60000 | 3162.50000000000000000000(36 rows)