HIVE left join on nearest date HIVE left join on nearest date hadoop hadoop

HIVE left join on nearest date


select  B.B_id       ,B.B_date       ,B.A_id       ,A.A_date       ,A.changed_col       ,B_valuefrom                B        left join  (select  *                    from   (select  B.B_id                                   ,A.A_date                                   ,A.changed_col                                   ,row_number () over                                    (                                        partition by    B.B_id                                        order by        A.A_date desc                                    ) as rn                            from            B                                    join    A                                    on      A.A_id = B.A_id                            where   A.A_date <= B.B_date                             ) A                    where rn = 1                    ) A        on          A.B_id  =                    B.B_id

+------+------------+------+------------+-------------+---------+| b_id |   b_date   | a_id |   a_date   | changed_col | b_value |+------+------------+------+------------+-------------+---------+| B01  | 2017-04-02 | A01  | 2017-04-01 | XYZ         |     200 || B01  | 2017-04-04 | A01  | 2017-04-01 | XYZ         |     500 |+------+------------+------+------------+-------------+---------+


select  B_id       ,dt                  as B_date       ,A_id       ,A_data.A_date       as A_date        ,A_data.changed_col       ,B_valuefrom   (select  B_id,dt,B_value,A_id,tab                              ,max                 (   case                         when tab = 'A'                         then named_struct ('A_date',dt,'changed_col',changed_col)                     end                ) over                (                    partition by    A_id                    order by        dt,tab                    rows            between unbounded preceding                                    and     current row                ) as A_data        from   (select  B_id,B_date as dt,B_value,A_id                       ,'B' as tab,null as changed_col                from    B                                union all                select  null as B_id,A_date as dt,null as B_value,A_id                       ,'A' as tab,changed_col                from    A                ) t        ) twhere   tab = 'B'

+------+------------+------+------------+-------------+---------+| b_id |   b_date   | a_id |   a_date   | changed_col | b_value |+------+------------+------+------------+-------------+---------+| B01  | 2017-04-02 | A01  | 2017-04-01 | XYZ         |     200 || B01  | 2017-04-04 | A01  | 2017-04-01 | XYZ         |     500 |+------+------------+------+------------+-------------+---------+            


select  B.B_id       ,B.B_date       ,B.A_id       ,A.A_data.A_date         as A_date        ,A.A_data.changed_col    as changed_col       ,B_valuefrom                B        left join  (select      B.B_id                               ,max (named_struct ('A_date',A_date,'changed_col',changed_col)) as A_data                    from                B                                join    A                                on      A.A_id = B.A_id                    where       A.A_date <= B.B_date                     group by    B.B_id                    ) A        on          A.B_id  =                    B.B_id

+------+------------+------+------------+-------------+---------+| b_id |   b_date   | a_id |   a_date   | changed_col | b_value |+------+------------+------+------------+-------------+---------+| B01  | 2017-04-02 | A01  | 2017-04-01 | XYZ         |     200 || B01  | 2017-04-04 | A01  | 2017-04-01 | XYZ         |     500 |+------+------------+------+------------+-------------+---------+