Tuesday, March 30, 2021

hive with clause

the subquery twice or once) but it works. Here is the fiddle

WITH subquery as
       ( 
SELECT
    First.deal_id
    ,COALESCE( First.exchange, Second.exchange ) as exchange_id
    ,COALESCE( First.publisher, Second.publisher ) as pub_id
    ,COUNT (DISTINCT(case when Second.user_id is null then First.user_id else null END)) AS Incremental
    ,SUM (First.imps) AS First_imps
    ,SUM (Second.imps) AS Second_imps
    FROM
        (
            SELECT
                a.deal_id
                ,a.exchange
                ,a.publisher
                ,a.user_id
                ,1 AS imps
            FROM
                T1 a 
            WHERE
                a.deal_id >= 1
        ) First 
        FULL OUTER JOIN (
            SELECT
                a.exchange
                ,a.publisher
                ,a.user_id
                ,1 AS imps
            FROM
                T1 a
            WHERE
            a.deal_id = 0
        ) Second
        ON (
            First.exchange = Second.exchange
            AND First.publisher = Second.publisher
            AND First.user_id = Second.user_id
        )
        GROUP BY
        First.deal_id
        ,COALESCE( First.exchange, Second.exchange )
        ,COALESCE( First.publisher, Second.publisher )
        )

SELECT
deal.deal_id,
deal.exchange_id,
deal.pub_id,
sum(deal.incremental),
sum(deal.first_imps),
sum(coalesce(deal.second_imps, 0) + coalesce(oa.second_imps,0))
FROM 
subquery deal
LEFT JOIN 
subquery oa 
ON (deal.exchange_id = oa.exchange_id
AND deal.pub_id = oa.pub_id
AND oa.deal_id is null)
WHERE deal.deal_id is not null
GROUP BY
deal.deal_id,
deal.exchange_id,
deal.pub_id
;

No comments:

Post a Comment