dataplex-quickstart-labs/00-resources/scripts/spark-sql/retail-transactions-anomaly-detection/retail-transactions-anomaly-detection.sql (16 lines of code) (raw):

select * from procurement.offlinesales where orderid != 'orderid' limit 10; /* Count of total transactions */ select count(*) from procurement.offlinesales where orderid!='orderid'; select count(distinct product) from procurement.offlinesales where orderid!='orderid'; select product,avg(quantityordered * unitprice) as avg_sales_amount from procurement.offlinesales where orderid!='orderid' group by product order by avg_sales_amount desc; /* Anomalous Transactions */ WITH stats AS ( SELECT product, AVG(quantityordered * unitprice) AS avg_value, STDDEV(quantityordered * unitprice) / AVG(quantityordered * unitprice) AS rsd_value FROM procurement.offlinesales GROUP BY product) SELECT orderid, orderdate, product, (quantityordered * unitprice) as sales_amount, ABS(1 - (quantityordered * unitprice)/ avg_value) AS distance_from_avg FROM procurement.offlinesales INNER JOIN stats USING (product) WHERE rsd_value <= 0.2 ORDER BY distance_from_avg DESC LIMIT 10