SamplesV1/TwitterAnalysisSample-AzureMLBatchScoringActivity/Scripts/Hive/Twitteragg.hql (55 lines of code) (raw):

DROP TABLE IF EXISTS TwitterRaw; CREATE EXTERNAL TABLE TwitterRaw ( ID int, StatusId string, Tweet string, Source string, UserID string, Latitude string, Longitutde string, InReplyToScreenName string, InReplyToStatusID string, InReplyToUserID string, RetweetedStatus_StatusID string, RetweetCount string, InsertedDate string, CreatedDate string, Query string, HourOfDay string, TweetType string, Platform string, Devices string, App string ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '10' STORED AS TEXTFILE LOCATION '${hiveconf:TwitterRaw}'; DROP TABLE IF EXISTS TweetCounts; CREATE EXTERNAL TABLE TweetCounts ( TweetCounts string ) ROW FORMAT delimited fields terminated by ',' LINES TERMINATED BY '10' STORED AS TEXTFILE LOCATION '${hiveconf:TweetsCount}'; INSERT OVERWRITE TABLE TweetCounts SELECT count(*) as TweetCounts from TwitterRaw; DROP TABLE IF EXISTS TweetsByDate; CREATE EXTERNAL TABLE TweetsByDate ( TweetsByDate string, CreateDate string ) ROW FORMAT delimited fields terminated by ',' LINES TERMINATED BY '10' STORED AS TEXTFILE LOCATION '${hiveconf:TweetsByDate}'; INSERT OVERWRITE TABLE TweetsByDate select count(ID) as TweetsByDate ,To_DATE(CreatedDate) from TwitterRaw group by To_DATE(CreatedDate) ; DROP TABLE IF EXISTS Tweets; CREATE EXTERNAL TABLE Tweets ( Tweets string ) ROW FORMAT delimited fields terminated by ',' LINES TERMINATED BY '10' STORED AS TEXTFILE LOCATION '${hiveconf:Tweets}'; INSERT OVERWRITE TABLE Tweets select Tweet from TwitterRaw; DROP TABLE IF EXISTS TweetsByHour; CREATE EXTERNAL TABLE TweetsByHour ( TweetsByHour string, CreatedDate string, Hour string ) ROW FORMAT delimited fields terminated by ',' LINES TERMINATED BY '10' STORED AS TEXTFILE LOCATION '${hiveconf:TweetsByHour}'; INSERT OVERWRITE TABLE TweetsByHour select count(ID) as TweetsByHour,TO_DATE(CreatedDate),HOUR(CreatedDate) from TwitterRaw group by HOUR(CreatedDate), TO_DATE(CreatedDate);