in src/java/com/twitter/search/ingester/pipeline/twitter/thriftparse/TweetEventParseHelper.java [171:347]
public static IngesterTwitterMessage getTwitterMessageFromCreationEvent(
@Nonnull TweetCreateEvent createEvent,
@Nonnull List<PenguinVersion> supportedPenguinVersions,
@Nullable DebugEvents debugEvents) throws ThriftTweetParsingException {
Tweet tweet = createEvent.getTweet();
if (tweet == null) {
throw new ThriftTweetParsingException("No tweet field in TweetCreateEvent");
}
TweetCoreData coreData = tweet.getCore_data();
if (coreData == null) {
throw new ThriftTweetParsingException("No core_data field in Tweet in TweetCreateEvent");
}
User user = createEvent.getUser();
if (user == null) {
throw new ThriftTweetParsingException("No user field in TweetCreateEvent");
}
if (!user.isSetProfile()) {
throw new ThriftTweetParsingException("No profile field in User in TweetCreateEvent");
}
if (!user.isSetSafety()) {
throw new ThriftTweetParsingException("No safety field in User in TweetCreateEvent");
}
long twitterId = tweet.getId();
IngesterTwitterMessage message = new IngesterTwitterMessage(
twitterId,
supportedPenguinVersions,
debugEvents);
// Set the creation time based on the tweet ID, because it has millisecond granularity,
// and coreData.created_at_secs has only second granularity.
message.setDate(new Date(SnowflakeIdParser.getTimestampFromTweetId(twitterId)));
boolean isNsfw = coreData.isNsfw_admin() || coreData.isNsfw_user();
boolean hasMediaOrUrlsOrCards =
tweet.getMediaSize() > 0
|| tweet.getUrlsSize() > 0
|| tweet.getCardsSize() > 0
|| tweet.isSetCard2();
message.setIsSensitiveContent(isNsfw && hasMediaOrUrlsOrCards);
message.setFromUser(getFromUser(user));
if (user.isSetCounts()) {
message.setFollowersCount((int) user.getCounts().getFollowers());
}
message.setUserProtected(user.getSafety().isIs_protected());
message.setUserVerified(user.getSafety().isVerified());
message.setUserBlueVerified(user.getSafety().isIs_blue_verified());
if (tweet.isSetLanguage()) {
message.setLanguage(tweet.getLanguage().getLanguage()); // language ID like "en"
}
if (tweet.isSetSelf_thread_metadata()) {
message.setSelfThread(true);
}
ExclusiveTweetControl exclusiveTweetControl = tweet.getExclusive_tweet_control();
if (exclusiveTweetControl != null) {
if (exclusiveTweetControl.isSetConversation_author_id()) {
message.setExclusiveConversationAuthorId(
exclusiveTweetControl.getConversation_author_id());
}
}
setDirectedAtUser(message, coreData);
addMentionsToMessage(message, tweet);
addHashtagsToMessage(message, tweet);
addMediaEntitiesToMessage(message, tweet.getId(), tweet.getMedia());
addUrlsToMessage(message, tweet.getUrls());
addEscherbirdAnnotationsToMessage(message, tweet);
message.setNullcast(coreData.isNullcast());
if (coreData.isSetConversation_id()) {
message.setConversationId(coreData.getConversation_id());
NUM_TWEETS_WITH_CONVERSATION_ID.increment();
}
// quotes
if (tweet.isSetQuoted_tweet()) {
QuotedTweet quotedTweet = tweet.getQuoted_tweet();
if (quotedTweet.getTweet_id() > 0 && quotedTweet.getUser_id() > 0) {
if (quotedTweet.isSetPermalink()) {
String quotedURL = quotedTweet.getPermalink().getLong_url();
UrlEntity quotedURLEntity = new UrlEntity();
quotedURLEntity.setExpanded(quotedURL);
quotedURLEntity.setUrl(quotedTweet.getPermalink().getShort_url());
quotedURLEntity.setDisplay(quotedTweet.getPermalink().getDisplay_text());
addUrlsToMessage(message, Lists.newArrayList(quotedURLEntity));
} else {
LOG.warn("Tweet {} has quoted tweet, but is missing quoted tweet URL: {}",
tweet.getId(), quotedTweet);
NUM_TWEETS_MISSING_QUOTE_URLS.increment();
}
TwitterQuotedMessage quotedMessage =
new TwitterQuotedMessage(
quotedTweet.getTweet_id(),
quotedTweet.getUser_id());
message.setQuotedMessage(quotedMessage);
NUM_TWEETS_WITH_QUOTE.increment();
}
}
// card fields
if (createEvent.getTweet().isSetCard2()) {
Card2 card = createEvent.getTweet().getCard2();
message.setCardName(card.getName());
message.setCardTitle(
CardFieldUtil.extractBindingValue(CardFieldUtil.TITLE_BINDING_KEY, card));
message.setCardDescription(
CardFieldUtil.extractBindingValue(CardFieldUtil.DESCRIPTION_BINDING_KEY, card));
CardFieldUtil.deriveCardLang(message);
message.setCardUrl(card.getUrl());
}
// Some fields should be set based on the "original" tweet. So if this tweet is a retweet,
// we want to extract those fields from the retweeted tweet.
Tweet retweetOrTweet = tweet;
TweetCoreData retweetOrTweetCoreData = coreData;
User retweetOrTweetUser = user;
// retweets
boolean isRetweet = coreData.isSetShare();
if (isRetweet) {
retweetOrTweet = createEvent.getSource_tweet();
retweetOrTweetCoreData = retweetOrTweet.getCore_data();
retweetOrTweetUser = createEvent.getSource_user();
TwitterRetweetMessage retweetMessage = new TwitterRetweetMessage();
retweetMessage.setRetweetId(twitterId);
if (retweetOrTweetUser != null) {
if (retweetOrTweetUser.isSetProfile()) {
retweetMessage.setSharedUserDisplayName(retweetOrTweetUser.getProfile().getName());
}
retweetMessage.setSharedUserTwitterId(retweetOrTweetUser.getId());
}
retweetMessage.setSharedDate(new Date(retweetOrTweetCoreData.getCreated_at_secs() * 1000));
retweetMessage.setSharedId(retweetOrTweet.getId());
addMediaEntitiesToMessage(message, retweetOrTweet.getId(), retweetOrTweet.getMedia());
addUrlsToMessage(message, retweetOrTweet.getUrls());
// If a tweet's text is longer than 140 characters, the text for any retweet of that tweet
// will be truncated. And if the original tweet has hashtags or mentions after character 140,
// the Tweetypie event for the retweet will not include those hashtags/mentions, which will
// make the retweet unsearchable by those hashtags/mentions. So in order to avoid this
// problem, we add to the retweet all hashtags/mentions set on the original tweet.
addMentionsToMessage(message, retweetOrTweet);
addHashtagsToMessage(message, retweetOrTweet);
message.setRetweetMessage(retweetMessage);
}
// Some fields should be set based on the "original" tweet.
// Only set geo fields if this is not a retweet
if (!isRetweet) {
setGeoFields(message, retweetOrTweetCoreData, retweetOrTweetUser);
setPlacesFields(message, retweetOrTweet);
}
setText(message, retweetOrTweetCoreData);
setInReplyTo(message, retweetOrTweetCoreData, isRetweet);
setDeviceSourceField(message, retweetOrTweet);
// Profile geo enrichment fields should be set based on this tweet, even if it's a retweet.
setProfileGeoEnrichmentFields(message, tweet);
// The composer used to create this tweet: standard tweet creator or the camera flow.
setComposerSource(message, tweet);
return message;
}