in Kiosk/Views/BingNewsAnalytics.xaml.cs [139:221]
private async Task<AnalyzeTextResult> AnalyzeNewsAsync(IEnumerable<NewsArticle> news, string userLanguage)
{
var scores = new List<double>();
var topKeyPhrases = new List<KeyPhraseCount>();
// prepare news titles to text string
string[] newsTitleList = news.Select(n => n.Title.Replace(".", ";")).ToArray();
string newsTitles = string.Join(". ", newsTitleList);
var strInfo = new StringInfo(newsTitles);
int lenInTextElements = strInfo.LengthInTextElements;
// check Text Analytics data limits
string languageCode = GetTextAnalyticsLanguageCodeFromLanguage(userLanguage);
bool isLangSupportedForKeyPhrases = IsLanguageSupportedByKeyPhraseAPI(userLanguage);
if (lenInTextElements < TextAnalyticsHelper.MaximumLengthOfTextDocument)
{
Task<DocumentSentiment> sentimentTask = TextAnalyticsHelper.AnalyzeSentimentAsync(newsTitles, languageCode);
Task<KeyPhraseCollection> keyPhrasesTask = isLangSupportedForKeyPhrases ? TextAnalyticsHelper.ExtractKeyPhrasesAsync(newsTitles, languageCode) : Task.FromResult<KeyPhraseCollection>(null);
await Task.WhenAll(sentimentTask, keyPhrasesTask);
var sentimentResult = sentimentTask.Result;
var keyPhrasesResult = keyPhrasesTask.Result;
scores = sentimentResult.Sentences.Select(s => GetSentimentScore(s)).ToList();
var wordGroups = keyPhrasesResult?.GroupBy(phrase => phrase, StringComparer.OrdinalIgnoreCase).OrderByDescending(g => g.Count()).Take(10).OrderBy(g => g.Key).ToList();
topKeyPhrases = wordGroups != null && wordGroups.Any()
? wordGroups.Select(w => new KeyPhraseCount { KeyPhrase = w.Key, Count = w.Count() }).ToList()
: new List<KeyPhraseCount>() { new KeyPhraseCount { KeyPhrase = "Not available in this language", Count = 1 } };
}
else
{
// if the input data is larger than max limit then split the input data into several different requests
var sentimentTaskList = new List<Task<AnalyzeSentimentResultCollection>>();
var keyPhrasesTaskList = new List<Task<ExtractKeyPhrasesResultCollection>>();
int maxDocsPerRequest = Math.Min(TextAnalyticsHelper.MaxDocumentsPerRequestForSentimentAnalysis, TextAnalyticsHelper.MaxDocumentsPerRequestForKeyPhraseExtraction);
int batchSize = Math.Min((int)Math.Ceiling((decimal)TotalNewsCount / maxDocsPerRequest), maxDocsPerRequest);
for (int i = 0; i < TextAnalyticsHelper.MaxRequestsPerSecond; i++)
{
int skip = i * batchSize;
string[] newsTitlesBatch = newsTitleList.Skip(skip).Take(batchSize).ToArray();
if (!newsTitlesBatch.Any())
{
break;
}
sentimentTaskList.Add(TextAnalyticsHelper.AnalyzeSentimentAsync(newsTitlesBatch, languageCode));
if (isLangSupportedForKeyPhrases)
{
keyPhrasesTaskList.Add(TextAnalyticsHelper.ExtractKeyPhrasesAsync(newsTitlesBatch, languageCode));
}
}
var taskList = new List<Task>();
taskList.AddRange(sentimentTaskList);
taskList.AddRange(keyPhrasesTaskList);
await Task.WhenAll(taskList);
foreach (var sentimentTask in sentimentTaskList)
{
AnalyzeSentimentResultCollection sentimentResult = sentimentTask.Result;
scores.AddRange(sentimentResult.SelectMany(d => d.DocumentSentiment.Sentences).Select(s => GetSentimentScore(s)).ToList());
}
var keyPhrasesList = new List<string>();
foreach (var keyPhrasesTask in keyPhrasesTaskList)
{
ExtractKeyPhrasesResultCollection keyPhrasesResult = keyPhrasesTask.Result;
keyPhrasesList.AddRange(keyPhrasesResult.SelectMany(k => k.KeyPhrases));
}
var wordGroups = keyPhrasesList.GroupBy(phrase => phrase, StringComparer.OrdinalIgnoreCase).OrderByDescending(g => g.Count()).Take(10).OrderBy(g => g.Key).ToList();
topKeyPhrases = wordGroups.Any()
? wordGroups.Select(w => new KeyPhraseCount { KeyPhrase = w.Key, Count = w.Count() }).ToList()
: new List<KeyPhraseCount>() { new KeyPhraseCount { KeyPhrase = "Not available in this language", Count = 1 } };
}
return new AnalyzeTextResult
{
Scores = scores,
TopKeyPhrases = topKeyPhrases
};
}