tools/apiview/parsers/cpp-api-parser/ApiViewProcessor/CommentExtractor.cpp (668 lines of code) (raw):
// Co pyright (c) Microsoft Corporation. All rights reserved.
// SPDX-License-Identifier: MIT
#include "CommentExtractor.hpp"
#include <clang/AST/ASTContext.h>
#include <clang/AST/Comment.h>
#include <clang/AST/CommentVisitor.h>
#include <iostream>
#include <iterator>
#include <list>
#include <string>
#include <vector>
using namespace clang;
struct AstComment : public AstDocumentation
{
AstComment(comments::FullComment const* const comment) : AstDocumentation() {}
bool IsInlineComment() const override { return false; }
void DumpNode(AstDumper* dumper, DumpNodeOptions const& options) const override
{
for (auto& child : m_children)
{
if (child)
{
child->DumpNode(dumper, options);
}
}
}
};
struct AstBlockCommandComment : public AstDocumentation
{
AstBlockCommandComment(comments::BlockCommandComment const* const comment) : AstDocumentation()
{
std::string value;
value += GetCommandMarker(comment->getCommandMarker());
auto commandInfo{
clang::comments::CommandTraits::getBuiltinCommandInfo(comment->getCommandID())};
if (commandInfo->IsBriefCommand)
{
value += "brief";
}
else if (commandInfo->IsReturnsCommand)
{
value += "returns";
}
else if (commandInfo->IsThrowsCommand)
{
value += "throws";
}
else if (commandInfo->IsParamCommand)
{
throw std::runtime_error("Block command comment should never have a param command.");
}
else if (commandInfo->IsTParamCommand)
{
throw std::runtime_error("Block command comment should never have a tparam command.");
}
else if (commandInfo->IsVerbatimBlockCommand)
{
throw std::runtime_error("Block command comment should never have a verbatim command.");
}
else if (commandInfo->IsVerbatimLineCommand)
{
throw std::runtime_error("Block command comment should never have a verbatim line command.");
}
else
{
value += commandInfo->Name;
}
m_thisLine = value;
}
bool IsInlineComment() const override { return false; }
void DumpNode(AstDumper* dumper, DumpNodeOptions const& options) const override
{
if (options.NeedsLeadingNewline)
{
dumper->Newline();
}
if (options.NeedsLeftAlign)
{
dumper->LeftAlign();
}
dumper->InsertWhitespace();
dumper->InsertPunctuation('*');
dumper->InsertWhitespace();
dumper->InsertComment(m_thisLine);
// The first child will be the first line of the brief description, it should be joined with the
// current line.
auto child{m_children.begin()};
if (child != m_children.end() && *child)
{
DumpNodeOptions innerOptions{options};
innerOptions.NeedsLeftAlign = false;
innerOptions.NeedsLeadingNewline = false;
innerOptions.NeedsTrailingNewline = true;
innerOptions.InlineBlockComment = true;
(*child)->DumpNode(dumper, innerOptions);
child++;
}
for (; child != m_children.end(); child++)
{
DumpNodeOptions innerOptions{options};
innerOptions.NeedsLeftAlign = true;
innerOptions.NeedsLeadingNewline = true;
innerOptions.NeedsTrailingNewline = false;
if (*child)
{
(*child)->DumpNode(dumper, options);
}
}
if (options.NeedsTrailingNewline)
{
dumper->Newline();
}
}
};
struct AstParamComment : public AstDocumentation
{
AstParamComment(comments::ParamCommandComment const* comment) : AstDocumentation()
{
std::string thisLine;
thisLine += GetCommandMarker(comment->getCommandMarker());
auto commandInfo{
clang::comments::CommandTraits::getBuiltinCommandInfo(comment->getCommandID())};
if (commandInfo->IsParamCommand)
{
thisLine += "param";
}
else
{
thisLine += commandInfo->Name;
}
thisLine += " ";
// If the caller explicitly listed the direction, include that in the description.
if (comment->isDirectionExplicit())
{
thisLine += comment->getDirectionAsString(comment->getDirection());
thisLine += " ";
}
if (comment->hasParamName())
{
thisLine += comment->getParamNameAsWritten();
thisLine += " ";
}
m_thisLine = thisLine;
}
bool IsInlineComment() const override { return false; }
void DumpNode(AstDumper* dumper, DumpNodeOptions const& options) const override
{
if (m_thisLine == "@param format")
{
std::cout << "@param[in] format";
}
if (options.NeedsLeadingNewline)
{
dumper->Newline();
}
if (options.NeedsLeftAlign)
{
dumper->LeftAlign();
}
dumper->InsertWhitespace();
dumper->InsertPunctuation('*');
dumper->InsertWhitespace();
dumper->InsertComment(m_thisLine);
// The first child will be the first line of the parameter documentation, it should be joined
// with the current line.
auto child{m_children.begin()};
if (child != m_children.end() && *child)
{
DumpNodeOptions innerOptions{options};
innerOptions.NeedsLeftAlign = false;
innerOptions.NeedsLeadingNewline = false;
innerOptions.NeedsTrailingNewline = true;
innerOptions.InlineBlockComment = true;
(*child)->DumpNode(dumper, innerOptions);
child++;
}
for (; child != m_children.end(); child++)
{
DumpNodeOptions innerOptions{options};
innerOptions.NeedsLeftAlign = true;
innerOptions.NeedsLeadingNewline = false;
innerOptions.NeedsTrailingNewline = true;
if (*child)
{
(*child)->DumpNode(dumper, options);
}
}
if (options.NeedsTrailingNewline)
{
dumper->Newline();
}
}
};
struct AstTParamComment : public AstDocumentation
{
AstTParamComment(comments::TParamCommandComment const* comment) : AstDocumentation()
{
std::string thisLine;
thisLine += GetCommandMarker(comment->getCommandMarker());
auto commandInfo{
clang::comments::CommandTraits::getBuiltinCommandInfo(comment->getCommandID())};
if (commandInfo->IsTParamCommand)
{
thisLine += "tparam";
}
else
{
thisLine += commandInfo->Name;
}
thisLine += " ";
if (comment->hasParamName())
{
thisLine += comment->getParamNameAsWritten();
thisLine += " ";
}
m_thisLine = thisLine;
}
bool IsInlineComment() const override { return false; }
void DumpNode(AstDumper* dumper, DumpNodeOptions const& options) const override
{
if (options.NeedsLeadingNewline)
{
dumper->Newline();
}
if (options.NeedsLeftAlign)
{
dumper->LeftAlign();
}
dumper->InsertWhitespace();
dumper->InsertPunctuation('*');
dumper->InsertWhitespace();
dumper->InsertComment(m_thisLine);
// The first child will be the first line of the parameter documentation, it should be joined
// with the current line.
auto child{m_children.begin()};
if (child != m_children.end() && *child)
{
DumpNodeOptions innerOptions{options};
innerOptions.NeedsLeftAlign = false;
innerOptions.NeedsLeadingNewline = false;
innerOptions.NeedsTrailingNewline = true;
innerOptions.InlineBlockComment = true;
(*child)->DumpNode(dumper, innerOptions);
child++;
}
for (; child != m_children.end(); child++)
{
DumpNodeOptions innerOptions{options};
innerOptions.NeedsLeftAlign = true;
innerOptions.NeedsLeadingNewline = true;
innerOptions.NeedsTrailingNewline = false;
if (*child)
{
(*child)->DumpNode(dumper, options);
}
}
if (options.NeedsTrailingNewline)
{
dumper->Newline();
}
}
};
struct AstVerbatimBlockComment : public AstDocumentation
{
AstVerbatimBlockComment(comments::VerbatimBlockComment const* comment) : AstDocumentation()
{
std::string thisLine;
auto commandInfo{
clang::comments::CommandTraits::getBuiltinCommandInfo(comment->getCommandID())};
thisLine += GetCommandMarker(comment->getCommandMarker());
thisLine += commandInfo->Name;
auto it = comment->child_begin();
auto childLineComment = clang::dyn_cast<clang::comments::VerbatimBlockLineComment>(*it);
if (childLineComment)
{
std::string childText{childLineComment->getText()};
// If the first character of the 0th argument is a '{', then this is a code block. Append
// it to the name.
if ((childText[0] == '{') && (childText[(childText.size() - 1)] = '}'))
{
m_hasLanguageTag = true;
}
}
m_thisLine = thisLine;
m_endMarker += GetCommandMarker(comment->getCommandMarker());
m_endMarker += commandInfo->EndCommandName;
}
bool IsInlineComment() const override { return false; }
void DumpNode(AstDumper* dumper, DumpNodeOptions const& options) const override
{
if (options.NeedsLeadingNewline)
{
dumper->Newline();
}
if (options.NeedsLeftAlign)
{
dumper->LeftAlign();
}
dumper->InsertWhitespace();
dumper->InsertComment("* ");
dumper->InsertWhitespace();
dumper->InsertComment(m_thisLine);
// The first child will be the first line of the parameter documentation, it should be joined
// with the current line.
auto child{m_children.begin()};
if (child != m_children.end() && *child)
{
DumpNodeOptions innerOptions{options};
if (m_hasLanguageTag)
{
innerOptions.NeedsLeftAlign = false;
innerOptions.NeedsLeadingNewline = false;
innerOptions.NeedsTrailingNewline = false;
innerOptions.InlineBlockComment = true;
}
(*child)->DumpNode(dumper, innerOptions);
child++;
}
for (; child != m_children.end(); child++)
{
DumpNodeOptions innerOptions{options};
innerOptions.NeedsLeftAlign = true;
innerOptions.NeedsLeadingNewline = true;
innerOptions.NeedsTrailingNewline = false;
if (*child)
{
(*child)->DumpNode(dumper, options);
}
}
if (!m_endMarker.empty())
{
dumper->Newline();
dumper->LeftAlign();
dumper->InsertWhitespace();
dumper->InsertComment("* ");
dumper->InsertWhitespace();
dumper->InsertComment(m_endMarker);
}
if (options.NeedsTrailingNewline)
{
dumper->Newline();
}
}
private:
bool m_hasLanguageTag{false};
std::string m_endMarker;
};
// Represents an inline command marker. Examples include the \c in \c foo, or the \a in \a foo.
// The marker is the \c or \a.
//
// \p or \c should be rendered in a fixed width font
// \a or \e or \em should be rendered in a italic font
// \b should be rendered in a bold font
// \emoji should be rendered as an emoji (if possible - see
// https://gist.github.com/rxaviers/7360908).
struct AstInlineCommand : AstDocumentation
{
AstInlineCommand(const comments::InlineCommandComment* comment) : AstDocumentation()
{
std::string thisLine;
std::string commandRenderMarkdownStart;
std::string commandRenderMarkdownEnd;
switch (comment->getRenderKind())
{
case clang::comments::InlineCommandRenderKind::Normal:
break;
case clang::comments::InlineCommandRenderKind::Bold:
commandRenderMarkdownEnd = "**";
commandRenderMarkdownStart = "**";
break;
case clang::comments::InlineCommandRenderKind::Emphasized:
commandRenderMarkdownEnd = "*";
commandRenderMarkdownStart = "*";
break;
case clang::comments::InlineCommandRenderKind::Monospaced:
commandRenderMarkdownEnd = "`";
commandRenderMarkdownStart = "`";
break;
default:
throw std::runtime_error("Unknown inline command render kind.");
}
// Include the arguments to the command.
thisLine += commandRenderMarkdownStart;
for (unsigned i = 0u; i < comment->getNumArgs(); ++i)
{
thisLine += comment->getArgText(i);
}
thisLine += commandRenderMarkdownEnd;
m_thisLine = thisLine;
}
bool IsInlineComment() const override { return true; }
void DumpNode(AstDumper* dumper, DumpNodeOptions const& options) const override
{
dumper->InsertComment(m_thisLine);
for (auto& child : m_children)
{
child->DumpNode(dumper, options);
}
}
};
// A paragraph represents a block of text. Typically this is a paragraph of text. The children of
// the line are typically AstTextComment nodes, but they may also be AstInlineCommand nodes. If they
// are AstInlineCommand nodes, we should just insert them with no separation, if they are
// AstTextComment nodes, we should insert them with a new line and comment leader between them.
struct AstParagraphComment : AstDocumentation
{
AstParagraphComment(comments::ParagraphComment const* comment) : AstDocumentation() {}
bool IsInlineComment() const override { return false; }
void DumpNode(AstDumper* dumper, DumpNodeOptions const& options) const override
{
if (!options.InlineBlockComment)
{
if (options.NeedsLeadingNewline)
{
dumper->Newline();
}
if (options.NeedsLeftAlign)
{
dumper->LeftAlign();
}
dumper->InsertWhitespace();
dumper->InsertPunctuation('*');
dumper->InsertWhitespace();
// Insert a blank line before the paragraph if the previous line was not an inline comment.
dumper->Newline();
dumper->LeftAlign();
dumper->InsertWhitespace();
dumper->InsertPunctuation('*');
dumper->InsertWhitespace();
}
bool insertLineBreak = false;
for (auto& child : m_children)
{
if (child)
{
if (insertLineBreak && !child->IsInlineComment())
{
dumper->Newline();
dumper->LeftAlign();
dumper->InsertWhitespace();
dumper->InsertPunctuation('*');
}
child->DumpNode(dumper, options);
if (child->IsInlineComment())
{
insertLineBreak = false;
}
else
{
insertLineBreak = true;
}
}
}
}
};
struct AstVerbatimBlockLineComment : AstDocumentation
{
AstVerbatimBlockLineComment(comments::VerbatimBlockLineComment const* comment)
: AstDocumentation()
{
m_thisLine = comment->getText();
}
bool IsInlineComment() const override { return false; }
void DumpNode(AstDumper* dumper, DumpNodeOptions const& options) const override
{
if (options.NeedsLeadingNewline)
{
dumper->Newline();
}
if (options.NeedsLeftAlign)
{
dumper->LeftAlign();
}
if (!options.InlineBlockComment)
{
dumper->InsertWhitespace();
dumper->InsertPunctuation('*');
dumper->InsertWhitespace();
}
dumper->InsertComment(m_thisLine);
for (auto& child : m_children)
{
child->DumpNode(dumper, options);
}
if (options.NeedsTrailingNewline)
{
dumper->Newline();
}
}
};
struct AstTextComment : AstDocumentation
{
AstTextComment(comments::TextComment const* comment) : AstDocumentation()
{
m_thisLine = comment->getText();
}
bool IsInlineComment() const override { return false; }
void DumpNode(AstDumper* dumper, DumpNodeOptions const& options) const override
{
dumper->InsertComment(m_thisLine);
}
};
struct AstVerbatimLineComment : AstDocumentation
{
AstVerbatimLineComment(comments::VerbatimLineComment const* comment) : AstDocumentation()
{
auto commandInfo{
clang::comments::CommandTraits::getBuiltinCommandInfo(comment->getCommandID())};
m_thisLine += GetCommandMarker(comment->getCommandMarker());
m_thisLine += commandInfo->Name;
m_endMarker = commandInfo->EndCommandName;
}
bool IsInlineComment() const override { return true; }
void DumpNode(AstDumper* dumper, DumpNodeOptions const& options) const override
{
dumper->InsertComment(m_thisLine);
}
private:
std::string m_endMarker;
};
struct AstHtmlStartTagComment : AstDocumentation
{
AstHtmlStartTagComment(comments::HTMLStartTagComment const* comment) : AstDocumentation()
{
if (comment->getTagName() == "a")
{
m_isLinkHref = true;
auto argCount = comment->getNumAttrs();
for (size_t i = 0; i < argCount; i += 1)
{
if (comment->getAttr(i).Name == "href")
{
m_linkTarget = comment->getAttr(i).Value;
}
}
}
}
bool IsInlineComment() const override { return true; }
void DumpNode(AstDumper* dumper, DumpNodeOptions const& options) const override
{
// We only serialize the first link argument.
if (m_isLinkHref)
{
dumper->AddExternalLinkStart(m_linkTarget);
}
}
private:
std::string m_linkTarget;
bool m_isLinkHref{false};
};
struct AstHtmlEndTagComment : AstDocumentation
{
AstHtmlEndTagComment(comments::HTMLEndTagComment const* comment) : AstDocumentation()
{
if (comment->getTagName() == "a")
{
m_isLinkHref = true;
}
}
bool IsInlineComment() const override { return true; }
void DumpNode(AstDumper* dumper, DumpNodeOptions const& options) const override
{
if (m_isLinkHref)
{
dumper->AddExternalLinkEnd();
}
}
private:
bool m_isLinkHref{false};
};
std::unique_ptr<AstDocumentation> AstDocumentation::Create(const comments::Comment* comment)
{
switch (comment->getCommentKind())
{
case comments::CommentKind::FullComment:
return std::make_unique<AstComment>(cast<const comments::FullComment>(comment));
case comments::CommentKind::BlockCommandComment:
return std::make_unique<AstBlockCommandComment>(
cast<const comments::BlockCommandComment>(comment));
case comments::CommentKind::ParamCommandComment:
return std::make_unique<AstParamComment>(cast<const comments::ParamCommandComment>(comment));
case comments::CommentKind::TParamCommandComment:
return std::make_unique<AstTParamComment>(
cast<const comments::TParamCommandComment>(comment));
case comments::CommentKind::VerbatimBlockComment:
return std::make_unique<AstVerbatimBlockComment>(
cast<const comments::VerbatimBlockComment>(comment));
case comments::CommentKind::InlineCommandComment:
return std::make_unique<AstInlineCommand>(
cast<const comments::InlineCommandComment>(comment));
case comments::CommentKind::ParagraphComment:
return std::make_unique<AstParagraphComment>(cast<const comments::ParagraphComment>(comment));
case comments::CommentKind::TextComment:
return std::make_unique<AstTextComment>(cast<const comments::TextComment>(comment));
case comments::CommentKind::VerbatimBlockLineComment:
return std::make_unique<AstVerbatimBlockLineComment>(
cast<const comments::VerbatimBlockLineComment>(comment));
case comments::CommentKind::VerbatimLineComment:
return std::make_unique<AstVerbatimLineComment>(
cast<const comments::VerbatimLineComment>(comment));
case comments::CommentKind::HTMLStartTagComment:
return std::make_unique<AstHtmlStartTagComment>(
cast<const comments::HTMLStartTagComment>(comment));
case comments::CommentKind::HTMLEndTagComment:
return std::make_unique<AstHtmlEndTagComment>(
cast<const comments::HTMLEndTagComment>(comment));
default:
llvm::errs() << "Unknown comment kind: " << comment->getCommentKindName() << "\n";
return nullptr;
}
}
// clang visitor to extract comments from the AST.
//
// clang comment visitors look for methods named "visit<type>Comment". If the method is found, it is
// called, otherwise the comment visitor tries the parent type of the comment. This allows us to
// specialize the visitor for different types of comments but leaves processing for most comments
// inside the visitComment method.
class CommentVisitor
: public clang::comments::CommentVisitor<CommentVisitor, std::unique_ptr<AstDocumentation>> {
public:
CommentVisitor()
: clang::comments::CommentVisitor<CommentVisitor, std::unique_ptr<AstDocumentation>>()
{
}
// Primary processor for comments. This method is called for all comments which do not have a
// specialized visitor.
std::unique_ptr<AstDocumentation> visitComment(const clang::comments::Comment* comment)
{
std::unique_ptr<AstDocumentation> rv{AstDocumentation::Create(comment)};
for (auto child = comment->child_begin(); child != comment->child_end(); child++)
{
auto childNode = visit(*child);
if (childNode)
{
rv->AddChild(std::move(childNode));
}
}
return rv;
};
// Process a full comment. This is the top level comment type.
std::unique_ptr<AstDocumentation> visitFullComment(const clang::comments::FullComment* decl)
{
// decl->dump(llvm::outs(), m_context);
std::unique_ptr<AstDocumentation> rv{AstDocumentation::Create(decl)};
for (auto child = decl->child_begin(); child != decl->child_end(); child++)
{
auto childNode = visit(*child);
if (childNode)
{
rv->AddChild(std::move(childNode));
}
}
return rv;
};
// We want to ignore empty paragraph comments, so we need to specialize the visitor for paragraph
// comments.
std::unique_ptr<AstDocumentation> visitParagraphComment(
const clang::comments::ParagraphComment* decl)
{
// Ignore empty paragraph clang::comments.
if (decl->isWhitespace())
{
return nullptr;
}
std::unique_ptr<AstDocumentation> node{AstDocumentation::Create(decl)};
for (auto child = decl->child_begin(); child != decl->child_end(); child++)
{
auto childNode = visit(*child);
if (childNode)
{
node->AddChild(std::move(childNode));
}
}
return node;
};
// We want to ignore empty text comments, so we need to specialize the visitor for text
// comments.
std::unique_ptr<AstDocumentation> visitTextComment(const clang::comments::TextComment* tc)
{
// Ignore text clang::comments which are whitespace.
if (tc->isWhitespace())
{
return nullptr;
}
std::unique_ptr<AstDocumentation> node{AstDocumentation::Create(tc)};
return node;
};
};
// Use a commentVisitor to extract all the comments from a comment node.
std::unique_ptr<AstDocumentation> ExtractCommentForDeclaration(
clang::ASTContext const& context,
clang::Decl const* decl)
{
auto comment = context.getCommentForDecl(decl, nullptr);
if (comment != nullptr)
{
CommentVisitor visitor;
std::unique_ptr<AstDocumentation> doc{visitor.visit(comment)};
return doc;
}
return nullptr;
}
std::string_view AstDocumentation::GetCommandMarker(clang::comments::CommandMarkerKind marker)
{
switch (marker)
{
case clang::comments::CommandMarkerKind::CMK_At:
return "@";
case clang::comments::CommandMarkerKind::CMK_Backslash:
return "\\";
}
throw std::runtime_error("Unknown command marker kind.");
}