Libclang 教程¶
Clang 的 C 接口提供了一个相对较小的 API,它公开了用于将源代码解析为抽象语法树 (AST)、加载已解析的 AST、遍历 AST、将物理源位置与 AST 中的元素相关联以及支持基于 Clang 的开发工具的其他功能。这个 C 接口永远不会提供 Clang 的 C++ AST 中存储的所有信息表示,也不应该提供:目的是维护一个从一个版本到下一个版本相对稳定的 API,只提供支持开发工具所需的基本功能。libclang 的整个 C 接口可在文件 Index.h 中获得
基本类型概述¶
所有 libclang 类型都以 CX
为前缀
CXIndex¶
一个索引,它包含一组翻译单元,这些翻译单元通常会链接到一个可执行文件或库中。
CXTranslationUnit¶
单个翻译单元,它位于一个索引中。
CXCursor¶
一个游标,表示指向翻译单元抽象语法树中某个元素的指针。
代码示例¶
// file.cpp
struct foo{
int bar;
int* bar_pointer;
};
#include <clang-c/Index.h>
#include <iostream>
int main(){
CXIndex index = clang_createIndex(0, 0); //Create index
CXTranslationUnit unit = clang_parseTranslationUnit(
index,
"file.cpp", nullptr, 0,
nullptr, 0,
CXTranslationUnit_None); //Parse "file.cpp"
if (unit == nullptr){
std::cerr << "Unable to parse translation unit. Quitting.\n";
return 0;
}
CXCursor cursor = clang_getTranslationUnitCursor(unit); //Obtain a cursor at the root of the translation unit
}
访问 AST 的元素¶
可以使用 clang_visitChildren
以先序遍历方式递归访问 AST 的元素。
clang_visitChildren(
cursor, //Root cursor
[](CXCursor current_cursor, CXCursor parent, CXClientData client_data){
CXString current_display_name = clang_getCursorDisplayName(current_cursor);
//Allocate a CXString representing the name of the current cursor
std::cout << "Visiting element " << clang_getCString(current_display_name) << "\n";
//Print the char* value of current_display_name
clang_disposeString(current_display_name);
//Since clang_getCursorDisplayName allocates a new CXString, it must be freed. This applies
//to all functions returning a CXString
return CXChildVisit_Recurse;
}, //CXCursorVisitor: a function pointer
nullptr //client_data
);
CXCursorVisitor
的返回值,即 clang_visitChildren
的可调用参数,可以返回以下三种之一
CXChildVisit_Break
:终止游标遍历CXChildVisit_Continue
:继续游标遍历,访问刚访问过的游标的下一个同级,而不访问其子级。CXChildVisit_Recurse
:使用相同的访问器和客户端数据递归遍历此游标的子级
该程序的预期输出为
Visiting element foo
Visiting element bar
Visiting element bar_pointer
从游标中提取信息¶
提取游标种类¶
CXCursorKind clang_getCursorKind(CXCursor)
描述了游标所指的实体类型。示例值
CXCursor_StructDecl
:C 或 C++ 结构体。CXCursor_FieldDecl
:结构体、联合体或 C++ 类中的字段。CXCursor_CallExpr
:调用函数的表达式。
提取游标类型¶
CXType clang_getCursorType(CXCursor)
:检索 CXCursor 的类型(如果有)。
一个 CXType
表示一个完整的 C++ 类型,包括限定符和指针。它有一个成员字段 CXTypeKind kind
和其他不透明数据。
示例 CXTypeKind kind
值
CXType_Invalid
:表示无效类型(例如,在没有类型可用时)CXType_Pointer
:指向另一个类型的指针CXType_Int
:常规int
CXType_Elaborated
:表示使用详细类型关键字(例如 struct S)或通过限定名称(例如 N::M::type)或两者来引用的类型。
可以使用 clang_getTypeKindSpelling(CXTypeKind)
将任何 CXTypeKind
转换为 CXString
。
一个 CXType
持有其他必要的非透明类型信息,例如
引用了哪个结构体?
指针指向什么类型?
限定符(例如
const
、volatile
)?
可以使用以下方法查询 CXType
的限定符
clang_isConstQualifiedType(CXType)
用于检查const
clang_isRestrictQualifiedType(CXType)
用于检查restrict
clang_isVolatileQualifiedType(CXType)
用于检查volatile
代码示例¶
//structs.cpp
struct A{
int value;
};
struct B{
int value;
A struct_value;
};
#include <clang-c/Index.h>
#include <iostream>
int main(){
CXIndex index = clang_createIndex(0, 0); //Create index
CXTranslationUnit unit = clang_parseTranslationUnit(
index,
"structs.cpp", nullptr, 0,
nullptr, 0,
CXTranslationUnit_None); //Parse "structs.cpp"
if (unit == nullptr){
std::cerr << "Unable to parse translation unit. Quitting.\n";
return 0;
}
CXCursor cursor = clang_getTranslationUnitCursor(unit); //Obtain a cursor at the root of the translation unit
clang_visitChildren(
cursor,
[](CXCursor current_cursor, CXCursor parent, CXClientData client_data){
CXType cursor_type = clang_getCursorType(current_cursor);
CXString type_kind_spelling = clang_getTypeKindSpelling(cursor_type.kind);
std::cout << "Type Kind: " << clang_getCString(type_kind_spelling);
clang_disposeString(type_kind_spelling);
if(cursor_type.kind == CXType_Pointer || // If cursor_type is a pointer
cursor_type.kind == CXType_LValueReference || // or an LValue Reference (&)
cursor_type.kind == CXType_RValueReference){ // or an RValue Reference (&&),
CXType pointed_to_type = clang_getPointeeType(cursor_type);// retrieve the pointed-to type
CXString pointed_to_type_spelling = clang_getTypeSpelling(pointed_to_type); // Spell out the entire
std::cout << "pointing to type: " << clang_getCString(pointed_to_type_spelling);// pointed-to type
clang_disposeString(pointed_to_type_spelling);
}
else if(cursor_type.kind == CXType_Record){
CXString type_spelling = clang_getTypeSpelling(cursor_type);
std::cout << ", namely " << clang_getCString(type_spelling);
clang_disposeString(type_spelling);
}
std::cout << "\n";
return CXChildVisit_Recurse;
},
nullptr
);
程序的预期输出为
Type Kind: Record, namely A
Type Kind: Int
Type Kind: Record, namely B
Type Kind: Int
Type Kind: Record, namely A
Type Kind: Record, namely A
再次强调 CXType
和 CXTypeKind
之间的区别:例如
const char* __restrict__ variable;
类型种类将为:
CXType_Pointer
,拼写为"Pointer"
类型将是一个复杂的
CXType
结构,拼写为"const char* __restrict__
检索源位置¶
CXSourceRange clang_getCursorExtent(CXCursor)
返回一个 CXSourceRange
,表示源代码中的半开区间。
分别使用 clang_getRangeStart(CXSourceRange)
和 clang_getRangeEnd(CXSourceRange)
从源区间中检索起始和结束 CXSourceLocation
。
给定一个 CXSourceLocation
,使用 clang_getExpansionLocation
检索源位置的文件、行和列。
代码示例¶
// Again, file.cpp
struct foo{
int bar;
int* bar_pointer;
};
clang_visitChildren(
cursor,
[](CXCursor current_cursor, CXCursor parent, CXClientData client_data){
CXType cursor_type = clang_getCursorType(current_cursor);
CXString cursor_spelling = clang_getCursorSpelling(current_cursor);
CXSourceRange cursor_range = clang_getCursorExtent(current_cursor);
std::cout << "Cursor " << clang_getCString(cursor_spelling);
CXFile file;
unsigned start_line, start_column, start_offset;
unsigned end_line, end_column, end_offset;
clang_getExpansionLocation(clang_getRangeStart(cursor_range), &file, &start_line, &start_column, &start_offset);
clang_getExpansionLocation(clang_getRangeEnd (cursor_range), &file, &end_line , &end_column , &end_offset);
std::cout << " spanning lines " << start_line << " to " << end_line;
clang_disposeString(cursor_spelling);
std::cout << "\n";
return CXChildVisit_Recurse;
},
nullptr
);
该程序的预期输出为
Cursor foo spanning lines 2 to 5
Cursor bar spanning lines 3 to 3
Cursor bar_pointer spanning lines 4 to 4
完整示例代码¶
#include <clang-c/Index.h>
#include <iostream>
int main(){
CXIndex index = clang_createIndex(0, 0); //Create index
CXTranslationUnit unit = clang_parseTranslationUnit(
index,
"file.cpp", nullptr, 0,
nullptr, 0,
CXTranslationUnit_None); //Parse "file.cpp"
if (unit == nullptr){
std::cerr << "Unable to parse translation unit. Quitting.\n";
return 0;
}
CXCursor cursor = clang_getTranslationUnitCursor(unit); //Obtain a cursor at the root of the translation unit
clang_visitChildren(
cursor,
[](CXCursor current_cursor, CXCursor parent, CXClientData client_data){
CXType cursor_type = clang_getCursorType(current_cursor);
CXString type_kind_spelling = clang_getTypeKindSpelling(cursor_type.kind);
std::cout << "TypeKind: " << clang_getCString(type_kind_spelling);
clang_disposeString(type_kind_spelling);
if(cursor_type.kind == CXType_Pointer || // If cursor_type is a pointer
cursor_type.kind == CXType_LValueReference || // or an LValue Reference (&)
cursor_type.kind == CXType_RValueReference){ // or an RValue Reference (&&),
CXType pointed_to_type = clang_getPointeeType(cursor_type);// retrieve the pointed-to type
CXString pointed_to_type_spelling = clang_getTypeSpelling(pointed_to_type); // Spell out the entire
std::cout << "pointing to type: " << clang_getCString(pointed_to_type_spelling);// pointed-to type
clang_disposeString(pointed_to_type_spelling);
}
else if(cursor_type.kind == CXType_Record){
CXString type_spelling = clang_getTypeSpelling(cursor_type);
std::cout << ", namely " << clang_getCString(type_spelling);
clang_disposeString(type_spelling);
}
std::cout << "\n";
return CXChildVisit_Recurse;
},
nullptr
);
clang_visitChildren(
cursor,
[](CXCursor current_cursor, CXCursor parent, CXClientData client_data){
CXType cursor_type = clang_getCursorType(current_cursor);
CXString cursor_spelling = clang_getCursorSpelling(current_cursor);
CXSourceRange cursor_range = clang_getCursorExtent(current_cursor);
std::cout << "Cursor " << clang_getCString(cursor_spelling);
CXFile file;
unsigned start_line, start_column, start_offset;
unsigned end_line, end_column, end_offset;
clang_getExpansionLocation(clang_getRangeStart(cursor_range), &file, &start_line, &start_column, &start_offset);
clang_getExpansionLocation(clang_getRangeEnd (cursor_range), &file, &end_line , &end_column , &end_offset);
std::cout << " spanning lines " << start_line << " to " << end_line;
clang_disposeString(cursor_spelling);
std::cout << "\n";
return CXChildVisit_Recurse;
},
nullptr
);
}