Browse Source

Merge pull request #14046 from nvim-treesitter/feature/language-tree-directive-config

feat(treesitter): allow injections to be configured through directives
pull/14364/head
Thomas Vigouroux 4 weeks ago
committed by GitHub
parent
commit
e652b2987a
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 87
      runtime/doc/treesitter.txt
  2. 72
      runtime/lua/vim/treesitter/languagetree.lua
  3. 20
      runtime/lua/vim/treesitter/query.lua
  4. 2
      test/functional/treesitter/highlight_spec.lua
  5. 59
      test/functional/treesitter/parser_spec.lua

87
runtime/doc/treesitter.txt

@ -365,4 +365,91 @@ identical identifiers, highlighting both as |hl-WarningMsg|: >
((binary_expression left: (identifier) @WarningMsg.left right: (identifier) @WarningMsg.right)
(eq? @WarningMsg.left @WarningMsg.right))
Treesitter language injection (WIP) *lua-treesitter-language-injection*
NOTE: This is a partially implemented feature, and not usable as a default
solution yet. What is documented here is a temporary interface intended
for those who want to experiment with this feature and contribute to
its development.
Languages can have nested languages within them, for example javascript inside
HTML. We can "inject" a treesitter parser for a child language by configuring
injection queries. Here is an example of Javascript and CSS injected into
HTML. >
local query = [[
(script_element (raw_text) @javascript)
(style_element (raw_text) @css)
]];
local parser = vim.treesitter.get_parser(nil, nil, {
injections = {html = query}
})
parser:parse()
Any capture will be treated as the node treesitter will use for the injected
language. The capture name will be used as the language. There are a couple
reserved captures that do not have this behavior
`@language`
This will use a nodes text content as the language to be injected.
`@content`
This will use the captured nodes content as the injected content.
`@combined`
This will combine all matches of a pattern as one single block of content.
By default, each match of a pattern is treated as it's own block of content
and parsed independent of each other.
`@<language>`
Any other capture name will be treated as both the language and the content.
`@_<name>`
Any capture with a leading "_" will not be treated as a language and will have
no special processing and is useful for capturing nodes for directives.
Injections can be configured using `directives` instead of using capture
names. Here is an example of a directive that resolves the language based on a
buffer variable instead of statically in the query. >
local query = require("vim.treesitter.query")
query.add_directive("inject-preprocessor!", function(_, bufnr, _, _, data)
local success, lang = pcall(vim.api.nvim_buf_get_var, bufnr, "css_preprocessor")
data.language = success and lang or "css"
end)
Here is the same HTML query using this directive. >
local query = [[
(script_element (raw_text) @javascript)
(style_element
((raw_text) @content
(#inject-preprocessor!)))
]];
local parser = vim.treesitter.get_parser(nil, nil, {
injections = {html = query}
})
parser:parse()
The following properties can be attached to the metadata object provided to
the directive.
`language`
Same as the language capture.
`content`
A list of ranges or nodes to inject as content. These ranges and/or nodes will
be treated as combined source and will be parsed within the same context. This
differs from the `@content` capture which only captures a single node as
content. This can also be a single number that references a captured node.
`combined`
Same as the combined capture.
vim:tw=78:ts=8:ft=help:norl:

72
runtime/lua/vim/treesitter/languagetree.lua

@ -12,14 +12,19 @@ LanguageTree.__index = LanguageTree
-- @param source Can be a bufnr or a string of text to parse
-- @param lang The language this tree represents
-- @param opts Options table
-- @param opts.queries A table of language to injection query strings.
-- This is useful for overriding the built-in runtime file
-- searching for the injection language query per language.
-- @param opts.injections A table of language to injection query strings.
-- This is useful for overriding the built-in runtime file
-- searching for the injection language query per language.
function LanguageTree.new(source, lang, opts)
language.require_language(lang)
opts = opts or {}
local custom_queries = opts.queries or {}
if opts.queries then
a.nvim_err_writeln("'queries' is no longer supported. Use 'injections' now")
opts.injections = opts.queries
end
local injections = opts.injections or {}
local self = setmetatable({
_source = source,
_lang = lang,
@ -27,8 +32,8 @@ function LanguageTree.new(source, lang, opts)
_regions = {},
_trees = {},
_opts = opts,
_injection_query = custom_queries[lang]
and query.parse_query(lang, custom_queries[lang])
_injection_query = injections[lang]
and query.parse_query(lang, injections[lang])
or query.get_query(lang, "injections"),
_valid = false,
_parser = vim._create_ts_parser(lang),
@ -297,33 +302,50 @@ function LanguageTree:_get_injections()
for pattern, match, metadata in self._injection_query:iter_matches(root_node, self._source, start_line, end_line+1) do
local lang = nil
local injection_node = nil
local combined = false
local ranges = {}
local combined = metadata.combined
-- Directives can configure how injections are captured as well as actual node captures.
-- This allows more advanced processing for determining ranges and language resolution.
if metadata.content then
local content = metadata.content
-- Allow for captured nodes to be used
if type(content) == "number" then
content = {match[content]}
end
if content then
vim.list_extend(ranges, content)
end
end
if metadata.language then
lang = metadata.language
end
-- You can specify the content and language together
-- using a tag with the language, for example
-- @javascript
for id, node in pairs(match) do
local data = metadata[id]
local name = self._injection_query.captures[id]
local offset_range = data and data.offset
-- Lang should override any other language tag
if name == "language" then
if name == "language" and not lang then
lang = query.get_node_text(node, self._source)
elseif name == "combined" then
combined = true
elseif name == "content" then
injection_node = offset_range or node
elseif name == "content" and #ranges == 0 then
table.insert(ranges, node)
-- Ignore any tags that start with "_"
-- Allows for other tags to be used in matches
elseif string.sub(name, 1, 1) ~= "_" then
if lang == nil then
if not lang then
lang = name
end
if not injection_node then
injection_node = offset_range or node
if #ranges == 0 then
table.insert(ranges, node)
end
end
end
@ -337,21 +359,21 @@ function LanguageTree:_get_injections()
injections[tree_index][lang] = {}
end
-- Key by pattern so we can either combine each node to parse in the same
-- context or treat each node independently.
-- Key this by pattern. If combined is set to true all captures of this pattern
-- will be parsed by treesitter as the same "source".
-- If combined is false, each "region" will be parsed as a single source.
if not injections[tree_index][lang][pattern] then
injections[tree_index][lang][pattern] = { combined = combined, nodes = {} }
injections[tree_index][lang][pattern] = { combined = combined, regions = {} }
end
table.insert(injections[tree_index][lang][pattern].nodes, injection_node)
table.insert(injections[tree_index][lang][pattern].regions, ranges)
end
end
local result = {}
-- Generate a map by lang of node lists.
-- Each list is a set of ranges that should be parsed
-- together.
-- Each list is a set of ranges that should be parsed together.
for _, lang_map in ipairs(injections) do
for lang, patterns in pairs(lang_map) do
if not result[lang] then
@ -360,10 +382,10 @@ function LanguageTree:_get_injections()
for _, entry in pairs(patterns) do
if entry.combined then
table.insert(result[lang], entry.nodes)
table.insert(result[lang], vim.tbl_flatten(entry.regions))
else
for _, node in ipairs(entry.nodes) do
table.insert(result[lang], {node})
for _, ranges in ipairs(entry.regions) do
table.insert(result[lang], ranges)
end
end
end

20
runtime/lua/vim/treesitter/query.lua

@ -89,17 +89,6 @@ local function read_query_files(filenames)
return table.concat(contents, '')
end
local match_metatable = {
__index = function(tbl, key)
rawset(tbl, key, {})
return tbl[key]
end
}
local function new_match_metadata()
return setmetatable({}, match_metatable)
end
--- The explicitly set queries from |vim.treesitter.query.set_query()|
local explicit_queries = setmetatable({}, {
__index = function(t, k)
@ -259,7 +248,7 @@ predicate_handlers["vim-match?"] = predicate_handlers["match?"]
-- Directives store metadata or perform side effects against a match.
-- Directives should always end with a `!`.
-- Directive handler receive the following arguments
-- (match, pattern, bufnr, predicate)
-- (match, pattern, bufnr, predicate, metadata)
local directive_handlers = {
["set!"] = function(_, _, _, pred, metadata)
if #pred == 4 then
@ -279,7 +268,6 @@ local directive_handlers = {
local start_col_offset = pred[4] or 0
local end_row_offset = pred[5] or 0
local end_col_offset = pred[6] or 0
local key = pred[7] or "offset"
range[1] = range[1] + start_row_offset
range[2] = range[2] + start_col_offset
@ -288,7 +276,7 @@ local directive_handlers = {
-- If this produces an invalid range, we just skip it.
if range[1] < range[3] or (range[1] == range[3] and range[2] <= range[4]) then
metadata[pred[2]][key] = range
metadata.content = {range}
end
end
}
@ -420,7 +408,7 @@ function Query:iter_captures(node, source, start, stop)
local raw_iter = node:_rawquery(self.query, true, start, stop)
local function iter()
local capture, captured_node, match = raw_iter()
local metadata = new_match_metadata()
local metadata = {}
if match ~= nil then
local active = self:match_preds(match, match.pattern, source)
@ -455,7 +443,7 @@ function Query:iter_matches(node, source, start, stop)
local raw_iter = node:_rawquery(self.query, false, start, stop)
local function iter()
local pattern, match = raw_iter()
local metadata = new_match_metadata()
local metadata = {}
if match ~= nil then
local active = self:match_preds(match, pattern, source)

2
test/functional/treesitter/highlight_spec.lua

@ -445,7 +445,7 @@ describe('treesitter highlighting', function()
exec_lua [[
local parser = vim.treesitter.get_parser(0, "c", {
queries = {c = "(preproc_def (preproc_arg) @c) (preproc_function_def value: (preproc_arg) @c)"}
injections = {c = "(preproc_def (preproc_arg) @c) (preproc_function_def value: (preproc_arg) @c)"}
})
local highlighter = vim.treesitter.highlighter
test_hl = highlighter.new(parser, {queries = {c = hl_query}})

59
test/functional/treesitter/parser_spec.lua

@ -468,7 +468,7 @@ int x = INT_MAX;
it("should inject a language", function()
exec_lua([[
parser = vim.treesitter.get_parser(0, "c", {
queries = {
injections = {
c = "(preproc_def (preproc_arg) @c) (preproc_function_def value: (preproc_arg) @c)"}})
]])
@ -489,7 +489,7 @@ int x = INT_MAX;
it("should inject a language", function()
exec_lua([[
parser = vim.treesitter.get_parser(0, "c", {
queries = {
injections = {
c = "(preproc_def (preproc_arg) @c @combined) (preproc_function_def value: (preproc_arg) @c @combined)"}})
]])
@ -506,11 +506,39 @@ int x = INT_MAX;
end)
end)
describe("when providing parsing information through a directive", function()
it("should inject a language", function()
exec_lua([=[
vim.treesitter.add_directive("inject-clang!", function(match, _, _, pred, metadata)
metadata.language = "c"
metadata.combined = true
metadata.content = pred[2]
end)
parser = vim.treesitter.get_parser(0, "c", {
injections = {
c = "(preproc_def ((preproc_arg) @_c (#inject-clang! @_c)))" ..
"(preproc_function_def value: ((preproc_arg) @_a (#inject-clang! @_a)))"}})
]=])
eq("table", exec_lua("return type(parser:children().c)"))
eq(2, exec_lua("return #parser:children().c:trees()"))
eq({
{0, 0, 7, 0}, -- root tree
{3, 14, 5, 18}, -- VALUE 123
-- VALUE1 123
-- VALUE2 123
{1, 26, 2, 68} -- READ_STRING(x, y) (char_u *)read_string((x), (size_t)(y))
-- READ_STRING_OK(x, y) (char_u *)read_string((x), (size_t)(y))
}, get_ranges())
end)
end)
describe("when using the offset directive", function()
it("should shift the range by the directive amount", function()
exec_lua([[
parser = vim.treesitter.get_parser(0, "c", {
queries = {
injections = {
c = "(preproc_def ((preproc_arg) @c (#offset! @c 0 2 0 -1))) (preproc_function_def value: (preproc_arg) @c)"}})
]])
@ -538,7 +566,7 @@ int x = INT_MAX;
it("should return the correct language tree", function()
local result = exec_lua([[
parser = vim.treesitter.get_parser(0, "c", {
queries = { c = "(preproc_def (preproc_arg) @c)"}})
injections = { c = "(preproc_def (preproc_arg) @c)"}})
local sub_tree = parser:language_for_range({1, 18, 1, 19})
@ -572,28 +600,5 @@ int x = INT_MAX;
eq(result, "value")
end)
end)
describe("when setting for a capture match", function()
it("should set/get the data correctly", function()
insert([[
int x = 3;
]])
local result = exec_lua([[
local result
query = vim.treesitter.parse_query("c", '((number_literal) @number (#set! @number "key" "value"))')
parser = vim.treesitter.get_parser(0, "c")
for pattern, match, metadata in query:iter_matches(parser:parse()[1]:root(), 0) do
result = metadata[pattern].key
end
return result
]])
eq(result, "value")
end)
end)
end)
end)
Loading…
Cancel
Save