Modül:Docbunto/parser
Docbunto taglet parser for Scribunto modules.
parser(modname)(function)- Docbunto taglet parser for Scribunto modules.
- Parameter:
modnameModule page name. (string) - Returns: Module documentation data, if any was found. (table|nil)
process_tag(str)(function • local)- Tag processor function.
- Parameter:
strTag string to process. (string) - Returns: Tag object. (table)
extract_info(documentation)(function • local)- Module info extraction utility.
- Parameter:
documentationPackage doclet info. (table) - Returns: Information name-value map. (table)
extract_type(item)(function • local)- Type extraction utility.
- Parameter:
itemItem documentation data. (table) - Returns: Item type. (string)
extract_name(item, project)(function • local)- Name extraction utility.
- Parameters:
- Returns: Item name. (string)
deduce_name(tokens, index, opts)(function • local)- Source code utility for item name detection.
- Parameters:
- Returns: Item name. (string)
code_static_analysis(item)(function • local)- Code analysis utility.
- Parameter:
itemItem documentation data. (table) hash_map(item)(function • local)- Array hash map conversion utility.
- Parameter:
itemItem documentation data array. (table) - Returns: Item documentation data map. (table)
export_item(documentation, item_reference, item_index, item_alias, factory_item)(function • local)- Item export utility.
- Parameters:
correct_subitem_tag(item)(function • local)- Subitem tag correction utility.
- Parameter:
itemItem documentation data. (table) - <code id="override_item_tag(item, name, alias)">override_item_tag(item, name, alias) (function • local)
- Item override tag utility.
- Parameters:
--- Docbunto taglet parser for Scribunto modules.
-- @script parser
-- @author [[dev:User:8nml]]
-- @param {string} modname Module page name.
-- @return {table|nil} Module documentation data, if any was found.
require('strict')
-- Module dependencies.
local dtags = mw.loadData('Module:Docbunto/tags')
local lexer = require('Module:Docbunto/lexer')
-- Docbunto variables & tag tokens.
local TAG_MULTI = 'M'
local TAG_ID = 'ID'
local TAG_SINGLE = 'S'
local TAG_TYPE = 'T'
local TAG_FLAG = 'N'
local TAG_MULTI_LINE = 'ML'
-- Docbunto processing patterns.
local DOCBUNTO_SUMMARY = '^[^.։。।෴۔።]+[.։。।෴۔።]?'
local DOCBUNTO_CONCAT = ' '
local DOCBUNTO_TAG = '^%s*@(%w+)'
local DOCBUNTO_TAG_VALUE = DOCBUNTO_TAG .. '(.*)'
local DOCBUNTO_TAG_MOD_VALUE = DOCBUNTO_TAG .. '%[([^%]]*)%](.*)'
local DOCBUNTO_TYPE = '^{({*[^}]+}*)}%s*'
--- Tag processor function.
-- @function process_tag
-- @param {string} str Tag string to process.
-- @return {table} Tag object.
-- @local
local function process_tag(str)
local tag = {}
if str:find(DOCBUNTO_TAG_MOD_VALUE) then
tag.name, tag.modifiers, tag.value = str:match(DOCBUNTO_TAG_MOD_VALUE)
local modifiers = {}
for mod in tag.modifiers:gmatch('[^%s,]+') do
modifiers[mod] = true
end
if modifiers.optchain then
modifiers.opt = true
modifiers.optchain = nil
end
tag.modifiers = modifiers
else
tag.name, tag.value = str:match(DOCBUNTO_TAG_VALUE)
end
tag.value = mw.text.trim(tag.value)
if dtags._type_alias[tag.name] then
if dtags._type_alias[tag.name] ~= 'variable' then
tag.value = dtags._type_alias[tag.name] .. ' ' .. tag.value
tag.name = 'field'
end
if tag.value:match('^%S+') ~= '...' then
tag.value = tag.value:gsub('^(%S+)', '{%1}')
end
end
tag.name = dtags._alias[tag.name] or tag.name
if tag.name ~= 'usage' and tag.value:find(DOCBUNTO_TYPE) then
tag.type = tag.value:match(DOCBUNTO_TYPE)
if tag.type:find('^%?') then
tag.type = tag.type:sub(2) .. '|nil'
end
tag.value = tag.value:gsub(DOCBUNTO_TYPE, '')
end
if dtags[tag.name] == TAG_FLAG then
tag.value = true
end
return tag
end
--- Module info extraction utility.
-- @function extract_info
-- @param {table} documentation Package doclet info.
-- @return {table} Information name-value map.
-- @local
local function extract_info(documentation)
local info = {}
for _, tag in ipairs(documentation.tags) do
if dtags._module_info[tag.name] then
if info[tag.name] then
if not info[tag.name]:find('^%* ') then
info[tag.name] = '* ' .. info[tag.name]
end
info[tag.name] = info[tag.name] .. '\n* ' .. tag.value
else
info[tag.name] = tag.value
end
end
end
return info
end
--- Type extraction utility.
-- @function extract_type
-- @param {table} item Item documentation data.
-- @return {string} Item type.
-- @local
local function extract_type(item)
local item_type
for _, tag in ipairs(item.tags) do
if dtags[tag.name] == TAG_TYPE then
item_type = tag.name
if tag.name == 'variable' then
local implied_local = process_tag('@local')
table.insert(item.tags, implied_local)
item.tags['local'] = implied_local
end
if dtags._generic_tags[item_type] and not dtags._project_level[item_type] and tag.type then
item_type = item_type .. '; ' .. tag.type
end
break
end
end
return item_type
end
--- Name extraction utility.
-- @function extract_name
-- @param {table} item Item documentation data.
-- @param {boolean} project Whether the item is project-level.
-- @return {string} Item name.
-- @local
local function extract_name(item, opts)
opts = opts or {}
local item_name
for _, tag in ipairs(item.tags) do
if dtags[tag.name] == TAG_TYPE then
item_name = tag.value; break;
end
end
if item_name or not opts.project then
return item_name
end
item_name = item.code:match('\nreturn%s+([%w_]+)')
if item_name == 'p' and not item.tags['alias'] then
local implied_alias = { name = 'alias', value = 'p' }
item.tags['alias'] = implied_alias
table.insert(item.tags, implied_alias)
end
item_name = (item_name and item_name ~= 'p')
and item_name
or item.filename
:gsub('^' .. mw.site.namespaces[828].name .. ':', '')
:gsub('^(%u)', mw.ustring.lower)
:gsub('/', '.'):gsub(' ', '_')
return item_name
end
--- Source code utility for item name detection.
-- @function deduce_name
-- @param {string} tokens Stream tokens for first line.
-- @param {string} index Stream token index.
-- @param {table} opts Configuration options.
-- @param[opt] {boolean} opts.lookahead Whether a variable name succeeds the index.
-- @param[opt] {boolean} opts.lookbehind Whether a variable name precedes the index.
-- @return {string} Item name.
-- @local
local function deduce_name(tokens, index, opts)
local name = ''
if opts.lookbehind then
for i2 = index, 1, -1 do
if tokens[i2].type ~= 'keyword' then
name = tokens[i2].data .. name
else
break
end
end
elseif opts.lookahead then
for i2 = index, #tokens do
if tokens[i2].type ~= 'keyword' and not tokens[i2].data:find('^%(') then
name = name .. tokens[i2].data
else
break
end
end
end
return name
end
--- Code analysis utility.
-- @function code_static_analysis
-- @param {table} item Item documentation data.
-- @local
local function code_static_analysis(item)
local tokens = lexer(item.code:match('^[^\n]*'))[1]
local t, i = tokens[1], 1
local item_name, item_type
while t do
if t.type == 'whitespace' then
table.remove(tokens, i)
end
t, i = tokens[i + 1], i + 1
end
t, i = tokens[1], 1
while t do
if t.data == '=' then
item_name = deduce_name(tokens, i - 1, { lookbehind = true })
end
if t.data == 'function' then
item_type = 'function'
if tokens[i + 1].data ~= '(' then
item_name = deduce_name(tokens, i + 1, { lookahead = true })
end
end
if t.data == '{' or t.data == '{}' then
item_type = 'table'
end
if t.data == 'local' and not (item.tags['private'] or item.tags['local'] or item.type == 'type') then
local implied_local = process_tag('@local')
table.insert(item.tags, implied_local)
item.tags['local'] = implied_local
end
t, i = tokens[i + 1], i + 1
end
item.name = item.name or item_name or ''
item.type = item.type or item_type
end
--- Array hash map conversion utility.
-- @function hash_map
-- @param {table} item Item documentation data array.
-- @return {table} Item documentation data map.
-- @local
local function hash_map(array)
local map = array
for _, element in ipairs(array) do
if map[element.name] and not map[element.name].name then
table.insert(map[element.name], mw.clone(element))
elseif map[element.name] and map[element.name].name then
map[element.name] = { map[element.name], mw.clone(element) }
else
map[element.name] = mw.clone(element)
end
end
return map
end
--- Item export utility.
-- @function export_item
-- @param {table} documentation Package documentation data.
-- @param {string} item_reference Identifier name for item.
-- @param {string} item_index Identifier name for item.
-- @param {string} item_alias Export alias for item.
-- @param {boolean} factory_item Whether the documentation item is a factory function.
-- @local
local function export_item(documentation, item_reference, item_index, item_alias, factory_item)
for _, item in ipairs(documentation.items) do
if item_reference == item.name then
item.tags['local'] = nil
item.tags['private'] = nil
for index, tag in ipairs(item.tags) do
if dtags._privacy_tags[tag.name] then
table.remove(item.tags, index)
end
end
item.type = item.type:gsub('variable', 'member')
if factory_item then
item.alias =
documentation.items[item_index].tags['factory'].value ..
(item_alias:find('^%[') and '' or (not item.tags['static'] and ':' or '.')) ..
item_alias
else
item.alias =
((documentation.tags['alias'] or {}).value or documentation.name) ..
(item_alias:find('^%[') and '' or (documentation.type == 'classmod' and not item.tags['static'] and ':' or '.')) ..
item_alias
end
item.hierarchy = mw.text.split((item.alias:gsub('["\']?%]', '')), '[.:%[\'""]+')
end
end
end
--- Subitem tag correction utility.
-- @function correct_subitem_tag
-- @param {table} item Item documentation data.
-- @local
local function correct_subitem_tag(item)
local field_tag = item.tags['field']
if item.type ~= 'function' or not field_tag then
return
end
if field_tag.name then
field_tag.name = 'param'
else
for _, tag_el in ipairs(field_tag) do
tag_el.name = 'param'
end
end
local param_tag = item.tags['param']
if param_tag and not param_tag.name then
if field_tag.name then
table.insert(param_tag, field_tag)
else
for _, tag_el in ipairs(field_tag) do
table.insert(param_tag, tag_el)
end
end
elseif param_tag and param_tag.name then
if field_tag.name then
param_tag = { param_tag, field_tag }
else
for i, tag_el in ipairs(field_tag) do
if i == 1 then
param_tag = { param_tag }
end
for _, tag_el in ipairs(field_tag) do
table.insert(param_tag, tag_el)
end
end
end
else
param_tag = field_tag
end
item.tags['field'] = nil
end
--- Item override tag utility.
-- @function override_item_tag
-- @param {table} item Item documentation data.
-- @param {string} name Tag name.
-- @param[opt] {string} alias Target alias for tag.
-- @local
local function override_item_tag(item, name, alias)
if item.tags[name] then
item[alias or name] = item.tags[name].value
end
end
-- Docbunto package items.
return function(modname)
local content = mw.title.new(modname):getContent()
if not content or not (content:match('%-%-%-') or content:match('%s+@%w+')) then
return
end
-- Remove leading escapes.
content = content:gsub('^%-%-+%s*<[^>]+>\n', '')
-- Remove closing pretty comments.
content = content:gsub('\n%-%-%-%-%-+(\n[^-]+)', '\n-- %1')
-- Content lexing.
local lines = lexer(content)
local tokens = {}
local dummy_token = {
data = '',
posFirst = 1,
posLast = 1
}
local token_closure = 0
for _, line in ipairs(lines) do
if #line == 0 then
dummy_token.type = token_closure == 0
and 'whitespace'
or tokens[#tokens].type
table.insert(tokens, mw.clone(dummy_token))
else
for _, token in ipairs(line) do
if token.data:find('^%[=*%[$') or token.data:find('^%-%-%[=*%[$') then
token_closure = 1
end
if token.data:find(']=*]') then
token_closure = 0
end
table.insert(tokens, token)
end
end
end
-- Start documentation data.
local documentation = {}
documentation.filename = modname
documentation.description = ''
documentation.code = content
documentation.comments = {}
documentation.tags = {}
documentation.items = {}
local line_no = 0
local item_index = 0
-- Taglet tracking variables.
local start_mode = true
local comment_mode = false
local doctag_mode = false
local export_mode = false
local factory_mode = false
local return_mode = false
local comment_tail = ''
local tag_name = ''
local new_item = false
local new_tag = false
local new_item_code = false
local code_block = false
local pretty_comment = false
local comment_brace = false
local t, i = tokens[1], 1
pcall(function()
while t do
-- Taglet variable update.
new_item = t.data:find('^%-%-%-') or t.data:find('^%-%-%[%[$')
comment_tail = t.data:gsub('^%-%-+', '')
tag_name = comment_tail:match(DOCBUNTO_TAG)
tag_name = dtags._alias[tag_name] or tag_name
new_tag = dtags[tag_name]
pretty_comment =
t.data:find('^%-+$') or
t.data:find('[^-]+%-%-+%s*$') or
t.data:find('</?nowiki>') or
t.data:find('</?pre>')
comment_brace =
t.data:find('^%-%-%[%[$') or
t.data:find('^%-%-%]%]$') or
t.data:find('^%]%]%-%-$')
export_mode = tag_name == 'export'
local tags, subtokens, separator
-- Line counter.
if t.posFirst == 1 then
line_no = line_no + 1
end
-- Data insertion logic.
if t.type == 'comment' then
if new_item then comment_mode = true end
-- Module-level documentation taglet.
if start_mode then
table.insert(documentation.comments, t.data)
if comment_mode and not new_tag and not doctag_mode and not comment_brace and not pretty_comment then
separator = mw.text.trim(comment_tail):find('^[{|!}:#*=]+[%s-}]+')
and '\n'
or (#documentation.description ~= 0 and DOCBUNTO_CONCAT or '')
documentation.description = documentation.description .. separator .. mw.text.trim(comment_tail)
end
if new_tag and not export_mode then
doctag_mode = true
table.insert(documentation.tags, process_tag(comment_tail))
elseif doctag_mode and not comment_brace and not pretty_comment then
tags = documentation.tags
if dtags[tags[#tags].name] == TAG_MULTI then
separator = mw.text.trim(comment_tail):find('^[{|!}:#*=]+[%s-}]+')
and '\n'
or DOCBUNTO_CONCAT
tags[#tags].value = tags[#tags].value .. separator .. mw.text.trim(comment_tail)
elseif dtags[tags[#tags].name] == TAG_MULTI_LINE then
tags[#tags].value = tags[#tags].value .. '\n' .. comment_tail
end
end
end
-- Documentation item detection.
if not start_mode and (new_item or (new_tag and tokens[i - 1].type ~= 'comment')) and not export_mode then
table.insert(documentation.items, {})
item_index = item_index + 1
documentation.items[item_index].lineno = line_no
documentation.items[item_index].code = ''
documentation.items[item_index].comments = {}
documentation.items[item_index].description = ''
documentation.items[item_index].tags = {}
end
if not start_mode and comment_mode and not new_tag and not doctag_mode and not comment_brace and not pretty_comment then
separator = mw.text.trim(comment_tail):find('^[{|!}:#*=]+[%s-}]+')
and '\n'
or (#documentation.items[item_index].description ~= 0 and DOCBUNTO_CONCAT or '')
documentation.items[item_index].description =
documentation.items[item_index].description ..
separator ..
mw.text.trim(comment_tail)
end
if not start_mode and new_tag and not export_mode then
doctag_mode = true
table.insert(documentation.items[item_index].tags, process_tag(comment_tail))
elseif not start_mode and doctag_mode and not comment_brace and not pretty_comment then
tags = documentation.items[item_index].tags
if dtags[tags[#tags].name] == TAG_MULTI then
separator = mw.text.trim(comment_tail):find('^[{|!}:#*=]+[%s-}]+')
and '\n'
or DOCBUNTO_CONCAT
tags[#tags].value = tags[#tags].value .. separator .. mw.text.trim(comment_tail)
elseif dtags[tags[#tags].name] == TAG_MULTI_LINE then
tags[#tags].value = tags[#tags].value .. '\n' .. comment_tail
end
end
if not start_mode and (comment_mode or doctag_mode) then
table.insert(documentation.items[item_index].comments, t.data)
end
-- Export tag support.
if export_mode then
factory_mode = t.posFirst ~= 1
if factory_mode then
documentation.items[item_index].exports = true
else
documentation.exports = true
end
subtokens = {}
while t and (not factory_mode or (factory_mode and t.data ~= 'end')) do
if factory_mode then
documentation.items[item_index].code =
documentation.items[item_index].code ..
(t.posFirst == 1 and '\n' or '') ..
t.data
end
t, i = tokens[i + 1], i + 1
if t and t.posFirst == 1 then
line_no = line_no + 1
end
if t and t.type ~= 'whitespace' and t.type ~= 'keyword' and t.type ~= 'comment' then
table.insert(subtokens, t)
end
end
local separator = { [','] = true, [';'] = true }
local brace = { ['{'] = true, ['}'] = true }
local item_reference, item_alias = '', ''
local sequence_index, has_key = 0, false
local subtoken, index, terminating_index = subtokens[2], 2, #subtokens - 1
while not brace[subtoken.data] do
if subtoken.data == '=' then
has_key = true
elseif not separator[subtoken.data] then
if has_key then
item_reference = item_reference .. subtoken.data
else
item_alias = item_alias .. subtoken.data
end
elseif separator[subtoken.data] or index == terminating_index then
if not has_key then
increment = increment + 1
item_reference, item_alias = item_alias, item_reference
alias = '[' .. tostring(increment) .. ']'
end
export_item(documentation, item_reference, item_index, item_alias, factory_mode)
item_reference, item_alias, has_key = '', '', false
end
subtoken, index = subtokens[index + 1], index + 1
end
if not factory_mode then
break
else
factory_mode = false
end
end
-- Data insertion logic.
elseif comment_mode or doctag_mode then
-- Package data post-processing.
if start_mode then
documentation.tags = hash_map(documentation.tags)
documentation.name = extract_name(documentation, { project = true })
documentation.info = extract_info(documentation)
documentation.type = extract_type(documentation) or 'module'
if #documentation.description ~= 0 then
documentation.summary = mw.ustring.match(documentation.description, DOCBUNTO_SUMMARY)
documentation.description = mw.ustring.gsub(documentation.description, DOCBUNTO_SUMMARY .. '%s*', '')
end
documentation.description = documentation.description:gsub('%s%s+', '\n\n')
documentation.executable = dtags._code_types[documentation.type] and true or false
correct_subitem_tag(documentation)
override_item_tag(documentation, 'name')
override_item_tag(documentation, 'alias')
override_item_tag(documentation, 'summary')
override_item_tag(documentation, 'description')
override_item_tag(documentation, 'class', 'type')
end
-- Item data post-processing.
if item_index ~= 0 then
documentation.items[item_index].tags = hash_map(documentation.items[item_index].tags)
documentation.items[item_index].name = extract_name(documentation.items[item_index])
documentation.items[item_index].type = extract_type(documentation.items[item_index])
if #documentation.items[item_index].description ~= 0 then
documentation.items[item_index].summary = mw.ustring.match(documentation.items[item_index].description, DOCBUNTO_SUMMARY)
documentation.items[item_index].description = mw.ustring.gsub(documentation.items[item_index].description, DOCBUNTO_SUMMARY .. '%s*', '')
end
documentation.items[item_index].description = documentation.items[item_index].description:gsub('%s%s+', '\n\n')
new_item_code = true
end
-- Documentation block reset.
start_mode = false
comment_mode = false
doctag_mode = false
export_mode = false
end
-- Don't concatenate module return value into item code.
if t.data == 'return' and t.posFirst == 1 then
return_mode = true
end
-- Item code concatenation.
if item_index ~= 0 and not doctag_mode and not comment_mode and not return_mode then
separator = #documentation.items[item_index].code ~= 0 and t.posFirst == 1 and '\n' or ''
documentation.items[item_index].code = documentation.items[item_index].code .. separator .. t.data
-- Code analysis on item head.
if new_item_code and documentation.items[item_index].code:find('\n') then
code_static_analysis(documentation.items[item_index])
new_item_code = false
end
end
t, i = tokens[i + 1], i + 1
end
documentation.lineno = line_no
local package_name = (documentation.tags['alias'] or {}).value or documentation.name
local package_alias = (documentation.tags['alias'] or {}).value or 'p'
local export_ptn = '^%s([.[])'
for _, item in ipairs(documentation.items) do
if item.name == package_alias or (item.name and item.name:match('^' .. package_alias .. '[.[]')) then
item.alias = item.name:gsub(export_ptn:format(package_alias), documentation.name .. '%1')
end
if
item.name == package_name or
(item.name and item.name:find(export_ptn:format(package_name))) or
(item.alias and item.alias:find(export_ptn:format(package_name)))
then
item.export = true
end
if item.name and (item.name:find('[.:]') or item.name:find('%[[\'"]')) then
item.hierarchy = mw.text.split((item.name:gsub('["\']?%]', '')), '[.:%[\'""]+')
end
item.type = item.type or ((item.alias or item.name or ''):find('[.[]') and 'member' or 'variable')
correct_subitem_tag(item)
override_item_tag(item, 'name')
override_item_tag(item, 'alias')
override_item_tag(item, 'summary')
override_item_tag(item, 'description')
override_item_tag(item, 'class', 'type')
end
-- Item sorting for documentation.
table.sort(documentation.items, function(item1, item2)
local inaccessible1 = item1.tags['local'] or item1.tags['private']
local inaccessible2 = item2.tags['local'] or item2.tags['private']
-- Send package items to the top.
if item1.export and not item2.export then
return true
elseif item2.export and not item1.export then
return false
-- Send private items to the bottom.
elseif inaccessible1 and not inaccessible2 then
return false
elseif inaccessible2 and not inaccessible1 then
return true
-- Sort via source code order by default.
else
return item1.lineno < item2.lineno
end
end)
end)
return documentation
end