Module:Str find word
From WWII Archives
Documentation for this module may be created at Module:Str find word/doc
require('strict') local p = {} local getArgs = require('Module:Arguments').getArgs local str = require('Module:String') local yesno = require('Module:Yesno') local defaultSep = ',' local iMaxWords = 16 local warningIMaxWordsReached = nil local xpLitWordCount = 0 local report -- to be initinated when explain needed -- Initialise the /report subpage. -- only invoked when 'explain' asked local function initReport() report = require('Module:Str find word/report') end -- Turn "A" into "A" etc. asap -- and reduce multi-spaces (including nbsp etc.) into single space local function decodeUnicode(str) return mw.ustring.gsub(mw.text.decode(str), '%s+', ' ') end -- %-Escape any word (character string) before feeding it into a string pattern function -- all punctuation (%p) will be %-escaped local function escape_word(word) return str._escapePattern(word) end -- Reads and parses a word list and returns a table with words (simple array) -- words list can be: source, andwords-to-check, orwords-to-check -- step 1: when case-insensitive, turn string into lowercase -- step 2: read & remove Literals ("..") -- step 3: read comma-separated words -- step 4: when booleans=T, change boolean words into true/false (module:yesno rules) -- all words returned are trimmed, TODO and all ws into single-plainspace? -- only T/F words are edited, other words remain, untouched -- return the table (a straight array) local function buildWordTable(tArgs, sWordlist) local wordTable = {} local hitWord = '' local hitCount = 0 if sWordlist == '' then return wordTable end -- Step 1: case-sensitive if yesno(tArgs.case, true) == false then sWordlist = string.lower(sWordlist) end -- Step 2: read "literals", -- then remove them from the string: -- replaced by single comma; idle & keeps word separation --- if yesno(tArgs.literals, false) then if false then local _, sCount _, sCount = mw.ustring.gsub(sWordlist, '"', '') if sCount > 1 then local litWord = '' local i, j while sCount > 1 do -- could do here: only when even? i = string.find(sWordlist, '%"', 1, false) j = string.find(sWordlist, '%"', i+1, false) litWord = mw.text.trim(string.sub(sWordlist, i+1, j-1)) if #litWord > 0 then -- not an empty string or spaces only xpLitWordCount = xpLitWordCount + 1 table.insert(wordTable, litWord) end -- remove from source, and do next gsub search: sWordlist = string.gsub(sWordlist, '%"%s*' .. escape_word(litWord) .. '%s*%"', ',') _, sCount = mw.ustring.gsub(sWordlist, '"', '') end end end -- Step 3: parse comma-delimited words hitCount = 0 sWordlist = tArgs.sep .. sWordlist .. tArgs.sep local eSep eSep = escape_word(tArgs.sep) local patstring = '%f[^' .. eSep .. '][^' .. eSep .. ']+%f[' .. eSep .. ']' if yesno(tArgs.explain, true) then report.xpMessage('1.eSep: ' .. eSep) -- dev report.xpMessage('2.pattern: ' .. patstring) -- dev end while hitCount <= iMaxWords do hitCount = hitCount + 1 hitWord = str._match(sWordlist, patstring, 1, hitCount, false, tArgs.sep) hitWord = mw.text.trim(hitWord) if hitWord == tArgs.sep then -- no more words found in the string break elseif hitWord ~= '' then table.insert(wordTable, hitWord) end end if hitCount > iMaxWords then warningIMaxWordsReached = 'Max number of words (' .. tostring(iMaxWords) .. ') reached. Extra words are ignored.' .. ' (' .. mw.ustring.sub(mw.text.trim(sWordlist), 1, 90) .. ' ...). ' end -- Step 4: when read booleans, converse words to true/false -- todo: check parameter here not elsewhere if tArgs.booleans then -- TODO if Yesno(tArgs.booleans) ... local sBool for i, v in ipairs(wordTable) do sBool = yesno(v) if sBool ~= nil then wordTable[i] = tostring(sBool) end end end return wordTable end -- Check whether a single word is in a table (a simple array of words) -- returns hitword or nil local function findWordInTable(sourceWordTable, word) local bHit = false for i, v in ipairs(sourceWordTable) do if v == word then bHit = true break end end if bHit then return word else return nil end end -- AND-logic with andWordTable words: ALL words must be found -- returns {T/F, hittable} -- T when *all* AND words are found -- hittable with all hit words -- note 1: when F, the hittable still contains the words that were found -- note 2: empty AND-wordlist => True by logic (because: not falsified) local function checkANDwords(sourceWordTable, andWordTable) local result1 local bAND local tHits bAND = true tHits = {} result1 = nil if #andWordTable > 0 then for i, word in ipairs(andWordTable) do result1 = findWordInTable(sourceWordTable, word) or nil if result1 == nil then bAND = false -- Falsified! -- could break after this logically but -- continue to complete the table (bAND remains false) else table.insert(tHits, result1) end end else bAND = true end return bAND, tHits end -- OR-logic with orWordTable words: at least one word must be found -- returns {T/F, hittable} -- True when at least one OR word is found -- hittable has all hit words -- note 1: empty OR-wordlist => True by logic (because: not falsified) -- note 2: while just one hitword is a True result, the hittable contains all words found local function checkORwords(sourceWordTable, orWordTable) local result1 local bOR local tHits bOR = false tHits = {} result1 = nil if #orWordTable > 0 then for i, word in ipairs(orWordTable) do result1 = findWordInTable(sourceWordTable, word) or nil if result1 == nil then -- this one is false; bOR unchanged; do next else bOR = true -- Confirmed! table.insert(tHits, result1) -- could break here logically, but complete the check end end else bOR = true end return bOR, tHits end -- Determine the requested return value (string). -- sYeslist is the _main return value (logically defined value) -- this function applies tArgs.yes / tArgs.no return value -- note: yes='' implies: blank return value -- note: no parameter yes= (that is, yes=nil) implies: by default, return the sYeslist local function yesnoReturnstring(tArgs, sYeslist) if sYeslist == '' then -- False return tArgs.no or '' else -- True if tArgs.yes == nil then return sYeslist else -- some |yes= value is entered, could be '' return tArgs.yes end end end local function isPreview() local ifPreview = require('Module:If preview') return not (ifPreview._warning( {'is_preview'} ) == '') end -- Explain options (=report info), interprets parameter explain= -- returns true/false/'testcases' -- explain=true => show report in Preview -- explain=testcases => WHEN in ns: template: or user: AND subpage = '/testcases' THEN show permanently local function checkExplain(tArgs) if yesno(tArgs.explain, true) then if yesno(tArgs.explain, false) == true then -- explicit True so preview show if isPreview() == true then return true end elseif tArgs.explain == 'testcases' then local titleObj = mw.title.getCurrentTitle() if titleObj:inNamespaces('template', 'user') and titleObj.subpageText == 'testcases' and titleObj.isSubpage then return 'testcases' end end end return false end -- ===== ===== ===== ===== ===== ===== ===== ===== ===== ===== ===== ===== ===== -- _main function: check for presence of words in source string -- Checks and returns: -- when T: the string of all hitwords (default), or the |yes=... input -- when F: empty string '' (default), or the |no=... input -- steps: -- 1. input word strings are prepared (parsed into an array of words) -- 2. words checks are made (applying AND-logic, OR-logic) -- 3. final conclusion drawn (T/F) -- 4. optionally, the preview report is prepared (debug, feedback) -- 5. based on T or F status, the return value (string) is established and returned -- note 1: each return value (yes=.., no=..) can be '' (nulstring) function p._main(tArgs) local sourceWordTable = {} local andWordTable = {} local orWordTable = {} local tANDhits local tORhits -- logical finding: local bANDresult = false local bORresult = false local resultALL = false local sYeslist = '' sourceWordTable = buildWordTable(tArgs, tArgs.source) andWordTable = buildWordTable(tArgs, tArgs.andString) orWordTable = buildWordTable(tArgs, tArgs.orString) if (#sourceWordTable == 0) or (#andWordTable + #orWordTable == 0) then -- No words to check resultALL = false if yesno(tArgs.explain, true) then report.xpNoWords(tArgs, sourceWordTable, andWordTable, orWordTable) end else bANDresult, tANDhits = checkANDwords(sourceWordTable, andWordTable) bORresult, tORhits = checkORwords(sourceWordTable, orWordTable) resultALL = (bANDresult) and (bORresult) end sYeslist = '' if resultALL then -- concat the sYeslist (= all hit words; from 2 tables) if bANDresult then sYeslist = sYeslist .. table.concat(tANDhits, tArgs.sep) end if #tORhits > 0 then if #tANDhits > 0 then sYeslist = sYeslist .. tArgs.sep end sYeslist = sYeslist .. table.concat(tORhits, tArgs.sep) end end if yesno(tArgs.explain, true) then if tArgs.yes ~= nil then if (tArgs.yes == '') and (tArgs.no == '') then report.xpYesNoBothBlank() end end if warningIMaxWordsReached ~= nil then report.xpMessage(warningIMaxWordsReached) end report.xpBuildReport(tArgs, sourceWordTable, bANDresult, andWordTable, tANDhits, bORresult, orWordTable, tORhits, sYeslist, xpLitWordCount) end return yesnoReturnstring(tArgs, sYeslist) end -- set wordt separator local function setSep(sSep) if sSep == nil then return defaultSep end local msg = '' -- todo what with {{!}} local newSep = defaultSep newSep = sSep sSep = decodeUnicode(sSep) if string.match(sSep, '[%s%w%d]') ~= nil then -- not ok msg = 'Irregular characters in sep: ' .. sSep newSep = defaultSep end newSep = string.sub(sSep, 1, 1) if newSep == '' then --- ??? newSep = defaultSep end return newSep end local function concatAndLists(s1, s2, newSep) local tLists = {} -- working table: both s1 and s2 to concat table.insert(tLists, s1) table.insert(tLists, s2) return table.concat(tLists, newSep) end local function parseArgs(origArgs) local newArgs = {} newArgs['sep'] = setSep(origArgs['sep']) -- do first, needed below newArgs['source'] = decodeUnicode(origArgs['s'] or origArgs['source'] or '') newArgs['andString'] = decodeUnicode(concatAndLists( origArgs['w'] or origArgs['word'] or nil, origArgs['andw'] or origArgs['andwords'] or nil, newArgs.sSep) ) newArgs['orString'] = decodeUnicode(origArgs['orw'] or origArgs['orwords'] or '') -- boolean options: catch both parameters, also handle nil & nonsense input values: newArgs['case'] = yesno(origArgs['case'] or origArgs['casesensitive'] or true, true) -- defaults to True newArgs['booleans'] = yesno(origArgs['bool'] or origArgs['booleans'] or false, false) -- defaults to False newArgs['literals'] = yesno(origArgs['literals'] or origArgs['lit'] or true, true) -- defaults to True newArgs['yes'] = origArgs['yes'] or nil -- nil; default so return sYeslist; keep '' as legal input & return value newArgs['no'] = origArgs['no'] or '' newArgs['explain'] = origArgs['explain'] or false newArgs.explain = checkExplain(newArgs) return newArgs end function p.main(frame) local origArgs = getArgs(frame) local sReturn = '' local tArgs = {} tArgs = parseArgs(origArgs) if yesno(tArgs.explain, true) then initReport() report.xpListArguments(origArgs) end sReturn = p._main(tArgs) if warningIMaxWordsReached ~=nil then local preview = require('Module:If preview') sReturn = sReturn .. preview._warning({warningIMaxWordsReached}) end if yesno(tArgs.explain, true) then return sReturn .. report.xpPresent(tArgs.explain) else return sReturn end end return p