Module:Authority control

From WWII Archives

Revision as of 22:02, 2 January 2024 by wc>RP88 (comment out ISNI grouping code (see talk page))
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

Documentation for this module may be created at Module:Authority control/doc

--[[  
  __  __           _       _           _         _   _                _ _                           _             _ 
 |  \/  | ___   __| |_   _| | ___ _   / \  _   _| |_| |__   ___  _ __(_) |_ _   _    ___ ___  _ __ | |_ _ __ ___ | |
 | |\/| |/ _ \ / _` | | | | |/ _ (_) / _ \| | | | __| '_ \ / _ \| '__| | __| | | |  / __/ _ \| '_ \| __| '__/ _ \| |
 | |  | | (_) | (_| | |_| | |  __/_ / ___ \ |_| | |_| | | | (_) | |  | | |_| |_| | | (_| (_) | | | | |_| | | (_) | |
 |_|  |_|\___/ \__,_|\__,_|_|\___(_)_/   \_\__,_|\__|_| |_|\___/|_|  |_|\__|\__, |  \___\___/|_| |_|\__|_|  \___/|_|
                                                                            |___/                                    
This module is intended to be the engine behind "Template:Authority control".

Please do not modify this code without applying the changes first at "Module:Authority control/sandbox" and testing 
at "Module:Authority control/testcases".

Authors and maintainers:
* User:Jarekt - original version 

]]

local properties = require('Module:Authority control/conf')
local core       = require('Module:Core')

-- ==================================================
-- === Internal functions ===========================
-- ==================================================

local function getSitelink(item, lang)
    -- get item's siteling in specific language
	local langList = mw.language.getFallbacksFor(lang)
	table.insert(langList, 1, lang)
	for _, language in ipairs(langList) do 
		local sitelink = mw.wikibase.sitelink( item, language .. 'wiki' )
		if sitelink then 
			return 'w:'.. language ..':'.. sitelink
		end
	end
	return nil
end

-- ==================================================
local function getIdentifierNameLink( lang, item1, item2, label )
-- Identifier names, like VIAF, LCCN, ISNI, need to be linked to the articles about them if possible
-- Alternativly they can be linked to the articles for institutions that issue them
	local id_name_URL = nil
	-- 1) try wikipedia sitelink for the identifier in users language and in English
	if item1 and item1 ~='' then
		id_name_URL = getSitelink(item1, lang)
	end
	-- 2) try wikipedia sitelink for the issuedBy property in users language and in English
	if id_name_URL==nil and item2 and item2 ~='' then -- if no link than
		id_name_URL = getSitelink(item2, lang)
	end
	-- 3) if still no links than link to wikidata
	if id_name_URL then	
		return string.format("[[%s|%s]]", id_name_URL, label) -- link to wikipedia
	else    
		return string.format("[[d:%s|%s]]", item1, label) -- link to wikidata
	end
end

-- ==================================================
-- Create link to a single identifier
-- INPUTS:
--  * val - value of the identifier
--  * P - property record, with URL_format and optional interwiki_format strings to create links
--  * params - additional parameters related to this type of identifiers. Single item from "conf"
--  * color - color of the link
local function getIdentifierValLink(val, P, params, color)
	if not val or val=='' then
		return ''
	end
	-- check if identifier is in the right format
	local mismatchStr = ''
	local val_ = val:gsub( ' ', '' ) -- remove spaces
	if (params.regexp and not val:match( params.regexp )) then
		mismatchStr  = string.format("<span style=\"color:red\">[does not match %s pattern]</span>", params.regexp)
	elseif (params.verify) then -- check if special "Verify" function is present
		mismatchStr = params.verify(val_) -- add error message if any
	end
	-- identifier_value_URL
	if color~="blue" then
		val = string.format('<span style=\"color:%s\">%s</span>', color, val)
	end
	local val_link
	if P.interwiki_format and P.interwiki_format ~= '' then
		val_link = string.format("[[%s:%s|%s]]", P.interwiki_format, val_, val)
	else
		local val_URL = P.URL_format:gsub('$1', val_)-- URL part of the link for the identifier value
		val_link = string.format("[%s %s]", val_URL, val)
	end
	return string.format("<span class=\"plainlinks\">%s</span>%s", val_link, mismatchStr) -- link to the identifier's website
end 

-- ==================================================
-- Convert between 2 formats of LCCN: "n/79/63767" -> "n79063767"
-- "n/79/63767" format was used as input by {{Authority Control}} templates
-- "n79063767" format is used by wikidata
local function fixLCCN(id)
  if id then
		local a, b, c = string.match(id, "([%a%d]*)/([%a%d]*)/([%a%d]*)")
		if c then
			local pad = 6 - string.len(c)
			if pad > 0 then
				c = string.rep("0", pad)..c
			end
			id = a..b..c
		end
	end
	return id
end -- fixLCCN

-- ==================================================
-- Verify last "check" digit is correct. ISNI and several other 
-- identifiers use last digit as a verification digit
local function verifyLastDigit( id )
    local total = 0
    for i = 1, #id-1 do
        local digit = id:byte( i ) - 48 --Get integer value
        total = (total + digit) * 2
    end
	
    --local remainder = total % 11
    local lastDigit = tostring((12 - total % 11) % 11)
    if lastDigit == '10' then
        lastDigit = "X"
    end
	
    if (lastDigit == string.sub( id, -1)) then
		return ''
	else
		return "<span style=\"color:red\">[last digit should be " .. lastDigit .. "]</span>"
	end
end

-- ==================================================
-- === Settings =====================================
-- ==================================================
-- In order to add a new identifier associated with Wikidata property do the following 
-- 1) go to [[Template:Authority control/IdentifierList]] and verify that the property number is on the list, if not than edit the page to add it
-- 2) copy code generated at [[Template:Authority control/IdentifierList]] to protected [[Module:Authority control/conf]]
-- 3) add the property to the "conf" list below

-- load 'Module:Authority control/conf' which holds hardwired data derived from Wikidata's properties of 
-- properties

--conf  holds list of identifiers to be displayed
local conf = {
    -- people 
	{label='VIAF'        , property='P214' , lang=''  , regexp='^%d+$' },
	{label='ISNI'        , property='P213' , lang=''  , regexp='^%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d[%dX]$', verify=verifyLastDigit }, 
	{label='ORCID'       , property='P496' , lang=''  , regexp='^0000%-000[1-3]%-%d%d%d%d%-%d%d%d[%dX]$' },
	{label='ULAN'        , property='P245' , lang=''  , regexp='^500%d%d%d%d%d%d$' }, -- 'Union List of Artist Names' by Getty Research Institute
	{label='ResearcherID', property='P1053', lang=''  , regexp='^[A-Z]+%-%d%d%d%d%-[12][90]%d%d$' }, 
	{label='LCCN'        , property='P244' , lang='en', regexp='^[ns][broshj]?%d%d%d%d%d%d%d%d%d?%d?$' }, -- Library of Congress Authorities
	{label='GND'         , property='P227' , lang='de', regexp='^[%dX%-]+$'},
	{label='SELIBR'      , property='P906' , lang='se', regexp='^%d+$' }, -- National Library of Sweden
	{label='SUDOC'       , property='P269' , lang='fr', regexp='^%d%d%d%d%d%d%d%d[%dxX]$' },    
	{label='BNF'         , property='P268' , lang='fr', regexp='^%d+%w?$' }, -- Bibliothèque nationale de France
	{label='BPN'         , property='P651' , lang='nl', regexp='^%d%d%d%d%d%d%d%d$' }, -- Biografisch Portaal number
	{label='NAID'        , property='P1225', lang='en', regexp='^%d+$' }, -- NARA ID (redirect for US National Archives Identifier (P1225))
	{label='Museofile'   , property='P539' , lang='fr', regexp='^M%d%d%d%d%-?%d?%d?$' }, --Ministry of Culture (France)
	{label='NDL'         , property='P349' , lang='ja', regexp='^0?%d%d%d%d%d%d%d%d$' }, -- National Diet Library (of Japan)
	{label='NLA'         , property='P409' , lang='en', regexp='^[1-9]%d*$' }, -- National Library of Australia
	{label='BIBSYS'      , property='P1015', lang='no', regexp='^%d+$' }, -- Norwegian information system BIBSYS
	{label='HDS'         , property='P902' , lang='de', regexp='^%d%d%d?%d?%d?%d?$' },  -- Historical Dictionary of Switzerland
	{label='MusicBrainz' , property='P434' , lang='en', regexp='^[-%x]+$' }, 
	{label='MGP'         , property='P549' , lang='en', regexp='^%d%d?%d?%d?%d?%d?$' },  -- Mathematics Genealogy Project
	{label='NCL'         , property='P1048', lang='zh', regexp='^%d+$' },  --National Central Library (Taiwan)
	{label='NKC'         , property='P691' , lang='cs', regexp='^%l%l%l?%l?%d%d%d?%d?%d?%d?%d?%d?%d?%d?%d?%d?%d?%d?$' },  --National Library of the Czech Republic
	{label='Léonore'     , property='P640' , lang='fr', regexp='^[LHC%/%d]+$' }, 
	{label='SBN'         , property='P396' , lang='it'},  -- Istituto Centrale per il Catalogo Unico /  National Library Service (SBN) of Italy
	{label='RSL'         , property='P947' , lang='ru', regexp='^%d%d%d%d%d%d%d%d%d$' },  --Russian State Library
	{label='Botanist'    , property='P428' , lang='en' }, 
	{label='US Congress' , property='P1157', lang='en', regexp='^%u00[01]%d%d%d' }, 
	{label='BNE'         , property='P950' , lang='es', regexp='' }, --Biblioteca Nacional de España
	{label='CALIS'       , property='P270' , lang='zh'}, --China Academic Library and Information 
	{label='CiNii'       , property='P271' , lang='jp', regexp='^DA%d%d%d%d%d%d%d[%dX]$' }, 
	{label='TLS'         , property='P1362', lang='de', regexp='' }, -- Theaterlexikon der Schweiz
	{label='SIKART'      , property='P781' , lang='de', regexp='^%d%d%d%d%d%d%d%d?%d?%d?$' }, -- Swiss
	{label='NLP'         , property='P1695', lang='pl', regexp='' }, -- National Library of Poland
	{label='WGA'         , property='P1882', lang='en', regexp='' }, -- Web Gallery of Art
	{label='KulturNav'   , property='P1248', lang='no', regexp='' }, 
	{label='RKD'         , property='P650' , lang='nl', regexp='^[1-9]%d%d?%d?%d?%d?$' }, --Netherlands Institute for Art History#Online artist pages
	{label='autores.uy'  , property='P2558', lang='es', regexp='^[1-9]%d?%d?%d?%d?$' },   --autores.uy
	{label='J9U'         , property='P8189', lang='he', regexp='' },  --National Library of Israel J9U ID

	{label='FIDE'        , property='P1440', lang='en', regexp='' }, -- FIDE database for chess players
	{label='Chess Games' , property='P1665', lang='en', regexp='' }, -- Chess Games

	{label='ISSN'        , property='P236',  lang='', regexp='' }, -- P1629: International Standard Serial Number
	{label='OSM'         , property='P402',  lang='', regexp='' },  -- P1629: OpenStreetMap
	{label='Joconde'     , property='P347',  lang='fr', regexp='' }, -- Joconde ID
	{label='Rijksmonument',property='P359',  lang='nl', regexp='' }, -- Rijksmonument ID
	{label='IMO'         , property='P458',  lang='', regexp='' }, --IMO ship number
	{label='BNCF'        , property='P508',  lang='it', regexp='' }, -- BNCF Thesaurus ID
	{label='MMSI'        , property='P587',  lang='', regexp='' }, -- P1629: Maritime Mobile Service Identity
	{label='Open Library', property='P648',  lang='', regexp='' }, -- P1629: Open Library
	{label='NRHP'        , property='P649',  lang='en', regexp='' }, -- NRHP reference number
	{label='DBNL'        , property='P723',  lang='', regexp='' }, -- DBNL author ID
	{label='UNESCO'      , property='P757',  lang='', regexp='' }, -- World Heritage Site ID
	{label='BIC'         , property='P808',  lang='', regexp='' }, -- Bien de Interés Cultural (BIC) code
	{label='LIR'         , property='P886',  lang='', regexp='' }, -- LIR
	{label='BNR'         , property='P1003', lang='ro', regexp='' }, -- NLR (Romania) ID
	{label='Koninklijke' , property='P1006', lang='nl', regexp='' }, -- National Thesaurus for Author Names ID
	{label='Louvre'      , property='P9394', lang='', regexp='' }, -- Louvre ID
	
	{label='OCLC'        , property='P243',  lang='', regexp='' }, -- OCLC
	{label='ISBN-13'     , property='P212',  lang='', regexp='' }, -- ISBN-13
	{label='ISBN-10'     , property='P957',  lang='', regexp='' }, -- ISBN-10
	{label='Historic England', property='P1216', lang='en', regexp='' }, -- National Heritage List for England number

	{label='Oxford Dict.', property='P1415', lang='en', regexp='' }, -- Oxford Dictionary of National Biography ID
	{label='kulturnoe-nasledie', property='P1483', lang='ru', regexp='' },  -- kulturnoe-nasledie.ru ID
	{label='Catalunya'   , property='P1600', lang='ca', regexp='' }, -- Inventari del Patrimoni Arquitectònic de Catalunya code
	{label='COAM'        , property='P2917', lang='es', regexp='' }, -- COAM structure ID
	{label='SIMBAD'      , property='P3083', lang='fr', regexp='' }, -- SIMBAD ID
	{label='JCyL'        , property='P3177', lang='es', regexp='' }, -- Patrimonio Web JCyL ID
	{label='Zaragoza'    , property='P3178', lang='es', regexp='' },  -- Zaragoza monument ID
	{label='BDI'         , property='P3318', lang='es', regexp='' }, -- Patrimonio Inmueble de Andalucía ID
	{label='SIPCA'       , property='P3580', lang='es', regexp='' }, -- SIPCA code
	{label='DOCOMOMO'    , property='P3758', lang='', regexp='' }, -- DOCOMOMO Ibérico ID
	{label='Czech Monument', property='P4075', lang='cz', regexp='' }, -- Czech Monument Catalogue Number
	{label='MEG'         , property='P4157', lang='ch', regexp='' }, -- P1629: Musée d'ethnographie de Genève
	{label='Enciclopédia Itaú Cultural' , property='P4399', lang='pt_br', regexp='' }, -- Enciclopédia Itaú Cultural ID
	{label='Monumentos de São Paulo'    , property='P4360', lang='pt_br', regexp='' }, -- Monumentos de São Paulo ID
	{label='Infopatrimônio'             , property='P4372', lang='pt_br', regexp='' }, -- Infopatrimônio ID
	{label="Musée d'Orsay"              , property='P4659', lang='fr'   , regexp='' }, -- Musée d'Orsay artwork ID
	{label='MuBE'                       , property='P4721', lang='pt_br', regexp='' }, -- MuBE Virtual ID
	{label='Hispania Nostra'            , property='P4868', lang='es'   , regexp='' }, -- Hispania Nostra Red List ID
	{label='NLK'            , property='P5034', lang='ko'   , regexp='' }, -- National Library of Korea ID
	}

-- ==================================================
-- === External functions ===========================
-- ==================================================
local p = {}

function p.getAuthorityControlTag( lang )
-- get a localized interwiki link to article "Authority Control"
	local field_name = "[[w:en:Help:Authority control|Authority control]]" -- hardwire the default
	if lang~='en' then
		field_name = core.getLabel("Q36524", lang)
	end
	return field_name
end

-- ==================================================
function p._authorityControl(entity, args, lang, length)
-- INPUTS:
--  * entity - wikidata entity if already created or nil. If provided than you should still provide args.Wikidata
--  * args   - structure with identifier fields: args.VIAF, args.LCCN, args.Wikidata, etc.
--  * lang   - language code
--  * length - maximum length of the identifier array, or number of identifiers to display
-- OUTPUTS:
--  * results - wikicode string equivalent to {{Authority control|...|bare=1 }} call
--  * cats    - wikicode with maintenance categories

  -- count custom parameters (not pulled from Wikidata)
	local nCustomParam = 0 
	for _,params in ipairs( conf ) do
		if (args[params.label]~=nil) then
			nCustomParam = nCustomParam + 1
		end
	end
	
	-- Get entity - record of wikidata related to a single item
	local q = args.wikidata
	if not entity and q then
		entity = mw.wikibase.getEntity(q)
	end
	
	-- Check if this is category item
	local cats = '' -- categories (mismatching and missing)
	if entity and entity.claims and entity.claims.P31 then 
		for _, statement in pairs( entity.claims.P31) do
			if (statement.mainsnak.snaktype == "value") and (statement.mainsnak.datavalue.value.id == 'Q4167836')  then -- P31 == Wikimedia category 
				cats = '[[Category:Wrong Wikidata ID in authority control data: category item]]'
			end
			if (statement.mainsnak.snaktype == "value") and (statement.mainsnak.datavalue.value.id == 'Q4167410')  then -- P31 == Wikimedia disambiguation page 
				cats = '[[Category:Wrong Wikidata ID in authority control data: disambiguation item]]'
			end
		end
	end

	--compare provided arguments with Wikidata identifiers
	local data = {} -- structure similar to "args" but filled with wikidata data
	for _,params in ipairs( conf ) do
		local label = string.lower(params.label)
		data[label] = nil
		if entity and entity.claims and params.property and entity.claims[params.property] then -- if we have wikidata item and item has the property
			-- capture all Wikidata values for the identifier
			--for _, statement in pairs( entity.claims[params.property]) do
			for _, statement in pairs( entity:getBestStatements( params.property )) do
				if (statement.mainsnak.snaktype == "value") then -- or if statement.mainsnak.datavalue then 
					local v = statement.mainsnak.datavalue.value
					if data[label]==nil then
						data[label] = v       -- save the first value
					end
					if args[label] == v then  -- match between template and wikidata identifiers
						data[label] = ''      -- ignore identifier from wikidata
						break  
					end
				end
			end
		end
	end

	--Create string with all the identifiers listed
	local results1 = {} -- high priority list
	local results2 = {} -- low  priority list
	properties.P214.item = 'Q54919';   -- hardwire link to VIAF
	local today = '+' .. os.date('!%F') .. 'T00:00:00Z/11'
	local TransStr = 'https://quickstatements.toolforge.org/#/v1=%s|%s|%%22%s%%22|S143|Q565|S813|'.. today -- QuickStatementts URL
	TransStr = '<span class=\"plainlinks\" title=\"Click (+) to copy to wikidata\">['.. TransStr .. ' (+)]</span>'
	for _,params in ipairs( conf ) do
		local label = string.lower(params.label)
		local val1 = args[label] -- identifier value provided to the template
		local val2 = data[label] -- identifier value pulled from wikidata
		if val1 or val2 then
			local P = properties[params.property] -- properties of wikidata identifier propertyc
			-- name_link - link for the identifier name
			local name_link = getIdentifierNameLink( lang, P.item, P.issuedBy, params.label )
			
			-- val_link - identifier value or values
			local transfer = ''
			local val3 = string.gsub(val1 or '', ' ', '' ) -- remove spaces
			local val_link
			if not val1 then
				val_link = getIdentifierValLink(val2, P, params, 'blue') -- wikidata only no local identifier
			elseif val2=='' then
				val_link = getIdentifierValLink(val1, P, params, 'magenta') -- match was found
			elseif val2 then
				val_link = getIdentifierValLink(val1, P, params, 'darkgreen') .. "/"..getIdentifierValLink(val2, P, params, 'blue')
				cats  = string.format("%s[[Category:Pages using authority control with identifiers mismatching Wikidata]]\n", cats)
				transfer  = string.format(TransStr, q, params.property, val3)
			elseif not val2 and entity then
				val_link = getIdentifierValLink(val1, P, params, 'darkgreen')
				cats  = string.format("%s[[Category:Pages using authority control with identifiers missing from Wikidata]]\n", cats)
				transfer  = string.format(TransStr, q, params.property, val3)
			else
				val_link = getIdentifierValLink(val1, P, params, 'blue') -- local identifier and no wikidata q-code
			end

			-- combine them all
			local lineStr = string.format("\n*%s:&thinsp;<span class=\"uid\">%s</span>%s", name_link, val_link, transfer)
			if (params.lang==lang) or (params.lang=='') then
				table.insert(results1, lineStr) -- add to high priority list
			else
				table.insert(results2, lineStr) -- add to low priority list
			end
		end
	end -- for all sources
	
	-- merge high and low priority lists, trim them if needed and convert to string 
	--table.insert(results1, "\n*End list 1") -- for debuging
	--table.insert(results2, "\n*End list 2")
	for _,v in pairs(results2) do table.insert(results1, v) end
	local results = table.concat(results1, "", 1, math.min(#results1, length or #results1)) 
	
	-- Add Link to wikidata 
	if q then
		results = string.format("\n*[[File:Wikidata-logo.svg|20px|wikidata:%s|link=wikidata:%s]]: [[d:%s|%s]]%s",q,q,q,q,results)
	end
	
	-- Add link to Worldcat
	if (args.worldcatid==nil and (args.lccn or data.lccn))  then
		args.worldcatid = 'lccn-' .. (args.lccn or data.lccn)
	end
	if args.worldcatid  then
		results = string.format("%s\n*<span class=\"uid\">[//www.worldcat.org/identities/%s WorldCat]</span>", results, args.worldcatid)
	end
	
	-- Add maintenance categories
	if q == nil then
		cats = string.format("%s[[Category:Pages using authority control without Wikidata link]]\n", cats)
	end
	if nCustomParam>0 then
		if cats=='' and entity ~= nil then
			cats = string.format("%s[[Category:Pages using authority control with all identifiers matching Wikidata]]\n", cats)
		end
		if string.find(results, "<span style=\"color:red\">") then 
			cats = string.format("%s[[Category:Pages using authority control with badly formated identifier]]\n", cats)
		end
	end

	-- return results
	if results~='' then -- if there are any results than wrap them in <div> tag
		results  = string.format('<div class="hlist">%s\n</div>', results)
	end
	return results, cats
end


-- ===========================================================================
-- === Version of the function to be called from template namespace
-- ===========================================================================
function p.authorityControl(frame)
	-- prepare arguments
	local args = core.getArgs(frame)
	local bare = core.yesno(args.bare,false)         
	
	-- Convert template arguments to the same format as used on wikidata
	if args.bnf then
		args.bnf = string.sub(args.bnf, 3) -- trim first 2 characters
	end
--	if args.isni then -- group in sets of 4
--		args.isni = string.sub(args.isni, 1, 4).." "..string.sub(args.isni, 5, 8)
--		     .." "..string.sub(args.isni, 9,12).." "..string.sub(args.isni,13,16)
--	end
	if args.isbn then
		local isbn = isbn.gsub( ' ', '' )
		if #isbn==10 then
			args['isbn-10'] = args.isbn
		elseif #isbn==13 then
			args['isbn-13'] = args.isbn
		end
		args.isbn = nil
	end
	args.gnd  = args.gnd or args.pnd --redirect PND to GND
	args.lccn = fixLCCN(args.lccn)
	args.wikidata = args.wikidata or args.q or nil
	
	-- call the inner "core" function
	local results, cats = p._authorityControl(nil, args, args.lang, args.length)
	local namespace = mw.title.getCurrentTitle().namespace
	local LUT = {[2]='user', [6]='file', [10]='template', [828]='module'}
	if (LUT[namespace] or math.fmod(namespace,2)==1) then
		-- lets not add categories to some namespaces, or talk pages and concentrate
		-- on templates and categories instead
		cats = '' 
	end
	
	--package results as a infobox if not "bare"
	if not bare then
		-- Get field name for authority control
		local field_name = p.getAuthorityControlTag(args.lang)

		-- build table
		results = string.format('<tr><td class="type fileinfo-paramfield">%s</td><td>\n%s\n</td></tr>', field_name, results)
		local dir   = mw.language.new( args.lang ):getDir()    -- get text direction
		local style = 'class="toccolours mw-content-%s layouttemplate commons-file-information-table" style="width: 100%%;" dir="%s" lang="%s"' 
		style = string.format(style, dir, dir, args.lang)
		results = string.format('<table %s>\n%s\n</table>\n', style, results)
	else
		results = string.format('\n%s\n', results)
	end
	return results..cats
end

return p