Module:Custom functions
Jump to navigation
Jump to search
Documentation for this module may be created at Module:Custom functions/doc
local cargo = mw.ext.cargo -- for cargo queries if needed
local cf = {}
-- Utility function to escape single quotes (apostrophes) and other problematic characters
function cf.escape_string_for_sql(input_string)
-- Escape single quotes by replacing ' with ''
-- This is the standard way to escape single quotes in SQL queries
return input_string:gsub("'", "''")
end
--[[
Function: extract_top_cargo_row
Purpose:
This function takes a Cargo query result and a comma-separated list of fields,
and assigns the values from the first row of the result to a new table.
If no fields parameter is provided, the function automatically uses all
available fields from the first row of the Cargo results.
Parameters:
cargo_results (table) - The table containing the results of a Cargo query.
The first row (cargo_results[1]) is used to extract the data.
fields (string) - An optional comma-separated string of field names to be extracted
from the Cargo result. If omitted or nil, all fields from the
first row are used.
Returns:
args (table) - A table containing the extracted values from the first row of
the Cargo results, where each key corresponds to a field name
and the value is the data from that field.
Usage:
- If you want to extract specific fields:
local args = extract_top_cargo_row(cargo_results, "Field1, Field2, Field3")
- If you want to extract all fields from the first row of cargo_results:
local args = extract_top_cargo_row(cargo_results)
--]]
function cf.extract_top_cargo_row(cargo_results, fields)
-- Initialize a new table to store cargo results
local cargo_args = {}
-- If fields parameter is not provided, use all fields from the first row of cargo_results
if not fields and cargo_results[1] then
fields = ""
for key in pairs(cargo_results[1]) do
fields = fields .. key .. ", "
end
-- Remove the trailing comma and space
fields = fields:sub(1, -3)
end
-- Check if there is at least one result
if cargo_results[1] then
-- Loop through the fields and assign each value to cargo_args
for field in fields:gmatch("[^,]+") do
local trimmed_field = field:match("^%s*(.-)%s*$") -- Trim spaces
cargo_args[trimmed_field] = cargo_results[1][trimmed_field]
end
end
return cargo_args
end
-- function to remove File prefix if it already exists
function cf.remove_file_prefix(file_name)
local prefix = "File:"
if string.sub(file_name, 1, string.len(prefix)) == prefix then
return string.sub(file_name, string.len(prefix) + 1)
else
return file_name
end
end
-- Function to process a file name and add "File:" prefix if needed
local function add_file_prefix_gentle(file_name)
if file_name:sub(1, 5) ~= "File:" then
return "File:" .. file_name
else
return file_name
end
end
cf.add_file_prefix_gentle = add_file_prefix_gentle
--[[This function creates a gallery of images from and already processed object containing Cargo query results. It's intent is to separate the logic of querying
cargo results which can be nuanced depending on the task at hand, from the creation of the forward-facing html wiki gallery.
@param cargo_results: A table of rows retrieved from a Cargo query, each row containing data for one item.
@param focal_field: The key in each row representing the name or title to be displayed in the gallery.
@param file_field: The key in each row representing the filename of the image to be shown in the gallery.
@param placeholder_image: A default image to use if no valid image is found in the row.
@param name_array: An optional array of specific names to include in the gallery, used to order the images.
@param gallery_mode: The display mode for the gallery (e.g., traditional, slideshow).
@return: Returns a string of wikitext that represents the gallery, or an error message if no images are found.
--]]
function cf.create_cargo_gallery(cargo_results, focal_field, file_field, placeholder_image, name_array, gallery_mode)
local file_name_map = {}
local ordered_names = {}
for _, row in ipairs(cargo_results) do
local name = row[focal_field]
if name then
local image = row[file_field] or placeholder_image
-- Use the add_file_prefix_gentle function to ensure the "File:" prefix
image = add_file_prefix_gentle(image)
-- If the name doesn't exist in the map, initialize an empty table for it
if not file_name_map[name] then
file_name_map[name] = {}
if not name_array then
table.insert(ordered_names, name)
end
end
-- Insert the image into the table for this name
table.insert(file_name_map[name], image)
end
end
local names_to_use = name_array or ordered_names
if next(file_name_map) == nil then
return '<span class="gallery-error">No images found for the provided names!</span>'
end
local gallery_wikitext = '<gallery heights=200 mode="' .. gallery_mode .. '">\n'
for _, name in ipairs(names_to_use) do
-- Iterate over all images associated with this name
for _, image in ipairs(file_name_map[name] or {}) do
local photo_link = "link=" .. name
local caption_with_link = "<center>[[" .. name .. "|" .. name .."]]</center>"
gallery_wikitext = gallery_wikitext .. image .. '|' .. photo_link .. '|' .. caption_with_link .. '\n'
end
end
gallery_wikitext = gallery_wikitext .. '</gallery>'
return gallery_wikitext
end
function cf.create_cargo_gallery_old(cargo_results, focal_field, file_field, placeholder_image, name_array, gallery_mode)
local file_name_map = {}
local ordered_names = {}
for _, row in ipairs(cargo_results) do
local name = row[focal_field]
if name then
local image = row[file_field] or placeholder_image
-- Use the add_file_prefix_gentle function to ensure the "File:" prefix
image = add_file_prefix_gentle(image)
file_name_map[name] = image
if not name_array then
table.insert(ordered_names, name)
end
end
end
local names_to_use = name_array or ordered_names
if next(file_name_map) == nil then
return '<span class="gallery-error">No images found for the provided names!</span>'
end
local gallery_wikitext = '<gallery heights=200 mode="' .. gallery_mode .. '">\n'
for _, name in ipairs(names_to_use) do
local image = file_name_map[name] or placeholder_image
local photo_link = "link=" .. name
local caption_with_link = "<center>[[" .. name .. "|" .. name .."]]</center>"
gallery_wikitext = gallery_wikitext .. image .. '|' .. photo_link .. '|' .. caption_with_link .. '\n'
end
gallery_wikitext = gallery_wikitext .. '</gallery>'
return gallery_wikitext
end
--[[
This function takes a comma-delimited string and converts it into a table
of trimmed values. Each value in the input string is separated by a comma,
and leading and trailing whitespace from each value is removed.
Parameters:
csv_string (string|nil): A comma-delimited string containing values to be parsed.
If nil, an empty table is returned.
Returns:
table: A table containing the trimmed values from the input string.
If the input is nil, an empty table is returned.
Example usage:
local result = cf.parse_csv_to_table(" value1 , value2 ,value3 ")
-- result: {"value1", "value2", "value3"}
local empty_result = cf.parse_csv_to_table(nil)
-- empty_result: {}
--]]
function cf.parse_csv_to_table(csv_string)
-- Return an empty table if the input is nil
if csv_string == nil then
return {}
end
-- Check if the argument is a string
if type(csv_string) ~= "string" then
error("Invalid argument: csv_string must be a string")
end
local t = {}
for field in csv_string:gmatch("[^,]+") do
table.insert(t, mw.text.trim(field))
end
return t
end
function cf.word_italicizer(text, words_to_italicize)
-- Iterate over each word in the flat list of words to italicize
for _, word in ipairs(words_to_italicize) do
-- Use string.gsub to replace the word with its italicized version in the text
-- The pattern escapes special characters in word and wraps it in wiki markup for italics
text = text:gsub("(%f[%a]" .. word:gsub("(%W)", "%%%1") .. "%f[%A])", "''%1''")
end
return text
end
function fetch_cargo_column(table_name, field_name)
-- Define the Cargo query parameters
local tables = table_name
local fields = field_name
local cargo_args = {
limit = 5000 -- Adjust based on your expected number of rows
}
-- Execute the Cargo query
local cargo_results = mw.ext.cargo.query(tables, fields, cargo_args)
-- Prepare an array to hold the field values
local values_array = {}
-- Extract the specified field from each row and insert into the values_array as simple strings
for _, row in ipairs(cargo_results) do
table.insert(values_array, row[field_name])
end
return values_array
end
-- local function to find the rank of a geography page name
function cf.geo_field_finder(title)
local tables = "Geography"
local fields = "Country, Intermediate_region, Subregion, Region"
local cargo_args = {
where = "Country = '" .. title .. "' OR Intermediate_region = '" .. title ..
"' OR Subregion = '" .. title .. "' OR Region = '" .. title .. "'"
}
local result = cargo.query(tables, fields, cargo_args)
local matching_field
for i, record in ipairs(result) do
if record.Country == title then
matching_field = "Country"
break
elseif record.Intermediate_region == title then
matching_field = "Intermediate_region"
break
elseif record.Subregion == title then
matching_field = "Subregion"
break
elseif record.Region == title then
matching_field = "Region"
break
end
end
if matching_field then
return matching_field
else
return "Administrative_unit"
end
end
--[[ function that trims rows from a cargo results object based on a field and value. All rows in that field
containing the substring specified are removed from the Cargo results object. This is not as useful as just creating an SQL
query with a NOT statement, so maybe I don't need this in the future--]]
function cf.cargo_row_remover(cargo_results, field, value)
local modified_results = {}
for _, row in ipairs(cargo_results) do
-- Check if the field value contains the specified value
if not row[field] or not string.find(row[field], value) then
table.insert(modified_results, row)
end
end
return modified_results
end
-- takes a flat 1D array of strings and surrounds its elements with specified characters
function cf.element_sandwicher(array, start_char, end_char)
local result = {}
local length = #array
for i = 1, length do
if array[i] then
result[i] = start_char .. array[i] .. end_char
else
result[i] = start_char .. "No value" .. end_char -- or some default string
end
end
return result
end
--[[ this function takes an array and italicizes the strings contained therein unless the string contains
parentheses in which case it italicizes only what's inside the parentheses. It was made to format species page titles]]--
function cf.species_italicizer(array)
local result = {}
for _, value in ipairs(array) do
local start_index, end_index = value:find("(%b())")
local italicized_value
if start_index then
local text_inside_parentheses = value:sub(start_index + 1, end_index - 1)
local italicized_text_inside_parentheses = "<i>" .. text_inside_parentheses .. "</i>"
italicized_value = value:sub(1, start_index - 1) .. "(" .. italicized_text_inside_parentheses .. ")" .. value:sub(end_index + 1)
else
italicized_value = "<i>" .. value .. "</i>"
end
table.insert(result, italicized_value)
end
return result
end
-- takes an array and only italicizes things inside parentheses (maybe get rid of this function for the more useful species_italicizer above)
function cf.parentheses_italicizer(array)
local result = {}
for _, value in ipairs(array) do
local start_index, end_index = value:find("(%b())")
local italicized_value
if start_index then
local text_inside_parentheses = value:sub(start_index + 1, end_index - 1)
local italicized_text_inside_parentheses = "<i>" .. text_inside_parentheses .. "</i>"
italicized_value = value:sub(1, start_index - 1) .. "(" .. italicized_text_inside_parentheses .. ")" .. value:sub(end_index + 1)
else
italicized_value = value
end
table.insert(result, italicized_value)
end
return result
end
function cf.remove_dupes_from_flat_list(flat_table)
local seen = {}
for index, item in ipairs(flat_table) do
if seen[item] then
table.remove(flat_table, index)
else
seen[item] = true
end
end
-- return keys back to values in simple list
simple_list = {}
-- for key, _ in pairs(seen) do
-- table.insert(simple_list, key)
-- end
return seen
end
-- this function filters table_a by elements that are in table_b.
function cf.filter_by_intersection(table_a, table_b)
local filtered_table = {}
for _, value in ipairs(table_a) do
for _, b in ipairs(table_b) do
if value == b then
table.insert(filtered_table, value)
break
end
end
end
return filtered_table
end
--[[ this function finds thing that are in the first array that are not in the second.
I first used it making custom pages that display orhpaned Data namespace pages that can't find
their forward facing partner
--]]
function cf.in_a_not_b(a, b)
local bSet = {} -- Using a table as a set for faster look-up
for _, value in ipairs(b) do
bSet[value] = true
end
local result = {}
for _, value in ipairs(a) do
if not bSet[value] then
table.insert(result, value)
end
end
return result
end
--[[ this parses text that is a single text string separated by commas into an array (with some formatting)
This function may be overly complex, I should considering deprecating it
--]]
function cf.parse_arguments(text, format, link_text)
if text then
local text_table = {} -- Table to store parsed text elements
-- Iterate over the delimited string and extract each text element
for item in text:gmatch("[^,]+") do
if format == "page" then
item = '[[' .. item .. ']]' -- Surround item with double brackets
elseif format == "ext_link" then
item = "[" .. item .. " " .. link_text .. "]"
else
item = item:gsub(",", ", ") -- Add a space after each comma
end
table.insert(text_table, item)
end
-- Iterate over the arguments table and concatenate them into a string
local full_text = table.concat(text_table, ", ")
return full_text
else
return full_text
end
end
function cf.rank_finder(title)
--[[ this is a helper function that extracts sci names from titles and is only need on wikis that have more
than just the sci name in their titles and that follow this format in their pages names: "Desert Spiny Lizard (Sceleporus magister)" --]]
local function process_string(input_string)
return input_string:match("%((.-)%)") or input_string
end
title = process_string(title)
local tables = "Species"
local taxon_fields = {
"Family", "Subfamily", "Genus", "Species"
}
local taxonomy_fields = {}
for _, field in ipairs(taxon_fields) do
local lowercase = string.lower(field)
if field == "Genus" then
taxonomy_fields[field] = { lowercase = lowercase, title = field, plural = "genera" }
else
taxonomy_fields[field] = { lowercase = lowercase, title = field, plural = lowercase .. (field == "Species" and "" or "s") }
end
end
local where_clauses = {}
for field in pairs(taxonomy_fields) do
table.insert(where_clauses, field .. " = '" .. title .. "'")
end
local cargo_args = { where = table.concat(where_clauses, " OR ") }
local cargo_result = cargo.query(tables, table.concat(keys(taxonomy_fields), ", "), cargo_args)
local output_table = {}
for _, row in ipairs(cargo_result) do
for field, attributes in pairs(taxonomy_fields) do
if row[field] == title then
output_table = attributes
output_table.cargo_field = field
break
end
end
if next(output_table) then break end
end
if not next(output_table) then
output_table.error = true
output_table.message = "No matching record found"
else
output_table.error = false
end
return output_table
end
-- Helper function to get keys from a table
function keys(t)
local keyset = {}
local n = 0
for k, _ in pairs(t) do
n = n + 1
keyset[n] = k
end
return keyset
end
-- function that returns subset of rank list based on focal rank
function cf.get_child_ranks(rank)
local rank_chain = {"family", "subfamily", "genus", "species"}
local variable_lookup = {
["lowercase"] = rank_chain,
["plural"] = {"families", "subfamilies", "genera", "species"},
["cargo_field"] = {"Family", "Subfamily", "Genus", "Species"},
["title"] = {"Family", "Subfamily", "Genus", "Species"}
}
-- Find the index of the specified rank
local index = nil
for i = 1, #rank_chain do
if rank_chain[i] == rank then
index = i
break
end
end
if not index then
return {} -- Return an empty table if the rank is not found
end
local result = {}
-- Iterate over each chain and construct the output array
for chain, focal_variable in pairs(variable_lookup) do
local chain_result = {}
for i = index + 1, #rank_chain do
table.insert(chain_result, focal_variable[i])
end
result[chain] = chain_result
end
return result
end
--[[
Function to take a dictionary and one focal key/field and outputs a single string that is a formatted
list of text separated by commas.
Usage:
The function cf.comma_separated_list takes three arguments:
1. dict (table): A dictionary containing multiple entries as tables.
2. field_name (string): The key/field to extract unique values from.
3. format (string, optional): The format of the output ("string" or "page"). Default is "string".
Example:
local dict = {
{species = "Homo sapiens"},
{species = "Pan troglodytes"},
{species = "Gorilla gorilla"},
{species = "Homo sapiens"}, -- Duplicate entry
{species = "Pan paniscus"},
}
local result = cf.comma_separated_list(dict, "species", "page")
--]]
function cf.comma_separated_list(dict, field_name, format)
-- Set default value for format if not provided
format = format or "string"
-- Table to hold unique values of the specified field
local uniques_of_rank = {}
-- Iterate through the dictionary to populate uniques_of_rank
for i = 1, #dict do
local entry = dict[i]
-- Check if entry exists and has the specified field
if entry and entry[field_name] then
local focal_rank = entry[field_name]
-- Mark the value as true in the uniques_of_rank table
uniques_of_rank[focal_rank] = true
else
-- Log an error if the entry is invalid
mw.log("Invalid entry at index " .. i .. " in dict")
end
end
-- Table to hold sorted unique values
local sorted_rank = {}
-- Populate sorted_rank with keys from uniques_of_rank
for focal_rank, _ in pairs(uniques_of_rank) do
table.insert(sorted_rank, focal_rank)
end
-- Sort the unique values
table.sort(sorted_rank)
-- Table to hold the final list items
local list = {}
-- Iterate through sorted unique values to format them
for i = 1, #sorted_rank do
-- Remove trailing asterisks from the item
local item = sorted_rank[i]:gsub("%**$", "")
-- Format the item based on the provided format and field name
if format == "page" then
if string.lower(field_name) == "species" or string.lower(field_name) == "genus" then
-- Format for species or genus with italicized links
table.insert(list, "''[[" .. item .. "]]''")
else
-- Format for other fields with regular links
table.insert(list, "[[" .. item .. "]]")
end
else
-- Default format as plain string
table.insert(list, item)
end
end
-- Return the comma-separated list or nil if the list is empty
if #list > 0 then
return table.concat(list, ", ")
else
return nil
end
end
--[[
Function to take a string delimited by a punctuation mark and format it
to ensure there is a space after each punctuation mark.
Usage:
The function format_delimited_string takes two arguments:
1. input_string (string): The input string to be formatted.
2. delimiter (string, optional): The punctuation mark used as the delimiter. Default is comma (",").
Example:
local input_string = "apple,banana,carrot,date,egg"
local result = format_delimited_string(input_string)
print(result)
-- Output: "apple, banana, carrot, date, egg"
local input_string_with_semicolon = "apple;banana;carrot;date;egg"
local delimiter = ";"
local result_with_semicolon = format_delimited_string(input_string_with_semicolon, delimiter)
print(result_with_semicolon)
-- Output: "apple; banana; carrot; date; egg"
]]
function cf.format_delimited_string(input_string, delimiter)
-- Set default delimiter to comma if not provided
delimiter = delimiter or ","
-- Replace occurrences of the delimiter without space with delimiter followed by a space
local formatted_string = input_string:gsub(delimiter .. "%s*", delimiter .. " ")
return formatted_string
end
--[[This function takes a cargo results array and a field name and produces a bulleted list of all the values in that field.
An optional arguments sort alphabetically can be passed the argument true]]--
function cf.display_field_in_bulleted_list(cargo_results, field_name, sort_alphabetically, delimiter)
-- Default the delimiter to a comma if not provided
delimiter = delimiter or ","
-- Create a table to collect all items
local items = {}
for _, result in ipairs(cargo_results) do
-- Extract the value of the specified field from the current result
local field_value = result[field_name]
-- Split the field value using the specified delimiter
local item_list = mw.text.split(field_value, delimiter)
for _, item in ipairs(item_list) do
table.insert(items, mw.text.trim(item))
end
end
-- Sort the items alphabetically if the sort_alphabetically argument is true
if sort_alphabetically then
table.sort(items, function(a, b)
-- Remove wiki link formatting for sorting comparison
local clean_a = mw.ustring.gsub(a, "^%[%[(.-)%]%]$", "%1")
local clean_b = mw.ustring.gsub(b, "^%[%[(.-)%]%]$", "%1")
return clean_a < clean_b
end)
end
-- Create a <ul> element using mw.html
local ul = mw.html.create('ul')
-- Add each sorted item as a <li> element to the <ul>
for _, item in ipairs(items) do
ul:tag('li'):wikitext(item):done()
end
-- Return the HTML code of the entire list
return tostring(ul)
end
--[[function that take a dictionary and one focal key/field and displays a sorted
bulleted list of the unique values for that key/field. This function is slightly awkward for historical reasons
in that it defaults to modifying all the elements as page links and the user must supply "true" to the text_only
parameter if links are not wanted]]--
function cf.bulleted_list(dict, field_name, text_only)
local uniques_of_rank = {}
for i = 1, #dict do
local entry = dict[i]
if entry and entry[field_name] then
local focal_rank = entry[field_name]
uniques_of_rank[focal_rank] = true
else
mw.log("Invalid entry at index " .. i .. " in dict")
end
end
local sorted_rank = {}
for focal_rank, _ in pairs(uniques_of_rank) do
table.insert(sorted_rank, focal_rank)
end
table.sort(sorted_rank)
local list = {}
for i = 1, #sorted_rank do
local item = sorted_rank[i]:gsub("%**$", "") -- Remove trailing asterisks
local listItem = mw.html.create('li')
if text_only then
listItem:wikitext(item)
else
if string.lower(field_name) == "species" or string.lower(field_name) == "genus" then
listItem:wikitext("''[[" .. item .. "]]''")
else
listItem:wikitext("[[" .. item .. "]]")
end
end
table.insert(list, listItem)
end
if #list > 0 then
local resultList = mw.html.create('ul')
for i = 1, #list do
resultList:node(list[i])
end
return tostring(resultList)
else
return nil
end
end
-- this function takes a wiki page and returns the scientific name within it
function cf.title_to_sci(str)
local start_idx, end_idx = string.find(str, "%b()")
if start_idx and end_idx then
return string.sub(str, start_idx+1, end_idx-1)
else
return str
end
end
function cf.count_comma_elements(input_string)
local count = 0
if input_string then
for _ in string.gmatch(input_string, "[^,]+") do
count = count + 1
end
return count
else
return 0
end
end
return cf