Module:Custom functions

From HopperWiki
Jump to navigation Jump to search

Documentation for this module may be created at Module:Custom functions/doc

local cargo = mw.ext.cargo -- for cargo queries if needed

local cf = {}


-- Utility function to escape single quotes (apostrophes) and other problematic characters
function cf.escape_string_for_sql(input_string)
    -- Escape single quotes by replacing ' with ''
    -- This is the standard way to escape single quotes in SQL queries
    return input_string:gsub("'", "''")
end







--[[
    Function: extract_top_cargo_row
    Purpose: 
        This function takes a Cargo query result and a comma-separated list of fields, 
        and assigns the values from the first row of the result to a new table. 
        If no fields parameter is provided, the function automatically uses all 
        available fields from the first row of the Cargo results.
    
    Parameters:
        cargo_results (table) - The table containing the results of a Cargo query. 
                                The first row (cargo_results[1]) is used to extract the data.
        fields (string) - An optional comma-separated string of field names to be extracted 
                          from the Cargo result. If omitted or nil, all fields from the 
                          first row are used.
    
    Returns:
        args (table) - A table containing the extracted values from the first row of 
                             the Cargo results, where each key corresponds to a field name 
                             and the value is the data from that field.
    
    Usage:
        - If you want to extract specific fields:
            local args = extract_top_cargo_row(cargo_results, "Field1, Field2, Field3")
        
        - If you want to extract all fields from the first row of cargo_results:
            local args = extract_top_cargo_row(cargo_results)
--]]
function cf.extract_top_cargo_row(cargo_results, fields)
    -- Initialize a new table to store cargo results
    local cargo_args = {}
    
    -- If fields parameter is not provided, use all fields from the first row of cargo_results
    if not fields and cargo_results[1] then
        fields = ""
        for key in pairs(cargo_results[1]) do
            fields = fields .. key .. ", "
        end
        -- Remove the trailing comma and space
        fields = fields:sub(1, -3)
    end

    -- Check if there is at least one result
    if cargo_results[1] then
        -- Loop through the fields and assign each value to cargo_args
        for field in fields:gmatch("[^,]+") do
            local trimmed_field = field:match("^%s*(.-)%s*$") -- Trim spaces
            cargo_args[trimmed_field] = cargo_results[1][trimmed_field]
        end
    end
    
    return cargo_args
end








-- function to remove File prefix if it already exists
function cf.remove_file_prefix(file_name)
    local prefix = "File:"
    if string.sub(file_name, 1, string.len(prefix)) == prefix then
        return string.sub(file_name, string.len(prefix) + 1)
    else
        return file_name
    end
end




-- Function to process a file name and add "File:" prefix if needed
local function add_file_prefix_gentle(file_name)
    if file_name:sub(1, 5) ~= "File:" then
        return "File:" .. file_name
    else
        return file_name
    end
end
cf.add_file_prefix_gentle = add_file_prefix_gentle











--[[This function creates a gallery of images from and already processed object containing Cargo query results. It's intent is to separate the logic of querying
cargo results which can be nuanced depending on the task at hand, from the creation of the forward-facing html wiki gallery.

@param cargo_results: A table of rows retrieved from a Cargo query, each row containing data for one item.
@param focal_field: The key in each row representing the name or title to be displayed in the gallery.
@param file_field: The key in each row representing the filename of the image to be shown in the gallery.
@param placeholder_image: A default image to use if no valid image is found in the row.
@param name_array: An optional array of specific names to include in the gallery, used to order the images.
@param gallery_mode: The display mode for the gallery (e.g., traditional, slideshow).
@return: Returns a string of wikitext that represents the gallery, or an error message if no images are found.
--]]
function cf.create_cargo_gallery(cargo_results, focal_field, file_field, placeholder_image, name_array, gallery_mode)
    local file_name_map = {}
    local ordered_names = {}

    for _, row in ipairs(cargo_results) do
        local name = row[focal_field]
        if name then
            local image = row[file_field] or placeholder_image
            -- Use the add_file_prefix_gentle function to ensure the "File:" prefix
            image = add_file_prefix_gentle(image)
            
            -- If the name doesn't exist in the map, initialize an empty table for it
            if not file_name_map[name] then
                file_name_map[name] = {}
                if not name_array then
                    table.insert(ordered_names, name)
                end
            end
            
            -- Insert the image into the table for this name
            table.insert(file_name_map[name], image)
        end
    end

    local names_to_use = name_array or ordered_names

    if next(file_name_map) == nil then
        return '<span class="gallery-error">No images found for the provided names!</span>'
    end
    
    local gallery_wikitext = '<gallery heights=200 mode="' .. gallery_mode .. '">\n'
    
    for _, name in ipairs(names_to_use) do
        -- Iterate over all images associated with this name
        for _, image in ipairs(file_name_map[name] or {}) do
            local photo_link = "link=" .. name
            local caption_with_link = "<center>[[" .. name .. "|" .. name .."]]</center>"
            gallery_wikitext = gallery_wikitext .. image .. '|' .. photo_link .. '|' .. caption_with_link .. '\n'
        end
    end
    
    gallery_wikitext = gallery_wikitext .. '</gallery>'
    
    return gallery_wikitext
end






function cf.create_cargo_gallery_old(cargo_results, focal_field, file_field, placeholder_image, name_array, gallery_mode)
    local file_name_map = {}
    local ordered_names = {}

    for _, row in ipairs(cargo_results) do
        local name = row[focal_field]
        if name then
            local image = row[file_field] or placeholder_image
            -- Use the add_file_prefix_gentle function to ensure the "File:" prefix
            image = add_file_prefix_gentle(image)
            file_name_map[name] = image
            if not name_array then
                table.insert(ordered_names, name)
            end
        end
    end

    local names_to_use = name_array or ordered_names

    if next(file_name_map) == nil then
        return '<span class="gallery-error">No images found for the provided names!</span>'
    end
    
    local gallery_wikitext = '<gallery heights=200 mode="' .. gallery_mode .. '">\n'
    
    for _, name in ipairs(names_to_use) do
        local image = file_name_map[name] or placeholder_image
        local photo_link = "link=" .. name
        local caption_with_link = "<center>[[" .. name .. "|" .. name .."]]</center>"
        gallery_wikitext = gallery_wikitext .. image .. '|' .. photo_link .. '|' .. caption_with_link .. '\n'
    end
    
    gallery_wikitext = gallery_wikitext .. '</gallery>'
    
    return gallery_wikitext
end




















--[[
    This function takes a comma-delimited string and converts it into a table
    of trimmed values. Each value in the input string is separated by a comma,
    and leading and trailing whitespace from each value is removed.

    Parameters:
    csv_string (string|nil): A comma-delimited string containing values to be parsed.
                             If nil, an empty table is returned.

    Returns:
    table: A table containing the trimmed values from the input string.
           If the input is nil, an empty table is returned.

    Example usage:
    local result = cf.parse_csv_to_table(" value1 , value2 ,value3 ")
    -- result: {"value1", "value2", "value3"}

    local empty_result = cf.parse_csv_to_table(nil)
    -- empty_result: {}
--]]
function cf.parse_csv_to_table(csv_string)
    -- Return an empty table if the input is nil
    if csv_string == nil then
        return {}
    end

    -- Check if the argument is a string
    if type(csv_string) ~= "string" then
        error("Invalid argument: csv_string must be a string")
    end

    local t = {}
    for field in csv_string:gmatch("[^,]+") do
        table.insert(t, mw.text.trim(field))
    end
    return t
end








function cf.word_italicizer(text, words_to_italicize)
    -- Iterate over each word in the flat list of words to italicize
    for _, word in ipairs(words_to_italicize) do
        -- Use string.gsub to replace the word with its italicized version in the text
        -- The pattern escapes special characters in word and wraps it in wiki markup for italics
        text = text:gsub("(%f[%a]" .. word:gsub("(%W)", "%%%1") .. "%f[%A])", "''%1''")
    end
    return text
end




function fetch_cargo_column(table_name, field_name)
    -- Define the Cargo query parameters
    local tables = table_name
    local fields = field_name
    local cargo_args = {
        limit = 5000 -- Adjust based on your expected number of rows
    }

    -- Execute the Cargo query
    local cargo_results = mw.ext.cargo.query(tables, fields, cargo_args)

    -- Prepare an array to hold the field values
    local values_array = {}

    -- Extract the specified field from each row and insert into the values_array as simple strings
    for _, row in ipairs(cargo_results) do
        table.insert(values_array, row[field_name])
    end

    return values_array
end














-- local function to find the rank of a geography page name
function cf.geo_field_finder(title)
    local tables = "Geography"
    local fields = "Country, Intermediate_region, Subregion, Region"
    
    local cargo_args = {
        where = "Country = '" .. title .. "' OR Intermediate_region = '" .. title ..
        "' OR Subregion = '" .. title .. "'  OR Region = '" .. title .. "'"
    }
    
    local result = cargo.query(tables, fields, cargo_args)
    
    local matching_field
    for i, record in ipairs(result) do
        if record.Country == title then
            matching_field = "Country"
            break
        elseif record.Intermediate_region == title then
            matching_field = "Intermediate_region"
            break
        elseif record.Subregion == title then
            matching_field = "Subregion"
            break
        elseif record.Region == title then
            matching_field = "Region"
            break
        end
    end
    
    if matching_field then
        return matching_field
    else
        return "Administrative_unit"
    
    end
end




















--[[ function that trims rows from a cargo results object based on a field and value. All rows in that field
containing the substring specified are removed from the Cargo results object. This is not as useful as just creating an SQL
query with a NOT statement, so maybe I don't need this in the future--]]
function cf.cargo_row_remover(cargo_results, field, value)
	local modified_results = {}
	for _, row in ipairs(cargo_results) do
	    -- Check if the field value contains the specified value
	    if not row[field] or not string.find(row[field], value) then
	        table.insert(modified_results, row)
	    end
	end
	return modified_results
end





-- takes a flat 1D array of strings and surrounds its elements with specified characters
function cf.element_sandwicher(array, start_char, end_char)
    local result = {}
    local length = #array

    for i = 1, length do
        if array[i] then
            result[i] = start_char .. array[i] .. end_char
        else
            result[i] = start_char .. "No value" .. end_char  -- or some default string
        end
    end
    return result
end




--[[ this function takes an array and italicizes the strings contained therein unless the string contains
parentheses in which case it italicizes only what's inside the parentheses. It was made to format species page titles]]--
function cf.species_italicizer(array)
    local result = {}

    for _, value in ipairs(array) do
        local start_index, end_index = value:find("(%b())")
        local italicized_value

        if start_index then
            local text_inside_parentheses = value:sub(start_index + 1, end_index - 1)
            local italicized_text_inside_parentheses = "<i>" .. text_inside_parentheses .. "</i>"
            italicized_value = value:sub(1, start_index - 1) .. "(" .. italicized_text_inside_parentheses .. ")" .. value:sub(end_index + 1)
        else
            italicized_value = "<i>" .. value .. "</i>"
        end

        table.insert(result, italicized_value)
    end

    return result
end


-- takes an array and only italicizes things inside parentheses (maybe get rid of this function for the more useful species_italicizer above)
function cf.parentheses_italicizer(array)
        local result = {}

    for _, value in ipairs(array) do
        local start_index, end_index = value:find("(%b())")
        local italicized_value

        if start_index then
            local text_inside_parentheses = value:sub(start_index + 1, end_index - 1)
            local italicized_text_inside_parentheses = "<i>" .. text_inside_parentheses .. "</i>"
            italicized_value = value:sub(1, start_index - 1) .. "(" .. italicized_text_inside_parentheses .. ")" .. value:sub(end_index + 1)
        else
            italicized_value = value
        end

        table.insert(result, italicized_value)
    end

    return result
end



function cf.remove_dupes_from_flat_list(flat_table)
		local seen = {}
		for index, item in ipairs(flat_table) do
			if seen[item] then
				table.remove(flat_table, index)
			else
				seen[item] = true
			end
		end
		
	-- return keys back to values in simple list
	simple_list = {}
	
	-- for key, _ in pairs(seen) do
	-- 	table.insert(simple_list, key)
	-- end
	
	return seen
end




-- this function filters table_a by elements that are in table_b.
function cf.filter_by_intersection(table_a, table_b)
  local filtered_table = {}

  for _, value in ipairs(table_a) do
    for _, b in ipairs(table_b) do
      if value == b then
        table.insert(filtered_table, value)
        break
      end
    end
  end

  return filtered_table
end




--[[ this function finds thing that are in the first array that are not in the second.
I first used it making custom pages that display orhpaned Data namespace pages that can't find
their forward facing partner
--]]
function cf.in_a_not_b(a, b)
    local bSet = {} -- Using a table as a set for faster look-up
    for _, value in ipairs(b) do
        bSet[value] = true
    end

    local result = {}
    for _, value in ipairs(a) do
        if not bSet[value] then
            table.insert(result, value)
        end
    end

    return result
end






--[[ this parses text that is a single text string separated by commas into an array (with some formatting)
This function may be overly complex, I should considering deprecating it
--]]
function cf.parse_arguments(text, format, link_text)
    if text then
        local text_table = {}  -- Table to store parsed text elements
        -- Iterate over the delimited string and extract each text element
        for item in text:gmatch("[^,]+") do
            if format == "page" then
                item = '[[' .. item .. ']]'  -- Surround item with double brackets
            elseif format == "ext_link" then
                item = "[" .. item .. " " .. link_text .. "]"
        	else
            	item = item:gsub(",", ", ")  -- Add a space after each comma
        	end
        table.insert(text_table, item)
        end
        -- Iterate over the arguments table and concatenate them into a string
        local full_text = table.concat(text_table, ", ")
        return full_text
    else
        return full_text
    end
end











function cf.rank_finder(title)
	
	--[[ this is a helper function that extracts sci names from titles and is only need on wikis that have more
	than just the sci name in their titles and that follow this format in their pages names: "Desert Spiny Lizard (Sceleporus magister)" --]]
	local function process_string(input_string)
	    return input_string:match("%((.-)%)") or input_string
	end
	
    title = process_string(title)
    local tables = "Species"
    local taxon_fields = {
        "Family", "Subfamily", "Genus", "Species"
    }

    local taxonomy_fields = {}
    for _, field in ipairs(taxon_fields) do
        local lowercase = string.lower(field)
        if field == "Genus" then
            taxonomy_fields[field] = { lowercase = lowercase, title = field, plural = "genera" }
        else
            taxonomy_fields[field] = { lowercase = lowercase, title = field, plural = lowercase .. (field == "Species" and "" or "s") }
        end
    end

    local where_clauses = {}
    for field in pairs(taxonomy_fields) do
        table.insert(where_clauses, field .. " = '" .. title .. "'")
    end

    local cargo_args = { where = table.concat(where_clauses, " OR ") }
    local cargo_result = cargo.query(tables, table.concat(keys(taxonomy_fields), ", "), cargo_args)

    local output_table = {}
    for _, row in ipairs(cargo_result) do
        for field, attributes in pairs(taxonomy_fields) do
            if row[field] == title then
                output_table = attributes
                output_table.cargo_field = field
                break
            end
        end
        if next(output_table) then break end
    end

    if not next(output_table) then
        output_table.error = true
        output_table.message = "No matching record found"
    else
        output_table.error = false
    end

    return output_table
end







-- Helper function to get keys from a table
function keys(t)
    local keyset = {}
    local n = 0
    for k, _ in pairs(t) do
        n = n + 1
        keyset[n] = k
    end
    return keyset
end








-- function that returns subset of rank list based on focal rank
function cf.get_child_ranks(rank)
    local rank_chain = {"family", "subfamily", "genus", "species"}
    local variable_lookup = {
        ["lowercase"] = rank_chain,
        ["plural"] = {"families", "subfamilies", "genera", "species"},
        ["cargo_field"] = {"Family", "Subfamily", "Genus", "Species"},
        ["title"] = {"Family", "Subfamily", "Genus", "Species"}
    }
    -- Find the index of the specified rank
    local index = nil
    for i = 1, #rank_chain do
        if rank_chain[i] == rank then
            index = i
            break
        end
    end

    if not index then
        return {}  -- Return an empty table if the rank is not found
    end

    local result = {}

    -- Iterate over each chain and construct the output array
    for chain, focal_variable in pairs(variable_lookup) do
        local chain_result = {}
        for i = index + 1, #rank_chain do
            table.insert(chain_result, focal_variable[i])
        end
        result[chain] = chain_result
    end

    return result
end








--[[
Function to take a dictionary and one focal key/field and outputs a single string that is a formatted
list of text separated by commas.

Usage:
The function cf.comma_separated_list takes three arguments:
1. dict (table): A dictionary containing multiple entries as tables.
2. field_name (string): The key/field to extract unique values from.
3. format (string, optional): The format of the output ("string" or "page"). Default is "string".

Example:
local dict = {
    {species = "Homo sapiens"},
    {species = "Pan troglodytes"},
    {species = "Gorilla gorilla"},
    {species = "Homo sapiens"}, -- Duplicate entry
    {species = "Pan paniscus"},
}

local result = cf.comma_separated_list(dict, "species", "page")

--]]

function cf.comma_separated_list(dict, field_name, format)
    -- Set default value for format if not provided
    format = format or "string" 

    -- Table to hold unique values of the specified field
    local uniques_of_rank = {}

    -- Iterate through the dictionary to populate uniques_of_rank
    for i = 1, #dict do
        local entry = dict[i]
        -- Check if entry exists and has the specified field
        if entry and entry[field_name] then
            local focal_rank = entry[field_name]
            -- Mark the value as true in the uniques_of_rank table
            uniques_of_rank[focal_rank] = true
        else
            -- Log an error if the entry is invalid
            mw.log("Invalid entry at index " .. i .. " in dict")
        end
    end

    -- Table to hold sorted unique values
    local sorted_rank = {}
    -- Populate sorted_rank with keys from uniques_of_rank
    for focal_rank, _ in pairs(uniques_of_rank) do
        table.insert(sorted_rank, focal_rank)
    end
    -- Sort the unique values
    table.sort(sorted_rank)

    -- Table to hold the final list items
    local list = {}
    -- Iterate through sorted unique values to format them
    for i = 1, #sorted_rank do
        -- Remove trailing asterisks from the item
        local item = sorted_rank[i]:gsub("%**$", "") 

        -- Format the item based on the provided format and field name
        if format == "page" then
            if string.lower(field_name) == "species" or string.lower(field_name) == "genus" then
                -- Format for species or genus with italicized links
                table.insert(list, "''[[" .. item .. "]]''")
            else
                -- Format for other fields with regular links
                table.insert(list, "[[" .. item .. "]]")
            end
        else
            -- Default format as plain string
            table.insert(list, item)
        end
    end

    -- Return the comma-separated list or nil if the list is empty
    if #list > 0 then
        return table.concat(list, ", ")
    else
        return nil
    end
end









--[[ 
Function to take a string delimited by a punctuation mark and format it
to ensure there is a space after each punctuation mark.

Usage:
The function format_delimited_string takes two arguments:
1. input_string (string): The input string to be formatted.
2. delimiter (string, optional): The punctuation mark used as the delimiter. Default is comma (",").

Example:
local input_string = "apple,banana,carrot,date,egg"
local result = format_delimited_string(input_string)
print(result)
-- Output: "apple, banana, carrot, date, egg"

local input_string_with_semicolon = "apple;banana;carrot;date;egg"
local delimiter = ";"
local result_with_semicolon = format_delimited_string(input_string_with_semicolon, delimiter)
print(result_with_semicolon)
-- Output: "apple; banana; carrot; date; egg"
]]

function cf.format_delimited_string(input_string, delimiter)
    -- Set default delimiter to comma if not provided
    delimiter = delimiter or ","

    -- Replace occurrences of the delimiter without space with delimiter followed by a space
    local formatted_string = input_string:gsub(delimiter .. "%s*", delimiter .. " ")
    return formatted_string
end





--[[This function takes a cargo results array and a field name and produces a bulleted list of all the values in that field.
An optional arguments sort alphabetically can be passed the argument true]]--
function cf.display_field_in_bulleted_list(cargo_results, field_name, sort_alphabetically, delimiter)
    -- Default the delimiter to a comma if not provided
    delimiter = delimiter or ","

    -- Create a table to collect all items
    local items = {}

    for _, result in ipairs(cargo_results) do
        -- Extract the value of the specified field from the current result
        local field_value = result[field_name]

        -- Split the field value using the specified delimiter
        local item_list = mw.text.split(field_value, delimiter)
        for _, item in ipairs(item_list) do
            table.insert(items, mw.text.trim(item))
        end
    end

    -- Sort the items alphabetically if the sort_alphabetically argument is true
    if sort_alphabetically then
        table.sort(items, function(a, b)
            -- Remove wiki link formatting for sorting comparison
            local clean_a = mw.ustring.gsub(a, "^%[%[(.-)%]%]$", "%1")
            local clean_b = mw.ustring.gsub(b, "^%[%[(.-)%]%]$", "%1")
            return clean_a < clean_b
        end)
    end

    -- Create a <ul> element using mw.html
    local ul = mw.html.create('ul')

    -- Add each sorted item as a <li> element to the <ul>
    for _, item in ipairs(items) do
        ul:tag('li'):wikitext(item):done()
    end

    -- Return the HTML code of the entire list
    return tostring(ul)
end











--[[function that take a dictionary and one focal key/field and displays a sorted
bulleted list of the unique values for that key/field. This function is slightly awkward for historical reasons
	in that it defaults to modifying all the elements as page links and the user must supply "true" to the text_only
	parameter if links are not wanted]]--
function cf.bulleted_list(dict, field_name, text_only)
    local uniques_of_rank = {}

    for i = 1, #dict do
        local entry = dict[i]
        if entry and entry[field_name] then
            local focal_rank = entry[field_name]
            uniques_of_rank[focal_rank] = true
        else
            mw.log("Invalid entry at index " .. i .. " in dict")
        end
    end

    local sorted_rank = {}
    for focal_rank, _ in pairs(uniques_of_rank) do
        table.insert(sorted_rank, focal_rank)
    end
    table.sort(sorted_rank)

    local list = {}
    for i = 1, #sorted_rank do
        local item = sorted_rank[i]:gsub("%**$", "") -- Remove trailing asterisks
        local listItem = mw.html.create('li')
        if text_only then
            listItem:wikitext(item)
        else
            if string.lower(field_name) == "species" or string.lower(field_name) == "genus" then
                listItem:wikitext("''[[" .. item .. "]]''")
            else
                listItem:wikitext("[[" .. item .. "]]")
            end
        end
        table.insert(list, listItem)
    end

    if #list > 0 then
        local resultList = mw.html.create('ul')
        for i = 1, #list do
            resultList:node(list[i])
        end
        return tostring(resultList)
    else
        return nil
    end
end










-- this function takes a wiki page and returns the scientific name within it
function cf.title_to_sci(str)
  local start_idx, end_idx = string.find(str, "%b()")
  
  if start_idx and end_idx then
    return string.sub(str, start_idx+1, end_idx-1)
  else
    return str
  end
end




function cf.count_comma_elements(input_string)
    local count = 0
    if input_string then
	    for _ in string.gmatch(input_string, "[^,]+") do
	        count = count + 1
	    end
    	return count
    else
		return 0
	end
end








return cf