Finding specific strings in an array using R Finding specific strings in an array using R arrays arrays

Finding specific strings in an array using R


this will first check if there are any characters in your word that do not exist within the array and then will check if the number of characters in the array are sufficient to meet repeat letters in your word

word <- strsplit("APPLE", "")pool <- c("A", "Q", "A", "Q",          "Q", "A", "Q", "P",          "Q", "Q", "Q", "Q",          "P", "L", "Q", "Q",          "Q", "Q", "E", "Q")t.word <- table(word)t.pool <- table(pool)length(setdiff(names(t.word), names(t.pool))) == 0min(t.pool[names(t.word)] - t.word) >= 0

the last two functions will both output TRUE to show that all the letters from word exist in pool and that the count of a single letter in word is not greater than that of pool

in function form that will output TRUE if found, otherwise FALSE

word.find <- function(word, pool) {  t.word <- table(strsplit(word, ""))  t.pool <- table(pool)  length(setdiff(names(t.word), names(t.pool))) == 0 & min(t.pool[names(t.word)] - t.word) >= 0}word.find("APPLE", pool)[1] TRUEword.find("APPLES", pool)[1] FALSEword.find("APPLEE", pool)[1] FALSE


This function works using only base R

THE FUNCTION

search_string = function(matrix_array, word_to_search){    position = data.frame(NA,NA,NA) #Create empty dataframe    word_to_search_inv = sapply(lapply(strsplit(word_to_search, NULL), rev), paste, collapse="") #Reverse word_to_search    for (i in 1:nrow(matrix_array)){        str_row = paste((matrix_array[i,]),collapse = "") #Collapse entire row into a string        if (grepl(word_to_search,str_row)) { #Check if the word_to_search is in the string towards right            position = rbind(position,c(i,paste(gregexpr(word_to_search, str_row)[[1]], collapse = ', '),"RIGHT")) #Get position and add it to the dataframe              }        if (grepl(word_to_search_inv,str_row)) {#Check if the word_to_search is in the string towards left (by checking for reverse of word_to_search)            position = rbind(position,c(i,paste(gregexpr(word_to_search_inv, str_row)[[1]], collapse = ', '),"LEFT"))               }    }    for (j in 1:ncol(matrix_array)){                str_column = paste((matrix_array[,j]),collapse = "")        if (grepl(word_to_search, str_column)) { #Check if the word_to_search is in the string towards down            position = rbind(position, c(paste(gregexpr(word_to_search, str_column)[[1]], collapse = ', '),j,"DOWN"))        }        if (grepl(word_to_search_inv, str_column)) { #Check if the word_to_search is in the string towards up            position = rbind(position, c(paste(gregexpr(word_to_search_inv, str_column)[[1]], collapse = ', '),j,"UP"))        }    }    colnames(position) = c("ROW","COLUMN","DIRECTION")    position = position[c(2:nrow(position)),]    rownames(position) = NULL    return(position) #Return the datafram containing row, columnm, and direction where word_to_match is found}

USAGE

#Datamydata = structure(c("A", "A", "Q", "Q", "D", "Q", "Q", "Q", "Q", "B",                      "A", "P", "P", "L", "E", "Q", "Q", "L", "E", "S", "Q", "Q", "Q",                      "Q", "T", "A", "P", "P", "L", "E"), .Dim = c(5L, 6L), .Dimnames = list(NULL, c("V1", "V2",                                                                            "V3", "V4", "V5", "V6")))key = "APPLE"#Run the functionpos = search_string(mydata,key)


Adding another approach, having:

board = structure(c("A", "A", "Q", "Q", "Q", "Q", "Q", "Q", "A", "P", "P", "Q", "Q", "Q", "L", "E", "Q", "Q", "Q", "Q"), .Dim = 4:5, .Dimnames = list(    NULL, NULL))word = "APPLE"

we start with:

matches = lapply(strsplit(word, NULL)[[1]], function(x) which(x == board, arr.ind = TRUE))

which is a simple -probably unavoidable- search of indices of "board" that match each letter of the word. It's a "list" containing the row/col indices like:

#[[1]]#     row col#[1,]   1   1#[2,]   2   1#[3,]   1   3##[[2]]#     row col#[1,]   2   3#[2,]   3   3###.....

Having that, we need to find out, progressively, whether an index in each element has a neighbour (i.e. the right/left/up/down cell) in the next element. E.g. we need something like:

as.matrix(find_neighbours(matches[[1]], matches[[2]], dim(board)))#      [,1]  [,2]#[1,] FALSE FALSE#[2,] FALSE FALSE#[3,]  TRUE FALSE

which informs us, that the row 3 of matches[[1]] is a neighbour of row 1 of matches[[2]], i.e. [1, 3] and [2, 3] are, indeed, neighbouring cells. We need this for each successive element in "matches":

are_neighs = Map(function(x, y) which(find_neighbours(x, y, dim(board)), TRUE),                  matches[-length(matches)], matches[-1])are_neighs#[[1]]#     [,1] [,2]#[1,]    3    1##[[2]]#     [,1] [,2]#[1,]    2    1#[2,]    1    2##[[3]]#     [,1] [,2]#[1,]    2    1##[[4]]#     [,1] [,2]#[1,]    1    1

Now that we have the pairwise ("i" with "i + 1") neighbour matches we need to complete the chain. For this example we'd like to have a vector like c(1, 2, 1, 1) which contains the info that the row 1 of are_neighs[[1]] is chained with the row 2 of are_neighs[[2]] which is chained with row 1 of are_neighs[[3]] which is chained with row 1 of are_neighs[[4]]. This smells like an "igraph" problem, but I'm not so familiar with it (hopefully someone has a better idea), so here's a naive approach to get that chaining:

row_connections = matrix(NA_integer_, nrow(are_neighs[[1]]), length(are_neighs))row_connections[, 1] = 1:nrow(are_neighs[[1]])cur = are_neighs[[1]][, 2]for(i in 1:(length(are_neighs) - 1)) {    im = match(cur, are_neighs[[i + 1]][, 1]) cur = are_neighs[[i + 1]][, 2][im]row_connections[, i + 1] = im}row_connections = row_connections[complete.cases(row_connections), , drop = FALSE]

Which returns:

row_connections#     [,1] [,2] [,3] [,4]#[1,]    1    2    1    1

Having this vector, now, we can extract the respective chain from "are_neighs":

Map(function(x, i) x[i, ], are_neighs, row_connections[1, ])#[[1]]#[1] 3 1##[[2]]#[1] 1 2##[[3]]#[1] 2 1##[[4]]#[1] 1 1

which can be used to extract the appropriate row/col chain of indices from "matches":

ans = vector("list", nrow(row_connections))for(i in 1:nrow(row_connections)) {     connect = Map(function(x, i) x[i, ], are_neighs, row_connections[i, ])     ans[[i]] = do.call(rbind, Map(function(x, i) x[i, ], matches, c(connect[[1]][1], sapply(connect, "[", 2))))}ans#[[1]]#     row col#[1,]   1   3#[2,]   2   3#[3,]   3   3#[4,]   3   4#[5,]   4   4

Wrapping it all in a function (find_neighbours is defined inside):

library(Matrix)ff = function(word, board){    matches = lapply(strsplit(word, NULL)[[1]], function(x) which(x == board, arr.ind = TRUE))    find_neighbours = function(x, y, d)    {        neighbours = function(i, j, d = d)         {            ij = rbind(cbind(i, j + c(-1L, 1L)), cbind(i + c(-1L, 1L), j))            ijr = ij[, 1]; ijc = ij[, 2]            ij = ij[((ijr > 0L) & (ijr <= d[1])) & ((ijc > 0L) & (ijc <= d[2])), ]            ij[, 1] + (ij[, 2] - 1L) * d[1]        }        x.neighs = lapply(1:nrow(x), function(i) neighbours(x[i, 1], x[i, 2], dim(board)))        y = y[, 1] + (y[, 2] - 1L) * d[1]        x.sparse = sparseMatrix(i = unlist(x.neighs),                                 j = rep(seq_along(x.neighs), lengths(x.neighs)),                                 x = 1L, dims = c(prod(d), length(x.neighs)))        y.sparse = sparseMatrix(i = y, j = seq_along(y), x = 1L, dims = c(prod(d), length(y)))                                 ans = crossprod(x.sparse, y.sparse, boolArith = TRUE)        ans    }          are_neighs = Map(function(x, y) which(find_neighbours(x, y, dim(board)), TRUE), matches[-length(matches)], matches[-1])    row_connections = matrix(NA_integer_, nrow(are_neighs[[1]]), length(are_neighs))    row_connections[, 1] = 1:nrow(are_neighs[[1]])    cur = are_neighs[[1]][, 2]    for(i in 1:(length(are_neighs) - 1)) {        im = match(cur, are_neighs[[i + 1]][, 1])         cur = are_neighs[[i + 1]][, 2][im]        row_connections[, i + 1] = im    }    row_connections = row_connections[complete.cases(row_connections), , drop = FALSE]    ans = vector("list", nrow(row_connections))    for(i in 1:nrow(row_connections)) {        connect = Map(function(x, i) x[i, ], are_neighs, row_connections[i, ])        ans[[i]] = do.call(rbind, Map(function(x, i) x[i, ], matches, c(connect[[1]][1], sapply(connect, "[", 2))))    }    ans}

We can try it:

ff("APPLE", board)#[[1]]#     row col#[1,]   1   3#[2,]   2   3#[3,]   3   3#[4,]   3   4#[5,]   4   4

And with more than one matches:

ff("AQQP", board)#[[1]]#     row col#[1,]   1   1#[2,]   1   2#[3,]   2   2#[4,]   2   3##[[2]]#     row col#[1,]   1   3#[2,]   1   2#[3,]   2   2#[4,]   2   3##[[3]]#     row col#[1,]   1   3#[2,]   1   4#[3,]   2   4#[4,]   2   3

Although, it's flexible in returning multiple matches, it does not return all possible matches and, in a nutshell, that's because of the use of match when building the chain of neighbours -- a linear search could be used instead, but -at the moment- adds significant code complexity.