1

This might be more easily explained with a sample of code, I have the following two functions. The first creates all sets of strings of a given length (size) and characters (group)

private List<string> generateSets(int size, IList<string> group)
{
    List<string> ret = new List<string>();
    int[] indices = new int[size];
    for (int i = 0; i < size; i++) indices[i] = i;
    ret.Add((size > 0 ? group[indices[0]] : "") +
        (size > 1 ? group[indices[1]] : "") +
        (size > 2 ? group[indices[2]] : "") +
        (size > 3 ? group[indices[3]] : "") +
        (size > 4 ? group[indices[4]] : ""));
    while (indices[0] < (group.Count - size))
    {
        for (int i = size - 1; i >= 0; i--)
        {
            if (indices[i] < (group.Count - (indices.Length - i)))
            {
                indices[i]++;
                for (int j = i + 1; j < size; j++)
                {
                    indices[j] = indices[j - 1] + 1;
                }
                break;
            }
        }
        ret.Add((size > 0 ? group[indices[0]] : "") +
            (size > 1 ? group[indices[1]] : "") +
            (size > 2 ? group[indices[2]] : "") +
            (size > 3 ? group[indices[3]] : "") +
            (size > 4 ? group[indices[4]] : ""));
    }
    return (ret);
}

The second function compresses a set of sets based on all possible matches:

private List<string> compressSets(List<string> sets, List<string> possible)
{
    List<string> working = null;
    List<string> ret = new List<string>();
    List<int> indices = new List<int>() { 0 };
    List<int> indicesLow = null;
    while (indices.Count < possible.Count)
    {
        working = new List<string>(sets);
        for (int i = 0; i < indices.Count; i++)
        {
            for (int w = working.Count - 1; w >= 0; w--)
            {
                if (this.ContainsAll(possible[indices[i]], working[w])) working.RemoveAt(w);
            }
        }
        if (working.Count < 1)
        {
            if ((indicesLow == null) || (indicesLow.Count > indices.Count))
            {
                for (int i = 0; i < indices.Count; i++)
                {
                    ret.Add(possible[indices[i]]);
                }
                return (ret);
            }
        }
        for (int i = indices.Count - 1; i >= 0; i--)
        {
            if (indices[i] < (possible.Count - (indices.Count - i)))
            {
                indices[i]++;
                for (int j = i + 1; j < indices.Count; j++)
                {
                    indices[j] = indices[j - 1] + 1;
                }
                break;
            }
        }
        if (indices[0] >= (possible.Count - indices.Count))
        {
            for (int i = 0; i < indices.Count; i++) indices[i] = i;
            indices.Add(indices.Count);
        }
    }
    return (ret);
}
public bool ContainsAll(string set, string subset)
{
    /*foreach (T item in subset)
    {
        if (!set.Contains(item)) return (false);
    }
    return (true);*/
    for (var i = 0; i < subset.Length; i++)
    {
        if (set.IndexOf(subset[i]) < 0) return (false);
    }
    return (true);
}

For instance:

List<string> group = new List<string>();
group.Add("A");
group.Add("B");
group.Add("C");
group.Add("D");
group.Add("E");
group.Add("F");
List<string> sets3 = this.generateSets(3, group);
List<string> sets4 = this.generateSets(4, group);
List<string> sets = this.compressSets(sets3, sets4);
for (int i = 0; i < sets.Count; i++)
{
    Debug.WriteLine(sets[i]);
}

Will output:

ABCD
ABCE
ABCF
ADEF
BDEF
CDEF

Which is a minimal set of 4-character-length strings containing every 3-character-length combination of letters A-F without regard to the order in which they occur. This works well and seems to scale up correctly with one major caveat: it takes exponentially longer for every increase in initial set size, target set size and the required number of matching characters in resultant sets. Is there a way to make this faster or a more optimal algorithm out there to achieve this task?

CoryG
  • 2,223
  • 3
  • 21
  • 49

1 Answers1

1

Hmmm... Combinations , N items taken X at a time.

In your example, 6 items (A-F) taken 4 at a time

So... See the Answers here ...

Algorithm to return all combinations of k elements from n

Phillip Williams
  • 396
  • 1
  • 10