This might be more easily explained with a sample of code, I have the following two functions. The first creates all sets of strings of a given length (size
) and characters (group
)
private List<string> generateSets(int size, IList<string> group)
{
List<string> ret = new List<string>();
int[] indices = new int[size];
for (int i = 0; i < size; i++) indices[i] = i;
ret.Add((size > 0 ? group[indices[0]] : "") +
(size > 1 ? group[indices[1]] : "") +
(size > 2 ? group[indices[2]] : "") +
(size > 3 ? group[indices[3]] : "") +
(size > 4 ? group[indices[4]] : ""));
while (indices[0] < (group.Count - size))
{
for (int i = size - 1; i >= 0; i--)
{
if (indices[i] < (group.Count - (indices.Length - i)))
{
indices[i]++;
for (int j = i + 1; j < size; j++)
{
indices[j] = indices[j - 1] + 1;
}
break;
}
}
ret.Add((size > 0 ? group[indices[0]] : "") +
(size > 1 ? group[indices[1]] : "") +
(size > 2 ? group[indices[2]] : "") +
(size > 3 ? group[indices[3]] : "") +
(size > 4 ? group[indices[4]] : ""));
}
return (ret);
}
The second function compresses a set of sets based on all possible matches:
private List<string> compressSets(List<string> sets, List<string> possible)
{
List<string> working = null;
List<string> ret = new List<string>();
List<int> indices = new List<int>() { 0 };
List<int> indicesLow = null;
while (indices.Count < possible.Count)
{
working = new List<string>(sets);
for (int i = 0; i < indices.Count; i++)
{
for (int w = working.Count - 1; w >= 0; w--)
{
if (this.ContainsAll(possible[indices[i]], working[w])) working.RemoveAt(w);
}
}
if (working.Count < 1)
{
if ((indicesLow == null) || (indicesLow.Count > indices.Count))
{
for (int i = 0; i < indices.Count; i++)
{
ret.Add(possible[indices[i]]);
}
return (ret);
}
}
for (int i = indices.Count - 1; i >= 0; i--)
{
if (indices[i] < (possible.Count - (indices.Count - i)))
{
indices[i]++;
for (int j = i + 1; j < indices.Count; j++)
{
indices[j] = indices[j - 1] + 1;
}
break;
}
}
if (indices[0] >= (possible.Count - indices.Count))
{
for (int i = 0; i < indices.Count; i++) indices[i] = i;
indices.Add(indices.Count);
}
}
return (ret);
}
public bool ContainsAll(string set, string subset)
{
/*foreach (T item in subset)
{
if (!set.Contains(item)) return (false);
}
return (true);*/
for (var i = 0; i < subset.Length; i++)
{
if (set.IndexOf(subset[i]) < 0) return (false);
}
return (true);
}
For instance:
List<string> group = new List<string>();
group.Add("A");
group.Add("B");
group.Add("C");
group.Add("D");
group.Add("E");
group.Add("F");
List<string> sets3 = this.generateSets(3, group);
List<string> sets4 = this.generateSets(4, group);
List<string> sets = this.compressSets(sets3, sets4);
for (int i = 0; i < sets.Count; i++)
{
Debug.WriteLine(sets[i]);
}
Will output:
ABCD
ABCE
ABCF
ADEF
BDEF
CDEF
Which is a minimal set of 4-character-length strings containing every 3-character-length combination of letters A-F without regard to the order in which they occur. This works well and seems to scale up correctly with one major caveat: it takes exponentially longer for every increase in initial set size, target set size and the required number of matching characters in resultant sets. Is there a way to make this faster or a more optimal algorithm out there to achieve this task?