Cluster computing

Wednesday, November 30, 2016

#codingexercise

Generate Lexicographically ordered combinations of a string

public static void Combine(ref String numbers, ref StringBuilder candidate, ref SortedList<String, String> combinations, int level, int start)

{

for (int i = start; i < numbers.Length; i++)

{

if (candidate.ToString().Contains(numbers[i]) == false)

{

candidate[level] = numbers[i];

combinations.Add(candidate.ToString(), candidate.ToString());

if (i < numbers.Length - 1)

Combine(ref numbers, ref candidate, ref combinations, level + 1, start + 1);

candidate[level] = '\0';

}

static void ToPrint(SortedList<String, String> combinations, string prefix)

{

Console.WriteLine("---------------------------------------");

for (int i = 0; i < combinations.Count; i++)

Console.WriteLine(prefix + combinations.ElementAt(i).Key);

Console.WriteLine("---------------------------------------");

}

static void Main(string[] args)

{

var a = "321";

var b = new StringBuilder();

for (int i = 0; i < a.Length; i++)

b.Append('\0');

int start = 0;

int level = 0;

var combinations = new SortedList<String, String>();

Combine(ref a, ref b, ref combinations, level, start);

var prefix = String.Empty;

ToPrint(combinations, prefix);

}

---------------------------------------

321

---------------------------------------

If the starting input string is already sorted from left to right, then the following permutations do not require a SortedList and the additions to a dictionary are already sorted and the results are the same for each prefix

static void Main(string[] args)
{
var a = "123";
var b = new StringBuilder();
var used = new bool[a.Length];
int start = 0;
int end = a.Length - 1;
var permutations = new Dictionary<String, String>();
for ( int i = end ; i>= start; i--){
var prefix = a.Substring(0, start);
Permute(a, b, used, start, end, ref permutations);
ToPrint(permutations, prefix);
permutations = new Dictionary<String, String>();
}
}

Tuesday, November 29, 2016

#codingexercise

Generate Lexicographically ordered permutations of a string

static void Permute(String a, StringBuilder b, bool[] used, int start, int end, ref SortedList<String, String> permutations)

{

if ( b.Length == end-start+1) { permutations.Add(b.ToString(),b.ToString()); return;}

for (int i = start; i <= end ; i++)

{

if (used[i]) continue;

used[i] = true;

b = b.Append(a[i]);

Permute(a, b, used, start, end, ref permutations);

b.Length--;

used[i] = false;

}

static void ToPrint(SortedList<String, String> permutations, string prefix)

{

Console.WriteLine("---------------------------------------");

for (int i = 0; i < permutations.Count; i++)

Console.WriteLine(prefix + permutations.ElementAt(i).Key);

Console.WriteLine("---------------------------------------");

}

static void Main(string[] args)

{

var a = "321";

var b = new StringBuilder();

var used = new bool[a.Length];

int start = 0;

int end = a.Length - 1;

var permutations = new SortedList<String, String>();

Permute(a, b, used, start, end, ref permutations);

var prefix = String.Empty;

ToPrint(permutations, prefix);

}

---------------------------------------

123

132

213

231

312

321

---------------------------------------

AVL tree delete

We described AVL tree insertion here. We now look at the AVL tree delete.

Node Remove(Node root, int k)

{

If (root ==null) return null;

If (k < root.data)

Root.left = Remove(root.left, k);

Else if (k > root.data)

Root.right = Remove(root.right, k);

Else

{

Node left = root.left;

Node right = root.right;

Delete root;

If (!right) return left;

Node min = FindMin(right);

Min.right = Removemin(min);

Min.left = left;

Return Balance(min);

}

Return Balance(root);

}

Node RemoveMin(Node root)

{

If (root.left)

Return root.right;

Root.left = RemoveMin(root.left);

Return Balance(root);

}

Node FindMin(Node root)

{

Return (root.left != null ? FindMin(root.left) : root;

}

Monday, November 28, 2016

#codingexercise
Implement AVL tree insertion
Node Insert(ref Node root, Node z)
{
assert(z);
z.height = -1;
if (!root)
     root = z;
else if (z.data < root.data)
     root.left = Insert(root.left, z);
else
     root.right = Insert(root.right, z);

root.height = 1 + max(height(root.left), height(root.right));
Balance(root);
}

Node Balance(Node root)
{
FixHeight(root); // 1 more than left or right
if (Bfactor(root) == 2) // Bfactor is h(right)-h(left)
{
      if (Bfactor(root.right) < 0)
           root.right = RightRotate(root.right); // returns root.right.left
      return LeftRotate(root);
}
if(BFactor(root) == -2)
{
      if (Bfactor(root.left) > 0)
           root.left = LeftRotate(root.left);
      return RightRotate(root);
}
return root;
}
Courtesy : https://kukuruku.co/hub/cpp/avl-trees, the wrong implementation in GeeksforGeeks and my fixes to that implementation.

// BST Tree insert iterative
TREE-INSERT(ref Node root, Node z)
{
Node y = null;
Node x = root;
while (x != null)
{
     y = x;
     if (z.data < x.data)
        x = x.left;
     else
        x = x.right;
}
if (y == null)
    root = z;
else if (z.data < y.data)
            y.left = z;
       else
            y.right = z;
}

string reversal recursive:
string reverse(string input)
{
var chars = input.ToChars();
var rev = chars.Aggregate((sent, next) => next + sent);
return rev.ToString();
}

Sunday, November 27, 2016

We continue discussing the paper "Nested sampling for general Bayesian computation" by Skilling
We were looking at the transformation of computing the evidence based on the prior mass instead of the parameters. We looked at the integration performed. This paper essentially says don't navigate the parameter space. It is sufficient to explore a likelihood weighted space
The method replaces the point of the lowest likelihood by new one drawn from within the likelihood
It increments the evidence by filling in the missing band with weight w = Xj/N for each surviving point. The random values in the range 0,1 suffice.to draw the samples within the constrained likelihood contour. This method also calculates the information H during the iterations as a by product.
The method terminates when a certain number of iterations have been performed. This could be modified to stop when the largest current likelihood would not increase the current evidence by more than a small fraction f. This threshold implies that the accumulation of the evidence is tailing off and therefore the sum is nearly complete.
The termination condition determines the total area found. The areas of the strips start by rising with the likelihood increasing faster than the widths decrease. This means more important regions are being found. When the likelihood flattens off, the areas pass across a maximum and start to fall away. Most of the total area is usually found in the region of this maximum with likelihood about e raised to the power -H. If e were raised to the power -i/N, the method is set to continue iterating "until the count i significantly exceeds NH".
#codingexercise

Find if two strings are interleaved in a third string
static bool isInterleaved(String A, String B, String C)
{
int ia = 0;
int ib = 0;
for (int i = 0; i < C.Length; i++)
{
if (ia < A.Length && C[i] == A[ia])
{
ia++;
}
else if (ib < B.Length && C[i] == B[ib])
{
ib++;
}
else
return false;

}
if (ia != A.Length || ib != B.Length)
return false;
return true;
}

Print numbers vertically
void PrintVeritcally(int n)
{
var digits = n.ToDigits();
digits.ForEach (x => Console.WriteLine(x));
}
static List<int>ToDigits(this int n)
{
var ret = new List<int>();
while(n)
{
int k = i%10;
ret.Add(k);
n = n / 10;
}
ret.reverse();
return ret;
}

Saturday, November 26, 2016

We continue discussing the paper "Nested sampling for general Bayesian computation" by Skilling
We were looking at the transformation of computing the evidence based on the prior mass instead of the parameters. We looked at the integration performed. This paper essentially says don't navigate the parameter space. It is sufficient to explore a likelihood weighted space.
The nested sampling procedure is therefore :
Choose the number of classes as N and the number of iterations as j
Record the lowest of the current likelihood values
In each iteration
set the initial prior to the exponential of a value that depends on iteration sequence
set the weight to be the difference in previous and the current prior
increment the evidence based on the strip area
then replace the point of the lowest likelihood by new one drawn from within the likelihood
Increment the evidence by filling in the missing band with weight w = Xj/N  for each surviving point
There are some salient points to consider in this simplification.
First, only one point is replaced with a new candidate for acceptance and this is the one with the worst likelihood value or prior mass equal to 1
Second random values in the range 0,1 suffice.to draw the samples within the constrained likelihood contour
Third the likelihood contours shrink by factors exp(-1/n) in area and are roughly followed by successive sampling points.
Fourth, we can calculate the information H during the iterations as a by product. It is the fraction of prior mass that contains most of the posterior mass.
Fourth the procedure takes about NH +- sqrt(NH) steps where H is information. The bulk of the posterior mass is reached in the first component and crossed in the second component. This follows from the premise that the individual log t are independent, its mean is -1/N and standard deviation is 1/N. After i steps the prior mass is expected to shring to log Xi which is roughly -(i +- sqrt(i))/N
Since the nested sampling procedure works on weighted sum basis, we could consider combining evidence after following the procedure for the current batch and then merging the evidence from the next batch with that of the current batch once the same log prior mass scale is aligned.
#codingexercise
Find if a string is the interleaving of two other strings
        static bool IsInterleaved( StringBuilder A, StringBuilder B, StringBuilder C, int ia, int ib, int ic)
        {
            if (ia == A.Length && ib == B.Length && ic == C.Length) return true;
            if (ic == C.Length) return false;
            if (ia < A.Length && ib < B.Length && C[ic] != A[ia] && C[ic] != B[ib]) return false;
            if (ia < A.Length && ib == B.Length && C[ic] != A[ia]) return false;
            if (ia == A.Length && ib < B.Length && C[ic] != B[ib]) return false;
            return (((ia < A.Length && ic < C.Length && C[ic] == A[ia]) && IsInterleaved(A, B, C, ia+1, ib, ic+1)) ||
                    ((ib < B.Length && ic < C.Length && C[ic] == B[ib] && IsInterleaved(A,B,C, ia, ib+1, ic+1))));
        }

Friday, November 25, 2016

We continue discussing the paper "Nested sampling for general Bayesian computation" by Skilling
We were looking at the transformation of computing the evidence based on the prior mass instead of the parameters. Today we look at the integration performed.
The integration is straightforward online because the integration is performed by computing the area in strips under the curve. This area of the strips is the weighted sum of Likelihood values for each of the strips. We know the curve is non-increasing between 0 and 1. Therefore each strip is bounded below by any value larger than prior mass X where the strips are arranged from right to left in m strips. Also the lower bound is given by Xm+1 = 0 and the area of the curve has to be greater than or equal to the sum of weighted Likelihood values where the weights are X intervals. There is also an upper bound with the the integration not exceeding the calculated area (weighted sum) plus the last prior mass times max likelihood value. This follows from the trapezoidal rule where the difference between the area covered by the strips and the area under the curve are triangular whose sum cannot exceed the last strip with Lmax likelihood
We already know the weighted sum is online. When more and more likelihood values are encountered we add the corresponding weighted sum in the proportion of their classes thereby extending the current to the next evidence. But the more interesting thing here is that the likelihood values are all sorted. Do we need these values to be sorted for the computation of the evidence ? The answer is no as we will shortly see. Before that let us discuss the sampling. The integral for the evidence is dominated by wherever the bulk of the posterior mass is found. It occupies a small fraction of the prior. To see the width of the posterior in terms of X just like we did with prior, let us say the likelihood function is a multivariate in rank C where the coefficients are normalized. The likelihood can then be written as proportional to the exponential in terms of radius squared. The radius is in C dimensions and with the likelihood function in terms of exponential of r squared, we get the area of the curve as proportional to r raised to the power C because there are C dimensions taken together and the integral translates to log. In other words, the posterior is distributed over a range in log X. In order to cover such a range, the sampling should be linear in log X rather than in X, and then we set X1 = t1, X2 = t1t2, ... Xi = t1t2..ti, ... Xm = t1t2t3...tm where each ti lies between 0 and 1. By setting values of successive t, we can then write the evidence in terms of weighted sum again but the weights being represented in terms of t and the likelihood values for each interval.
In practice finding the exact value of t is difficult but we can certainly do with a statistical value. We could do this by finding a new point Xi from the prior subject to the condition that it is smaller than the previous prior and the initial prior being equal to 1. Then our knowledge of the new point would be specified by Xi = ti. Xi-1. To find such a point, we could sample uniformly from the points in the restricted range (0,Xi-1) and then investigate from the original sorted likelihood values what its parameter would have been. But in that parameter space, we would be guided by the likelihood value subject to the constraint that the likelihood value for that chosen parameter is greater than the likelihood of the previous interval with the initial likelihood being equal to zero and in proportion to the prior density. Either way we find a random point and with distribution just the same. Therefore we need not choose the latter method which requires sorting based on the likelihood value and instead perform the sampling based on a random value of t. Thus we avoid sorting. And if we set t to be 0.99, then we can reach the bulk of the posterior in 100 steps of width that is proportional to the fraction of prior mass that contains it instead of taking thousand such steps as is generally the case.
Successive data points in sampling the prior within the box defined by the likelihood greater than the current is done by some Markov chain monte carlo approximation starting at some candidate that obeys the constraints if available or at the worst found from the previous iteration that lies on the previous likelihood contour in the parameter space. This paper essentially says don't navigate the parameter space. It is sufficient to explore a likelihood weighted space.
The nested sampling procedure is therefore :
Choose the number of classes as N and the number of iterations as j
Record the lowest of the current likelihood values
In each iteration
set the initial prior to the exponential of a value that depends on iteration sequence
set the weight to be the difference in previous and the current prior
increment the evidence based on the strip area
then replace the point of the lowest likelihood by new one drawn from within the likelihood
Increment the evidence by filling in the missing band with weight w using the chosen point

Notice that this greatly simplified procedure invovles only exponentials and weighted sum.
#codingexercise
Yesterday we discussed
int F(List<int>V, int i, int j)
{
if (i > j || i >= V.Count || j >= V.Count) return 0;
if (i==j) return V[i];
if (i ==j + 1) return max(V[i], V[j]);
return Max(V[i] + min (F(V, i+2,j), F(V, i+1, j-1)), V[j] + min (F(V, i+1, j-1), F(V, i, j-2)));
}

Today we write down a memoized way
static int FMemoized(List<int> V, int n)
{
var table = new int[n,n];
for (int k = 0; k < n; k++)
{
for (int i = 0, j=k; j <n; i++,j++) // the simultaneous increment is due to the fact that we use each axes for each component of the max function and update the diagonal as the matrix grows in size.
// also we initialize one of the axis differently because we can skip what we have already computed.
This causes the lower left of the matrix to remain stationary while the upper right to update
{
int x = ((i+2) <= j) ? table[i+2,j] : 0;
int y = ((i+1) <= (j - 1)) ? table[i+1, j-1] : 0;
int z = (i <= (j-2)) ? table[i,j-2] : 0;

table[i,j] = Math.Max(V[i] + Math.Min(x,y), V[j] + Math.Min(y,z));
}
}
return table[0,n-1];
}