Cluster computing

Wednesday, September 17, 2014

int memcmp(char* pSrc, char* pDest, size_ len)

{
// assuming parameter validation

while (len--)

{

if (*pSrc != *pDest)

{

return -1;

}

pSrc++;

pDest++;

}

return 0;

}
------------------------------------------------------------------------------------------------------------------
template<class C>
typename C::reverse_iterator find_last(const C& c, typename C::value_type v)
{
     typename C::reverse_iterator p = c.rbegin();
     while ( p != c.rend())
     {
         if (*p == v) return p;
         ++p;
      }
      return p;
}
------------------------------------------------------------------------------------------------------------------
using System;
using System.Collections.Generic;

class Program
{
static void Main()
{
List<Tuple<string,int, int>> list = new List<Tuple<string, int, int>>();
list.Add(new Tuple<string, int, int>("a", 1, 5));
list.Add(new Tuple<string, int, int>("b", 2, 4));
list.Add(new Tuple<string, int, int>("c", 3, 6));

List<Tuple<string, int>> pairs = new List<Tuple<string, int>>();
list.ForEach(x =>
{
pairs.Add(new Tuple<string, int> (x.Item1, x.Item2));
pairs.Add(new Tuple<string, int> (x.Item1, x.Item3));
});
pairs.Sort((a, b) => a.Item2.CompareTo(b.Item2));

foreach (var element in pairs)
{
Console.WriteLine(element);
}
}
}
------------------------------------------------------------------------------------------------------------------
// This solution was taken from another blog.
public static void place8Queens(int row, int ld, int rd, int lim, ref int ans)
{
if (row == lim)
{
ans++;
return;
}
int pos = lim & (~(row | ld | rd));
while (pos != 0)
{
int p = pos & (-pos);
pos -= p;
place8Queens(row + p, (ld + p) << 1, (rd + p) >> 1, lim, ref ans);

}
}
public static int solve(int n)
{
int ans = 0;
int lim = (1 << 8) - 1;
place8Queens(0, 0, 0, lim, ref ans);
return ans;
}

Another approach and same answer of 92 possible solutions (needs to be verified):
public static int place8QueensRecursive(int rstart, int rend, int cstart, int cend, ref int[,] Board)
{
if (cstart > cend)
{
if (CheckBoardState(ref Board))
{
//PrintBoard(ref Board);
return 1; // success
}
else
return 0;
}

int countOfPossibilities = 0;
for (int initialr = rstart; initialr < 8; initialr++)
{
int initialc = cstart;
if (Board[initialr, initialc] != 0) continue;

// each Queen has to be in a row by herself and a column by herself
Board[initialr, initialc] = 2; // marks the position of the queen

// MarkUnavailable(ref Board) or inline it here ;
for (int i = 0; i < 8; i++)
if (Board[i, initialc] == 0) Board[i, initialc] = 1; // unavailable
for (int j = cstart; j < 8; j++)
if (Board[initialr, j] == 0) Board[initialr, j] = 1; // unavailable
for (int k = -8; k < 8; k++)
if ((initialr + k) >= 0 && (initialc + k) >= cstart &&
(initialr + k) < 8 && (initialc + k) < 8 &&
Board[initialr + k, initialc + k] == 0) Board[initialr + k, initialc + k] = 1;
for (int k = -8; k < 8; k++)
if ((initialr + k) >= 0 && (initialc - k) >= cstart &&
(initialr + k) < 8 && (initialc - k) < 8 &&
Board[initialr + k, initialc - k] == 0) Board[initialr + k, initialc - k] = 1;

countOfPossibilities += place8QueensRecursive(rstart, rend, cstart + 1, cend, ref Board);

// MarkAvailable or inline it here
Board[initialr, initialc] = 0;
var queenOccupiedRows = new List<int>();
var queenOccupiedCols = new List<int>();
for (int l = 0; l < 8; l++)
for (int m = 0; m < cstart; m++)
{
if (Board[m, l] == 2) { queenOccupiedRows.Add(m); queenOccupiedCols.Add(l); }
}
for (int i = 0; i < 8; i++)
{
if (Board[i, initialc] == 1
&& queenOccupiedRows.Any(x => x == i) == false
) Board[i, initialc] = 0; // available
}
for (int j = cstart; j < 8; j++)
if (Board[initialr, j] == 1
&& queenOccupiedCols.Any(x => x == j) == false) Board[initialr, j] = 0; // available
for (int k = -8; k < 8; k++)
if ((initialr + k) >= 0 && (initialc + k) >= cstart &&
(initialr + k) < 8 && (initialc + k) < 8 &&
Board[initialr + k, initialc + k] == 1
&& queenOccupiedRows.Any(x => x == (initialr + k)) == false
&& queenOccupiedCols.Any(x => x == (initialc + k)) == false
) Board[initialr + k, initialc + k] = 0;
for (int k = -8; k < 8; k++)
if ((initialr + k) >= 0 && (initialc - k) >= cstart &&
(initialr + k) < 8 && (initialc - k) < 8 &&
Board[initialr + k, initialc - k] == 1
&& queenOccupiedRows.Any(x => x == (initialr + k)) == false
&& queenOccupiedCols.Any(x => x == (initialc - k)) == false
) Board[initialr + k, initialc - k] = 0;
}

return countOfPossibilities;

}
public static void Reset(int rstart, int rend, int cstart, int cend, ref int[,] Board)
{
for (int i = rstart; i < rend + 1; i++)
for (int j = cstart; j < cend + 1; j++)
Board[i,j] = 0;
}

public static bool CheckBoardState(ref int[,] Board)
{
int numQueens = 0;
for (int i = 0; i < 8; i++)
for (int j = 0; j < 8; j++)
{
switch(Board[i,j])
{
case 0: throw new InvalidOperationException();
case 1: break;
case 2: numQueens++;
break;
default:
throw new InvalidOperationException();
}
}

if (numQueens != 8)
return false;

// no row has two queens
for (int i = 0; i < 8; i++)
{
int queensInARow = 0;
for (int j = 0; j < 8; j++)
{
if (Board[i, j] == 2)
{
queensInARow++;
if (queensInARow > 1)
return false;
}
}
}

// no column has two queens
for (int j = 0; j < 8; j++)
{
int queensInACol = 0;
for (int i = 0; i < 8; i++)
{
if (Board[i, j] == 2)
{
queensInACol++;
if (queensInACol > 1)
return false;
}
}
}

// no topleft-to-rightbottom diagonal has two queens
for (int i = 0; i < 8; i++)
{
int j = 0;
int queensInLRDDiagonal = 0;
for (int k = -8; k < 8; k++)
{
if (i + k >= 0 && j + k >= 0 && i + k < 8 && j + k < 8 && Board[i + k, j + k] == 2)
{
queensInLRDDiagonal++;
if (queensInLRDDiagonal > 1)
return false;
}
}
}

for (int j = 0; j < 8; j++)
{
int i = 0;
int queensInLRDDiagonal = 0;
for (int k = -8; k < 8; k++)
{
if (i + k >= 0 && j + k >= 0 && i + k < 8 && j + k < 8 && Board[i + k, j + k] == 2)
{
queensInLRDDiagonal++;
if (queensInLRDDiagonal > 1)
return false;
}
}
}

// no topright-to-bottomleft diagonal has two queens
for (int j = 0; j < 8; j++)
{
int i = 0;
int queensInRLDiagonal = 0;
for (int k = -8; k < 8; k++)
{
if (i + k >= 0 && j - k >= 0 && i + k < 8 && j - k < 8 && Board[i + k, j - k] == 2)
{
queensInRLDiagonal++;
if (queensInRLDiagonal > 1)
return false;
}
}
}

for (int i = 0; i < 8; i++)
{
int j = 7;
int queensInRLDiagonal = 0;
for (int k = -8; k < 8; k++)
{
if (i + k >= 0 && j - k >= 0 && i + k < 8 && j - k < 8 && Board[i + k, j - k] == 2)
{
queensInRLDiagonal++;
if (queensInRLDiagonal > 1)
return false;
}
}
}

return true;
}

public static void PrintBoard(ref int[,] Board)
{
for (int i = 0; i < 8; i++)
{
for (int j = 0; j < 8; j++)
{
Console.Write("{0} ", Board[i, j]);
}
Console.WriteLine();
}
Console.WriteLine();
}

gives output as :
Count=92 and sample as
1 1 2 1 1 1 1 1
1 1 1 1 1 2 1 1
1 1 1 2 1 1 1 1
1 2 1 1 1 1 1 1
1 1 1 1 1 1 1 2
1 1 1 1 2 1 1 1
1 1 1 1 1 1 2 1
2 1 1 1 1 1 1 1

------------------------------------------------------------------------------------------------------------------

Given n points on a 2D plane, find the maximum number of points that lie on the same straight line
Approach 1) // Least square method ?
m = (Sum(xy) - (Sum(x)Sum(y))/n) / (Sum(x^2) - Sum(x^2)/n)
b = Avg(y) - mAvg(x)
or
Approach 2)
pair-wise slope in n ^ 2 iterations.

int pointsInALine(List<Point> points)
{
double [,] slopes = new double[points.Length, points.Length] {};
for (int i = 0; i < points.Length; i++)
{
for (int j = 0; j < points.Length; j++)
{
      if ( i != j)
      {
        var ydiff = (points[i].y - points[j].y);
        var xdiff = (points[i].x - points[j].x);
         var slope = xdiff != 0 ? ydiff/xdiff : infinity;
         points[i,j] = slope;
       }
  }
}
var slopeFrequency = new  Dictionary<double, int>();
for (int i = 0; i < points.Length; i++)
{
for (int j = 0; j < points.Length; j++)
{
      if ( i != j)
      {
           if (slopeFrequency.Keys.Contains(slopes[i,j]))
             slopeFrequency[slopes[i,j]]++;
          else
             slopeFrequency.Add(slopes[i,j], 1);
       }
  }
}
var maxSlopeFrequency = slopeFrequency.Values.max();
int maxPoints = 0;
var pointsInALine = new List<Point>();
for (int i =0; i < Points.Length; i++)
   for (int j = 0; j < Points.Length; j++)
       if (i != j && slopes[i,j] == maxSlopeFrequency)
       {
            if(pointsInALine.Contains(Points[i]) == false) pointsInALine.Add(Points[i]);
            if(pointsInALine.Contains(Points[j]) == false) pointsInALine.Add(Points[j]);
        }
return pointsInALine.Count;
}
------------------------------------------------------------------------------------------------------------------
LRU

public class LRUCache{
private List<int> keys {get; set;}
private Hashtable<int,int> keyValues {get;set;}
    public LRUCache(int capacity) {
        entries = new List<int>(capacity);
        keyValues = new List<int, int>(capacity);
    }

    int get(int key) {
        int index = keys.IndexOf(key);
        if (index != -1)
        {
               keys.MoveToFront(index); // extension method
               return keyValues[key];
        }
        else
              throw NotFoundException();
    }

    void set(int key, int value) {
        int index = keys.IndexOf(key);
        if (index == -1)
        {
             keyValues.Add(key, value);
             if (keys.Count == keys.Capacity){
var item = keys.RemoveLast();
                 keyValues.Remove(item);
             }
             keys.Insert(0,key); // add front
        }
        else
              throw InvalidOperationException();
    }
};

void memmove(char* pSrc, char* pDest, size_t len)
{
if (pDest < pSrc || pSrc + len < pDest)
{
while (len--)
*pDest++ = *pSrc++;
}
}

A list of graph algorithms :

Minimum spanning trees: The generic minimum spanning tree algorithm grows a minimum spanning tree from an undirected graph G= (V,E) with a weight function w by using a greedy approach to the problem. It grows the MST by adding one edge at a time that is safe for the MST. At each step of the iteration the the set of edges maintained, say A, is a subset of the minimum spanning tree. If (S, V-S) is a cut in the graph respecting A and there is a light edge (u,v) crossing the cut then that light edge is safe for A.

Kruskal and Prim algorithm: These algorithms are elaborations of the greedy generic algorithm mentioned above. In Kruskal’s algorithm the set A is a forest. An edge that connects any two trees in the forest and has least weight is added as a safe edge. Kruskal’s algorithm sorts the edges of E into a non-decreasing order by weight w. For each edge (u,v) belonging to E, taken in non-decreasing order by weight, if the set that u belongs to is different from the set v belongs to then the edge connecting (u,v) is added to the graph.

In Prim’s algorithm, the set A forms a single tree. A light edge crossing the cut that is safe for the tree is added. The algorithm maintains a min priority queue Q to contain all the vertices. For each vertex extracted from this queue, it adds the edge connecting vertices adjacent to the extracted vertex whose weight is minimum and updates the parent and the key.

Bellman Ford algorithm: This algorithm solves the single source shortest path problem even for edges with negative weights. It checks whether there is a negative weight cycle that is reachable from the source and returns false otherwise it returns the shortest path and their weights. The algorithm uses relaxation progressively decreasing an estimate d[v] on the weight of the shortest path s to each vertex v until it achieves the actual shortest path weight.

Dijkstra’s algorithm: This algorithm solves the single source shortest path problem for edges that have non-negative weights. It maintains a min priority queue of vertices, keyed by their d-values and it extracts each vertex from the queue, it relaxes the edges connecting to the adjacent vertices.

Floyd-Warshall algorithm: This algorithm computes the shortest path weights in a bottom-up manner. It exploits the relationship between a pair of intermediary vertices and the shortest paths that pass through them. If there is no intermediary vertex, then such a path has at most one edge and the weight of the edge is the minimum. Otherwise, the minimum weight is the minimum of the path from I to j or the path from I to k and k to j. Thus this algorithm iterates for each of the intermediary vertices for each of the given input of an N*N matrix to compute the shortest path weight.

Johnson’s algorithm: This algorithm finds the shortest path between all pairs in shortest and it has better time complexity than Floyd Warshall even for sparse graphs. It uses both subroutines from both Bellman-Ford algorithm and Dijkstra’s algorithms to report that there is a negative weight cycle or a matrix of the shortest paths weights for all pairs of vertices. It uses the technique of reweighting which works as follows: If all the edges are non-negative, it uses Dijkstra’s algorithm iteratively from each vertex. If the edges have negative weights but no negative weight cycle, it transforms the graph into a one with non-negative weights and then applies the previous step.

Ford Fulkerson method: This method is based on residual networks (one that can admit more flows), augmenting paths, and cuts. The method is iterative : It starts with f(u,v) = 0 for all (u,v) belongs to V giving an initial flow of value 0 from the source s to the sink t along which we can send more flow, and then augmenting the flow along this path.

Tuesday, September 16, 2014

In today's post we quickly review the Longest Common Subsequence and a hashing function
Let c[i, j] be the length of the longest common subsequence of the prefix of X[1..i] and Y[1..j]. Recursion:
c[i, j] = { 0 if i = 0 or j = 0
          = { c[i-1, j-1] + 1 if i,j > 0 and xi = yj
          = { max( c[i,j-1], c[i-1,j] ) otherwise
This way we utilize the solutions we computed. We compute the table c[i,j] bottom-up and also store the value b[i,j] that captures whether c[i,j-1], c[i-1,j] or c[i-1,j-1] + 1is optimum. We therefore find the longest common subsequence by walking backwards on the path from the last cell of b[i,j] to the first.

Hashing function
template<class T> size_t Hash<T>::operator()(const T& obj)
{
size_t len = sizeof(obj);
size_t res = 0;
const char* ptr = reinterpret_cast<const char*>(&obj);
while (len--) res = (res << 1) ^ *ptr++;
return res;
}

Void memcpy ( char* src, char* dest, size_t len)
{
While (len--){
*dest++ = *src++;
}
}

void GaussianAverage(char* pSrc, char* pDest, int i, int j, int row, int col)
{
// parameter validation
int val = pSrc[i * col + j];
int numNeighbors = 0;
for (int m = i - 2; m <= i+2; m++)
        for (int n = j- 2; n <= j+2; n++)
           if ( m >= 0 && n >= 0 && m < row && n < col )
           {
                     val +=  pSrc[m * col + n];
                     numNeighbors++;
            }
val = val / (numNeighbors + 1);
pDest[m * col + n]   = val;
}

Monday, September 15, 2014

In Today's post, we will be reviewing OpenStack. This is an opensource software stack for Cloud that works on Linux and Debian. It has the following components:
Nova for the computing fabric over the commodity hardware. It's a IaaS offering which can work with virtual servers, servers, storage, load balancers, and other infrastructure. It facilitates concurrent programming, database access and messaging
Swift for object storage. It promotes redundancy by storing objects and files on different computers.
This is a replacement for CloudFiles.
Cinder is one layer below. It is a block storage system that can work with different storage platform from vendors.
Neutron is a networking stack that features network configurations by user and app groups.
Horizon provides a GUI dashboard to automate cloud based resources.
Keystone provides an identity service for authentication and integrates with LDAP. AD integration is provided by some storage platform vendors.
Ceilometer provides all the instrumentation or telemetry for billing.
Heat provides an app management framework using REST and Query API.
Trove works with traditional databases.
Sahara provides an elastic map reduce framework that can work with Hadoop.
The OpenStack shared services consist of the Compute Networking and Storage stacks and these can operate on the Hypervisor and Standardized Hardware. One of the benefits of the OpenStack is that you need not know which stack is comprised of what and what vendor is providing it. The APIs abstract those away and provide a reliable set to work with. They consist of the following:
Block Storage Service API In the OpenStack Swift architecture, the storage consists of three components the Object Storage Service, the container storage service and the account storage service. The account layer process handles requests regarding metadata and the individual account The container server processes handles requests regarding container metadata or object list. The object server process is responsible for the actual storage of objects on the drives of it's node. A proxy is used to compute a hash of the storage location. An object ring is a modified consistent hashing ring that enables an account/object/container path to be mapped to partitions.
Compute API A compute worker manages computing instances on host machines.and includes commands to run, terminate, reboot instances, attach/detach volumes etc.
Identity Service API enables provisioning certificates for PKI, integrating with LDAP, token binding, user CRUD with logging and monitoring etc.
Image Service API Images are created small and are untouched. Image and runtime state are used to create instances. cinder-volume is mapped separately.
Networking API: networking services can run on different hosts and includes a cloud controller host, a network gateway host, and a number of hypervisors for hosting virtual machines.
Object Storage API such as https://swift.example.com/v1/account/container/object

Sunday, September 14, 2014

class Program
{
static void Main(string[] args)
{
string numbers = "1,2,3,4,5,6,7,8,9";
Console.WriteLine("Sum = {0}", ToNumbers(numbers, -1).Sum());
List<int> numerals = Reverse(numbers);
Console.WriteLine(ReverseString(numbers));
}

public static List<int> ToNumbers(string commaSeparatedNumbers, int start)
{
if (commaSeparatedNumbers == null) return null;
var candidates = commaSeparatedNumbers.Split(new char[] { ',' }).ToList();
candidates.RemoveRange(0, start - 1 > 0 && start < candidates.Count() ? start - 1 : 0);
Converter<string, Int32> converter = s => { Int32 result; return Int32.TryParse(s, out result) ? result : 0; };
return candidates.ConvertAll<Int32>(converter).ToList();
}

public static string ToString(List<int> numbers, int start)
{
string result = String.Empty;
numbers.ForEach(x => result += x.ToString() + ", ");
result = result.TrimEnd(new char[] {',', ' '});
return result;
}

public static List<int> Reverse(string commaSeparated)
{
var numbers = ToNumbers(commaSeparated, 0);
numbers.Reverse();
Console.WriteLine(ToString(numbers, 0));
return numbers;
}

public static string ReverseString(string commaSeparated)
{
var words = commaSeparated.Split(new char[] { ',', ' ' });
var reversed = words.Aggregate((sent, next) => next + ", " + sent);
return reversed;
}
}

Sum = 45
9, 8, 7, 6, 5, 4, 3, 2, 1
9, 8, 7, 6, 5, 4, 3, 2, 1

Saturday, September 13, 2014

In the previous post, we discussed the time series algorithm. We saw how the ART model is constructed and how we apply it to predict the next one step transition. Note that the ART tree is created on the values of the random variable. To fit the linear regression for a restricted data set, we determine the values of the random variable from the length p transformations of the time series data set.
For a given time series data set, a corresponding nine data sets for length p transformations is created. The p varies from zero to eight for the nine data sets. Each of these transformed datasets is centered and standardized before modeling; that is for each variable we subtract the mean value and divide by the standard deviation. Then we divided the data set into a training set used as input to the learning method and a holdout set to evaluate the model. The holdout set contains the cases corresponding to the last five observations in the sequence.

Central to the step of fitting the linear regression, is the notion of covariance stationarity.
By that we mean
the mean is not dependent on t
the standard deviation is not dependent on t
the covariance (Yt, Yt-j) exists and is finite and does not depend on t
This last factor is called jth order autocovariance
The jth order auto-correlation is described as autocovariance divided by the square of standard deviation

The autocovariance measures the direction of the linear dependence between Yt and Yt-j.
while the autocorrelation measures both the direction and the strength of the linear dependence between the Yt and Yt-j.
An autoregressive process is defined as one in which the time dependence in the process decays to zero as the random variables in the process get farther and farther apart. It has the following properties:
E(Yt) = mean
Var(Yt) = sigma squared
Cov(Yt, Yt-1) = sigma squared . phi
Cor(Yt, Yt-1) = phi

Courtesy: Time Series Concepts UW.
There are other curve fitting techniques notably the non-linear curve fitting that could be helpful in predicting the one-step transition. Such techniques could involve finding the R-value and/or the Chi-square value. Perhaps a numerical approach to predicting the time series could be to use a Fast Fourier Transform.

By the way, somebody surprised me with a question to print Fibonacci series. I hadn't heard it in a long while and yes there is a recursive solution but this works too:

int Fib(int index)
{
if (index < 1) throw new Exception();
int lastToLast = 1;
int last = 1;
if (index == 1) return lastToLast;
if (index == 2 ) return last;
int sum = 0;
for (int i = 2; i <= index; i++)
{
sum = last + lastToLast;
lastToLast = last;
last = sum;
}
return sum;
}

int Fib(int index)
{
Assert (index > 0);
if (index == 1) return 1;
if (index == 2) return 1;
return Fib(index-1) + Fib(index-2);
}
we will next discuss openstack and try out a sample with Rackspace

Wednesday, September 10, 2014

we will be resuming the discussion on Microsoft time series algorithm. We discussed the decision tree algorithm in the previous post but we will cover how its applied here. The decision tree algorithm could predict discrete data. For continuous data, the decision tree algorithm builds a tree where each node contains a regression formula. At the point where a regression formula shows non-linearity, the decision tree algorithm adds a split. For example, if the scatter plot of the data could be fitted with trends represented by two lines joining at a vertex, the corresponding decision tree generated would have the equations for the two lines as the leaf nodes from a split of all the data.

The time series algorithm on the other hand uses the random variables of a Markov chain to predict the immediate next given the history. It's called an auto regressive model because the probability for a value at a time has mean autoregressively dependent on the last p values in the series.
An autoregressive tree model has AR model at every leaf. This model represents the time periods in a series.
Today we will continue our study of the decision tree and time series algorithm.
When we fit a regressor line to the scatter plot of the data, we can use it to describe the trend for the upcoming events. However this is often too general. Instead if we split the data into regions and fit the regressor lines, we get better approximation. This is what the splitting does. To learn the decision tree for the time series, we apply a split-leaf operator. This operator takes a predictor variable and a value. The resulting left and right nodes are for the two resulting regions.
To apply the split for a leaf, we use the seven values of each predictor variable that come from the boundaries of eight equiprobable contiguous regions of a normal distribution estimated from the restricted data set at the leaf for the predictor variable. We choose the split that results in the largest increase in model score. We stop splitting the tree when no split yields a higher score.
If we do not know the set of potential split variables, we iteratively apply the above method to get an ART for each and choose the one with the highest Bayesian score.
The Decision tree building is thus a recursive approach.

We will now see how to use this to predict the next step data. Note that the leaves of the decision tree for a piece wise trend predictor. To forecast, we can do a one step forecasting or a multi step forecasting. The former is more accurate than the latter. We are only interested in the former. To predict the distribution of the random variable for the next step, we only need one of the leaf nodes. Each leaf for the tree has a conditional time distribution for this variable because the path from the root to the leaf we are interested in has a conjunction of all the formulas. Remember that our generic auto regressive model of length p is a linear regression modeled using a normal distribution with model parameters as the mean, variance and the coefficients for each of the p observations. The autoregressive model tree is merely a piecewise linear AR model. This structure is chosen based on the highest posterior conditional probability p(s|d) for the structure s given the data d. And this probability can be expanded as the combination of that for the structure and the marginal likelihood where the likelihood of the data is averaged over the uncertainty in the model parameters. To predict the data, we use this normal distribution at the maximum a posteriori value.

The likelihood could be measured with t-distribution but we use the normal distribution and for the leaf we are interested in, we find the transition probability to the next one-step and we find this given its a stationary random Markov chain.

By that I mean we use the equation for the leaf that gives us the one step transition probability from the distribution we have. To find the equation, we find the the coefficients, mean and variance from the last p observations using a normal distribution estimated from the dataset. We transform the dataset into the length p transformation of the time series data set where each of the transformation x = (x1, x2, ..., xp+1) and x is calculated for each i in 1 to T-p. The elements within the bracket denoted by xj is the original event (i+j-1). For example, if we have events y = (1, 3, 2, 4) then the length-2 transformation is x1 = (1,3), x2 = (3,2) and x3 = (2,4) and the length-3 transformation is (1,3,2) and (3,2,4). This is called windowing and with the transformations that we get with this approach, we can now apply the decision tree technique mentioned earlier. We create a single leaf with the normal distribution from this transformation dataset which fits a line to the transformations with a target as the p+1. Then we decide to split it at one of the seven values corresponding to the eight equiprobable continuous regions (each of standard deviation length) of the normal distribution for the predictor variable. We continue to split until it does not improve the model score. The model score is a Bayesian score with the first term as the probability of the structure and the second term is the marginal likelihood. The second term balances the fit of the model to the complexity of the data. The second term can be approximated as a fit penalized by complexity to ease the calculation of the score for the model.

I noticed today that the PageRank algorithm is also a stationary Markov chain.