Monday, July 11, 2016

#codingexercise
Generate a 2D distance matrix for the nearest distance from each cell as city location to  a parking spot that appears as one or more occupied cells.
 static int[][] getParkingDistanceGrid(int cityLength, int cityWidth, int[] parkingXCoordinates, int[] parkingYCoordinates) {
        var d = new int[cityWidth][];
        for (int i = 0; i < cityWidth; i++)
            d[i] = new int[cityLength];
        for (int i = 0; i < cityWidth; i++)
        {
            for (int j = 0; j < cityLength; j++)
                {
                    var parkingdistances = new List<int>();
                    for (int count = 0; count < parkingXCoordinates.Length; count++)
                    {
                        int distance = GetDistance(i+1, j+1, parkingXCoordinates[count], parkingYCoordinates[count]);
                        parkingdistances.Add(distance);
                    }
                    int min = INT_MAX;
                    for (int k = 0; k < parkingXCoordinates.Length; k++)
                        if (parkingdistances[k] < min)
                           min  = parkingdistances[k];
                    d[i][j] = min; 
                }
        }
        return d;
    }

    static int GetDistance(int r1, int c1, int r2, int c2)
    {
        int deltax = r1 - r2;
        deltax = deltax > 0 ? deltax : 0-deltax;
        int deltay = c1 - c2;
        deltay = deltay > 0 ? deltay : 0-deltay;
        return deltax + deltay;        

    }

Generate a recommendation list of courses taken by the social circle of a user. The recommendation must include courses taken by the members of her social circle but not by her. It must be ranked by the number of attendees to the course. A social circle consists only of the persons direct friends and their direct friends.
public List<string> getRankedCourses(string user) {
    var coursePopularity = new Dictionary<string, int>();
    var circle = new List<string>();
    
    var direct = getDirectFriendsForUser(user);
    circle.AddRange(direct);
    foreach (var friend in direct){
        if (friend != user){
        var directForFriend = getDirectFriendsForUser(friend);
        directForFriend.Remove(user);
        circle.AddRange(directForFriend);
        }
    }
    
    foreach (var person in circle)
    {
        var attended = getAttendedCoursesForUser(person);
        foreach(var course in attended)
        {
            if (!coursePopularity.ContainsKey(course))
            {
                coursePopularity.Add(course,1);   
            }
            else
            {
                coursePopularity[course] += 1;   
            }
        }
    }
    
    foreach(var attending in getAttendedCoursesForUser(user))
    {
     coursePopularity.Remove(attending);   
    }
    List<KeyValuePair<string, int>> ranked = coursePopularity.ToList();
    ranked.Sort(
    delegate(KeyValuePair<string, int> pair1,
    KeyValuePair<string, int> pair2)
    {
        return pair1.Value.CompareTo(pair2.Value);
    }
    );
    
    var recommendations = new List<string>();
    foreach (var kvp in ranked)
        recommendations.Add(kvp.Key);
    recommendations.Reverse();
    return recommendations;

}

Sunday, July 10, 2016

#codingexercise
Given n ropes of different length, combine them into a single rope,such that total cost is minimum. You can tie two ropes at a time,and cost of tying is sum of length of ropes.
Int getmin (list <int> Len)
{
Int cost = 0
Heap h = build_heap(len)
While (h.size () >1)
{
First = extract_min (h)
Second = extract_min (h)
Cost += first + second
H.insert (first+second );
}
Return cost;
}
Given a “2 x n” board and tiles of size “2 x 1″, count the number of ways to tile the given board using the 2 x 1 tiles. A tile can either be placed horizontally i.e., as a 1 x 2 tile or vertically i.e., as 2 x 1 tile.
Let the number of ways be GetCount(n)
If the tile is placed vertically, the problem reduces to GetCount(n-1)
If the tile is placed horizontally, the problem reduces to GetCount(n-2)
This is Fibonacci series
int GetCount(int n)
{
if (n <=1) return n;
return GetCount(n-1) + GetCount(n-2);
}
A car factory has two assembly lines, each with n stations.
Each station is dedicated to some sort of work. Cars may proceed from one station to other on the same line or transfer across another line. There is a cost to transfer as well as cost for entry and exit on each line in terms of minutes. Compute the minimum time it will take to build a car.

int GetCarTime(int [,] A, int [,] t, int numStations,  ref int[] entry, ref int[] exit) // A, entry and exit have two rows each and correspond to the static costs
{
int T1[numStations];
int T2[numStations];
T1[0] = entry[0] + A[0,0];
T2[0] = entry[1] + A[1,0];
for (i = 1; i < numStations; ++i)
{
T1[i] = mint(T1[i-1] + A[0,i], T2[i-1] +t[1,i] + A[0,i]);
T2[i] = min(T2[i-1] + A[1,i], T1[i-1] +t[0,i] +A[1,i]);
}
return min(T1[numStations-1] + exit[0], T2[numStations-1] + exit[1]);
}
Given an array of random numbers, Push all the zero’s of a given array to the right end of the array in minimum possible swaps. Order of appearance doesn’t matter. Print the minimum swaps needed to do so.

input : {1, 9, 8, 0, 0, -2, 0, 1, 6}.output :swaps : 2 (-2 as it is and swap 1 and 6 from first two zeros. )
Int GetMinShifts(List<int>A, int N)
{
int count = 0;
Int swaps = 0;
for (int i =0; i < N; i++)
    for ( int j = N-1; j > i; j--){
    if (A[i] == 0 && A[j] != 0){
          int temp = A[j];
          A[j] = A[i];
          A[i] = temp;
          swaps++;
         break;
     } 
}
return swaps;
}

convert big indian to little indian
List<byte> ToLittleIndian(List<byte> num)
{
var ret = new List<byte>();
for (int i = num.Count-1; i >=0; i--)
      ret.Add(num[i]) // at initial position
return ret;
}

Saturday, July 9, 2016

#codingexercise
Print right justified # in stair case format
    static void StairCase(int n) {
        for (int i = 0; i < n; i++){
            string s = "";
            for (int k = 0; k < n-i; k++)
                s += " ";
            for (int k = 0; k < i; k++)
                s += "#";
            Console.WriteLine(s);
        }
    }
There are some glasses with equal capacity as 1 litre. The glasses are kept as follows:
           1
         2   3
       4   5   6
    7    8  9   10   
You can put water to only top glass. If you put more than 1 litre water to 1st glass, water overflows and fills equally in both 2nd and 3rd glasses. Glass 5 will get water from both 2nd glass and 3rd glass and so on.
If you have X litre of water and you put that water in top glass, how much water will be contained by jth glass in ith row?
This is like Pascal's triangle where the inner values are formed from the sum of the two values above it.


double GetWater( int i, int j, double X)
{
assert (j <= i);
int glasscount = i * (i+1) / 2; 
var d = new double[glasscount]();
int index = 0;
glass[index] = X;
for (int row = 1; row  <= i && X != 0; ++row)
{
for (int col = 0; col < row; col++, index++)
{
 X = glass[index];
 glass[index] = (X  >= 1.0) ? 1.0 : X;
  X= (X >= 1.0f) ? (X-1) : 0;
glass[index + row] += X/2;
glass[index + row + 1] += X/2;
}
}
return glass[i*(i-1)/2 + j - 1];
}

# print pascal's triangle
void PrintPascal(int n)
{
int A[n, n];
for (int row = 0; row < n; row++){
   for (int col = 0; col < row; col++){
     if (row == col || col == 0)
       A[row,col]  = 1;
     else
      A[row, col]  = A[row-1][col-1] + A[row-1][col]
     Console.Write(A[row,col]);
   }
   Console.WriteLine();
}
}
 Given a binary search tree. Find two numbers in the tree whose sum is k
Node getNode(node root, int val)
{
if (root == null) return null;
if (root.val == val) return root;
if (root.val < val)
     return getNode(root.right, val);
else
     return getNode(root.left, val);
}
void getNodes(node root, int sum, ref Tuple<Node, Node> ret)
{
if (root == null) return;
node pair = getNode(root, sum-root.val);
if (pair != null) {ret.first = root; ret.second = pair; return;}
if (ret.first == null && ret.second == null) getNodes(root.left, sum, ref ret);
if (ret.first == null && ret.second == null) getNodes(root.right, sum, ref ret);
}
var ret = new Tuple<Node, Node> ();
ret.first = null;
ret.second = null;
getNodes(root, sum ref ret);
if (ret.first && ret.second)
Console.Write("{0}:{1}", ret.first, ret.second);

convert little indian to big indian
List<byte> ToBigIndian(List<byte> num)
{
var ret = new List<byte>();
for (int i = 0; i < num.Count; i++)
      ret.Insert(num[i],0) // at initial position
return ret;
}
A topic for a talk : https://1drv.ms/w/s!Ashlm-Nw-wnWk2xYkvf4o1W0hEJk

Friday, July 8, 2016

#codingexercise

We discussed the method to remove all nodes which don't lie in any path with sum  >= k

Node removeNodes(ref Node root, int k, ref int sum)
{
if (root == null) return null;
int lsum = sum + root.data;
int rsum = lsum;
root.left = removeNodes(ref root.left, k, ref lsum);
root.right = removeNodes(ref root.right, k, ref rsum);
sum = max(lsum, rsum);
if (sum < k)
{
delete(root);
root = null;
}
return root;
}
But this assumes that the sum is strictly increasing down the tree. A better solution might be to delete the leaves in a bottom up manner.

Node removeLeaves(ref Node root, ref int sum)
{
if (root = null) return null;
root.left = removeLeaves(ref root.left, sum - root.data);
root.right = removeLeaves(ref root.right, sum - root.data);
if (root.left == null && root.right == null && root.data < sum)
{
delete(root);
return null;
}
return root;
}

Check if two binary trees are equal
bool Compare( node first, node second)
{
if (first == null && second == null) return true;
if (first == null || second == null) return false;
if (first.data != second.data) return false;
return Compare(first.left, second.left) && Compare(first.right, second.right);
}

Given an array of integers, can it be partitioned into two groups such that the sum of the groups are equal ?

If the sum of all elements is odd, then there is no solution
If the sum of the elements is even, then we recur using sum/2
The recursion considers the last element as included or excluded.

bool isSumEqual(List<int> A, int n, int sum)
{
if (sum == 0) return true;
if (n == 0 && sum != 0) return false;
if (A[n-1] > sum)
   return isSumEqual(A, n-1, sum);
return isSumEqual(A, n-1, sum) || isSumEqual(A, n-1, sum-A[n-1]);
}

bool getPartition(List<int> A, int n)
{
int sum = A.sum();
if (sum%2 != 0) return false;
return isSumEqual(A,n, sum/2);
}

#codingexercise

Given two sets of strings A and B. Find the
 (A-B) U (B-A) ( U = union ). The answer should be in lexicographical order and A’s elements should appear before B’s.


List<T> nonoverlapping(list<T> A, list<T> B)
{
Var infirst = A.Except(B);
Var insecond = B.Except(A);
Return infirst.union(insecond);
}

List<int> Except<List<int> A, List<int> B)
{
var ret = new List<int> ();
for (int i = 0; i < A.Count; i++)
  if (B.Contains(A[i]) == false)
     ret.Add(A[i]);
return ret;
}

Thursday, July 7, 2016

Backup services in a private cloud 
A private cloud has several valuable assets in the form of virtual machine instances. A backup of these VMs helps with restore to point of time, disaster recovery and such other benefits. Backups are different from snapshots because they may include storage volumes and application data. Both backup and snapshots involve large files to the tune of several hundred Gigabytes. 
Many cloud providers offer their own backup products. However, a private cloud may be made up with more than one cloud provider. Moreover the private cloud may shift weight on its cloud vendors. It doesn't seem flexible to use one or the other of these cloud specific products. A combination of these products could work but each would introduce a new variable to the clients. Some commercial backup products successfully claim to provide a central solution against heterogeneous clouds but they may impose their own restrictions on the backup activities. Consequently a Do-it-yourself backup product that can do all that we want in a private cloud becomes the only satisfactory solution. 
As is conventional with offerings from the private cloud, a service seems better than a product. In addition if this is written in a micro service model, it becomes much more manageable and tested given that this is a dedicated services 
Moreover clients can now call via command line, user interface or direct api calls. Each client offers two kinds of backup service: frequent backups via scheduled jobs and infrequent on-demand-push-button backups.  They differentiate the workloads from a customer and make it easier customer to realize their backups. A frequent schedule alleviates the onus on the customer to take repeated actions and provides differentiation to the workloads that can be siphoned off to automation. 
By nature the backup service does not take into consideration the source of the data and performs the same kinds of operations when data is made available to it. The data is differenced, then compressed and encrypted and stored on destination file storage. The data in this case happens to be virtual machine files. The type of Virtual machine, its flavor of operating system or the cloud provider from which the virtual machine is provisioned, doesn’t matter. The virtual machine is exported as a set of files.  
Consequently backup service can be used with all kinds of data and the architecture for the service takes this into consideration. If the source of the data is not the virtual machines but files and folders within a virtual machine, it performs the same set of operations. The data is made available to the backup service by agents within the virtual machine or some delegate for the cloud provider that exports the virtual machines. There is a decoupling between the data packager which in this case is the one that is tasked with exporting the Virtual machines from the cloud provider and the backup activities performed once the data is made available.  
That said, the task of packaging the virtual machines is complex. Different cloud providers have different nuances. These involve tasks such as interacting with the cloud center to take a snapshot of the virtual machine. Often a backup can be taken only when the virtual machine is powered off. This is sometimes an unacceptable solution as virtual machines may be required to be active 24x7. The virtual machines in such cases may require to be cloned by ‘quiescing’ the file system.  Different cloud providers involved different techniques or mechanisms to do it. Some merely allow only a snapshot to be taken which works only with their originating datacenter. Others allow the virtual machine to be exported in a more portable format that can be used to restore the virtual machines to other datacenters. Moreover, the set of tasks to export a VM may also differ based on the flavor of the virtual machine and the tasks associated with the virtual machine. The tasks of packaging vary based on where it is performed. For example, there are differences in what is packaged by a request to the operating system or by a request to the cloud provider such as a VMWare vCenter. In comparison to public clouds that offer snapshots of virtual machines as well as an export of the storage volumes associated with the virtual machine, different cloud providers may have different ways to request the export of all files relevant to a virtual machine.  It just so happens that the primary disk of a virtual machine in a public cloud has all the information for the virtual machine to be launched. Therefore virtual machines can be exported as a set of virtual disk files.  
The task of packaging does not culminate with the availability of disk files. Sometimes descriptors need to be added or the files need to be packaged into an archive all of which requires a download of the relevant files to a local disk where the operation can be performed.  Such copying can involve large files and requite a lot of time and bandwidth.  
If a download is involved, there must be enough space on the local disk or a remote share. If it’s a remote share, there will be contention from different copiers. Consequently, it is better to separate each copier's destination to a different file share. For example, file in the source may be categorized by the vCenters they belong to. This gives some distribution of source files to destination and results in reduced contention at any single file share.   
Moreover, each file copy may span several minutes. This means the task will require its own states. Fortunately the state does not need to be updated very often. Task parallel libraries like Celery maintain this state on their backend. Typically a message broker is involved. If each file copy and transfer may be considered a processor, and this processor has a corresponding does bookkeeping with a message broker, an enterprise grade message broker would be required so that all tasks can share the same endpoint for message broker. Message brokers can scale to form a cluster just like databases and do not need to be partitioned. However, if the source destination loads are independent and can be partitioned to different processors, databases and message brokers, then there can be more than one such server to do the backups. This has a tradeoff in terms of how many different units of such servers are maintained. Each unit adds more complexity, maintenance and additional points of failure. On the other hand they are designed to scale as a central resource. Having one server that targets different regions for their processing is simpler and gives one endpoint to the client while managing the distribution internally. Additionally only one script with the prepopulated instance names will be required and the same script can be used in parallel by different processors running for different regions. The suggestion here is to leverage the virtualization for different regions but provide one co-ordinator for all clients. The co-ordinator itself is no more than a distributor and the virtualization is as deep as cloned processors for each copy-transfer with all its associated resources being independent for each region. Since processors are cloned there is no change to the script for different workers. This enables tasks to be parallelized with a task parallel library. The processor is invoked with appropriate parameters at the time the user makes the request to backup. The job id issued by the processor is maintained along with the backup request until the state of the request is changed by the processor from pending to completed. The processor resides on machines earmarked one for each region so as to distribute the network and disk i/o of virtual machines in the same region.  The database and message broker for use by the processors do NOT need to be virtualized by the processor for each region because they hold only the metadata and do not affect the processing or data operations  
An enterprise grade message broker will be required for any such task parallelization and asynchronous processing. This message broker maintains a job id for each job in its queue. It can handle fair queuing, retries and queue chaining. Each job will have states that can be queried for the completion status. The job enqueuers can file and forget about the job until later and then check if the job is done. The job is looked up by its id which was issued at the time the job was queued. This is how task parallel libraries enable asynchronous processing. Since each job can be completed by a separate worker, it enables parallelization.   
Progress can be described if the transfer is done in chunks. A simple stdout from the script during large transfers of chunks can be sufficient to provide progress information. Consequently script output may be piped for job progress indication. 
We alleviated bottlenecks and improved partitions and distribution of loads along with asynchronous processing. Therefore this system can scale to 1000 virtual machine backups in fifteen minute windows each from the time the lease is acquired to the state being update as completed. 
The task of backup does not merely involve copying but also differencing the source data, storing the full or incremental data set at a storage different from the source and encrypting it. Consequently data differencing, compression and encrypting tools are required.  Moreover since thousands of Virtual machines may be backed up at any given time, the activities performed on each must be transparent and readily diagnosable via detailed logs for up to the current time processing logs. 
Together, the task of packaging the virtual machines and their backup to remote file storage complete the backup of individual virtual machines. The rest is about partitioning and scalability to handle workloads that are specified in a database where state can be maintained for each operation. 
  
#codingquestion

  
  1. Given an unsorted integer (positive values only) array of size ‘n’, we can form a group of two or three, the group should be such that the sum of all elements in that group is a multiple of 3. Find the maximum number of groups that can be generated in this way.

Void Combine (List<int> a, List<int> b,int start, int level)
{
For (int I = start ; I < a.Length; i++)
{
b[level] = a[i];
if ((b.length == 2 || b.length == 3) && b.sum() % 3 == 0) print(b);
If (I < a.Length)
Combine(a,b,start+1,level+1)
B[level] = 0;


}