Cluster computing

Sunday, August 11, 2024

Find minimum in a rotated sorted array:

class Solution {

public int findMin(int[] A) {

If (A == null || A.length == 0) { return Integer.MIN_VALUE; }

int start = 0;

int end = A.length -1;

while (start < end) {

int mid = (start + end) / 2;

// check monotonically increasing series

if (A[start] <= A[end] && A[start] <= A[mid] && A[mid] <= A[end]]) { return A[start];};

// check if only [start, end]

if (mid == start || mid == end) { if (A[start] < A[end]) return A[start]; else return A[end];}

// detect rotation point

if (A[start] > A[mid]){

end = mid;

} else {

if (A[mid] > A[mid+1]) return A[mid+1];

start = mid + 1;

}

return A[0];

}

Works for:

[0 1 4 4 5 6 7]

[7 0 1 4 4 5 6]

[6 7 0 1 4 4 5]

[5 6 7 0 1 4 4]

[4 5 6 7 0 1 4]

[4 4 5 6 7 0 1]

[1 4 4 5 6 7 0]

[1 0 0 0 0 0 1]

Saturday, August 10, 2024

A self organizing map algorithm for scheduling meeting times as availabilities and bookings. A map is a low-dimensional representation of a training sample comprising of elements e. It is represented by nodes n. The map is transformed by a regression operation to modify the nodes position one element from the model (e) at a time. With preferences translating to nodes and availabilities as elements, this allows the map to start getting a closer match to the sample space with each epoch/iteration.

from sys import argv

import numpy as np

from io_helper import read_xyz, normalize

from neuron import generate_network, get_neighborhood, get_boundary

from distance import select_closest, euclidean_distance, boundary_distance

from plot import plot_network, plot_boundary

def main():

if len(argv) != 2:

print("Correct use: python src/main.py <filename>.xyz")

return -1

problem = read_xyz(argv[1])

boundary = som(problem, 100000)

problem = problem.reindex(boundary)

distance = boundary_distance(problem)

print('Boundary found of length {}'.format(distance))

def som(problem, iterations, learning_rate=0.8):

"""Solve the xyz using a Self-Organizing Map."""

# Obtain the normalized set of timeslots (w/ coord in [0,1])

timeslots = problem.copy()

# print(timeslots)

#timeslots[['X', 'Y', 'Z']] = normalize(timeslots[['X', 'Y', 'Z']])

# The population size is 8 times the number of timeslots

n = timeslots.shape[0] * 8

# Generate an adequate network of neurons:

network = generate_network(n)

print('Network of {} neurons created. Starting the iterations:'.format(n))

for i in range(iterations):

if not i % 100:

print('\t> Iteration {}/{}'.format(i, iterations), end="\r")

# Choose a random timeslot

timeslot = timeslots.sample(1)[['X', 'Y', 'Z']].values

winner_idx = select_closest(network, timeslot)

# Generate a filter that applies changes to the winner's gaussian

gaussian = get_neighborhood(winner_idx, n//10, network.shape[0])

# Update the network's weights (closer to the timeslot)

network += gaussian[:,np.newaxis] * learning_rate * (timeslot - network)

# Decay the variables

learning_rate = learning_rate * 0.99997

n = n * 0.9997

# Check for plotting interval

if not i % 1000:

plot_network(timeslots, network, name='diagrams/{:05d}.png'.format(i))

# Check if any parameter has completely decayed.

if n < 1:

print('Radius has completely decayed, finishing execution',

'at {} iterations'.format(i))

break

if learning_rate < 0.001:

print('Learning rate has completely decayed, finishing execution',

'at {} iterations'.format(i))

break

else:

print('Completed {} iterations.'.format(iterations))

# plot_network(timeslots, network, name='diagrams/final.png')

boundary = get_boundary(timeslots, network)

plot_boundary(timeslots, boundary, 'diagrams/boundary.png')

return boundary

if __name__ == '__main__':

main()

Reference:

https://github.com/raja0034/som4drones

#codingexercise

https://1drv.ms/w/s!Ashlm-Nw-wnWhPBaE87l8j0YBv5OFQ?e=uCIAp9

Thursday, August 8, 2024

This is the Knuth-Morris-Pratt method of string matching

Public void KMP-Matcher(String text, String pattern) {

Int n = text.length();

Int m = pattern.length();

Int[] prefixes = ComputePrefixFunction(pattern);

Int noOfCharMatched = 0;

for ( int I = 1; I <= n; I++) {

While (noOfCharMatched > 0 && pattern[noOfCharMatched + 1] != Text[I])

NoOfCharMatched = prefixes[nofOfCharMatched]

If (pattern[noOfCharMatched + 1] == text[I])

NoOfCharMatched = NoOfCharMatched + 1;

If (noOfCharMatched == m) {

System.out.println(“Pattern occurs at “ + I);

NoOfCharMatched = prefixes[NoOfCharMatched];

}

Public int[] ComputePrefixFunction(String pattern) {

Int m = pattern.length();

Int[] prefixes = new int[m+1];

Prefixes[1] = 0;

Int k = 0;

For (int q = 2; q <=m ; q++) {

While (k > 0 && Pattern[k + 1] != Pattern[q])

K = pattern[k];

If (pattern[k+1] == Pattern[q]) {

K = k + 1;

}

Pattern[q] = k;

}

Return prefixes;

}

Reference: for drone data: https://1drv.ms/w/s!Ashlm-Nw-wnWhPFoQ0k-mnjii2Gs3Q?e=cbET9N

Tuesday, August 6, 2024

-- Demonstrate dynamic tagging for drone data vectors

USE master;

IF NOT EXISTS (SELECT 1 FROM sys.server_principals WHERE name = N'DroneFleetUser')

BEGIN

CREATE LOGIN DroneFleetUser

WITH PASSWORD = N'LuvDr0ne!',

CHECK_POLICY = OFF,

CHECK_EXPIRATION = OFF,

DEFAULT_DATABASE = DroneCatalog;

END;

IF NOT EXISTS (SELECT 1 FROM sys.server_principals WHERE name = N'DroneFleetAdmin')

BEGIN

CREATE LOGIN DroneFleetAdmin

WITH PASSWORD = N'LuvDr0neFl@@t!',

CHECK_POLICY = OFF,

CHECK_EXPIRATION = OFF,

DEFAULT_DATABASE = DroneCatalog;

END;

USE DroneCatalog;

CREATE USER DroneFleetUser FOR LOGIN DroneFleetUser;

CREATE USER DroneFleetAdmin FOR LOGIN DroneFleetAdmin;

ALTER ROLE [Drone Operators] ADD MEMBER DroneFleetUser;

-- Ensure that the policy has been applied

EXEC [Application].Configuration_ApplyDynamicTagging;

-- The function that has been applied is as follows:

-- CREATE FUNCTION [Application].DetermineDroneUserAccess(@TeamID int)

-- RETURNS TABLE

-- WITH SCHEMABINDING

-- AS

-- RETURN (SELECT 1 AS AccessResult

-- WHERE IS_ROLEMEMBER(N'db_owner') <> 0

-- OR IS_ROLEMEMBER((SELECT sp.FlightsTerritory

-- FROM [Application].Teams AS c

-- INNER JOIN [Application].Fleets AS sp

-- ON c.FleetID = sp.FleetID

-- WHERE c.TeamID = @TeamID) + N' Flights') <> 0

-- OR (ORIGINAL_LOGIN() = N'DroneFleetAdmin'

-- AND EXISTS (SELECT 1

-- FROM [Application].Teams AS c

-- INNER JOIN [Application].Fleets AS sp

-- ON c.FleetID = sp.FleetID

-- WHERE c.TeamID = @TeamID

-- AND sp.FlightsTerritory = SESSION_CONTEXT(N'FlightsTerritory'))));

-- GO

-- The security policy that has been applied is as follows:

-- CREATE SECURITY POLICY [Application].FilterDroneUsersByFlightsTerritoryRole

-- ADD FILTER PREDICATE [Application].DetermineDroneUserAccess(DeliveryTeamID)

-- ON Flights.DroneUsers,

-- ADD BLOCK PREDICATE [Application].DetermineDroneUserAccess(DeliveryTeamID)

-- ON Flights.DroneUsers AFTER UPDATE;

-- GO

SELECT * FROM sys.database_principals; -- not the role for Pacific and the user for Pacific

SELECT * FROM Flights.DroneUsers; -- and note count

GRANT SELECT, UPDATE ON Flights.DroneUsers TO [Drone Operators];

GRANT SELECT ON [Application].Teams TO [Drone Operators];

GRANT SELECT ON [Application].Fleets TO [Drone Operators];

GRANT SELECT ON [Application].Inventories TO [Drone Operators];

-- impersonate the user DroneFleetUser

EXECUTE AS USER = 'DroneFleetUser';

-- Now note the count and which rows are returned

-- even though we have not changed the command

SELECT * FROM Flights.DroneUsers;

-- where are those drones?

-- note the spatial results tab

SELECT c.Border

FROM [Application].Inventories AS c

WHERE c.InventoryName = N'Northwest'

UNION ALL

SELECT c.DeliveryLocation

FROM Flights.DroneUsers AS c

-----------------------------------------------------------------------

-- updating rows that are accessible to a non-accessible row is blocked

-----------------------------------------------------------------------

DECLARE @DroneFleetDroneUserID INT

DECLARE @NonDroneFleetTeamID INT

-- pick a drone in the Pacific flights territory

SELECT TOP 1 @DroneFleetDroneUserID=c.DroneUserID

FROM Flights.DroneUsers c JOIN Application.Teams ci ON c.DeliveryTeamID=ci.TeamID

JOIN Application.Fleets sp ON ci.FleetID=sp.FleetID

WHERE sp.FlightsTerritory=N'Pacific'

-- pick a Team outside of the Pacific flights territory

SELECT @NonDroneFleetTeamID=c.TeamID

FROM Application.Teams c JOIN Application.Fleets sp ON c.FleetID=sp.FleetID

WHERE TeamName=N'Seattle' AND sp.FleetCode=N'WA'

UPDATE Flights.DroneUsers -- Attempt to update

SET DeliveryTeamID = @NonDroneFleetTeamID -- to a team that is not in the Drone Operators Territory

WHERE DroneUserID = @DroneFleetDroneUserID; -- for a drone that is in the Drone Operators Territory

-- revert the impersonation

REVERT;

-- Remove the user from the role

ALTER ROLE [Drone Operators] DROP MEMBER DroneFleetUser;

-- Instead of permission for a role, let's give permissions to the website user

GRANT SELECT, UPDATE ON Flights.DroneUsers TO [DroneFleetAdmin];

GRANT SELECT ON [Application].Teams TO [DroneFleetAdmin];

GRANT SELECT ON [Application].Inventories TO [DroneFleetAdmin];

-- Finally, tidy up (optional)

REVOKE SELECT, UPDATE ON Flights.DroneUsers FROM [Drone Operators];

REVOKE SELECT ON [Application].Teams FROM [Drone Operators];

REVOKE SELECT ON [Application].Inventories FROM [Drone Operators];

REVOKE SELECT, UPDATE ON Flights.DroneUsers FROM [DroneFleetAdmin];

REVOKE SELECT ON [Application].Teams FROM [DroneFleetAdmin];

REVOKE SELECT ON [Application].Inventories FROM [DroneFleetAdmin];

DROP USER DroneFleetUser;

DROP USER DroneFleetAdmin;

USE master;

DROP LOGIN DroneFleetUser;

DROP LOGIN DroneFleetAdmin;

-- Reference: DroneData: https://1drv.ms/w/s!Ashlm-Nw-wnWhPJAFzVxJMWI2f_eKw?e=BDtnPM

#codingexercise

https://1drv.ms/w/s!Ashlm-Nw-wnWhM0bmlY_ggTBTNTYxQ?e=K8GuKL

Monday, August 5, 2024

When describing the Azure Machine Learning Workspace deployments via IaC and its shortcomings and corresponding resolutions, it was hinted that the workspace and all its infrastructure concerns can be resolved at deployment time so that the data scientists are free to focus on business use cases. Part of this setup involves kernel creation that can be done via scripts during the creation and assignment of compute to the data scientists. There are two scripts required one at the creation time and other at the start of the compute. Some commads require the terminal to be restarted, so the split in the scripts helps with the stages to specify them. For example, to provision a python 3.11 and spark 3.5 based custom kernel, the following scripts come useful

#!/bin/bash

set -e

curl https://repo.anaconda.com/archive/Anaconda3-2024.02-1-Linux-x86_64.sh --output Anaconda3-2024.02-1-Linux-x86_64.sh

chmod 755 Anaconda3-2024.02-1-Linux-x86_64.sh

./Anaconda3-2024.02-1-Linux-x86_64.sh -b

# This script creates a custom conda environment and kernel based on a sample yml file.

echo "installation complete"

cat <<EOF > env.yaml

name: python3.11_spark3.5

channels:

- conda-forge

- defaults

dependencies:

- python=3.11

- numpy

- pyspark

- pip

- pip:

- azureml-core

- ipython

- ipykernel

- pyspark==3.5

EOF

echo "env.yaml written"

/anaconda/condabin/conda env create -f env.yaml

echo "Initializing new conda environment"

/anaconda/condabin/conda init bash

#!/bin/bash

set -e

echo "Activating new conda environment"

/anaconda/envs/azureml_py38/bin/conda init --all

/anaconda/envs/azureml_py38/bin/conda init bash

export PATH="/anaconda/condabin:$PATH"

export name="python3.11_spark3.5"

conda install -p "/anaconda/envs/$name" -y ipykernel anaconda::pyspark anaconda::conda

conda -v activate "$name" && true

echo "Installing kernel"

sudo -u azureuser -i <<'EOF'

export name="python3.11_spark3.5"

export pathToPython3="/anaconda/envs/$name/bin/python3"

$pathToPython3 -m pip install pip --upgrade

$pathToPython3 -m pip install pyopenssl --upgrade

$pathToPython3 -m pip install pyspark==3.5

$pathToPython3 -m pip install snowflake-snowpark-python==1.20.0

$pathToPython3 -m pip install snowflake-connector-python==3.11.0

$pathToPython3 -m pip install azure-keyvault

$pathToPython3 -m pip install azure-identity

$pathToPython3 -m pip install ipykernel==v6.29.5

$pathToPython3 -m ipykernel install --user --name "$name" --display-name "Python 3.11 - Spark 3.5 (DSS)"

echo "Conda environment setup successfully."

EOF

Sunday, August 4, 2024

This is a summary of the book titled “Why not better and cheaper?” written by James and Robert Rebitzer for healthcare and innovation and published by Oxford University Press in June 2023. The brothers delve into a study comprising of research, social norms, and market competition on why the results fall short for patients and society. Contrasting with a poster child story for better and cheaper innovations in residential lighting to begin with, the authors take us on a journey through the landscape and history in healthcare and its state of union. The healthcare system is at once profusely innovative and yet remarkably ineffective in discovering ways to deliver increased value at lower cost.

Innovations in treating heart conditions, for example, illustrate the significance and interrelation of these two factors of value addition and cost reduction. Among a large number of nations, this disease is one of the leading contributor for deaths and research in this field increased tremendously in the recent decades rather than earlier, partly because research was traditionally based on indirect markers and indicators. LDL or bad cholesterol could be reduced by newer class of drugs that were recently discovered including evolocumab and alirocumab and these were approved for a wider audience. The initial list price for these drugs is over fourteen thousand dollars per year and this is excessive cost considering the drug has to be taken for life. This example shows that patients want to improve outcomes in mortality and quality of life, but the innovations and their delivery are determined by the pharmacies, physicians and insurance that influence their purchasing decisions. High-cost innovations can continue to gain market in healthcare and can coexist with low-cost innovations. Low value innovations can gain great market penetration while high value may not. In fact, a campaign called “Choosing Wisely” grew to promote low value options while calling out over five hundred tests that patients should avoid which usually contributes to about a hundred million dollars in waste annually. Cost reduction problem, on the other hand, arises from failure to imbibe processes, technologies, and skills to remove inefficiencies and reduce resources used. Innovators are unable to focus on cost reduction because skilled intervention becomes necessary which has limited the potential for pilot projects to reach mainstream. Taken together, this causes innovations in healthcare to underperform. Financial incentives, norms and competition can articulate these two symptoms.

Patents often fail to provide economic gains for innovators. Patents stimulate innovation by providing time-allotted monopolies, but the innovators can only profit when there is demand for the product. Saving lives by the development of antibiotics and overcoming antibiotic-resistant strains is an example where the patent and other financial incentives have fallen short. Inventing new antibiotics is a money losing endeavor as companies steer towards drugs along predecessor lines which may not have any more benefit than existing. Similarly, vaccine development has different value for those at high and low risks. Price for the vaccine is dependent on the individual and fails to calculate the benefit to others. The economic value of vaccine is miscalculated. Drug makers for a disease can make more money from than the treatment than from the vaccine. The distribution of risk is seldom considered. Another example of bias is indicated by the higher number of treatment options for late-stage cancer rather than earlier stage cancer when in fact the latter has higher value across a broader spectrum. Out of pocket costs to the end users tend to be quite close to the marginal costs of manufacturing the drug.

Previous book summary:

1. https://1drv.ms/w/s!Ashlm-Nw-wnWhPI80x8PN6ekOh1GlQ?e=B7hJe0

2. SummarizerCodeSnippets.docx

3. https://coursera.org/share/89dd61377bad7e93402c5bb3440414af

4. https://coursera.org/share/b1e019fd7028b96f54a057db4d11ea85

Saturday, August 3, 2024

#!/bin/bash

set -e

curl https://repo.anaconda.com/archive/Anaconda3-2024.02-1-Linux-x86_64.sh --output Anaconda3-2024.02-1-Linux-x86_64.sh

chmod 755 Anaconda3-2024.02-1-Linux-x86_64.sh

./Anaconda3-2024.02-1-Linux-x86_64.sh -b

# This script creates a custom conda environment and kernel based on a sample yml file.

echo "installation complete"

cat <<EOF > env.yaml

name: python3.11_spark3.5

channels:

- conda-forge

- defaults

dependencies:

- python=3.11

- numpy

- pyspark

- pip

- pip:

- azureml-core

- ipython

- ipykernel

- pyspark==3.5

EOF

echo "env.yaml written"

/anaconda/condabin/conda env create -f env.yaml

echo "Initializing new conda environment"

/anaconda/condabin/conda init bash

#!/bin/bash

set -e

echo "Activating new conda environment"

/anaconda/envs/azureml_py38/bin/conda init --all

/anaconda/envs/azureml_py38/bin/conda init bash

export PATH="/anaconda/condabin:$PATH"

export name="python3.11_spark3.5"

conda install -p "/anaconda/envs/$name" -y ipykernel anaconda::pyspark anaconda::conda

conda -v activate "$name" && true

echo "Installing kernel"

sudo -u azureuser -i <<'EOF'

export name="python3.11_spark3.5"

export pathToPython3="/anaconda/envs/$name/bin/python3"

$pathToPython3 -m pip install pip --upgrade

$pathToPython3 -m pip install pyopenssl --upgrade

$pathToPython3 -m pip install pyspark==3.5

$pathToPython3 -m pip install snowflake-snowpark-python==1.20.0

$pathToPython3 -m pip install snowflake-connector-python==3.11.0

$pathToPython3 -m pip install azure-keyvault

$pathToPython3 -m pip install azure-identity

$pathToPython3 -m pip install ipykernel==v6.29.5

$pathToPython3 -m ipykernel install --user --name "$name" --display-name "Python 3.11 - Spark 3.5 (DSS)"

echo "Conda environment setup successfully."

EOF