Cluster computing

Saturday, August 31, 2024

A self organizing map algorithm for scheduling meeting times as availabilities and bookings. A map is a low-dimensional representation of a training sample comprising of elements e. It is represented by nodes n. The map is transformed by a regression operation to modify the nodes position one element from the model (e) at a time. With preferences translating to nodes and availabilities as elements, this allows the map to start getting a closer match to the sample space with each epoch/iteration.

from sys import argv

import numpy as np

from io_helper import read_xyz, normalize

from neuron import generate_network, get_neighborhood, get_boundary

from distance import select_closest, euclidean_distance, boundary_distance

from plot import plot_network, plot_boundary

def main():

if len(argv) != 2:

print("Correct use: python src/main.py <filename>.xyz")

return -1

problem = read_xyz(argv[1])

boundary = som(problem, 100000)

problem = problem.reindex(boundary)

distance = boundary_distance(problem)

print('Boundary found of length {}'.format(distance))

def som(problem, iterations, learning_rate=0.8):

"""Solve the xyz using a Self-Organizing Map."""

# Obtain the normalized set of timeslots (w/ coord in [0,1])

timeslots = problem.copy()

# print(timeslots)

#timeslots[['X', 'Y', 'Z']] = normalize(timeslots[['X', 'Y', 'Z']])

# The population size is 8 times the number of timeslots

n = timeslots.shape[0] * 8

# Generate an adequate network of neurons:

network = generate_network(n)

print('Network of {} neurons created. Starting the iterations:'.format(n))

for i in range(iterations):

if not i % 100:

print('\t> Iteration {}/{}'.format(i, iterations), end="\r")

# Choose a random timeslot

timeslot = timeslots.sample(1)[['X', 'Y', 'Z']].values

winner_idx = select_closest(network, timeslot)

# Generate a filter that applies changes to the winner's gaussian

gaussian = get_neighborhood(winner_idx, n//10, network.shape[0])

# Update the network's weights (closer to the timeslot)

network += gaussian[:,np.newaxis] * learning_rate * (timeslot - network)

# Decay the variables

learning_rate = learning_rate * 0.99997

n = n * 0.9997

# Check for plotting interval

if not i % 1000:

plot_network(timeslots, network, name='diagrams/{:05d}.png'.format(i))

# Check if any parameter has completely decayed.

if n < 1:

print('Radius has completely decayed, finishing execution',

'at {} iterations'.format(i))

break

if learning_rate < 0.001:

print('Learning rate has completely decayed, finishing execution',

'at {} iterations'.format(i))

break

else:

print('Completed {} iterations.'.format(iterations))

# plot_network(timeslots, network, name='diagrams/final.png')

boundary = get_boundary(timeslots, network)

plot_boundary(timeslots, boundary, 'diagrams/boundary.png')

return boundary

if __name__ == '__main__':

main()

Reference:

https://github.com/raja0034/som4drones

#codingexercise

https://1drv.ms/w/s!Ashlm-Nw-wnWhPBaE87l8j0YBv5OFQ?e=uCIAp9

Cluster computing

Saturday, August 31, 2024

No comments:

Post a Comment