Problem statement:
Assume you have two matrices A and B in a sparse matrix format, where each record is of the form i, j, value. Design a MapReduce algorithm to compute the matrix multiplication A x B
Map Input
The input to the map function will be a row of a matrix represented as a list. Each list will be of the form [matrix, i, j, value] where matrix is a string and i, j, and value are integers.
The first item, matrix, is a string that identifies which matrix the record originates from. This field has two possible values: "a" indicates that the record is from matrix A and "b" indicates that the record is from matrix B.
Reduce Output
The output from the reduce function will also be a row of the result matrix represented as a tuple. Each tuple will be of the form (i, j, value) where each element is an integer.
Answer:
#!/usr/bin/python
import MapReduce
import json
import sys
# Part 1
mr = MapReduce.MapReduce()
# A has dimensions L,M
# B has dimensions M,N
L = 5
M = 5
N = 5
# Part 2
def mapper(record):
print(f"record={record} \t + {record[0]} + \t + {record[1]} + \t + {record[2]} + \t + {record[3]}")
matrix_index = record[0]
row = record[1]
col = record[2]
value = record[3]
if matrix_index == "a":
for i in range(0, N):
key = f"{row},{i}"
mr.emit_intermediate(key, ("a", row, col, value))
if matrix_index == "b":
for j in range(0, L):
key = f"{j},{col}"
mr.emit_intermediate(key, ("b", row, col, value))
# Part 3
def reducer(key, list_of_values):
# one reducer per output cell of destination matrix
# print(f"{key},{list_of_values}")
total = 0
line = ""
for k in range(0,M):
left = getcolumn(list_of_values, k, "a")
right = getrow(list_of_values, k, "b")
total += left*right
line += f"{left}*{right}={left*right} +"
line += f"= {total}"
print(line)
mr.emit((int(key.split(',')[0]), int(key.split(',')[1]), total))
def getcolumn(values, k, matrix_type):
result = 0
for item in values:
mtype = item[0]
row = item[1]
col = item[2]
value = item[3]
if mtype == matrix_type and col == k:
result = value
break
return result
def getrow(values, k, matrix_type):
result = 0
for item in values:
mtype = item[0]
row = item[1]
col = item[2]
value = item[3]
if matrix_type == mtype and row == k:
result = value
break
return result
# Part 4
inputdata = open(sys.argv[1])
mr.execute(inputdata, mapper, reducer)
Output:
python3 multiply1.py matrix.json
record=['a', 0, 0, 63] + a + + 0 + + 0 + + 63
record=['a', 0, 1, 45] + a + + 0 + + 1 + + 45
record=['a', 0, 2, 93] + a + + 0 + + 2 + + 93
record=['a', 0, 3, 32] + a + + 0 + + 3 + + 32
record=['a', 0, 4, 49] + a + + 0 + + 4 + + 49
record=['a', 1, 0, 33] + a + + 1 + + 0 + + 33
record=['a', 1, 3, 26] + a + + 1 + + 3 + + 26
record=['a', 1, 4, 95] + a + + 1 + + 4 + + 95
record=['a', 2, 0, 25] + a + + 2 + + 0 + + 25
record=['a', 2, 1, 11] + a + + 2 + + 1 + + 11
record=['a', 2, 3, 60] + a + + 2 + + 3 + + 60
record=['a', 2, 4, 89] + a + + 2 + + 4 + + 89
record=['a', 3, 0, 24] + a + + 3 + + 0 + + 24
record=['a', 3, 1, 79] + a + + 3 + + 1 + + 79
record=['a', 3, 2, 24] + a + + 3 + + 2 + + 24
record=['a', 3, 3, 47] + a + + 3 + + 3 + + 47
record=['a', 3, 4, 18] + a + + 3 + + 4 + + 18
record=['a', 4, 0, 7] + a + + 4 + + 0 + + 7
record=['a', 4, 1, 98] + a + + 4 + + 1 + + 98
record=['a', 4, 2, 96] + a + + 4 + + 2 + + 96
record=['a', 4, 3, 27] + a + + 4 + + 3 + + 27
record=['b', 0, 0, 63] + b + + 0 + + 0 + + 63
record=['b', 0, 1, 18] + b + + 0 + + 1 + + 18
record=['b', 0, 2, 89] + b + + 0 + + 2 + + 89
record=['b', 0, 3, 28] + b + + 0 + + 3 + + 28
record=['b', 0, 4, 39] + b + + 0 + + 4 + + 39
record=['b', 1, 0, 59] + b + + 1 + + 0 + + 59
record=['b', 1, 1, 76] + b + + 1 + + 1 + + 76
record=['b', 1, 2, 34] + b + + 1 + + 2 + + 34
record=['b', 1, 3, 12] + b + + 1 + + 3 + + 12
record=['b', 1, 4, 6] + b + + 1 + + 4 + + 6
record=['b', 2, 0, 30] + b + + 2 + + 0 + + 30
record=['b', 2, 1, 52] + b + + 2 + + 1 + + 52
record=['b', 2, 2, 49] + b + + 2 + + 2 + + 49
record=['b', 2, 3, 3] + b + + 2 + + 3 + + 3
record=['b', 2, 4, 95] + b + + 2 + + 4 + + 95
record=['b', 3, 0, 77] + b + + 3 + + 0 + + 77
record=['b', 3, 1, 75] + b + + 3 + + 1 + + 75
record=['b', 3, 2, 85] + b + + 3 + + 2 + + 85
record=['b', 4, 1, 46] + b + + 4 + + 1 + + 46
record=['b', 4, 2, 33] + b + + 4 + + 2 + + 33
record=['b', 4, 3, 69] + b + + 4 + + 3 + + 69
record=['b', 4, 4, 88] + b + + 4 + + 4 + + 88
63*63=3969 +45*59=2655 +93*30=2790 +32*77=2464 +49*0=0 += 11878
63*18=1134 +45*76=3420 +93*52=4836 +32*75=2400 +49*46=2254 += 14044
63*89=5607 +45*34=1530 +93*49=4557 +32*85=2720 +49*33=1617 += 16031
63*28=1764 +45*12=540 +93*3=279 +32*0=0 +49*69=3381 += 5964
63*39=2457 +45*6=270 +93*95=8835 +32*0=0 +49*88=4312 += 15874
33*63=2079 +0*59=0 +0*30=0 +26*77=2002 +95*0=0 += 4081
33*18=594 +0*76=0 +0*52=0 +26*75=1950 +95*46=4370 += 6914
33*89=2937 +0*34=0 +0*49=0 +26*85=2210 +95*33=3135 += 8282
33*28=924 +0*12=0 +0*3=0 +26*0=0 +95*69=6555 += 7479
33*39=1287 +0*6=0 +0*95=0 +26*0=0 +95*88=8360 += 9647
25*63=1575 +11*59=649 +0*30=0 +60*77=4620 +89*0=0 += 6844
25*18=450 +11*76=836 +0*52=0 +60*75=4500 +89*46=4094 += 9880
25*89=2225 +11*34=374 +0*49=0 +60*85=5100 +89*33=2937 += 10636
25*28=700 +11*12=132 +0*3=0 +60*0=0 +89*69=6141 += 6973
25*39=975 +11*6=66 +0*95=0 +60*0=0 +89*88=7832 += 8873
24*63=1512 +79*59=4661 +24*30=720 +47*77=3619 +18*0=0 += 10512
24*18=432 +79*76=6004 +24*52=1248 +47*75=3525 +18*46=828 += 12037
24*89=2136 +79*34=2686 +24*49=1176 +47*85=3995 +18*33=594 += 10587
24*28=672 +79*12=948 +24*3=72 +47*0=0 +18*69=1242 += 2934
24*39=936 +79*6=474 +24*95=2280 +47*0=0 +18*88=1584 += 5274
7*63=441 +98*59=5782 +96*30=2880 +27*77=2079 +0*0=0 += 11182
7*18=126 +98*76=7448 +96*52=4992 +27*75=2025 +0*46=0 += 14591
7*89=623 +98*34=3332 +96*49=4704 +27*85=2295 +0*33=0 += 10954
7*28=196 +98*12=1176 +96*3=288 +27*0=0 +0*69=0 += 1660
7*39=273 +98*6=588 +96*95=9120 +27*0=0 +0*88=0 += 9981
[0, 0, 11878]
[0, 1, 14044]
[0, 2, 16031]
[0, 3, 5964]
[0, 4, 15874]
[1, 0, 4081]
[1, 1, 6914]
[1, 2, 8282]
[1, 3, 7479]
[1, 4, 9647]
[2, 0, 6844]
[2, 1, 9880]
[2, 2, 10636]
[2, 3, 6973]
[2, 4, 8873]
[3, 0, 10512]
[3, 1, 12037]
[3, 2, 10587]
[3, 3, 2934]
[3, 4, 5274]
[4, 0, 11182]
[4, 1, 14591]
[4, 2, 10954]
[4, 3, 1660]
[4, 4, 9981]
#codingexercise: https://1drv.ms/w/s!Ashlm-Nw-wnWhM0bmlY_ggTBTNTYxQ?e=s7hP7W