HOMEWORK 2 SOLUTIONS: --------------------- 5.4) MESI1) 1Rd(90) + 1Wr(1) + 1Rd(1) + 1Wr(1) + 2Rd(90) + 2Wr(60) + 2Rd(1) + 2Wr(1) + 3Rd(90) + 3Wr(60) + 3Rd(1) + 3Wr(1) = 397 Dragon1) 1Rd(90) + 1Wr(1) + 1Rd(1) + 1Wr(1) + 2Rd(90) + 2Wr(60) + 2Rd(1) + 2Wr(60) + 3Rd(90) + 3Wr(60) + 3Rd(1) + 3Wr(60) = 515 MESI2) 1Rd(90) + 2Rd(90) + 3Rd(90) + 1Wr(60) + 2Wr(90) + 3Wr(90) + 1Rd(90) + 2Rd(90) + 3Rd(1) + 3Wr(60) + 1Wr(90) = 841 Dragonb2) 1Rd(90) + 2Rd(90) + 3Rd(90) + 1Wr(60) + 2Wr(60) + 3Wr(60) + 1Rd(1) + 2Rd(1) + 3Rd(1) + 3Wr(60) + 1Wr(60) = 573 MESI3) 1Rd(90) + 2Rd(90) + 3Rd(90) + 3Rd(1) + 1Wr(60) + 1Wr(1) + 1Wr(1) + 1Wr(1) + 2Wr(90) + 3Wr(90) = 514 Dragona3) 1Rd(90) + 2Rd(90) + 3Rd(90) + 3Rd(1) + 1Wr(60) + 1Wr(60) + 1Wr(60) + 1Wr(60) + 2Wr(60) + 3Wr(60) = 631 5.6) NOTE: given order of assignments are not the only ones possible a) A B u v w order ---------------------------------------------- 1 1 1 1 1 AuBvw 1 1 1 1 0 none 1 1 1 0 1 vAuBw 1 1 1 0 0 vwAuB 1 1 0 1 1 uBAvw 1 1 0 1 0 uBvwA 1 1 0 0 1 uvABw 1 1 0 0 0 uvBwA b) A B u v w x order ----------------------------------------------------- 1 1 1 1 1 1 ABuvwx 1 1 1 1 1 0 BwxAuv 1 1 1 1 0 1 wABuvx 1 1 1 1 0 0 wxABuv 1 1 1 0 1 1 AuvBwx 1 1 1 0 1 0 none 1 1 1 0 0 1 AuvwxB 1 1 1 0 0 0 wxAuvB 1 1 0 1 1 1 uABvwx 1 1 0 1 1 0 BuvwxA 1 1 0 1 0 1 wuABvx 1 1 0 1 0 0 wxuABv 1 1 0 0 1 1 uvABwx 1 1 0 0 1 0 uvBwxA 1 1 0 0 0 1 uvwABx 1 1 0 0 0 0 uvwxAB c) single instructions: A u v order ----------------------------- 2 0 1 uv 2 1 0 vu separate instructions: A u v order ----------------------------- 2 0 1 uA(u)vA(v) 2 1 0 vA(v)uA(u) 1 0 0 uvA(u)A(v) 5.10) Note: depending on your assumptions, various other solutions are possible. Min Max MESI 7 inf. Dragon 7 inf. In every case, each processor will need a bus transaction to release the lock and processors 2-4 will need a transaction to acquire the lock. So in the best case, each will require 7 transactions. In the worst case, since locks are uncache-able, if all remaining processors go for the lock repeatedly while one has it, bus transactions go to infinity. 5.20) The Nth process enters the barrier, sets it to zero, and continues executing the next iteration of the loop. If another process is stalled or swapped out for something, it is possible for the Nth process (or another that got through the barrier as soon as it was reset) to get to the bottom of the next iteration of the loop before the stalled process wakes up and notices that it can go. Now the barrier is no longer zero because another process has reached it again. So the process in the old iteration of the loop will wait forever to get out of this iteration, and other processes will wait forever for it to catch up. Fixed code: BARRIER (Var B: BarVariable, N: integer, Var sense: integer) { localSense: integer; localSense := !sense; if (fetch-and-add(B,1)=N-1) then { B:=0; sense := !sense; } else while (localSense != sense) do {}; } 5.29) program nbody; type molecule = record x_pos: double; y_pos: double; z_pos: double; x_vel: double; y_vel: double; z_vel: double; x_f: double; y_f: double; z_f: double; end; config var numMols: integer=4096; region R = [1..numMols]; Rflood = [1..numMols,1..numMols]; var mol: [R] molecule; procedure nbody(); ...declarations... begin for time := 0 to endTime do [*,] col := >>[R]mol; -- flood-broadcast along one dimension O(sqrt(P)) [,*] row := >>[R]mol; -- flood-broadcast along one dimension O(sqrt(P)) [R] x_fi = +<<[1..n,*]x_fn(row,col); -- local computation and reduction along one dimension