@@ -141,4 +141,23 @@ for (i = 0; i < n; i++)
141141 * <img src="https://tex.s2cms.ru/svg/%5Cbegin%7Balign*%7D%0AComm%20%26%3D%20%5Cleft%5B%20(q-1)q%20%5Ctimes%20%5Cfrac%7BM%20%5Ctimes%20K%7D%7Bq%5E%7B2%7D%7D%20%5Ctimes%20q%20%5Cright%5D%20%5C%5C%0A%26%2B%20%5Cleft%5B%20(q%20%5Ctimes%20q)%20%5Ctimes%20%5Cfrac%7BK%20%5Ctimes%20N%7D%7Bq%5E2%7D%20%5Ctimes(q-1)%20%5Cright%5D%20%5C%5C%0A%26%3D%20%5Cleft(%20M%20%5Ctimes%20K%20%2B%20K%20%5Ctimes%20N%20%5Cright)%20%5Ctimes%20(q-1)%20%0A%5Cend%7Balign*%7D">
142142
1431433. **Computing in total**
144- * <img src="https://tex.s2cms.ru/svg/%5Cbegin%7Balign*%7D%0AComput%20%26%3D%20%20%5Cleft%5B%20%5Cleft(%20%5Cfrac%7BM%7D%7Bq%7D%20%5Ctimes%20%5Cfrac%7BK%7D%7Bq%7D%20%5Ctimes%20%5Cfrac%7BN%7D%7Bq%7D%20%5Cright)%20%5Ctimes%20%5Cleft(%20q%20%5Ctimes%20q%5Cright)%20%5Cright%5D%20%5Ctimes%20q%20%5C%5C%0A%26%3D%20M%20%5Ctimes%20K%20%5Ctimes%20N%0A%5Cend%7Balign*%7D">
144+ * <img src="https://tex.s2cms.ru/svg/%5Cbegin%7Balign*%7D%0AComput%20%26%3D%20%20%5Cleft%5B%20%5Cleft(%20%5Cfrac%7BM%7D%7Bq%7D%20%5Ctimes%20%5Cfrac%7BK%7D%7Bq%7D%20%5Ctimes%20%5Cfrac%7BN%7D%7Bq%7D%20%5Cright)%20%5Ctimes%20%5Cleft(%20q%20%5Ctimes%20q%5Cright)%20%5Cright%5D%20%5Ctimes%20q%20%5C%5C%0A%26%3D%20M%20%5Ctimes%20K%20%5Ctimes%20N%0A%5Cend%7Balign*%7D">
145+
146+ 4. **FOX Kernel in the Parallel Program**
147+
148+ * ``` c
149+ for (stage = 0; stage < grid->q; stage++) {
150+ bcast_root = (grid->my_row + stage) % grid->q;
151+ if (bcast_root == grid->my_col) {
152+ MPI_Bcast(local_A, 1, local_matrix_mpi_t,
153+ bcast_root, grid->row_comm);
154+ Local_matrix_multiply(local_A, local_B,local_C);
155+ } else {
156+ MPI_Bcast(temp_A, 1, local_matrix_mpi_t,
157+ bcast_root, grid->row_comm);
158+ Local_matrix_multiply(temp_A, local_B,local_C);
159+ }
160+ MPI_Sendrecv_replace(local_B, 1, local_matrix_mpi_t,
161+ dest, 0, source, 0, grid->col_comm, &status);
162+ }
163+ ```
0 commit comments