-
I have a blog where I compare performance between Python, Cython, Numba, Julia. I've noticed that the Mojo performance is not great (still much better than Python, but not beating Julia or Numba) and I'm not sure if this is user error or just not the designed use case of Mojo. I am a Swift fanboy and really want to jump on the Mojo train, so I'd like to know if anyone sees any obvious mistakes I am making: from tensor import Tensor, TensorSpec, TensorShape
from utils.index import Index
from random import rand
from math import exp
from benchmark import Report
import benchmark
alias data_type = DType.float32
fn random_spin_field(N: Int, M: Int) -> Tensor[data_type]:
var t = rand[data_type](N, M)
for i in range(N):
for j in range(M):
if t[i, j] < 0.5:
t[Index(i, j)] = -1
else:
t[Index(i, j)] = 1
return t
fn _ising_update(inout field: Tensor[data_type], n: Int, m: Int, beta: Float32) -> None:
var total = SIMD[data_type, 1]()
var shape = field.shape()
var N = shape[0]
var M = shape[1]
for i in range(n - 1, n + 2):
for j in range(m - 1, m + 2):
if i == n and j == m:
continue
total += field[i % N, j % M]
var dE = 2 * field[n, m] * total
if dE <= 0:
field[Index(n, m)] *= -1
elif exp(-dE * beta) > rand[data_type](1)[0]:
field[Index(n, m)] *= -1
fn ising_step(inout field: Tensor[data_type], beta: Float32 = 0.4) -> None:
var shape = field.shape()
var N = shape[0]
var M = shape[1]
for n_offset in range(2):
for m_offset in range(2):
for n in range(n_offset, N, 2):
for m in range(m_offset, M, 2):
_ising_update(field=field, n=n, m=m, beta=beta)
@always_inline
fn bench() -> Report:
var N = 200
var M = 200
var field = random_spin_field(N, M)
@always_inline
@parameter
fn ising_step_fn():
ising_step(field=field)
return benchmark.run[ising_step_fn](max_runtime_secs=10)
var report = bench()
# Print a report in Milliseconds
report.print("ms") Things I tried:
I understand |
Beta Was this translation helpful? Give feedback.
Replies: 2 comments
-
Hey, @jakee417, thank you for the blog! I didn't run the profiler but I think this line is allocating like crazy: elif exp(-dE * beta) > rand[data_type](1)[0]: You created a Maybe you could try using |
Beta Was this translation helpful? Give feedback.
-
Thanks! Seems to increase speed from 2.9ms to 1.27ms as you said. I think the next tier of speed increases involving incorporating |
Beta Was this translation helpful? Give feedback.
Hey, @jakee417, thank you for the blog!
I didn't run the profiler but I think this line is allocating like crazy:
You created a
Tensor
only to throw it away.Maybe you could try using
random_float64().cast[field.dtype]()
. It gives an easy 2 times speedup on my machine.