-
Notifications
You must be signed in to change notification settings - Fork 40
/
Copy pathmatmul_benchmark.py
37 lines (30 loc) · 1.13 KB
/
matmul_benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# On Titan X (Pascal)
# 8192 x 8192 matmul took: 0.10 sec, 11304.59 G ops/sec
# http://stackoverflow.com/questions/41804380/testing-gpu-with-tensorflow-matrix-multiplication
import os
import sys
import tensorflow as tf
import time
n = 8192
dtype = tf.float32
with tf.device("/gpu:0"):
matrix1 = tf.Variable(tf.ones((n, n), dtype=dtype))
matrix2 = tf.Variable(tf.ones((n, n), dtype=dtype))
product = tf.matmul(matrix1, matrix2)
# avoid optimizing away redundant nodes
config = tf.ConfigProto(graph_options=tf.GraphOptions(optimizer_options=tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0)))
sess = tf.Session(config=config)
sess.run(tf.global_variables_initializer())
iters = 10
# pre-warming
sess.run(product.op)
start = time.time()
for i in range(iters):
sess.run(product.op)
end = time.time()
ops = n**3 + (n-1)*n**2 # n^2*(n-1) additions, n^3 multiplications
elapsed = (end - start)
rate = iters*ops/elapsed/10**9
print('\n %d x %d matmul took: %.2f sec, %.2f G ops/sec' % (n, n,
elapsed/iters,
rate,))