tensorflow實現非負矩陣分解（Non-negative matrix factorization）

05-10

1.導入庫

import tensorflow as tfimport numpy as npimport pandas as pdnp.random.seed(0)

2.生成一個待分解的矩陣

A_orig = np.array([[3, 4, 5, 2], [4, 4, 3, 3], [5, 5, 4, 4]], dtype=np.float32).TA_orig_df = pd.DataFrame(A_orig)

3.標記非空實體

A_df_masked = A_orig_df.copy()A_df_masked.iloc[0,0]=np.NANnp_mask = A_df_masked.notnull()

4.tensorflow的基本配置

# Boolean mask for computing cost only on valid (not missing) entriestf_mask = tf.Variable(np_mask.values)A = tf.constant(A_df_masked.values)shape = A_df_masked.values.shape#latent factorsrank = 3 # Initializing random H and Wtemp_H = np.random.randn(rank, shape[1]).astype(np.float32)temp_H = np.divide(temp_H, temp_H.max())temp_W = np.random.randn(shape[0], rank).astype(np.float32)temp_W = np.divide(temp_W, temp_W.max())H = tf.Variable(temp_H)W = tf.Variable(temp_W)WH = tf.matmul(W, H)

5.損失函數

#cost of Frobenius normcost = tf.reduce_sum(tf.pow(tf.boolean_mask(A, tf_mask) - tf.boolean_mask(WH, tf_mask), 2))

6.訓練參數

# Learning ratelr = 0.001# Number of stepssteps = 1000train_step = tf.train.GradientDescentOptimizer(lr).minimize(cost)init = tf.global_variables_initializer()

7.確保分解後的兩個矩陣非負

# Clipping operation. This ensure that W and H learnt are non-negativeclip_W = W.assign(tf.maximum(tf.zeros_like(W), W))clip_H = H.assign(tf.maximum(tf.zeros_like(H), H))clip = tf.group(clip_W, clip_H)

8.運行

steps = 1000with tf.Session() as sess: sess.run(init) for i in range(steps): sess.run(train_step) sess.run(clip) if i%100==0: print(" Cost: %f" % sess.run(cost)) print("*"*40) learnt_W = sess.run(W) learnt_H = sess.run(H)

運行結果

Cost: 148.859848
****************************************
Cost: 3.930173

****************************************
Cost: 2.068569
****************************************
Cost: 1.418309
****************************************

Cost: 0.819721
****************************************
Cost: 0.399934
****************************************

Cost: 0.176080
****************************************
Cost: 0.079007
****************************************
Cost: 0.041353
****************************************
Cost: 0.027042
****************************************

9.查看矩陣

pred = np.dot(learnt_W, learnt_H)pred_df = pd.DataFrame(pred)print(pred_df.round())print("*"*40)print(A_orig_df)

運行結果

0 1 2
0 3.0 4.0 5.0
1 4.0 4.0 5.0
2 5.0 3.0 4.0
3 2.0 3.0 4.0
****************************************
0 1 2
0 3.0 4.0 5.0
1 4.0 4.0 5.0
2 5.0 3.0 4.0
3 2.0 3.0 4.0