@inproceedings {deleonSPP2019, title = {Parallel acceleration of density matrix renormalization group calculations with TensorFlow}, booktitle = {Proceedings of the 37th Samahang Pisika ng Pilipinas Physics Conference}, year = {2019}, month = {29 May 2019}, pages = {SPP-2019-PB-14}, address = {Tagbilaran City, Philippines}, abstract = {We parallelize singular value decomposition in a matrix product state formulation of the density matrix renormalization group using the TensorFlow library to find use cases in which consumer-grade GPU hardware can reduce run times. Specifically, we tested the performance of the implementation on a 20-site spin chain for a variable number of kept states. We were able to acquire a speedup of up to 6.4\% when using TensorFlow GPU libraries and a speedup of up to 5.4\% with TensorFlow multicore CPU libraries. This speedup is observed when the number of kept states exceeds a threshold value so that the dimensions of the matrices in the calculation are large enough that the gains in parallelization exceed computational overhead costs.}, url = {https://paperview.spp-online.org/proceedings/article/view/SPP-2019-PB-14}, author = {Kryzz Joshua G. de Leon and Francis N. C. Paraan} } @inproceedings {455, title = {GPU implementation of singular value decomposition for high rank tensors}, booktitle = {Proceedings of the 36th Samahang Pisika ng Pilipinas Physics Conference}, year = {2018}, month = {6{\textendash}9 June 2018}, pages = {SPP-2018-PB-50}, address = {Puerto Princesa City, Philippines}, abstract = {Programming using the Python API (application programming interface) offers some advantages over using compiled languages. Here we implement a high rank tensor decomposition routine using the TensorFlow library which has native support for utilizing multi-core CPU, GPU, and TPU hardware. Specifically, a singular value decomposition algorithm was performed on a rank-5 tensor. The performance of this Python implementation was compared with a known C++ based library written specifically for tensor manipulations but without native GPU support. We report some use cases where the implementation on a consumer grade GPU was empirically faster than the C++ based library when the rank-5 tensor has more than 2x10E6 elements. With the acceptable performance of the implementation, it may be beneficial to have have a native implementation of tensor network operations on TensorFlow.}, url = {https://paperview.spp-online.org/proceedings/article/view/SPP-2018-PB-50}, author = {Kryzz Joshua G. de Leon and Francis N. C. Paraan} }