@inproceedings {deleonSPP2019,
	title = {Parallel acceleration of density matrix renormalization group calculations with TensorFlow},
	booktitle = {Proceedings of the 37th Samahang Pisika ng Pilipinas Physics Conference},
	year = {2019},
	month = {29 May 2019},
	pages = {SPP-2019-PB-14},
	address = {Tagbilaran City, Philippines},
	abstract = {We parallelize singular value decomposition in a matrix product state formulation of the density matrix renormalization group using the TensorFlow library to find use cases in which consumer-grade GPU hardware can reduce run times. Specifically, we tested the performance of the implementation on a 20-site spin chain for a variable number of kept states. We were able to acquire a speedup of up to 6.4\% when using TensorFlow GPU libraries and a speedup of up to 5.4\% with TensorFlow multicore CPU libraries. This speedup is observed when the number of kept states exceeds a threshold value so that the dimensions of the matrices in the calculation are large enough that the gains in parallelization exceed computational overhead costs.},
	url = {https://paperview.spp-online.org/proceedings/article/view/SPP-2019-PB-14},
	author = {Kryzz Joshua G. de Leon and Francis N. C. Paraan}
}
@inproceedings {455,
	title = {GPU implementation of singular value decomposition for high rank tensors},
	booktitle = {Proceedings of the 36th Samahang Pisika ng Pilipinas Physics Conference},
	year = {2018},
	month = {6{\textendash}9 June 2018},
	pages = {SPP-2018-PB-50},
	address = {Puerto Princesa City, Philippines},
	abstract = {Programming using the Python API (application programming interface) offers some advantages over using compiled languages. Here we implement a high rank tensor decomposition routine using the TensorFlow library which has native support for utilizing multi-core CPU, GPU, and TPU hardware. Specifically, a singular value decomposition algorithm was performed on a rank-5 tensor. The performance of this Python implementation was compared with a known C++ based library written specifically for tensor manipulations but without native GPU support. We report some use cases where the implementation on a consumer grade GPU was empirically faster than the C++ based library when the rank-5 tensor has more than 2x10E6 elements. With the acceptable performance of the implementation, it may be beneficial to have have a native implementation of tensor network operations on TensorFlow.},
	url = {https://paperview.spp-online.org/proceedings/article/view/SPP-2018-PB-50},
	author = {Kryzz Joshua G. de Leon and Francis N. C. Paraan}
}