% Bibliography for Mincut-Gated Transformer @article{raposo2024mixture, title={Mixture-of-Depths: Dynamically allocating compute in transformer-based language models}, author={Raposo, David and Ritter, Sam and Richards, Blake A and Lillicrap, Timothy P and Humphreys, Peter Conway and Santoro, Adam}, journal={arXiv preprint arXiv:2404.02258}, year={2024} } @article{elhoushi2024layerskip, title={LayerSkip: Enabling Early Exit Inference and Self-Speculative Decoding}, author={Elhoushi, Mostafa and Diana, Akshat and Xu, Zhongwei and Choi, Yuxiong and Zhang, Yuchen and Keutzer, Kurt}, journal={arXiv preprint arXiv:2404.16710}, year={2024} } @inproceedings{jiang2024minference, title={MInference 1.0: Accelerating Pre-filling for Long-Context LLMs via Dynamic Sparse Attention}, author={Jiang, Huiqiang and Wu, Qianhui and Zheng, Haoyang and Li, Yue and Yang, Hongsheng}, booktitle={Advances in Neural Information Processing Systems}, volume={37}, year={2024} } @article{gladstone2025energy, title={Energy-Based Transformers are Scalable Learners and Thinkers}, author={Gladstone, Aram and Shankar, Shishir and Belanger, David and Likhomanenko, Tatiana and Faust, Aleksandra}, journal={arXiv preprint arXiv:2507.02092}, year={2025} } @inproceedings{yao2023spike, title={Spike-driven Transformer}, author={Yao, Man and Zhao, Guangshe and Zhang, Hengyu and Hu, Yifan and Deng, Lei and Tian, Yonghong and Xu, Bo and Li, Guoqi}, booktitle={Advances in Neural Information Processing Systems}, volume={36}, pages={56--78}, year={2023} } @inproceedings{yao2024spike2, title={Spike-driven Transformer V2: Meta Spiking Neural Network Architecture Inspiring Integrated Artificial Intelligence}, author={Yao, Man and Zhang, Hengyu and Zhao, Guangshe and Wang, Jiechen and Hu, Yifan and Deng, Lei and Li, Guoqi}, booktitle={International Conference on Learning Representations}, year={2024} } @inproceedings{kreuzer2021spectral, title={Rethinking Graph Transformers with Spectral Attention}, author={Kreuzer, Devin and Beaini, Dominique and Hamilton, Will and L{\'e}tourneau, Vincent and Tossou, Prudencio}, booktitle={Advances in Neural Information Processing Systems}, volume={34}, pages={21618--21629}, year={2021} } @article{kernighan1970efficient, title={An efficient heuristic procedure for partitioning graphs}, author={Kernighan, Brian W and Lin, Shen}, journal={Bell System Technical Journal}, volume={49}, number={2}, pages={291--307}, year={1970}, publisher={Wiley Online Library} } @article{blondel2008fast, title={Fast unfolding of communities in large networks}, author={Blondel, Vincent D and Guillaume, Jean-Loup and Lambiotte, Renaud and Lefebvre, Etienne}, journal={Journal of Statistical Mechanics: Theory and Experiment}, volume={2008}, number={10}, pages={P10008}, year={2008}, publisher={IOP Publishing} } @inproceedings{vaswani2017attention, title={Attention is all you need}, author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia}, booktitle={Advances in Neural Information Processing Systems}, volume={30}, year={2017} }