wifi-densepose/crates/ruvector-mincut-gated-transformer/docs/CITATIONS.bib

% Bibliography for Mincut-Gated Transformer

@article{raposo2024mixture,
  title={Mixture-of-Depths: Dynamically allocating compute in transformer-based language models},
  author={Raposo, David and Ritter, Sam and Richards, Blake A and Lillicrap, Timothy P and Humphreys, Peter Conway and Santoro, Adam},
  journal={arXiv preprint arXiv:2404.02258},
  year={2024}
}

@article{elhoushi2024layerskip,
  title={LayerSkip: Enabling Early Exit Inference and Self-Speculative Decoding},
  author={Elhoushi, Mostafa and Diana, Akshat and Xu, Zhongwei and Choi, Yuxiong and Zhang, Yuchen and Keutzer, Kurt},
  journal={arXiv preprint arXiv:2404.16710},
  year={2024}
}

@inproceedings{jiang2024minference,
  title={MInference 1.0: Accelerating Pre-filling for Long-Context LLMs via Dynamic Sparse Attention},
  author={Jiang, Huiqiang and Wu, Qianhui and Zheng, Haoyang and Li, Yue and Yang, Hongsheng},
  booktitle={Advances in Neural Information Processing Systems},
  volume={37},
  year={2024}
}

@article{gladstone2025energy,
  title={Energy-Based Transformers are Scalable Learners and Thinkers},
  author={Gladstone, Aram and Shankar, Shishir and Belanger, David and Likhomanenko, Tatiana and Faust, Aleksandra},
  journal={arXiv preprint arXiv:2507.02092},
  year={2025}
}

@inproceedings{yao2023spike,
  title={Spike-driven Transformer},
  author={Yao, Man and Zhao, Guangshe and Zhang, Hengyu and Hu, Yifan and Deng, Lei and Tian, Yonghong and Xu, Bo and Li, Guoqi},
  booktitle={Advances in Neural Information Processing Systems},
  volume={36},
  pages={56--78},
  year={2023}
}

@inproceedings{yao2024spike2,
  title={Spike-driven Transformer V2: Meta Spiking Neural Network Architecture Inspiring Integrated Artificial Intelligence},
  author={Yao, Man and Zhang, Hengyu and Zhao, Guangshe and Wang, Jiechen and Hu, Yifan and Deng, Lei and Li, Guoqi},
  booktitle={International Conference on Learning Representations},
  year={2024}
}

@inproceedings{kreuzer2021spectral,
  title={Rethinking Graph Transformers with Spectral Attention},
  author={Kreuzer, Devin and Beaini, Dominique and Hamilton, Will and L{\'e}tourneau, Vincent and Tossou, Prudencio},
  booktitle={Advances in Neural Information Processing Systems},
  volume={34},
  pages={21618--21629},
  year={2021}
}

@article{kernighan1970efficient,
  title={An efficient heuristic procedure for partitioning graphs},
  author={Kernighan, Brian W and Lin, Shen},
  journal={Bell System Technical Journal},
  volume={49},
  number={2},
  pages={291--307},
  year={1970},
  publisher={Wiley Online Library}
}

@article{blondel2008fast,
  title={Fast unfolding of communities in large networks},
  author={Blondel, Vincent D and Guillaume, Jean-Loup and Lambiotte, Renaud and Lefebvre, Etienne},
  journal={Journal of Statistical Mechanics: Theory and Experiment},
  volume={2008},
  number={10},
  pages={P10008},
  year={2008},
  publisher={IOP Publishing}
}

@inproceedings{vaswani2017attention,
  title={Attention is all you need},
  author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia},
  booktitle={Advances in Neural Information Processing Systems},
  volume={30},
  year={2017}
}