Instructions to use Lowin/chinese-bigbird-base-4096 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Lowin/chinese-bigbird-base-4096 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("fill-mask", model="Lowin/chinese-bigbird-base-4096")# Load model directly from transformers import AutoTokenizer, AutoModelForMaskedLM tokenizer = AutoTokenizer.from_pretrained("Lowin/chinese-bigbird-base-4096") model = AutoModelForMaskedLM.from_pretrained("Lowin/chinese-bigbird-base-4096") - Notebooks
- Google Colab
- Kaggle
import jieba_fast
from transformers import BertTokenizer
from transformers import BigBirdModel
class JiebaTokenizer(BertTokenizer):
def __init__(
self, pre_tokenizer=lambda x: jieba_fast.cut(x, HMM=False), *args, **kwargs
):
super().__init__(*args, **kwargs)
self.pre_tokenizer = pre_tokenizer
def _tokenize(self, text, *arg, **kwargs):
split_tokens = []
for word in self.pre_tokenizer(text):
if word in self.vocab:
split_tokens.append(word)
else:
split_tokens.extend(super()._tokenize(word))
return split_tokens
model = BigBirdModel.from_pretrained('Lowin/chinese-bigbird-base-4096')
tokenizer = JiebaTokenizer.from_pretrained('Lowin/chinese-bigbird-base-4096')
- Downloads last month
- 20