FlairでサクッとNERを実行するコード [Python]
from flair.data import Corpus
from flair.datasets import ColumnCorpus
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer
from flair.embeddings import TokenEmbeddings
from flair.embeddings import StackedEmbeddings
from flair.embeddings import BytePairEmbeddings
from pathlib import Path
from typing import List
def loadCorpus(data_folder):
# define columns
columns = {0: "text", 1: "ner"}
# init a corpus using column format, data folder and the names of the train, dev and test files
corpus: Corpus = ColumnCorpus(
data_folder,
columns,
train_file="train.txt",
test_file="test.txt",
dev_file="dev.txt",
)
return corpus
if __name__ == "__main__":
biopath = "/home/ubuntu/bio"
resultpath = "/home/ubuntu/result"
corpus: Corpus = loadCorpus(biopath)
tag_dictionary = corpus.make_tag_dictionary(tag_type='ner')
# initialize embeddings
embedding_objects: List[TokenEmbeddings] = []
# embedding_objects.append(CharacterEmbeddings())
embedding_objects.append(BytePairEmbeddings("en"))
# embedding_objects.append(fetchMaterialElmoEmbeddings())
embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_objects)
tagger: SequenceTagger = SequenceTagger(
hidden_size=512,
embeddings=embeddings,
tag_dictionary=tag_dictionary,
tag_type='ner',
use_crf=True,
)
# train as always
trainer = ModelTrainer(tagger, corpus)
resultpath = Path(resultpath) / "tagger_results" / "bpe"
trainer.train(
str(resultpath),
learning_rate=0.1,
mini_batch_size=64,
max_epochs=200,
patience=10,
embeddings_storage_mode="cpu"
)
関連記事
ディスカッション
コメント一覧
まだ、コメントがありません