FlairでELMoをEmbeddingとして使う

from flair.data import Corpus
from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings
from flair.data import Sentence
from flair.models import SequenceTagger
from flair.embeddings import (
WordEmbeddings,
CharacterEmbeddings,
FlairEmbeddings,
BertEmbeddings,
)
from flair.data import Corpus
from flair.datasets import ColumnCorpus
import flair.datasets

def ElmoEmbeddings():
    options = "/path/to/elmo/elmo_options.json"
    weights = "/path/to/elmo/elmo_weights.hdf5"
   
    from flair.embeddings import ELMoEmbeddings
   
    embedding = ELMoEmbeddings('custom', options_file = options, weight_file= weights)
   
    return embedding

if __name__ == "__main__":
datapath = "/path/to/BIO" # train.tsv, test.tsv, devel.tsvが入っているフォルダ
corpus: Corpus = loadCorpus(datapath)
tag_type = "ner"
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
embedding_objects: List[TokenEmbeddings] = []
embedding_objects.append(ElmoEmbeddings())
embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_objects)
tagger: SequenceTagger = SequenceTagger(
    hidden_size=256,
    embeddings=embeddings,
    tag_dictionary=tag_dictionary,
    tag_type=tag_type,
    use_crf=True,
    )

from flair.trainers import ModelTrainer
resultpath = "/path/to/result"
trainer: ModelTrainer = ModelTrainer(tagger, corpus)
trainer.train(
    str(resultpath),
    learning_rate=0.1,
    mini_batch_size=128,
    max_epochs=3,
    patience=5,
    embeddings_storage_mode="gpu"
    )

Flair

Posted by vastee