from flair.data import Corpus
from flair.datasets import ColumnCorpus
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer
from flair.embeddings import TokenEmbeddings
from flair.embeddings import StackedEmbeddings
from flair.embeddings import BytePairEmbeddings
def loadCorpus(data_folder):
columns = {0: "text", 1: "ner"}
# init a corpus using column format, data folder and the names of the train, dev and test files
corpus: Corpus = ColumnCorpus(
if __name__ == "__main__":
biopath = "/home/ubuntu/bio"
resultpath = "/home/ubuntu/result"
corpus: Corpus = loadCorpus(biopath)
tag_dictionary = corpus.make_tag_dictionary(tag_type='ner')
embedding_objects: List = []
# embedding_objects.append(CharacterEmbeddings())
embedding_objects.append(BytePairEmbeddings("en"))
# embedding_objects.append(fetchMaterialElmoEmbeddings())
embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_objects)
tagger: SequenceTagger = SequenceTagger(
tag_dictionary=tag_dictionary,
trainer = ModelTrainer(tagger, corpus)
resultpath = Path(resultpath) / "tagger_results" / "bpe"
embeddings_storage_mode="cpu"