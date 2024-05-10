So I decided to go for another route, to fine tune my hugging face model, so I went to this site:
and it all works well, until I met this error:
Traceback (most recent call last):
File "c:\Users\Philip Chen\Documents\AICrowd\llm-practive\qna-bot5.py", line 48, in <module>
model.compile(
File "C:\Users\Philip Chen\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\modeling_tf_utils.py", line 1563, in compile
super().compile(
File "C:\Users\Philip Chen\AppData\Local\Programs\Python\Python310\lib\site-packages\tf_keras\src\utils\traceback_utils.py", line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File "C:\Users\Philip Chen\AppData\Local\Programs\Python\Python310\lib\site-packages\tf_keras\src\optimizers\__init__.py", line 335, in get
raise ValueError(
ValueError: Could not interpret optimizer identifier: <keras.src.optimizers.adam.Adam object at 0x0000028B61A7DA50>
And it comes from the offending line marked in the code below":
# Load a data set
from transformers import TFDistilBertForSequenceClassification, DistilBertTokenizerFast
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow.keras.optimizers as tk
import tensorflow.keras.losses as tl
from transformers import TFDistilBertForSequenceClassification
# Preparation
model_name="google-bert/bert-base-cased"
fileName="data.json"
# dataset = load_dataset("json", data_files=fileName, split="train")
# model_save_path = './model'
# questions=dataset["input"]
df = pd.read_json(fileName, lines=True)
df=df[['input']]
print(df)
#Work
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
model = TFDistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased')
is_train = np.random.uniform(size = len(df))<0.8
train_raw = (tf.data.Dataset.
from_tensor_slices((dict(tokenizer(list(df['input'][is_train]), padding = True, truncation = True)),np.array(df['input'])[is_train ])).
shuffle(len(df)).
batch(64,drop_remainder = True)
)
train_raw.prefetch(1)
test_raw = (tf.data.Dataset.
from_tensor_slices((dict(tokenizer(list(df['input'][~is_train]), padding = True, truncation = True)),np.array(df['input'])[~is_train ])).
shuffle(len(df)).
batch(64, drop_remainder = True)
)
test_raw.prefetch(1)
num_epochs = 3
adam = tk.Adam()
model.compile( #<<---- line where error is raised
optimizer=tk.Adam(5e-5),
metrics=["accuracy"],
loss = tl.SparseCategoricalCrossentropy(from_logits = True)
)
model.fit(
train_raw,
validation_data=test_raw,
epochs = num_epochs
)
How exactly should I have imported Adam?