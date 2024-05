Traceback:

raceback (most recent call last): File "c:\Users\Philip Chen\Documents\AICrowd\amazon-kdd-cup-2024-starter-kit\models\QnAModel.py", line 47, in <module> trainer.train() File "C:\Users\Philip Chen\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\trainer.py", line 1859, in train return inner_training_loop( File "C:\Users\Philip Chen\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\trainer.py", line 2165, in _inner_training_loop for step, inputs in enumerate(epoch_iterator): File "C:\Users\Philip Chen\AppData\Local\Programs\Python\Python310\lib\site-packages\accelerate\data_loader.py", line 454, in __iter__ current_batch = next(dataloader_iter) File "C:\Users\Philip Chen\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\utils\data\dataloader.py", line 631, in __next__ data = self._next_data() File "C:\Users\Philip Chen\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\utils\data\dataloader.py", line 675, in _next_data data = self._dataset_fetcher.fetch(index) # may raise StopIteration File "C:\Users\Philip Chen\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\utils\data\_utils\fetch.py", line 54, in fetch return self.collate_fn(data) File "C:\Users\Philip Chen\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\trainer_utils.py", line 808, in __call__ return self.data_collator(features) File "C:\Users\Philip Chen\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\data\data_collator.py", line 92, in default_data_collator return torch_default_data_collator(features) File "C:\Users\Philip Chen\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\data\data_collator.py", line 131, in torch_default_data_collator features = [vars(f) for f in features] File "C:\Users\Philip Chen\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\data\data_collator.py", line 131, in <listcomp> features = [vars(f) for f in features] TypeError: vars() argument must have __dict__ attribute 0%| | 0/36 [00:00<?, ?it/s]

Offending line:

trainer.train()

Code:

import datasets as ds import pandas as pd import transformers as tf myData=pd.read_json("./models/labels.json", lines=True) inputField=myData["input_field"] taskType=myData["task_type"] outputField=myData["output_field"] isMCQ=myData["is_multiple_choice"] qArr=[] aArr=[] trainDF = pd.DataFrame(inputField) testDF = pd.DataFrame(inputField) trainDS = ds.Dataset.from_pandas(trainDF) testDS = ds.Dataset.from_pandas(testDF) for i in range(len(myData)): qArr.append(inputField[i]) aArr.append(outputField[i]) myDataSet=trainDS.add_column('test',testDS) myDataSet=myDataSet.rename_column('input_field', 'train') tokenizer = tf.AutoTokenizer.from_pretrained("google-bert/bert-base-cased") def tokenize_function(examples): return tokenizer(examples["train"], padding="max_length", truncation=True) tokenizedDatasets = myDataSet.map(tokenize_function, batched=True) model = tf.AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased", num_labels=5) training_args = tf.TrainingArguments(output_dir="./") print(trainDS) print(testDS) trainer = tf.Trainer( model=model, args=training_args, train_dataset=tokenizedDatasets["train"], eval_dataset=tokenizedDatasets["test"] ) trainer.train() # THE OFFENDING LINE

Any solutions on how to fix this? Thanks.