I wish, I could sent you all the code, but it is for my company so I cannot show all of it. However, if you can help me to clear the warning it will be much appreciated. Perhaps it is a KNIME bug???
def execute(self, exec_context: knext.ExecutionContext, context_table: knext.Table, input_port: ISK_GenericPortObject) -> list:
# Deserialize data from the custom port
serialized_data = input_port._model
deserialized_data = pickle.loads(serialized_data)
if not isinstance(deserialized_data, pd.DataFrame):
raise ValueError(f"Expected deserialized data to be a pandas DataFrame, but got {type(deserialized_data)}")
# Convert context table to pandas DataFrame
context_df = context_table.to_pandas()
# Ensure required columns are present in context data
required_columns = ["Document", "Chunk Name", "Text"]
for col in required_columns:
if col not in context_df.columns:
raise ValueError(f"The '{col}' column is missing in the context data.")
# Concatenate the context data to the deserialized data
combined_df = pd.concat([context_df, deserialized_data], ignore_index=True)
# Generate embeddings
embeddings = self.semantic_search(combined_df, self.model_path)
# Create the output DataFrame
output_df = pd.DataFrame({
"Document": combined_df["Document"],
"Chunk Name": combined_df["Chunk Name"],
"Embedded Text": embeddings.tolist()
})
# Serialize the output DataFrame
types = [knext.string(), knext.string(), knext.list_(knext.double())]
names = ["Document", "Chunk Name", "Embedded Text"]
schema = knext.Schema(types, names)
serialized_schema = json.dumps(schema.serialize())
spec = ISK_GenericPortObjectSpec(spec_data=serialized_schema, schema=schema)
serialized_output_data = pickle.dumps(output_df)
output_port_object = ISK_GenericPortObject(spec, serialized_output_data)
table_output = output_df
# Return both the custom port object and the KNIME table
return [output_port_object, knext.Table.from_pandas(table_output)]