>>> from snowflake.snowpark.testing import assert_dataframe_equal
>>> from snowflake.snowpark.types import StructType, StructField, IntegerType, StringType, DoubleType
>>> schema1 = StructType([
... StructField("id", IntegerType()),
... StructField("name", StringType()),
... StructField("value", DoubleType())
... ])
>>> data1 = [[1, "Rice", 1.0], [2, "Saka", 2.0], [3, "White", 3.0]]
>>> df1 = session.create_dataframe(data1, schema1)
>>> df2 = session.create_dataframe(data1, schema1)
>>> assert_dataframe_equal(df2, df1) # pass, DataFrames are identical
>>> data2 = [[2, "Saka", 2.0], [1, "Rice", 1.0], [3, "White", 3.0]] # change the order
>>> df3 = session.create_dataframe(data2, schema1)
>>> assert_dataframe_equal(df3, df1) # pass, DataFrames are identical
>>> data3 = [[1, "Rice", 1.0], [2, "Saka", 2.0], [4, "Rowe", 4.0]]
>>> df4 = session.create_dataframe(data3, schema1)
>>> assert_dataframe_equal(df4, df1)
Traceback (most recent call last):
AssertionError: Value mismatch on row 2 at column 0: actual 4, expected 3
Different row:
--- actual ---
+++ expected +++
- Row(ID=4, NAME='Rowe', VALUE=4.0)
? ^ ^^^ ^
+ Row(ID=3, NAME='White', VALUE=3.0)
? ^ ^^^^ ^
>>> data4 = [[1, "Rice", 1.0], [2, "Saka", 2.0], [3, "White", 3.0001]]
>>> df5 = session.create_dataframe(data4, schema1)
>>> assert_dataframe_equal(df5, df1, atol=1e-3) # pass, DataFrames are identical due to higher error tolerance
>>> assert_dataframe_equal(df5, df1, atol=1e-5)
Traceback (most recent call last):
AssertionError: Value mismatch on row 2 at column 2: actual 3.0001, expected 3.0
Different row:
--- actual ---
+++ expected +++
- Row(ID=3, NAME='White', VALUE=3.0001)
? ---
+ Row(ID=3, NAME='White', VALUE=3.0)
>>> schema2 = StructType([
... StructField("id", IntegerType()),
... StructField("key", StringType()),
... StructField("value", DoubleType())
... ])
>>> df6 = session.create_dataframe(data1, schema2)
>>> assert_dataframe_equal(df6, df1)
Traceback (most recent call last):
AssertionError: Column name mismatch at column 1: actual KEY, expected NAME
Different schema:
--- actual ---
+++ expected +++
- StructType([StructField('ID', LongType(), nullable=True), StructField('KEY', StringType(), nullable=True), StructField('VALUE', DoubleType(), nullable=True)])
? ^ -
+ StructType([StructField('ID', LongType(), nullable=True), StructField('NAME', StringType(), nullable=True), StructField('VALUE', DoubleType(), nullable=True)])
?
>>> schema3 = StructType([
... StructField("id", IntegerType()),
... StructField("name", StringType()),
... StructField("value", IntegerType())
... ])
>>> df7 = session.create_dataframe(data1, schema3)
>>> assert_dataframe_equal(df7, df1)
Traceback (most recent call last):
AssertionError: Column data type mismatch at column 2: actual LongType(), expected DoubleType()
Different schema:
--- actual ---
+++ expected +++
- StructType([StructField('ID', LongType(), nullable=True), StructField('NAME', StringType(), nullable=True), StructField('VALUE', LongType(), nullable=True)])
? ^ ^^
+ StructType([StructField('ID', LongType(), nullable=True), StructField('NAME', StringType(), nullable=True), StructField('VALUE', DoubleType(), nullable=True)])
?