@@ -918,6 +918,87 @@ def generate_snapshot(
918918 "refs" : {"test" : {"snapshot-id" : 3051729675574597004 , "type" : "tag" , "max-ref-age-ms" : 10000000 }},
919919}
920920
921+ TABLE_METADATA_V2_WITH_STATISTICS = {
922+ "format-version" : 2 ,
923+ "table-uuid" : "9c12d441-03fe-4693-9a96-a0705ddf69c1" ,
924+ "location" : "s3://bucket/test/location" ,
925+ "last-sequence-number" : 34 ,
926+ "last-updated-ms" : 1602638573590 ,
927+ "last-column-id" : 3 ,
928+ "current-schema-id" : 0 ,
929+ "schemas" : [
930+ {
931+ "type" : "struct" ,
932+ "schema-id" : 0 ,
933+ "fields" : [
934+ {
935+ "id" : 1 ,
936+ "name" : "x" ,
937+ "required" : True ,
938+ "type" : "long" ,
939+ }
940+ ],
941+ }
942+ ],
943+ "default-spec-id" : 0 ,
944+ "partition-specs" : [{"spec-id" : 0 , "fields" : []}],
945+ "last-partition-id" : 1000 ,
946+ "default-sort-order-id" : 0 ,
947+ "sort-orders" : [{"order-id" : 0 , "fields" : []}],
948+ "properties" : {},
949+ "current-snapshot-id" : 3055729675574597004 ,
950+ "snapshots" : [
951+ {
952+ "snapshot-id" : 3051729675574597004 ,
953+ "timestamp-ms" : 1515100955770 ,
954+ "sequence-number" : 0 ,
955+ "summary" : {"operation" : "append" },
956+ "manifest-list" : "s3://a/b/1.avro" ,
957+ },
958+ {
959+ "snapshot-id" : 3055729675574597004 ,
960+ "parent-snapshot-id" : 3051729675574597004 ,
961+ "timestamp-ms" : 1555100955770 ,
962+ "sequence-number" : 1 ,
963+ "summary" : {"operation" : "append" },
964+ "manifest-list" : "s3://a/b/2.avro" ,
965+ "schema-id" : 1 ,
966+ },
967+ ],
968+ "statistics" : [
969+ {
970+ "snapshot-id" : 3051729675574597004 ,
971+ "statistics-path" : "s3://a/b/stats.puffin" ,
972+ "file-size-in-bytes" : 413 ,
973+ "file-footer-size-in-bytes" : 42 ,
974+ "blob-metadata" : [
975+ {
976+ "type" : "ndv" ,
977+ "snapshot-id" : 3051729675574597004 ,
978+ "sequence-number" : 1 ,
979+ "fields" : [1 ],
980+ }
981+ ],
982+ },
983+ {
984+ "snapshot-id" : 3055729675574597004 ,
985+ "statistics-path" : "s3://a/b/stats.puffin" ,
986+ "file-size-in-bytes" : 413 ,
987+ "file-footer-size-in-bytes" : 42 ,
988+ "blob-metadata" : [
989+ {
990+ "type" : "ndv" ,
991+ "snapshot-id" : 3055729675574597004 ,
992+ "sequence-number" : 1 ,
993+ "fields" : [1 ],
994+ }
995+ ],
996+ },
997+ ],
998+ "snapshot-log" : [],
999+ "metadata-log" : [],
1000+ }
1001+
9211002
9221003@pytest .fixture
9231004def example_table_metadata_v2 () -> Dict [str , Any ]:
@@ -929,6 +1010,11 @@ def table_metadata_v2_with_fixed_and_decimal_types() -> Dict[str, Any]:
9291010 return TABLE_METADATA_V2_WITH_FIXED_AND_DECIMAL_TYPES
9301011
9311012
1013+ @pytest .fixture
1014+ def table_metadata_v2_with_statistics () -> Dict [str , Any ]:
1015+ return TABLE_METADATA_V2_WITH_STATISTICS
1016+
1017+
9321018@pytest .fixture (scope = "session" )
9331019def metadata_location (tmp_path_factory : pytest .TempPathFactory ) -> str :
9341020 from pyiceberg .io .pyarrow import PyArrowFileIO
@@ -2170,6 +2256,18 @@ def table_v2_with_extensive_snapshots(example_table_metadata_v2_with_extensive_s
21702256 )
21712257
21722258
2259+ @pytest .fixture
2260+ def table_v2_with_statistics (table_metadata_v2_with_statistics : Dict [str , Any ]) -> Table :
2261+ table_metadata = TableMetadataV2 (** table_metadata_v2_with_statistics )
2262+ return Table (
2263+ identifier = ("database" , "table" ),
2264+ metadata = table_metadata ,
2265+ metadata_location = f"{ table_metadata .location } /uuid.metadata.json" ,
2266+ io = load_file_io (),
2267+ catalog = NoopCatalog ("NoopCatalog" ),
2268+ )
2269+
2270+
21732271@pytest .fixture
21742272def bound_reference_str () -> BoundReference [str ]:
21752273 return BoundReference (field = NestedField (1 , "field" , StringType (), required = False ), accessor = Accessor (position = 0 , inner = None ))
0 commit comments