Schema#

class swordfish.connection.Schema(impl)#

Manages catalog schemas.

Parameters:: impl (SchemaImpl)

property handle: Handle#

Obtains a schema handle.

Returns:: The schema handle.
Return type:: Handle

create_table(name: str, table_schema, partition_cols: List[str], *, compress_methods: Dict[str, str] = None)#

create_table(name: str, table_schema, *, compress_methods: Dict[str, str] = None)

create_table(name: str, table_schema, partition_cols: List[str], *, compress_methods: Dict[str, str] = None, sort_cols: List[str] = None, keep_duplicates: Literal['ALL', 'LAST', 'FIRST'] | EnumInt = 'ALL', sort_key_mapping_function: List[FunctionDef] = None, soft_delete: bool = False, indexes: List[str] = None) → Table

create_table(name: str, table_schema, *, compress_methods: Dict[str, str] = None, sort_cols: List[str] = None, keep_duplicates: Literal['ALL', 'LAST', 'FIRST'] | EnumInt = 'ALL', soft_delete: bool = False, indexes: List[str] = None) → Table

create_table(name: str, table_schema, partition_cols: List[str], *, compress_methods: Dict[str, str] = None, primary_key_cols: List[str] = None, indexes: List[str] = None) → Table

create_table(name: str, table_schema, *, compress_methods: Dict[str, str] = None, primary_key_cols: List[str] = None, indexes: List[str] = None) → Table

Creates a table using a specific storage engine with optional partitioning.

This method supports creating tables with three different storage engines: OLAP, TSDB, PKEY. Each engine supports creating either:

Partitioned tables: Require partition_cols for data partitioning
Dimension tables: Do not use partitioning but retain other engine capabilities

Engine Compatibility Chart:

Feature	OLAP Engine	TSDB Engine	PKEY Engine
partition_cols	✓ (required) [1]	✓ (required) [1]	✓ (required) [1]
compress_methods	✓	✓	✓
sort_cols		✓
primary_key_cols			✓
keep_duplicates		✓
sort_key [2]		✓
soft_delete		✓
indexes		✓	✓

Parameters:

name (str) – The name of the table.
table_schema (Any) – The schema definition of the table, a mapping of column names to data types.
partition_cols (list of str, optional) – The partitioning column(s). Defaults to None.
compress_methods (dict of str to str, optional) – Compression methods for specific columns, where the key is the column name and the value is the compression method. Defaults to None.
sort_cols (list of str, optional) – Columns used for sorting to optimize query performance, applicable for the TSDB engine. Defaults to None.
primary_key_cols (list of str, optional) – Columns that act as primary keys, applicable for the PKEY engine. Defaults to None.
keep_duplicates ({"ALL", "LAST", "FIRST"}, EnumInt, optional) – Deduplication strategy, applicable for the TSDB engine. Defaults to None. - “ALL”: Allows all duplicate values. - “LAST”: Retains only the latest value. - “FIRST”: Retains only the earliest value.
sort_key_mapping_function (list of FunctionDef, optional) – A list of functions for defining sorting key mappings, applicable for the TSDB engine (partitioned table). Defaults to None.
soft_delete (bool, optional) – Enables soft delete functionality, applicable for the TSDB engine. Defaults to None.
indexes (list of str, optional) – A list of columns to create indexes on, used for query optimization. Applicable for the TSDB and PKEY engine. Defaults to None.

Returns:

The created table instance.

Return type:

Table

Examples

Creating a partitioned table with the OLAP engine

>>> schema.create_table("quote", table_schema={'id': "INT",
... 'date': "DATE", 'value': "DOUBLE"}, partition_cols=["id"],
... compress_methods={"id": "lz4"}))

Creating a dimension table with the OLAP engine

>>> schema.create_table("quote", table_schema={'id': "INT",
... 'date': "DATE", 'value': "DOUBLE"}, compress_methods={"id":
... "lz4"})

Creating a partitioned table with the TSDB engine

>>> import swordfish as sf
>>> import swordfish.function as F
>>> schema.create_table(
...     name="quote",
...     table_schema={'id': "INT", 'date': "DATE", 'value':
...     "DOUBLE"},
...     partition_cols=["id"],
...     compress_methods={"id": "lz4"},
...     sort_cols=["date", "id"],
...     keep_duplicates="LAST",
...     sort_key_mapping_function=[sf.partial(F.hashBucket,
...     buckets=5)],
...     soft_delete=True,
...     indexes=["id", "date"],
... )

Creating a dimension table with the TSDB engine

>>> schema.create_table(
...     name="quote",
...     table_schema={'id': "INT", 'date': "DATE", 'value':
...     "DOUBLE"},
...     compress_methods={"id": "lz4"},
...     sort_cols=['timestamp', 'value'],
...     keep_duplicates="LAST",
...     soft_delete=True,
...     indexes=['name', 'timestamp'],
... )

Creating a partitioned table with the PKEY engine

>>> schema.create_table(
...     name="quote",
...     table_schema={'id': "INT", 'date': "DATE", 'value':
...     "DOUBLE"},
...     partition_cols=["id"],
...     compress_methods={"id": "lz4"},
...     primary_key_cols=["id"],
...     indexes=["timestamp"],
... )

Creating a dimension table with the PKEY engine

>>> schema.create_table(
...     name="quote",
...     table_schema={'id': "INT", 'date': "DATE", 'value':
...     "DOUBLE"},
...     compress_methods={"value": "lz4"},
...     primary_key_cols=["id"],
...     indexes=["name"],
... )

list_tables()#

Retrieves the names of all tables in the schema.

Returns:: A list of table names.
Return type:: list of str

Examples

>>> schema.list_tables()

exists_table(name)#

Parameters:: name (str)
Return type:: bool

drop_table(name)#

Drops a table from the schema.

Parameters:: name (str) – The name of the table to be dropped.

Examples

>>> schema.drop_table("table_name")

truncate_table(name)#

Truncates a table in the schema.

Parameters:: name (str) – The name of the table to truncate.

Examples

>>> schema.truncate_table("table_name")

table(name)#

Retrieves a table by name.

Parameters:: name (str) – The name of the table to retrieve.
Returns:: The Table corresponding to the specified name.
Return type:: Table

Examples

>>> schema.table("table_name")

property engine_type: StorageType#

Retrieves the storage engine type.

Returns:: The type of storage engine.
Return type:: StorageType