sqlite in sqlite

python
ibis
sqlite
Author

Cody

Published

November 28, 2024

Storing SQLite databases in SQLite databases.


In this post, we’ll look at storing SQLite databases in SQLite databases. Since a SQLite database is represented by a file on disk, we can simply store the bytes of that file in a binary blob in another SQLite database.

We’ll demonstrate this with Ibis and some stateful dkdc-* tools:

import os
import ibis
import ibis.expr.datatypes as dt

from dkdc_util import now, uuid
from dkdc_todo import Todo
from dkdc_lake import Lake

ibis.options.interactive = True

Let’s define the schema of our meta-database:

schema = ibis.schema(
    {
        "idx": dt.timestamp,
        "filename": str,
        "data": dt.binary,
    }
)
schema
ibis.Schema {
  idx       timestamp
  filename  string
  data      binary
}

And create it:

# for idempotency
if os.path.exists("db.db"):
    os.remove("db.db")

dbs = ibis.sqlite.connect("db.db")
dbs.create_table("dbs", schema=schema)
dbs.list_tables()
['dbs']

The table is empty:

t = dbs.table("dbs")
t
┏━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━┓
┃ idx        filename  data   ┃
┡━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━┩
│ timestampstringbinary │
└───────────┴──────────┴────────┘

creating some example SQLite databases

Let’s use our previously demonstrated dkdc-todo and dkdc-lake packages to quickly create a couple SQLite databases with some tables in them:

# for idempotency
if os.path.exists("todo.db"):
    os.remove("todo.db")

todo = Todo(dbpath="todo.db")
for i in range(10):
    todo.append_todo(
        id=uuid(),
        user_id=None,
        subject=f"subject {i}",
        body=f"body {i}",
    )

We can confirm there’s some data:

todo.t()
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┓
┃ idx                         id                        user_id  subject    body    priority  status  description  labels ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━┩
│ timestamp(6)stringstringstringstringint64stringstringarray… │
├────────────────────────────┼──────────────────────────┼─────────┼───────────┼────────┼──────────┼────────┼─────────────┼────────┤
│ 2024-11-27 22:59:35.373461173274837536515-1a8d263eNULLsubject 9body 9100NULLNULLNULL   │
│ 2024-11-27 22:59:35.354856173274837534667-d2415d59NULLsubject 8body 8100NULLNULLNULL   │
│ 2024-11-27 22:59:35.336503173274837532835-b76e629dNULLsubject 7body 7100NULLNULLNULL   │
│ 2024-11-27 22:59:35.317993173274837530991-f7d38852NULLsubject 6body 6100NULLNULLNULL   │
│ 2024-11-27 22:59:35.299746173274837529152-b606fbb9NULLsubject 5body 5100NULLNULLNULL   │
│ 2024-11-27 22:59:35.280810173274837527255-88483831NULLsubject 4body 4100NULLNULLNULL   │
│ 2024-11-27 22:59:35.262296173274837525379-392182c1NULLsubject 3body 3100NULLNULLNULL   │
│ 2024-11-27 22:59:35.243579173274837523532-cde646e7NULLsubject 2body 2100NULLNULLNULL   │
│ 2024-11-27 22:59:35.224873173274837521645-42ef320dNULLsubject 1body 1100NULLNULLNULL   │
│ 2024-11-27 22:59:35.205032173274837519609-bfd8f4fdNULLsubject 0body 0100NULLNULLNULL   │
└────────────────────────────┴──────────────────────────┴─────────┴───────────┴────────┴──────────┴────────┴─────────────┴────────┘

Repeat for the lake database:

# for idempotency
if os.path.exists("lake.db"):
    os.remove("lake.db")

lake = Lake(dbpath="lake.db")
for i in range(10):
    lake.append_file(
        user_id=None,
        path=None,
        filename=f"file_{i}.txt",
        filetype="txt",
        data=b"hi",
    )

And again confirm we have data:

lake.t()
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┓
┃ idx                         id                        user_id  path    filename    filetype  data    version  status  description  labels ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━┩
│ timestamp(6)stringstringstringstringstringbinaryint64stringstringarray… │
├────────────────────────────┼──────────────────────────┼─────────┼────────┼────────────┼──────────┼────────┼─────────┼────────┼─────────────┼────────┤
│ 2024-11-27 22:59:35.711658173274837571166-f05b485bNULLNULLfile_9.txttxt     b'hi'NULLNULLNULLNULL   │
│ 2024-11-27 22:59:35.679843173274837567984-f06163bcNULLNULLfile_8.txttxt     b'hi'NULLNULLNULLNULL   │
│ 2024-11-27 22:59:35.648301173274837564830-7170a0cfNULLNULLfile_7.txttxt     b'hi'NULLNULLNULLNULL   │
│ 2024-11-27 22:59:35.617413173274837561741-cf59e8d2NULLNULLfile_6.txttxt     b'hi'NULLNULLNULLNULL   │
│ 2024-11-27 22:59:35.586614173274837558661-ff3a4162NULLNULLfile_5.txttxt     b'hi'NULLNULLNULLNULL   │
│ 2024-11-27 22:59:35.556501173274837555650-d90ec35dNULLNULLfile_4.txttxt     b'hi'NULLNULLNULLNULL   │
│ 2024-11-27 22:59:35.525899173274837552590-741e8642NULLNULLfile_3.txttxt     b'hi'NULLNULLNULLNULL   │
│ 2024-11-27 22:59:35.494471173274837549447-311b5d47NULLNULLfile_2.txttxt     b'hi'NULLNULLNULLNULL   │
│ 2024-11-27 22:59:35.464211173274837546421-ff9b623cNULLNULLfile_1.txttxt     b'hi'NULLNULLNULLNULL   │
│ 2024-11-27 22:59:35.432711173274837543271-c8d04496NULLNULLfile_0.txttxt     b'hi'NULLNULLNULLNULL   │
└────────────────────────────┴──────────────────────────┴─────────┴────────┴────────────┴──────────┴────────┴─────────┴────────┴─────────────┴────────┘

file to database

Let’s check the current databases we have:

!ls | grep .db
db.db
lake.db
todo.db

And define some functions to read a file into bytes and insert it into the meta-database:

def file_to_bytes(filename: str) -> bytes:
    with open(filename, "rb") as f:
        return f.read()


def insert_file_to_db(filename: str) -> None:
    data = {
        "idx": [now()],
        "filename": [filename],
        "data": [file_to_bytes(filename)],
    }
    dbs.insert("dbs", data)

We can run this over our todo and lake databases, removing the files after copying the data:

for filename in ["todo.db", "lake.db"]:
    insert_file_to_db(filename)
    os.remove(filename)

And see that our meta-database now contains the data:

t
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ idx                         filename  data                                                                                                                                                                                                                                                                               ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ timestampstringbinary                                                                                                                                                                                                                                                                             │
├────────────────────────────┼──────────┼────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┤
│ 2024-11-27 22:59:36.013540todo.db b'SQLite format 3\x00\x10\x00\x01\x01\x00@  \x00\x00\x00\x0b\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'+8112 │
│ 2024-11-27 22:59:36.020486lake.db b'SQLite format 3\x00\x10\x00\x01\x01\x00@  \x00\x00\x00\x0b\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'+8112 │
└────────────────────────────┴──────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘

We can also see that the original databases are gone:

!ls | grep .db
db.db

database to file

We can reverse the process to re-hydrate the todo and lake databases:

for filename in t["filename"].to_pyarrow().to_pylist():
    data = t.filter(t["filename"] == filename)["data"].to_pyarrow().to_pylist()[0]
    with open(filename, "wb") as f:
        f.write(data)

And confirm the data is back:

!ls | grep .db
db.db
lake.db
todo.db

We can then re-hydrate our Python objects:

todo = Todo(dbpath="todo.db")
todo.t()
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┓
┃ idx                         id                        user_id  subject    body    priority  status  description  labels ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━┩
│ timestamp(6)stringstringstringstringint64stringstringarray… │
├────────────────────────────┼──────────────────────────┼─────────┼───────────┼────────┼──────────┼────────┼─────────────┼────────┤
│ 2024-11-27 22:59:35.373461173274837536515-1a8d263eNULLsubject 9body 9100NULLNULLNULL   │
│ 2024-11-27 22:59:35.354856173274837534667-d2415d59NULLsubject 8body 8100NULLNULLNULL   │
│ 2024-11-27 22:59:35.336503173274837532835-b76e629dNULLsubject 7body 7100NULLNULLNULL   │
│ 2024-11-27 22:59:35.317993173274837530991-f7d38852NULLsubject 6body 6100NULLNULLNULL   │
│ 2024-11-27 22:59:35.299746173274837529152-b606fbb9NULLsubject 5body 5100NULLNULLNULL   │
│ 2024-11-27 22:59:35.280810173274837527255-88483831NULLsubject 4body 4100NULLNULLNULL   │
│ 2024-11-27 22:59:35.262296173274837525379-392182c1NULLsubject 3body 3100NULLNULLNULL   │
│ 2024-11-27 22:59:35.243579173274837523532-cde646e7NULLsubject 2body 2100NULLNULLNULL   │
│ 2024-11-27 22:59:35.224873173274837521645-42ef320dNULLsubject 1body 1100NULLNULLNULL   │
│ 2024-11-27 22:59:35.205032173274837519609-bfd8f4fdNULLsubject 0body 0100NULLNULLNULL   │
└────────────────────────────┴──────────────────────────┴─────────┴───────────┴────────┴──────────┴────────┴─────────────┴────────┘
lake = Lake(dbpath="lake.db")
lake.t()
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┓
┃ idx                         id                        user_id  path    filename    filetype  data    version  status  description  labels ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━┩
│ timestamp(6)stringstringstringstringstringbinaryint64stringstringarray… │
├────────────────────────────┼──────────────────────────┼─────────┼────────┼────────────┼──────────┼────────┼─────────┼────────┼─────────────┼────────┤
│ 2024-11-27 22:59:35.711658173274837571166-f05b485bNULLNULLfile_9.txttxt     b'hi'NULLNULLNULLNULL   │
│ 2024-11-27 22:59:35.679843173274837567984-f06163bcNULLNULLfile_8.txttxt     b'hi'NULLNULLNULLNULL   │
│ 2024-11-27 22:59:35.648301173274837564830-7170a0cfNULLNULLfile_7.txttxt     b'hi'NULLNULLNULLNULL   │
│ 2024-11-27 22:59:35.617413173274837561741-cf59e8d2NULLNULLfile_6.txttxt     b'hi'NULLNULLNULLNULL   │
│ 2024-11-27 22:59:35.586614173274837558661-ff3a4162NULLNULLfile_5.txttxt     b'hi'NULLNULLNULLNULL   │
│ 2024-11-27 22:59:35.556501173274837555650-d90ec35dNULLNULLfile_4.txttxt     b'hi'NULLNULLNULLNULL   │
│ 2024-11-27 22:59:35.525899173274837552590-741e8642NULLNULLfile_3.txttxt     b'hi'NULLNULLNULLNULL   │
│ 2024-11-27 22:59:35.494471173274837549447-311b5d47NULLNULLfile_2.txttxt     b'hi'NULLNULLNULLNULL   │
│ 2024-11-27 22:59:35.464211173274837546421-ff9b623cNULLNULLfile_1.txttxt     b'hi'NULLNULLNULLNULL   │
│ 2024-11-27 22:59:35.432711173274837543271-c8d04496NULLNULLfile_0.txttxt     b'hi'NULLNULLNULLNULL   │
└────────────────────────────┴──────────────────────────┴─────────┴────────┴────────────┴──────────┴────────┴─────────┴────────┴─────────────┴────────┘

conclusion

Is this useful? Maybe. Packaging data into a single file can be useful. You’d probably want to add compression, encryption, and various other things for production.

Back to top