db/sql/ PythonMySqlBible


contents

This simple example takes the text of the World English Bible from gutenberg (https://www.gutenberg.org/cache/epub/8294/pg8294.txt), and puts the book names and verses into two tables.

A sample of the text is as follows:

Book 01 Genesis
001:001 In the beginning God{After "God," the Hebrew has the two letters
        "Aleph Tav" (the first and last letters of the Hebrew alphabet)
        as a grammatical marker.} created the heavens and the earth.
001:002 Now the earth was formless and empty.  Darkness was on the surface
        of the deep.  God's Spirit was hovering over the surface
        of the waters.

Before we find a line of the form

Book nn xyz...

we skip everything.

Once we see a line of this form, we start a new book. A book consists of a number, a name, and an array of chapters. A chapter is an array of verses. A verse is a string.

There are now four types of line we are interested in, and including blank lines that gives us five regular expressions:

re_ws = re.compile(r"^\s*$")                          # -> skip()
re_book = re.compile(r"^Book (\d+) (.*)$")            # -> new_book()
re_new_verse = re.compile(r"^(\d{3}):(\d{3}) (.*)$")  # -> new_verse()
re_cont_verse = re.compile(r"^        (.*)$")         # -> cont_verse()
re_end = re.compile(r"^\*\*\* END")                   # -> finish()

The script is as follows:

#!/usr/bin/env python3

import mysql.connector
import re

re_ws = re.compile(r"^\s*$") # -> skip()
re_book = re.compile(r"^Book (\d+) (.*)$") # -> new_book()
re_new_verse = re.compile(r"^(\d{3}):(\d{3}) (.*)$") # -> new_verse()
re_cont_verse = re.compile(r"^        (.*)$") # -> cont_verse()
re_end = re.compile(r"^\*\*\* END") # -> finish()

started = False
books = []
conn = None
cursor = None
chapter_number = -1

def open_db():
  global conn, cursor
  conn = mysql.connector.connect(
    host="localhost",
    user="mrflibble",
    password="turnip",
    database="bible"
  )
  cursor = conn.cursor()  

def close_db():
  conn.commit()
  conn.close()

class Finished(Exception):
  pass

def process_line(line):
  global started
  if not started:
    if re_book.match(line):
      started = True
    else:
      return
  if re_ws.match(line):
    return
  elif m := re_book.match(line):
    numstr,name = m.groups()
    num = int(numstr)
    new_book(num,name)    
  elif m := re_new_verse.match(line):
    ch,vs,text = m.groups()
    ch = int(ch)
    vs = int(vs)
    new_verse(ch,vs,text)
  elif m := re_cont_verse.match(line):
    (text,) = m.groups()
    cont_verse(text)
  elif re_end.match(line):
    raise Finished()

def new_book(num,name):
  global chapter_number
  print(f": {num} {name}")
  books.append({"number":num,"name":name,"chapters":[]})
  chapter_number = 0

def new_verse(chnum,vnum,text):
  global chapter_number
  if chnum > chapter_number:
    books[-1]['chapters'].append([])
    chapter_number = chnum
  books[-1]['chapters'][-1].append(text)

def cont_verse(text):
  books[-1]['chapters'][-1][-1]+=" "+text

def process_file(lines):
  try:
    for line in lines:
      process_line(line)
  except Finished:
    print("Done")
    return

def process_data():
  for book in books:
    number, name, chapters = book['number'], book['name'], book['chapters']
    cursor.execute("INSERT INTO book_names (book_number, book_name) VALUES (%s,%s)",(number,name))
    for i,chapter in enumerate(chapters):
      for j,verse in enumerate(chapter):
        cursor.execute("INSERT INTO verses (book_number, chapter_number, verse_number, verse_text) VALUES (%s,%s,%s,%s)",(number,i+1,j+1,verse))
    print(f"{name} done")

def create_tables():
  cursor.execute("DROP TABLE IF EXISTS book_names")
  cursor.execute("DROP TABLE IF EXISTS verses")

  cursor.execute("CREATE TABLE book_names (book_number integer, book_name varchar(255) )")
  cursor.execute("CREATE TABLE verses (id integer auto_increment primary key, book_number integer, chapter_number integer, verse_number integer, verse_text text)")

def main():
  open_db()
  with open("web_bible.txt") as f:
    lines = f.read().rstrip().split("\n")
    print("Read data")
    process_file(lines)
  print("Processed lines")
  create_tables()
  process_data()
  close_db()

if __name__ == "__main__":
  main()

Removing notes

Notes are contained in braces .... To remove these, we fetch all rows whose verse_text contains {{, use a regular expression to remove the notes, and update.

#!/usr/bin/env python3

import mysql.connector, re
conn = mysql.connector.connect(
  host = "localhost",
  user = "mrflibble",
  password = "turnip",
  database = "bible"
)

cursor = conn.cursor()

cursor.execute("SELECT id,verse_text FROM verses WHERE verse_text LIKE '%{%'")
rows = cursor.fetchall()
for row in rows:
    verse_id, verse_text = row
    new_verse_text = re.sub(r"\{.*?\}","",verse_text)
    cursor.execute("UPDATE verses SET verse_text = %s WHERE id = %s",(new_verse_text,verse_id))
    print(verse_id)
conn.commit()

Full Text Search

Enable when creating

CREATE TABLE (id INTEGER AUTO_INCREMENT PRIMARY KEY, text_field TEXT, FULLTEXT KEY (text_field) );

or for a pre-existing table

ALTER TABLE verses ADD FULLTEXT(verse_text);

The script:

#!/usr/bin/env python3

import mysql.connector

conn = mysql.connector.connect(
  host = "localhost",
  user = "mrflibble",
  password = "turnip",
  database = "bible"
)
cursor = conn.cursor()

cursor.execute("ALTER TABLE verses ADD FULLTEXT(verse_text)")
conn.commit()

Then search via

#!/usr/bin/env python3

import mysql.connector
conn = mysql.connector.connect(
  host = "localhost",
  user = "john",
  password = "tree",
  database = "bible"
)

cursor = conn.cursor()

cursor.execute(
"""SELECT book_name,chapter_number,verse_number,verse_text 
     FROM book_names JOIN verses ON book_names.book_number = verses.book_number 
     WHERE MATCH( verse_text ) AGAINST ('Jesus' IN NATURAL LANGUAGE MODE)""")

rows = cursor.fetchall()

for row in rows:
  book_name,chapter,verse,text = row
  print(f"{book_name} {chapter}:{verse} {text}")