db/sql/ PythonMySqlBible
contents
This simple example takes the text of the World English Bible from gutenberg (https://www.gutenberg.org/cache/epub/8294/pg8294.txt), and puts the book names and verses into two tables.
A sample of the text is as follows:
Book 01 Genesis
001:001 In the beginning God{After "God," the Hebrew has the two letters
"Aleph Tav" (the first and last letters of the Hebrew alphabet)
as a grammatical marker.} created the heavens and the earth.
001:002 Now the earth was formless and empty. Darkness was on the surface
of the deep. God's Spirit was hovering over the surface
of the waters.
Before we find a line of the form
Book nn xyz...
we skip everything.
Once we see a line of this form, we start a new book. A book consists of a number, a name, and an array of chapters. A chapter is an array of verses. A verse is a string.
There are now four types of line we are interested in, and including blank lines that gives us five regular expressions:
re_ws = re.compile(r"^\s*$") # -> skip()
re_book = re.compile(r"^Book (\d+) (.*)$") # -> new_book()
re_new_verse = re.compile(r"^(\d{3}):(\d{3}) (.*)$") # -> new_verse()
re_cont_verse = re.compile(r"^ (.*)$") # -> cont_verse()
re_end = re.compile(r"^\*\*\* END") # -> finish()
The script is as follows:
#!/usr/bin/env python3
import mysql.connector
import re
re_ws = re.compile(r"^\s*$") # -> skip()
re_book = re.compile(r"^Book (\d+) (.*)$") # -> new_book()
re_new_verse = re.compile(r"^(\d{3}):(\d{3}) (.*)$") # -> new_verse()
re_cont_verse = re.compile(r"^ (.*)$") # -> cont_verse()
re_end = re.compile(r"^\*\*\* END") # -> finish()
started = False
books = []
conn = None
cursor = None
chapter_number = -1
def open_db():
global conn, cursor
conn = mysql.connector.connect(
host="localhost",
user="mrflibble",
password="turnip",
database="bible"
)
cursor = conn.cursor()
def close_db():
conn.commit()
conn.close()
class Finished(Exception):
pass
def process_line(line):
global started
if not started:
if re_book.match(line):
started = True
else:
return
if re_ws.match(line):
return
elif m := re_book.match(line):
numstr,name = m.groups()
num = int(numstr)
new_book(num,name)
elif m := re_new_verse.match(line):
ch,vs,text = m.groups()
ch = int(ch)
vs = int(vs)
new_verse(ch,vs,text)
elif m := re_cont_verse.match(line):
(text,) = m.groups()
cont_verse(text)
elif re_end.match(line):
raise Finished()
def new_book(num,name):
global chapter_number
print(f": {num} {name}")
books.append({"number":num,"name":name,"chapters":[]})
chapter_number = 0
def new_verse(chnum,vnum,text):
global chapter_number
if chnum > chapter_number:
books[-1]['chapters'].append([])
chapter_number = chnum
books[-1]['chapters'][-1].append(text)
def cont_verse(text):
books[-1]['chapters'][-1][-1]+=" "+text
def process_file(lines):
try:
for line in lines:
process_line(line)
except Finished:
print("Done")
return
def process_data():
for book in books:
number, name, chapters = book['number'], book['name'], book['chapters']
cursor.execute("INSERT INTO book_names (book_number, book_name) VALUES (%s,%s)",(number,name))
for i,chapter in enumerate(chapters):
for j,verse in enumerate(chapter):
cursor.execute("INSERT INTO verses (book_number, chapter_number, verse_number, verse_text) VALUES (%s,%s,%s,%s)",(number,i+1,j+1,verse))
print(f"{name} done")
def create_tables():
cursor.execute("DROP TABLE IF EXISTS book_names")
cursor.execute("DROP TABLE IF EXISTS verses")
cursor.execute("CREATE TABLE book_names (book_number integer, book_name varchar(255) )")
cursor.execute("CREATE TABLE verses (id integer auto_increment primary key, book_number integer, chapter_number integer, verse_number integer, verse_text text)")
def main():
open_db()
with open("web_bible.txt") as f:
lines = f.read().rstrip().split("\n")
print("Read data")
process_file(lines)
print("Processed lines")
create_tables()
process_data()
close_db()
if __name__ == "__main__":
main()
Removing notes
Notes are contained in braces .... To remove these, we fetch all rows whose verse_text
contains {{
, use a regular expression to remove the notes, and update.
#!/usr/bin/env python3
import mysql.connector, re
conn = mysql.connector.connect(
host = "localhost",
user = "mrflibble",
password = "turnip",
database = "bible"
)
cursor = conn.cursor()
cursor.execute("SELECT id,verse_text FROM verses WHERE verse_text LIKE '%{%'")
rows = cursor.fetchall()
for row in rows:
verse_id, verse_text = row
new_verse_text = re.sub(r"\{.*?\}","",verse_text)
cursor.execute("UPDATE verses SET verse_text = %s WHERE id = %s",(new_verse_text,verse_id))
print(verse_id)
conn.commit()
Full Text Search
Enable when creating
CREATE TABLE (id INTEGER AUTO_INCREMENT PRIMARY KEY, text_field TEXT, FULLTEXT KEY (text_field) );
or for a pre-existing table
ALTER TABLE verses ADD FULLTEXT(verse_text);
The script:
#!/usr/bin/env python3
import mysql.connector
conn = mysql.connector.connect(
host = "localhost",
user = "mrflibble",
password = "turnip",
database = "bible"
)
cursor = conn.cursor()
cursor.execute("ALTER TABLE verses ADD FULLTEXT(verse_text)")
conn.commit()
Then search via
#!/usr/bin/env python3
import mysql.connector
conn = mysql.connector.connect(
host = "localhost",
user = "john",
password = "tree",
database = "bible"
)
cursor = conn.cursor()
cursor.execute(
"""SELECT book_name,chapter_number,verse_number,verse_text
FROM book_names JOIN verses ON book_names.book_number = verses.book_number
WHERE MATCH( verse_text ) AGAINST ('Jesus' IN NATURAL LANGUAGE MODE)""")
rows = cursor.fetchall()
for row in rows:
book_name,chapter,verse,text = row
print(f"{book_name} {chapter}:{verse} {text}")