J
jeff_d_harper
I've run into a problem with text encoding in the Sqlite3 module. I
think it may be a bug. By default sqlite3 converts strings in the
database from UTF-8 to unicode. This conversion can be controlled by
changing the connection's text_factory.
I have a database that stores strings in 8-bit ISO-8859. So, I set
the text_factory to do no conversion. In my database I use user
defined functions. I noticed that even when I set text_factory =
lambda x:x, it appears to do UTF-8 to unicode conversion on strings
that are passed to my user defined function.
I've included a small program that illustrates the problem. It
creates a database and table in memory and then populates 2 rows. One
row contains an ASCII string. The other row contains a string with
the non-ascii string, "Tést".
Then, the program does an SQL select which calls the user-defined
function, my_func(). The resulting row tuples contain 8-bit strings.
But, my_func() is passed unicode strings. Notice, my_func is called
with None instead of "Tést". I suspect this is because the binary
representation of "Tést" is not valid UTF-8.
Is there a way to turn off the UTF-8 to unicode when my_func() is
called? Is this a bug or intended behavior?
import sqlite3
def create_table(dbase):
#dbase.execute(r"""PRAGMA encoding = "UTF-16le";""")
dbase.execute(r"""CREATE TABLE `my_table` ( 'id' INTEGER, 'column'
BLOB); """)
def add_rows(dbase):
c = dbase.cursor()
string1 = "Test"
string2 = "T\xe9st"
try:
print string1
c.execute(r"""INSERT INTO `my_table` ('id', 'column') VALUES
(?,?)""", (1,string1))
print string2
c.execute(r"""INSERT INTO `my_table` ('id', 'column') VALUES
(?,?)""", (2,string2,))
finally:
c.close()
def select_rows(dbase):
c = dbase.cursor()
try:
c.execute(r"""SELECT *, my_func(`column`) FROM `my_table`""")
for row in c:
print row
finally:
c.close()
def factory(x):
print 'x =', x
return x
def my_func(p):
print 'my_func(%r) type = %s' % (p,type(p))
def my_test():
db_path = ":memory:"
try:
os.remove(db_path)
except:
pass
dbase = sqlite3.connect(db_path)
dbase.text_factory = lambda x:x
dbase.create_function('my_func', 1, my_func)
try:
create_table(dbase)
add_rows(dbase)
select_rows(dbase)
finally:
dbase.commit()
dbase.close()
my_test()
think it may be a bug. By default sqlite3 converts strings in the
database from UTF-8 to unicode. This conversion can be controlled by
changing the connection's text_factory.
I have a database that stores strings in 8-bit ISO-8859. So, I set
the text_factory to do no conversion. In my database I use user
defined functions. I noticed that even when I set text_factory =
lambda x:x, it appears to do UTF-8 to unicode conversion on strings
that are passed to my user defined function.
I've included a small program that illustrates the problem. It
creates a database and table in memory and then populates 2 rows. One
row contains an ASCII string. The other row contains a string with
the non-ascii string, "Tést".
Then, the program does an SQL select which calls the user-defined
function, my_func(). The resulting row tuples contain 8-bit strings.
But, my_func() is passed unicode strings. Notice, my_func is called
with None instead of "Tést". I suspect this is because the binary
representation of "Tést" is not valid UTF-8.
Is there a way to turn off the UTF-8 to unicode when my_func() is
called? Is this a bug or intended behavior?
import sqlite3
def create_table(dbase):
#dbase.execute(r"""PRAGMA encoding = "UTF-16le";""")
dbase.execute(r"""CREATE TABLE `my_table` ( 'id' INTEGER, 'column'
BLOB); """)
def add_rows(dbase):
c = dbase.cursor()
string1 = "Test"
string2 = "T\xe9st"
try:
print string1
c.execute(r"""INSERT INTO `my_table` ('id', 'column') VALUES
(?,?)""", (1,string1))
print string2
c.execute(r"""INSERT INTO `my_table` ('id', 'column') VALUES
(?,?)""", (2,string2,))
finally:
c.close()
def select_rows(dbase):
c = dbase.cursor()
try:
c.execute(r"""SELECT *, my_func(`column`) FROM `my_table`""")
for row in c:
print row
finally:
c.close()
def factory(x):
print 'x =', x
return x
def my_func(p):
print 'my_func(%r) type = %s' % (p,type(p))
def my_test():
db_path = ":memory:"
try:
os.remove(db_path)
except:
pass
dbase = sqlite3.connect(db_path)
dbase.text_factory = lambda x:x
dbase.create_function('my_func', 1, my_func)
try:
create_table(dbase)
add_rows(dbase)
select_rows(dbase)
finally:
dbase.commit()
dbase.close()
my_test()