1
0
mirror of https://github.com/spl0k/supysonic.git synced 2024-12-23 01:16:18 +00:00

Index folder and track path

Speeds up scanning
This commit is contained in:
spl0k 2018-03-17 23:25:26 +01:00
parent e8d0d4b6b6
commit 6ac969e2c5
8 changed files with 366 additions and 245 deletions

View File

@ -0,0 +1,13 @@
START TRANSACTION;
ALTER TABLE folder ADD COLUMN path_hash BINARY(20) NOT NULL AFTER path;
ALTER TABLE track ADD COLUMN path_hash BINARY(20) NOT NULL AFTER path;
UPDATE folder SET path_hash=UNHEX(SHA1(path));
UPDATE track SET path_hash=UNHEX(SHA1(path));
CREATE UNIQUE INDEX index_folder_path ON folder(path_hash);
CREATE UNIQUE INDEX index_track_path ON track(path_hash);
COMMIT;

View File

@ -0,0 +1,34 @@
import argparse
import hashlib
import psycopg2
import uuid
try:
bytes = buffer
except:
pass
parser = argparse.ArgumentParser()
parser.add_argument('username')
parser.add_argument('password')
parser.add_argument('database')
parser.add_argument('-H', '--host', default = 'localhost', help = 'default: localhost')
args = parser.parse_args()
def process_table(connection, table):
c = connection.cursor()
c.execute(r"ALTER TABLE {0} ADD COLUMN path_hash BYTEA NOT NULL DEFAULT E'\\0000'".format(table))
hashes = dict()
c.execute('SELECT path FROM {0}'.format(table))
for row in c.fetchall():
hashes[row[0]] = hashlib.sha1(row[0].encode('utf-8')).digest()
c.executemany('UPDATE {0} SET path_hash=%s WHERE path=%s'.format(table), [ (bytes(h), p) for p, h in hashes.items() ])
c.execute('CREATE UNIQUE INDEX index_{0}_path ON {0}(path_hash)'.format(table))
with psycopg2.connect(host = args.host, user = args.username, password = args.password, dbname = args.database) as conn:
process_table(conn, 'folder')
process_table(conn, 'track')

View File

@ -0,0 +1,31 @@
import argparse
import hashlib
import sqlite3
import uuid
try:
bytes = buffer
except:
pass
parser = argparse.ArgumentParser()
parser.add_argument('dbfile', help = 'Path to the SQLite database file')
args = parser.parse_args()
def process_table(connection, table):
c = connection.cursor()
c.execute('ALTER TABLE {0} ADD COLUMN path_hash BLOB NOT NULL DEFAULT ROWID'.format(table))
hashes = dict()
for row in c.execute('SELECT path FROM {0}'.format(table)):
hashes[row[0]] = hashlib.sha1(row[0].encode('utf-8')).digest()
c.executemany('UPDATE {0} SET path_hash=? WHERE path=?'.format(table), [ (bytes(h), p) for p, h in hashes.items() ])
c.execute('CREATE UNIQUE INDEX index_{0}_path ON {0}(path_hash)'.format(table))
with sqlite3.connect(args.dbfile) as conn:
process_table(conn, 'folder')
process_table(conn, 'track')
conn.cursor().execute('VACUUM')

View File

@ -3,12 +3,15 @@ CREATE TABLE folder (
root BOOLEAN NOT NULL, root BOOLEAN NOT NULL,
name VARCHAR(256) NOT NULL, name VARCHAR(256) NOT NULL,
path VARCHAR(4096) NOT NULL, path VARCHAR(4096) NOT NULL,
path_hash BINARY(20) NOT NULL,
created DATETIME NOT NULL, created DATETIME NOT NULL,
has_cover_art BOOLEAN NOT NULL, has_cover_art BOOLEAN NOT NULL,
last_scan INTEGER NOT NULL, last_scan INTEGER NOT NULL,
parent_id BINARY(16) REFERENCES folder parent_id BINARY(16) REFERENCES folder
) DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; ) DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
CREATE UNIQUE INDEX index_folder_path ON folder(path_hash);
CREATE TABLE artist ( CREATE TABLE artist (
id BINARY(16) PRIMARY KEY, id BINARY(16) PRIMARY KEY,
name VARCHAR(256) NOT NULL name VARCHAR(256) NOT NULL
@ -32,6 +35,7 @@ CREATE TABLE track (
artist_id BINARY(16) NOT NULL REFERENCES artist, artist_id BINARY(16) NOT NULL REFERENCES artist,
bitrate INTEGER NOT NULL, bitrate INTEGER NOT NULL,
path VARCHAR(4096) NOT NULL, path VARCHAR(4096) NOT NULL,
path_hash BINARY(20) NOT NULL,
content_type VARCHAR(32) NOT NULL, content_type VARCHAR(32) NOT NULL,
created DATETIME NOT NULL, created DATETIME NOT NULL,
last_modification INTEGER NOT NULL, last_modification INTEGER NOT NULL,
@ -41,6 +45,8 @@ CREATE TABLE track (
folder_id BINARY(16) NOT NULL REFERENCES folder folder_id BINARY(16) NOT NULL REFERENCES folder
) DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; ) DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
CREATE UNIQUE INDEX index_track_path ON track(path_hash);
CREATE TABLE user ( CREATE TABLE user (
id BINARY(16) PRIMARY KEY, id BINARY(16) PRIMARY KEY,
name VARCHAR(64) NOT NULL, name VARCHAR(64) NOT NULL,

View File

@ -3,12 +3,15 @@ CREATE TABLE folder (
root BOOLEAN NOT NULL, root BOOLEAN NOT NULL,
name CITEXT NOT NULL, name CITEXT NOT NULL,
path VARCHAR(4096) NOT NULL, path VARCHAR(4096) NOT NULL,
path_hash BYTEA NOT NULL,
created TIMESTAMP NOT NULL, created TIMESTAMP NOT NULL,
has_cover_art BOOLEAN NOT NULL, has_cover_art BOOLEAN NOT NULL,
last_scan INTEGER NOT NULL, last_scan INTEGER NOT NULL,
parent_id UUID REFERENCES folder parent_id UUID REFERENCES folder
); );
CREATE UNIQUE INDEX index_folder_path ON folder(path_hash);
CREATE TABLE artist ( CREATE TABLE artist (
id UUID PRIMARY KEY, id UUID PRIMARY KEY,
name CITEXT NOT NULL name CITEXT NOT NULL
@ -32,6 +35,7 @@ CREATE TABLE track (
artist_id UUID NOT NULL REFERENCES artist, artist_id UUID NOT NULL REFERENCES artist,
bitrate INTEGER NOT NULL, bitrate INTEGER NOT NULL,
path VARCHAR(4096) NOT NULL, path VARCHAR(4096) NOT NULL,
path_hash BYTEA NOT NULL,
content_type VARCHAR(32) NOT NULL, content_type VARCHAR(32) NOT NULL,
created TIMESTAMP NOT NULL, created TIMESTAMP NOT NULL,
last_modification INTEGER NOT NULL, last_modification INTEGER NOT NULL,
@ -41,6 +45,8 @@ CREATE TABLE track (
folder_id UUID NOT NULL REFERENCES folder folder_id UUID NOT NULL REFERENCES folder
); );
CREATE UNIQUE INDEX index_track_path ON track(path_hash);
CREATE TABLE "user" ( CREATE TABLE "user" (
id UUID PRIMARY KEY, id UUID PRIMARY KEY,
name VARCHAR(64) NOT NULL, name VARCHAR(64) NOT NULL,

View File

@ -3,12 +3,15 @@ CREATE TABLE folder (
root BOOLEAN NOT NULL, root BOOLEAN NOT NULL,
name VARCHAR(256) NOT NULL COLLATE NOCASE, name VARCHAR(256) NOT NULL COLLATE NOCASE,
path VARCHAR(4096) NOT NULL, path VARCHAR(4096) NOT NULL,
path_hash BLOB NOT NULL,
created DATETIME NOT NULL, created DATETIME NOT NULL,
has_cover_art BOOLEAN NOT NULL, has_cover_art BOOLEAN NOT NULL,
last_scan INTEGER NOT NULL, last_scan INTEGER NOT NULL,
parent_id CHAR(36) REFERENCES folder parent_id CHAR(36) REFERENCES folder
); );
CREATE UNIQUE INDEX index_folder_path ON folder(path_hash);
CREATE TABLE artist ( CREATE TABLE artist (
id CHAR(36) PRIMARY KEY, id CHAR(36) PRIMARY KEY,
name VARCHAR(256) NOT NULL COLLATE NOCASE name VARCHAR(256) NOT NULL COLLATE NOCASE
@ -32,6 +35,7 @@ CREATE TABLE track (
artist_id CHAR(36) NOT NULL REFERENCES artist, artist_id CHAR(36) NOT NULL REFERENCES artist,
bitrate INTEGER NOT NULL, bitrate INTEGER NOT NULL,
path VARCHAR(4096) NOT NULL, path VARCHAR(4096) NOT NULL,
path_hash BLOB NOT NULL,
content_type VARCHAR(32) NOT NULL, content_type VARCHAR(32) NOT NULL,
created DATETIME NOT NULL, created DATETIME NOT NULL,
last_modification INTEGER NOT NULL, last_modification INTEGER NOT NULL,
@ -41,6 +45,8 @@ CREATE TABLE track (
folder_id CHAR(36) NOT NULL REFERENCES folder folder_id CHAR(36) NOT NULL REFERENCES folder
); );
CREATE UNIQUE INDEX index_track_path ON track(path_hash);
CREATE TABLE user ( CREATE TABLE user (
id CHAR(36) PRIMARY KEY, id CHAR(36) PRIMARY KEY,
name VARCHAR(64) NOT NULL, name VARCHAR(64) NOT NULL,

View File

@ -12,8 +12,10 @@ import mimetypes
import os.path import os.path
from datetime import datetime from datetime import datetime
from hashlib import sha1
from pony.orm import Database, Required, Optional, Set, PrimaryKey, LongStr from pony.orm import Database, Required, Optional, Set, PrimaryKey, LongStr
from pony.orm import ObjectNotFound from pony.orm import ObjectNotFound
from pony.orm import buffer
from pony.orm import min, max, avg, sum, exists from pony.orm import min, max, avg, sum, exists
from uuid import UUID, uuid4 from uuid import UUID, uuid4
@ -29,13 +31,33 @@ def now():
db = Database() db = Database()
class Folder(db.Entity): class PathMixin(object):
@classmethod
def get(cls, *args, **kwargs):
if kwargs:
path = kwargs.pop('path', None)
if path:
kwargs['_path_hash'] = sha1(path.encode('utf-8')).digest()
return db.Entity.get.__func__(cls, *args, **kwargs)
def __init__(self, *args, **kwargs):
path = kwargs['path']
kwargs['_path_hash'] = sha1(path.encode('utf-8')).digest()
db.Entity.__init__(self, *args, **kwargs)
def __setattr__(self, attr, value):
db.Entity.__setattr__(self, attr, value)
if attr == 'path':
db.Entity.__setattr__(self, '_path_hash', sha1(value.encode('utf-8')).digest())
class Folder(PathMixin, db.Entity):
_table_ = 'folder' _table_ = 'folder'
id = PrimaryKey(UUID, default = uuid4) id = PrimaryKey(UUID, default = uuid4)
root = Required(bool, default = False) root = Required(bool, default = False)
name = Required(str) name = Required(str)
path = Required(str, 4096) # unique path = Required(str, 4096) # unique
_path_hash = Required(buffer, column = 'path_hash')
created = Required(datetime, precision = 0, default = now) created = Required(datetime, precision = 0, default = now)
has_cover_art = Required(bool, default = False) has_cover_art = Required(bool, default = False)
last_scan = Required(int, default = 0) last_scan = Required(int, default = 0)
@ -160,7 +182,7 @@ class Album(db.Entity):
def prune(cls): def prune(cls):
return cls.select(lambda self: not exists(t for t in Track if t.album == self)).delete(bulk = True) return cls.select(lambda self: not exists(t for t in Track if t.album == self)).delete(bulk = True)
class Track(db.Entity): class Track(PathMixin, db.Entity):
_table_ = 'track' _table_ = 'track'
id = PrimaryKey(UUID, default = uuid4) id = PrimaryKey(UUID, default = uuid4)
@ -177,6 +199,7 @@ class Track(db.Entity):
bitrate = Required(int) bitrate = Required(int)
path = Required(str, 4096) # unique path = Required(str, 4096) # unique
_path_hash = Required(buffer, column = 'path_hash')
content_type = Required(str) content_type = Required(str)
created = Required(datetime, precision = 0, default = now) created = Required(datetime, precision = 0, default = now)
last_modification = Required(int) last_modification = Required(int)

View File

@ -17,6 +17,7 @@ import time
import unittest import unittest
from contextlib import contextmanager from contextlib import contextmanager
from hashlib import sha1
from pony.orm import db_session from pony.orm import db_session
from threading import Thread from threading import Thread
@ -171,6 +172,7 @@ class WatcherTestCase(WatcherTestBase):
self.assertIsNotNone(track) self.assertIsNotNone(track)
self.assertNotEqual(track.path, path) self.assertNotEqual(track.path, path)
self.assertEqual(track.path, newpath) self.assertEqual(track.path, newpath)
self.assertEqual(track._path_hash, memoryview(sha1(newpath.encode('utf-8')).digest()))
self.assertEqual(track.id, trackid) self.assertEqual(track.id, trackid)
def test_move_in(self): def test_move_in(self):