root/HTTP/Finder.lua

Revision 1465 (checked in by rsz, 21 hours ago)

cleanup

Line 
1 --------------------------------------------------------------------------------
2 -- Title:               Finder.lua
3 -- Description:         Like a square peg in a round hole
4 -- Author:              Raphaël Szwarc http://alt.textdrive.com/lua/
5 -- Creation Date:       January 30, 2007
6 -- Legal:               Copyright (C) 2007 Raphaël Szwarc
7 --                      Under the terms of the MIT License
8 --                      http://www.opensource.org/licenses/mit-license.html
9 --------------------------------------------------------------------------------
10
11 -- import dependencies
12 local Template = require( 'Template' )
13
14 local io = require( 'io' )
15 local string = require( 'string' )
16
17 local assert = assert
18 local error = error
19 local getmetatable = getmetatable
20 local ipairs = ipairs
21 local loadstring = loadstring
22 local pairs = pairs
23 local pcall = pcall
24 local require = require
25 local setmetatable = setmetatable
26 local tonumber = tonumber
27 local tostring = tostring
28 local type = type
29
30 --------------------------------------------------------------------------------
31 -- Finder
32 --------------------------------------------------------------------------------
33
34 module( 'Finder' )
35 _VERSION = '1.0'
36
37 local self = setmetatable( _M, {} )
38 local meta = getmetatable( self )
39
40 --------------------------------------------------------------------------------
41 -- DDL
42 --------------------------------------------------------------------------------
43
44 local function Path( anExtension )
45     return ( '%s%s.%s' ):format( require( 'Bundle' )(), self._NAME, anExtension )
46 end
47
48 local function Load( anExtension )
49     local aPath = Path( anExtension )
50     local aFile = assert( io.open( aPath, 'rb' ) )
51     local aContent = assert( aFile:read( '*a' ) )
52     local aContent = ( 'return { %s }' ):format( aContent )
53     local aChunk = assert( loadstring( aContent ) )
54     local aStatement = assert( aChunk() )
55    
56     aFile:close()
57    
58     return aStatement
59 end
60
61 local DDL = Load( 'ddl' )
62 local DML = Load( 'dml' )
63 local PartitionCount = 10
64
65 --------------------------------------------------------------------------------
66 -- DB
67 --------------------------------------------------------------------------------
68
69 local function Try( aFunction, aDB, ... )
70     local ok, aResult = pcall( aFunction, aDB, ... )
71    
72     if ok then
73         return aResult
74     end
75    
76     pcall( function() aDB( DML[ 'Rollback' ] ) end )
77    
78     error( aResult )
79 end
80
81
82 local function HexDecode( aString )
83     local aCoder = function( aValue )
84         return string.char( tonumber( aValue, 16 ) )
85     end
86
87     return aString:gsub( '(%x%x)', aCoder )
88 end
89
90 local function PartitionName( aNumber )
91     return ( 'P%02d' ):format( aNumber )
92 end
93
94 local function Partition( aDocument )
95     local crypto = require( 'crypto' )
96     local aValue = HexDecode( crypto.sha1( aDocument ) )
97     local aHash = 0
98    
99     for anIndex = 1, 4 do
100         aHash = aHash * 256 + aValue:byte( anIndex )
101     end
102    
103     aHash = ( aHash % PartitionCount ) + 1
104
105     return PartitionName( aHash )
106 end
107
108 local function PartitionPath( aURL )
109     local URL = require( 'URL' )
110     local aURL = URL( aURL )
111     local aPath = aURL.path
112    
113     aPath.absolute = false
114    
115     return tostring( aPath )
116 end
117
118 local function NewDB( aURL )
119     local DB = require( 'DB' )
120     local aDB = DB( aURL )
121     local aPath = PartitionPath( aURL )
122  
123     for anIndex = 1, PartitionCount do
124         local aStatement = Template( DML[ 'Attach' ] )
125         local aPartition = PartitionName( anIndex )
126        
127         aStatement[ 'path' ] = ( '%s%02d' ):format( aPath, anIndex )
128         aStatement[ 'name' ] = aPartition
129        
130         aDB( aStatement )
131        
132         for anIndex, aStatement in ipairs( DDL ) do
133             local aStatement = Template( aStatement )
134            
135             aStatement[ 'partition' ] = aPartition
136            
137             aDB( aStatement )
138         end
139     end
140    
141     return aDB
142 end
143
144 --------------------------------------------------------------------------------
145 -- Token
146 --------------------------------------------------------------------------------
147
148 local function TokenList( anIterator )
149     local aMap = {}
150     local aList = {}
151    
152     for anIndex, aToken in anIterator do
153         if not aMap[ aToken ] then
154             aList[ #aList + 1 ] = aToken
155            
156             aMap[ aToken ] = true
157            
158             if #aList >= 16 then
159                 break
160             end
161         end
162     end
163
164     return aList
165 end
166
167 local function TokenMap( anIterator )
168     local aMap = {}
169    
170     for anIndex, aToken in anIterator do
171         aMap[ aToken ] = ( aMap[ aToken ] or 0 ) + 1
172     end
173    
174     return aMap
175 end
176
177 local function TokenRange( aValue )
178     local aLength = aValue:len()
179     local aLastChar = string.char( aValue:byte( aLength ) + 1 )
180     local aLastValue = aValue:sub( 1, aLength - 1 ) .. aLastChar
181    
182     return aLastValue
183 end
184
185 --------------------------------------------------------------------------------
186 -- DML
187 --------------------------------------------------------------------------------
188
189 local function InsertDocument( aDB, aDocument, anIterator )
190     local aPartition = Partition( aDocument )
191     local aStatement = nil
192    
193     aDB( DML[ 'BeginTransaction' ] )
194    
195     aDB( DML[ 'DeleteStage' ] )
196    
197     for aToken, aWeight in pairs( TokenMap( anIterator ) ) do
198         aDB( DML[ 'InsertStage' ], aDocument, aToken, aWeight )
199     end
200
201     aStatement = Template( DML[ 'InsertDocument' ] )
202     aStatement[ 'partition' ] = aPartition
203     aDB( aStatement, aDocument )
204    
205     aStatement = Template( DML[ 'InsertToken' ] )
206     aStatement[ 'partition' ] = aPartition
207     aDB( aStatement )
208
209     aStatement = Template( DML[ 'DeleteDocumentToken' ] )
210     aStatement[ 'partition' ] = aPartition
211     aDB( aStatement, aDocument )
212    
213     aStatement = Template( DML[ 'InsertDocumentToken' ] )
214     aStatement[ 'partition' ] = aPartition
215     aDB( aStatement )
216    
217     aDB( DML[ 'EndTransaction' ] )
218 end
219
220 local function DeleteDocument( aDB, aDocument )
221     local aPartition = Partition( aDocument )
222     local aStatement = nil
223
224     aDB( DML[ 'BeginTransaction' ] )
225
226     aStatement = Template( DML[ 'DeleteDocumentToken' ] )
227     aStatement[ 'partition' ] = aPartition
228     aDB( aStatement, aDocument )
229
230     aStatement = Template( DML[ 'DeleteDocument' ] )
231     aStatement[ 'partition' ] = aPartition
232     aDB( aStatement, aDocument )
233    
234     aDB( DML[ 'EndTransaction' ] )
235 end
236
237 local function UpdateDocument( aDB, aDocument, aNewDocument )
238     local aPartition = Partition( aDocument )
239     local aStatement = nil
240
241     aDB( DML[ 'BeginTransaction' ] )
242    
243     aStatement = Template( DML[ 'UpdateDocument' ] )
244     aStatement[ 'partition' ] = aPartition
245     aDB( aStatement, aNewDocument, aDocument )
246    
247     aDB( DML[ 'EndTransaction' ] )
248 end
249
250 local function SelectDocument( aDB, aDocument )
251     local aPartition = Partition( aDocument )
252     local aStatement = Template( DML[ 'SelectDocumentToken' ] )
253           aStatement[ 'partition' ] = aPartition
254     local anIterator = aDB( aStatement, aDocument )
255    
256     return function()
257         local aRow = anIterator()
258        
259         if aRow then
260             return aRow.name, aRow.weight
261         end
262     end
263 end
264
265 local function FindDocument( aDB, anIterator, aLimit )
266     local aList = TokenList( anIterator )
267     local aLimit = aLimit or 999
268     local aStatement = DML[ 'SelectHit' ]
269     local anIterator = nil
270
271     aDB( DML[ 'BeginTransaction' ] )
272    
273     aDB( DML[ 'DeleteHit' ] )
274     aDB( DML[ 'DeleteStage' ] )
275    
276     for anIndex, aToken in ipairs( aList ) do
277         aDB( DML[ 'InsertStage' ], aToken, TokenRange( aToken ), 0 )
278     end
279    
280     for anIndex = 1, PartitionCount do
281         local aPartition = PartitionName( anIndex )
282         local aStatement = Template( DML[ 'FindDocument' ] )
283            
284         aStatement[ 'partition' ] = aPartition
285    
286         aDB( aStatement, aLimit )
287     end
288
289     aDB( DML[ 'EndTransaction' ] )
290    
291     anIterator = aDB( aStatement, aLimit )
292    
293     return function()
294         local aRow = anIterator()
295        
296         if aRow then
297             return aRow.document, aRow.score
298         end
299     end
300 end
301
302 --------------------------------------------------------------------------------
303 -- Metamethods
304 --------------------------------------------------------------------------------
305
306 function meta:__call( aURL )
307     local aDB = NewDB( aURL )
308     local aFinder = { db = aDB }
309
310     setmetatable( aFinder, self )
311    
312     return aFinder
313 end
314
315 function meta:__concat( aValue )
316     return tostring( self ) .. tostring( aValue )
317 end
318
319 function meta:__tostring()
320     return ( '%s/%s' ):format( self._NAME, self._VERSION )
321 end
322
323 function self:__call( anIterator, aLimit )
324     return Try( FindDocument, self.db, anIterator, aLimit )
325 end
326
327 function self:__index( aKey )
328     return Try( SelectDocument, self.db, aKey )
329 end
330
331 function self:__newindex( aKey, aValue )
332     if type( aKey ) == 'string' and type( aValue ) == 'function' then
333         return Try( InsertDocument, self.db, aKey, aValue )
334     elseif type( aKey ) == 'string' and type( aValue ) == 'string' then
335         return Try( UpdateDocument, self.db, aKey, aValue )
336     elseif type( aKey ) == 'string' and type( aValue ) == 'boolean' and aValue == false then
337         return Try( DeleteDocument, self.db, aKey )
338     end
339
340     error( ( 'Invalid parameters: %q = %q' ):format( tostring( aKey ), tostring( aValue ) ) )
341 end
342
343 function self:__concat( aValue )
344     return tostring( self ) .. tostring( aValue )
345 end
346
347 function self:__tostring()
348     return tostring( self.db )
349 end
350
Note: See TracBrowser for help on using the browser.