Changeset 1474
- Timestamp:
- 08/24/08 18:48:15
- Files:
-
- HTTP/Finder.ddl (modified) (1 diff)
- HTTP/Finder.dml (modified) (4 diffs)
- HTTP/Finder.lua (modified) (12 diffs)
- HTTP/WikiFinder.lua (modified) (3 diffs)
- HTTP/WikiSearchService.lua (modified) (5 diffs)
- HTTP/WikiSearchService.txt (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
HTTP/Finder.ddl
r1443 r1474 24 24 ]], 25 25 26 [ 6 ] = 27 [[ 28 create table if not exists [v:partition].document 29 ( 30 id integer primary key not null, 31 name text not null 32 ) 33 ]], 34 35 [ 7 ] = 36 [[ 37 create unique index if not exists [v:partition].document_name on document( name ) 38 ]], 39 40 [ 8 ] = 41 [[ 42 create table if not exists [v:partition].document_token 43 ( 44 document_id integer not null, 45 token_id integer not null, 46 weight integer not null 47 ) 48 ]], 49 50 [ 9 ] = 51 [[ 52 create index if not exists [v:partition].document_token_document_id on document_token( document_id ) 53 ]], 54 55 [ 10 ] = 56 [[ 57 create index if not exists [v:partition].document_token_token_id on document_token( token_id ) 58 ]], 59 60 [ 11 ] = 61 [[ 62 create table if not exists [v:partition].token 63 ( 64 id integer primary key not null, 65 name text not null 66 ) 67 ]], 68 69 [ 12 ] = 70 [[ 71 create unique index if not exists [v:partition].token_name on token( name ) 72 ]], 73 74 [ 13 ] = 75 [[ 76 create temporary table if not exists stage 77 ( 78 document text not null, 79 token text not null, 80 weight integer not null 81 ) 82 ]], 83 84 [ 14 ] = 26 [ 6 ] = 85 27 [[ 86 28 create temporary table if not exists hit 87 29 ( 88 documenttext not null,89 score integernot null30 name text not null, 31 extract text not null 90 32 ) 91 33 ]] HTTP/Finder.dml
r1466 r1474 19 19 ]], 20 20 21 CreateDocument = 22 [[ 23 create virtual table [v:partition].document using fts3 24 ( 25 name, 26 content, 27 tokenize porter 28 ) 29 ]], 30 21 31 DeleteDocument = 22 32 [[ … … 28 38 InsertDocument = 29 39 [[ 30 insert or ignore into [v:partition].document( name ) values( %s ) 31 ]], 32 33 UpdateDocument = 34 [[ 35 update or ignore [v:partition].document set name = %s where name = %s 36 ]], 37 38 DeleteStage = 39 [[ 40 delete from stage 41 ]], 42 43 InsertStage = 44 [[ 45 insert into stage( document, token, weight ) values( %s, %s, %s ) 46 ]], 47 48 InsertToken = 49 [[ 50 insert 51 into [v:partition].token( name ) 52 select stage.token as name 53 from stage 54 left join [v:partition].token on token.name = stage.token 55 where token.id is null 56 ]], 57 58 DeleteDocumentToken = 59 [[ 60 delete 61 from [v:partition].document_token 62 where document_id 63 in ( 64 select id 65 from [v:partition].document 66 where name = %s 67 ) 68 ]], 69 70 InsertDocumentToken = 71 [[ 72 insert 73 into [v:partition].document_token 74 ( 75 document_id, 76 token_id, 77 weight 78 ) 79 select document.id as document_id, 80 token.id as token_id, 81 stage.weight as weight 82 from stage 83 join [v:partition].document on document.name = stage.document 84 join [v:partition].token on token.name = stage.token 85 ]], 86 87 SelectDocumentToken = 88 [[ 89 select token.name as name, 90 document_token.weight as weight 91 from [v:partition].document 92 join [v:partition].document_token on document_token.document_id = document.id 93 join [v:partition].token on token.id = document_token.token_id 94 where document.name = %s 95 order by token.name, 96 document_token.weight 40 insert or ignore into [v:partition].document( name, content ) values( %s, %s ) 97 41 ]], 98 42 … … 102 46 into hit 103 47 ( 104 document,105 score48 name, 49 extract 106 50 ) 107 51 select document.name as name, 108 sum( document_token.weight ) as score 109 from stage 110 join [v:partition].token on token.name >= stage.document and token.name < stage.token 111 join [v:partition].document_token on document_token.token_id = token.id 112 join [v:partition].document on document.id = document_token.document_id 113 group by document.name 114 having count( document_token.token_id ) >= 115 ( 116 select count( stage.document ) as count 117 from stage 118 ) 119 order by 2 desc, 120 1 52 snippet( document, '<i>', '</i>', '…' ) as extract 53 from [v:partition].document 54 where document.content match %s 55 order by 1 121 56 limit %s 122 57 ]], … … 129 64 SelectHit = 130 65 [[ 131 select document,132 score66 select name, 67 extract 133 68 from hit 134 order by score desc, 135 document 69 order by name 136 70 limit %s 137 71 ]] HTTP/Finder.lua
r1465 r1474 116 116 end 117 117 118 local function CreateDocument( aDB, aPartition ) 119 local aCall = function() 120 local aStatement = Template( DML[ 'CreateDocument' ] ) 121 122 aStatement[ 'partition' ] = aPartition 123 124 aDB( aStatement ) 125 end 126 127 pcall( aCall ) 128 end 129 118 130 local function NewDB( aURL ) 119 131 local DB = require( 'DB' ) 120 132 local aDB = DB( aURL ) 121 133 local aPath = PartitionPath( aURL ) 122 134 123 135 for anIndex = 1, PartitionCount do 124 136 local aStatement = Template( DML[ 'Attach' ] ) … … 137 149 aDB( aStatement ) 138 150 end 151 152 CreateDocument( aDB, aPartition ) 139 153 end 140 154 … … 143 157 144 158 -------------------------------------------------------------------------------- 145 -- Token146 --------------------------------------------------------------------------------147 148 local function TokenList( anIterator )149 local aMap = {}150 local aList = {}151 152 for anIndex, aToken in anIterator do153 if not aMap[ aToken ] then154 aList[ #aList + 1 ] = aToken155 156 aMap[ aToken ] = true157 158 if #aList >= 16 then159 break160 end161 end162 end163 164 return aList165 end166 167 local function TokenMap( anIterator )168 local aMap = {}169 170 for anIndex, aToken in anIterator do171 aMap[ aToken ] = ( aMap[ aToken ] or 0 ) + 1172 end173 174 return aMap175 end176 177 local function TokenRange( aValue )178 local aLength = aValue:len()179 local aLastChar = string.char( aValue:byte( aLength ) + 1 )180 local aLastValue = aValue:sub( 1, aLength - 1 ) .. aLastChar181 182 return aLastValue183 end184 185 --------------------------------------------------------------------------------186 159 -- DML 187 160 -------------------------------------------------------------------------------- 188 161 189 local function InsertDocument( aDB, aDocument, a nIterator)162 local function InsertDocument( aDB, aDocument, aContent ) 190 163 local aPartition = Partition( aDocument ) 191 164 local aStatement = nil … … 193 166 aDB( DML[ 'BeginTransaction' ] ) 194 167 195 aDB( DML[ 'DeleteStage' ] ) 196 197 for aToken, aWeight in pairs( TokenMap( anIterator ) ) do 198 aDB( DML[ 'InsertStage' ], aDocument, aToken, aWeight ) 199 end 168 aStatement = Template( DML[ 'DeleteDocument' ] ) 169 aStatement[ 'partition' ] = aPartition 170 aDB( aStatement, aDocument ) 200 171 201 172 aStatement = Template( DML[ 'InsertDocument' ] ) 202 173 aStatement[ 'partition' ] = aPartition 203 aDB( aStatement, aDocument ) 204 205 aStatement = Template( DML[ 'InsertToken' ] ) 206 aStatement[ 'partition' ] = aPartition 207 aDB( aStatement ) 208 209 aStatement = Template( DML[ 'DeleteDocumentToken' ] ) 210 aStatement[ 'partition' ] = aPartition 211 aDB( aStatement, aDocument ) 212 213 aStatement = Template( DML[ 'InsertDocumentToken' ] ) 214 aStatement[ 'partition' ] = aPartition 215 aDB( aStatement ) 174 aDB( aStatement, aDocument, aContent ) 216 175 217 176 aDB( DML[ 'EndTransaction' ] ) … … 224 183 aDB( DML[ 'BeginTransaction' ] ) 225 184 226 aStatement = Template( DML[ 'DeleteDocumentToken' ] )227 aStatement[ 'partition' ] = aPartition228 aDB( aStatement, aDocument )229 230 185 aStatement = Template( DML[ 'DeleteDocument' ] ) 231 186 aStatement[ 'partition' ] = aPartition … … 235 190 end 236 191 237 local function UpdateDocument( aDB, aDocument, aNewDocument )238 local aPartition = Partition( aDocument )239 local aStatement = nil240 241 aDB( DML[ 'BeginTransaction' ] )242 243 aStatement = Template( DML[ 'UpdateDocument' ] )244 aStatement[ 'partition' ] = aPartition245 aDB( aStatement, aNewDocument, aDocument )246 247 aDB( DML[ 'EndTransaction' ] )248 end249 250 192 local function SelectDocument( aDB, aDocument ) 251 193 local aPartition = Partition( aDocument ) 252 local aStatement = Template( DML[ 'SelectDocument Token' ] )194 local aStatement = Template( DML[ 'SelectDocument' ] ) 253 195 aStatement[ 'partition' ] = aPartition 254 196 local anIterator = aDB( aStatement, aDocument ) … … 258 200 259 201 if aRow then 260 return aRow.name, aRow. weight202 return aRow.name, aRow.content 261 203 end 262 204 end 263 205 end 264 206 265 local function FindDocument( aDB, anIterator, aLimit ) 266 local aList = TokenList( anIterator ) 207 local function FindDocument( aDB, aQuery, aLimit ) 267 208 local aLimit = aLimit or 999 268 local aStatement = DML[ 'SelectHit' ]269 209 local anIterator = nil 270 210 … … 272 212 273 213 aDB( DML[ 'DeleteHit' ] ) 274 aDB( DML[ 'DeleteStage' ] )275 276 for anIndex, aToken in ipairs( aList ) do277 aDB( DML[ 'InsertStage' ], aToken, TokenRange( aToken ), 0 )278 end279 214 280 215 for anIndex = 1, PartitionCount do … … 284 219 aStatement[ 'partition' ] = aPartition 285 220 286 aDB( aStatement, a Limit )221 aDB( aStatement, aQuery, aLimit ) 287 222 end 288 223 289 224 aDB( DML[ 'EndTransaction' ] ) 290 225 291 anIterator = aDB( aStatement, aLimit )226 anIterator = aDB( DML[ 'SelectHit' ], aLimit ) 292 227 293 228 return function() … … 295 230 296 231 if aRow then 297 return aRow. document, aRow.score232 return aRow.name, aRow.extract 298 233 end 299 234 end … … 321 256 end 322 257 323 function self:__call( a nIterator, aLimit )324 return Try( FindDocument, self.db, a nIterator, aLimit )258 function self:__call( aQuery, aLimit ) 259 return Try( FindDocument, self.db, aQuery, aLimit ) 325 260 end 326 261 … … 330 265 331 266 function self:__newindex( aKey, aValue ) 332 if type( aKey ) == 'string' and type( aValue ) == ' function' then267 if type( aKey ) == 'string' and type( aValue ) == 'string' then 333 268 return Try( InsertDocument, self.db, aKey, aValue ) 334 elseif type( aKey ) == 'string' and type( aValue ) == 'string' then335 return Try( UpdateDocument, self.db, aKey, aValue )336 269 elseif type( aKey ) == 'string' and type( aValue ) == 'boolean' and aValue == false then 337 270 return Try( DeleteDocument, self.db, aKey ) HTTP/WikiFinder.lua
r1472 r1474 75 75 end 76 76 77 local stop = { [ 'and' ] = true, [ 'are' ] = true, [ 'but' ] = true, [ 'for' ] = true, [ 'into' ] = true, [ 'not' ] = true, [ 'such' ] = true, [ 'that' ] = true, [ 'the' ] = true, [ 'their' ] = true, [ 'then' ] = true, [ 'there' ] = true, [ 'these' ] = true, [ 'they' ] = true, [ 'this' ] = true, [ 'will' ] = true, [ 'with' ] = true, [ 'yes' ] = true, [ 'you' ] = true, [ 'your' ] = true }78 79 local function Token( aValue )80 local Unidecode = require( 'Unidecode' )81 local aValue = Unidecode( aValue or '' ):lower():gsub( '%W', ' ' )82 local anIterator = aValue:gmatch( '([%S]+)' )83 local anIndex = 084 85 return function()86 local aToken = anIterator()87 88 while aToken and ( aToken:len() < 3 or stop[ aToken ] ) do89 aToken = anIterator()90 end91 92 if aToken then93 anIndex = anIndex + 194 95 return anIndex, aToken:sub( 1, 9 )96 end97 end98 end99 100 77 local function Try( aFunction, ... ) 101 78 local ok, aResult = pcall( aFunction, ... ) … … 177 154 local WikiService = require( 'WikiService' ) 178 155 179 aFinder[ aName ] = Token( WikiService.Text( aContent ))156 aFinder[ aName ] = WikiService.Text( aContent ) 180 157 end 181 158 … … 189 166 190 167 if aFinder then 191 return aFinder( Token( aText ), 10)168 return aFinder( aText ) 192 169 end 193 170 HTTP/WikiSearchService.lua
r1471 r1474 34 34 35 35 local getmetatable = getmetatable 36 local next = next 36 37 local pairs = pairs 37 38 local require = require … … 57 58 local hasData = false 58 59 59 for aContent, aURL in anIterator do60 for aContent, aURL, anExtract in anIterator do 60 61 if not someHits[ aContent.name ] then 61 62 local aNameTemplate = aTemplate[ 'names' ] … … 64 65 aNameTemplate[ 'name' ] = Encode( aContent.title ) 65 66 aNameTemplate[ 'tag' ] = Tag( aContent.modification, true ) 67 aNameTemplate[ 'extract' ] = anExtract 66 68 67 69 aTemplate[ 'names' ] = aNameTemplate … … 129 131 end 130 132 133 local function ContentSearch( aQuery ) 134 local anIterator = WikiFinder[ aQuery ] 135 local aContentIterator = function() 136 local HTTPExtra = require( 'HTTPExtra' ) 137 local HTTPService = require( 'HTTPService' ) 138 local WikiContent = require( 'WikiContent' ) 139 local WikiContentService = require( 'WikiContentService' ) 140 local aName, anExtract = anIterator() 141 142 if aName then 143 local aContent = WikiContent( aName ) 144 local aService = WikiContentService( aContent ) 145 local aURL = HTTPService[ aService ] 146 147 return aContent, aURL, anExtract 148 end 149 end 150 151 return aContentIterator 152 end 153 131 154 local function ContentHit( aTemplate, aQuery, someHits ) 132 local anIterator = Content Iterator( WikiFinder[ aQuery ])133 155 local anIterator = ContentSearch( aQuery ) 156 134 157 return Hit( aTemplate, anIterator, someHits ) 158 end 159 160 local function Search( aQuery ) 161 local aTemplate = Template[ 'WikiSearchService.txt' ] 162 local someHits = { n = 0, m = false } 163 local aKey = nil 164 local anIterator = function() 165 aKey = next( someHits, aKey ) 166 167 if aKey then 168 return aKey 169 end 170 end 171 172 aTemplate[ 'directHit' ] = DirectHit( aTemplate[ 'directHit' ], aQuery, someHits ) 173 aTemplate[ 'titleHit' ] = TitleHit( aTemplate[ 'titleHit' ], aQuery, someHits ) 174 aTemplate[ 'contentHit' ] = ContentHit( aTemplate[ 'contentHit' ], aQuery, someHits ) 175 176 return ContentIterator( anIterator ) 135 177 end 136 178 … … 192 234 local WikiFeed = require( 'WikiFeed' ) 193 235 local WikiIndex = require( 'WikiIndex' ) 194 local anIterator , aCount, hasMore = TitleSearch( self.query )236 local anIterator = Search( self.query ) 195 237 local aGenerator = HTML 196 238 local aQuery = self.query or '' HTTP/WikiSearchService.txt
r1473 r1474 20 20 <li> 21 21 <a href='[v:href]' title='[v:name]'>[v:name]</a>[v:tag] 22 < br/>“…”22 <p>“[v:extract]”</p> 23 23 </li> 24 24 [/t:names]