hi-level implementation of Inverted Lists in Key-Value Stores:
(Python and MongoDB)
import re
def getwords(doc):
splitter = re.compile('\\W*')
#split the words by non-alpha characters
words = [s.lower() for s in splitter.split(doc)]
return words;
class Vocabulary:
def makeInvertedIndexSortedByKey(doc):
i = 0;
for w in getwords(doc):
Position = i++
Frequency = (doc.id, freq(w)++, offset(Position))
PostingsList = (Field, w, freq(doc.id), offset(Frequency))
FieldDef = (Field, indexed=true, stored=true, offset(PostingsList))
(Python and MongoDB)
import re
def getwords(doc):
splitter = re.compile('\\W*')
#split the words by non-alpha characters
words = [s.lower() for s in splitter.split(doc)]
return words;
class Vocabulary:
def makeInvertedIndexSortedByKey(doc):
i = 0;
for w in getwords(doc):
Position = i++
Frequency = (doc.id, freq(w)++, offset(Position))
PostingsList = (Field, w, freq(doc.id), offset(Frequency))
FieldDef = (Field, indexed=true, stored=true, offset(PostingsList))
No comments:
Post a Comment