from AccessControl import ClassSecurityInfo from OFS.SimpleItem import SimpleItem from zope.interface import implements from Globals import InitializeClass from interfaces import IWordTree from permissions import perm_manage class WordTree(SimpleItem): """ wordtree """ implements(IWordTree) meta_type = 'WordTree' security = ClassSecurityInfo() security.declareObjectPublic() id = 'wordtree' def getStartChars(self): """ ad """ d = self.sqls.selectFirstChars() return [ c['char'] for c in d ] def getWordsBy(self): s = self.REQUEST.get('char') so = self.REQUEST.get('by', 'freq') s = unicode(s, 'utf-8') if len(s) != 1: s = u'a' sortorder = 'count DESC' if so == 'alpha': sortorder = 'word' if so == 'prim': sortorder = 'prim' if so == 'lemma': sortorder = 'lemma' sqlres = self.sqls.getWords(char=s+'%', order=sortorder) res = [] for r in sqlres: res.append([r[0], r[1], r[3], r[2]]) return res security.declareProtected(perm_manage, 'saveWordsToFile') def saveWordsToFile(self): """ save words to file - one per line """ f = open('/tmp/wordlist.txt', 'w') for k in self._word_count.keys(): try: f.write(k.encode('iso-8859-15')+'\r\n') except UnicodeEncodeError: print k f.close() return "done" def getUndecided(self): start = self.REQUEST.get('start', 0) end = self.REQUEST.get('end', 40) filt = self.REQUEST.get('uni_startswith', '') if start < 0: start = 0 if start>=end: end = start + 40 if not filt: sr = self.sqls.getUndecided(limit=40, offset=start) else: filt = filt.replace('%', '') filt = filt.replace("'", '') modfilt = "'" if filt.startswith('*'): modfilt += '%' modfilt += filt if filt.endswith('*'): modfilt += '%' modfilt += "'" modfilt = modfilt.replace('*', '') sr = self.sqls.getUndecidedFilter(limit=40, offset=start, filter=modfilt) res = {} for r in sr: if not res.has_key(r[1]): res[r[1]] = [] res[r[1]].append({'id': r[0], 'option': r[2], 'language': r[3]}) return res def nOfUndecided(self): return self.sqls.getNofUndecided()[0][0] def nOfMorf(self): return self.sqls.getNofWords()[0][0] def getNewStart(self): # for previous link s = self.REQUEST.get('start', 0) if s != 0: s -= 40 if s < 0: s = 0 return s def getNewEnd(self): # for next link s = self.REQUEST.get('start', None) e = self.REQUEST.get('end', None) r = 40 if e is None and s is not None: r = s+40 return r security.declareProtected(perm_manage, 'unifyWords') def unifyWords(self, REQUEST): """ unify words """ for k in REQUEST.form.keys(): if k.endswith('_custom') or k.endswith('_alias') or k == 'unifyButton' or k == 'start': continue v = REQUEST.get(k) if k.startswith('option_'): # tick option with a ID in undecided table self.sqls.tickOption(id=v) else: # unik is a word # v is an action to be done unik = unicode(k, 'utf-8') morf = unicode(REQUEST.get(k+'_'+v), 'utf-8') # _cursom if morf[0] == '?': morf[1:] if v == 'custom': self.sqls.tickWithData(word=unik, option=morf) elif v == 'alias': # add alias self.sqls.addAlias(fromword=unik, toword=morf) else: raise 'unknown stuff!', REQUEST return REQUEST.RESPONSE.redirect(self.absolute_url()+'?sunif=1') def getDocumentBackrefs(self, word): # get word info w = unicode(word, 'utf-8') res = [] sr = self.sqls.getDocWordStats(word=w) for r in sr: did = r[0] dc = r[1] title = r[2] if len(title.strip()) == 0: title = did res.append([did, dc, title]) return res def getWordCount(self, word): # get wordcount w = unicode(word, 'utf-8') return self.sqls.getWordCount(word=word)[0][0] def getTotalWordCount(self): return self.sqls.getTotalWordCount()[0][0] def getIsAlias(self, word): refs = self.sqls.getRefFrom(word=word) if len(refs) > 0: return [ r[0] for r in refs ] return False def getHasAlias(self, word): refs = self.sqls.getRefTo(word=word) if len(refs) > 0: return [ r[0] for r in refs ] return False def getWordsByLemma(self, lemma): # return words with a same lemma lems = self.sqls.wordsByLemma(lemma=lemma) r = [] for l in lems: r.append(l[0]) return r InitializeClass(WordTree)