PG_CONN_ID = 'evkk_pg_conn' def step00(obj): if not hasattr(obj, PG_CONN_ID): return False return True def step01(obj): obj.setupFolders() from zExceptions import BadRequest try: obj.wwwdata.manage_addProperty('site_titles', (), 'lines') except BadRequest: pass return True def step02(obj): obj.setupSite() return True def step03(obj): """ switch to schemas """ for x in obj.Documents.objectValues('Document'): if hasattr(x, '_document_schema'): # we don't want to mess up documents with schemas continue x._document_schema = 'BasicSchema' x._dsstore = {} return True def step04(obj): """ migrate data from document object to schema container """ from schemas import get_schema c = 0 for x in obj.Documents.objectValues('Document'): if not hasattr(x, '_document_schema'): # no schema! continue schema = get_schema(x._document_schema) u = {} for f in schema.fields: legAttr = f.getLegacy() if legAttr is None: continue lval = getattr(x, legAttr) if lval in ('grammatikaha',): lval = 'muu' u[f.__name__] = lval schema.update(x, u) c = c + 1 x._p_changed = True print "total documents:", c return True def step05(obj): """ delete unused/legacy attributes """ x = obj.Documents try: x._delProperty('elukoht') except AttributeError: pass try: x._delProperty('sots') except AttributeError: pass try: x._delProperty('vanus') except AttributeError: pass try: x._delProperty('sugu') except AttributeError: pass try: x._delProperty('emakeel') except AttributeError: pass try: x._delProperty('kodus') except AttributeError: pass try: x._delProperty('valdamise_tase') except AttributeError: pass try: x._delProperty('haridus') except AttributeError: pass try: x._delProperty('abivahendid') except AttributeError: pass try: x._delProperty('texttype') except AttributeError: pass for x in obj.Documents.objectValues('Document'): x._delProperty('elukoht') x._delProperty('sots') x._delProperty('vanus') x._delProperty('sugu') x._delProperty('emakeel') x._delProperty('kodus') x._delProperty('valdamise_tase') x._delProperty('haridus') x._delProperty('abivahendid') x._delProperty('texttype') x._p_changed = True return True def step06(self): """ create indexes based on document schemas """ from Products.ZCatalog.Catalog import CatalogError from schemas import all cat = self.zcatalog seen = [] for s in all(): for f in s.fields: if f.getName() in seen: continue seen.append(f.getName()) try: cat.addIndex(f.getName(), 'FieldIndex') except CatalogError: pass try: cat.addColumn(f.getName()) except CatalogError: pass cat.addColumn('getCorpus') cat.addIndex('getCorpus', 'FieldIndex') return True def step07(self): """ brr """ docs = self.Documents.objectValues('Document') from Corpus import Corpus self.corpora._setObject('main', Corpus('main')) for d in docs: #XXX d.setCorpus('main') return True def step08(self): """ create SQL methods """ if not hasattr(self, PG_CONN_ID): from Products.ZPsycopgDA.DA import manage_addZPsycopgConnection manage_addZPsycopgConnection(self, PG_CONN_ID, 'PG connection', 'dbname=evkk user=evkk host=127.0.0.1', None, # zdatetime 1, # READ_COMMITED 'UTF8') from Products.ZSQLMethods.SQL import SQL from Products.ZSQLMethods.SQL import manage_addZSQLMethod from OFS.Folder import manage_addFolder if not hasattr(self, 'sqls'): manage_addFolder(self, 'sqls', 'ZSQL Methods') sqls = getattr(self, 'sqls') # ================================================================= # # words table # ================================================================= # if not hasattr(sqls, 'wordInsert'): manage_addZSQLMethod(sqls, 'wordInsert', 'Insert a word in to words table', PG_CONN_ID, 'word prim lemma category gram language raw', """ INSERT INTO words VALUES ( NEXTVAL('widseq'), , 1, , , , , , ); """) if not hasattr(sqls, 'wordGetByWord'): manage_addZSQLMethod(sqls, 'wordGetByWord', "Get word by word", PG_CONN_ID, 'word', """ SELECT * FROM words WHERE word=; """) if not hasattr(sqls, 'wordsByLemma'): manage_addZSQLMethod(sqls, 'wordsByLemma', "Get words by lemma", PG_CONN_ID, "lemma", """ SELECT word, prim, lemma, category, gram FROM words WHERE lemma=; """) if not hasattr(sqls, 'getNofWords'): manage_addZSQLMethod(sqls, 'getNofWords', "get a number of words with morf. info", PG_CONN_ID, "", """ SELECT count("ID") FROM words; """) # ================================================================= # # undecided table # ================================================================= # # ================================================================= # # documents # ================================================================= # if not hasattr(sqls, 'docsInsert'): manage_addZSQLMethod(sqls, 'docsInsert', "register word presence in a document", PG_CONN_ID, "word docid language corpus", """ INSERT INTO docrefs (word, "DocumentID", "ID", language, corpus) VALUES ( , , NEXTVAL('didseq'), , ); """) if not hasattr(sqls, 'storeDocument'): manage_addZSQLMethod(sqls, 'storeDocument', "store document's ID and title", PG_CONN_ID, "docid title language corpus", """ DELETE FROM documents WHERE "DocumentID"= INSERT INTO documents ("DocumentID", title, language, corpus) VALUES ( , , , ); """) if not hasattr(sqls, 'deleteDocref'): manage_addZSQLMethod(sqls, 'deleteDocref', "delete doc ref from docrefs table", PG_CONN_ID, "docid", """ DELETE FROM docrefs WHERE "DocumentID"=; """) if not hasattr(sqls, 'selectFirstChars'): manage_addZSQLMethod(sqls, 'selectFirstChars', "select first chars", PG_CONN_ID, "", """ SELECT * FROM firstchars; """) sqls.selectFirstChars.manage_advanced( 1000, # max_rows 100, # max_cache 300, # cache_time, 5 min '', '') # ================================================================= # # Get a list of words ordered by freq # Get a list of words ordered by alpha # ================================================================= # # SELECT word, count("DocumentID") AS count # FROM docrefs WHERE word LIKE # GROUP BY word # ORDER BY ; if not hasattr(sqls, 'getWords'): manage_addZSQLMethod(sqls, 'getWords', "order by: word, count", PG_CONN_ID, "char order", """ SELECT word, count('word'), lemma, prim FROM docrefs LEFT JOIN words USING(word) WHERE word LIKE GROUP BY word, lemma, prim ORDER BY , word; """) if not hasattr(sqls, 'getUndecided'): manage_addZSQLMethod(sqls, 'getUndecided', "get undecided words", PG_CONN_ID, "limit offset", """ SELECT "ID", word, option, language FROM undecided WHERE word IN ( SELECT DISTINCT word FROM undecided WHERE processed=false GROUP BY word EXCEPT SELECT fromword FROM aliases LIMIT OFFSET ); """) if not hasattr(sqls, 'getUndecidedFilter'): manage_addZSQLMethod(sqls, 'getUndecidedFilter', "get undecided words with limit option", PG_CONN_ID, "limit offset filter", """ SELECT "ID", word, option, language FROM undecided WHERE word IN ( SELECT DISTINCT word FROM undecided WHERE processed=false AND word LIKE GROUP BY word EXCEPT SELECT fromword FROM aliases LIMIT OFFSET ); """) if not hasattr(sqls, 'getNofUndecided'): manage_addZSQLMethod(sqls, 'getNofUndecided', "get a number of undecided words", PG_CONN_ID, "", """ SELECT count(idcount) FROM (SELECT count("ID") AS idcount FROM undecided GROUP BY word) AS grouped; """) if not hasattr(sqls, 'tickOption'): manage_addZSQLMethod(sqls, 'tickOption', "tick option in undecided table", PG_CONN_ID, "id", """ UPDATE undecided SET tick=true WHERE "ID"=; """) if not hasattr(sqls, 'tickWithData'): manage_addZSQLMethod(sqls, 'tickWithData', "add an option in to undecided and tick it", PG_CONN_ID, "word option", """ INSERT INTO undecided ("ID", word, option, language, tick) VALUES ( NEXTVAL('unidseq'), , , 'ET', true ); """) if not hasattr(sqls, 'addAlias'): manage_addZSQLMethod(sqls, 'addAlias', "link word with correct word", PG_CONN_ID, "fromword toword", """ INSERT INTO aliases ("ID", fromword, toword) VALUES ( NEXTVAL('aliidseq'), , ); """) if not hasattr(sqls, 'getTotalWordCount'): manage_addZSQLMethod(sqls, 'getTotalWordCount', "total number of words", PG_CONN_ID, "", """ SELECT count("ID") from docrefs; """) # to get documents and word count in them! rowcount show total number of documents where # this word is in. if not hasattr(sqls, 'getDocWordStats'): manage_addZSQLMethod(sqls, 'getDocWordStats', "per word, doc statistics", PG_CONN_ID, "word", """ SELECT "DocumentID", count("DocumentID"), title FROM docrefs LEFT JOIN documents USING("DocumentID") WHERE word= GROUP BY "DocumentID", title ORDER BY count DESC; """) if not hasattr(sqls, 'getRefFrom'): manage_addZSQLMethod(sqls, 'getRefFrom', "get reference from word", PG_CONN_ID, "word", """ SELECT toword FROM aliases WHERE fromword=; """) if not hasattr(sqls, 'getRefTo'): manage_addZSQLMethod(sqls, 'getRefTo', "get reference words to word", PG_CONN_ID, "word", """ SELECT fromword FROM aliases WHERE toword=; """) # select "DocumentID",count("DocumentID") from docrefs where word='akulaadija' group by "DocumentID"; if not hasattr(sqls, 'getWordCount'): manage_addZSQLMethod(sqls, 'getWordCount', "freq of one word", PG_CONN_ID, "word", """ SELECT count("ID") FROM docrefs WHERE word=; """) # ================================================================= # # # ================================================================= # #select "DocumentID", count("DocumentID") from docrefs where word='lenin' group by "DocumentID" order by count desc; return True def step09(self): """ update docs refs - migration """ docs = self.Documents.objectValues('Document') for d in docs: d._updateDocStatistics() return True def step10(self): """ blah blah """ docs = self.Documents.objectValues('Document') for d in docs: d._storeDocInfos() return True def step11(self): from Products.PluggableAuthService.Extensions.upgrade import _replaceUserFolder, replace_acl_users replace_acl_users(self) self._setupPAS() return True