Best Python code snippet using autotest_python
datastore_stats_generator.py
Source:datastore_stats_generator.py
1#!/usr/bin/env python2#3# Copyright 2007 Google Inc.4#5# Licensed under the Apache License, Version 2.0 (the "License");6# you may not use this file except in compliance with the License.7# You may obtain a copy of the License at8#9# http://www.apache.org/licenses/LICENSE-2.010#11# Unless required by applicable law or agreed to in writing, software12# distributed under the License is distributed on an "AS IS" BASIS,13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.14# See the License for the specific language governing permissions and15# limitations under the License.16#17"""Generate Datastore Stats over Dev mode appserver's datastore."""18import datetime19import logging20from google.appengine.api import datastore21from google.appengine.api import datastore_admin22from google.appengine.api import datastore_types23from google.appengine.api import users24from google.appengine.ext.db import stats25DELETE_BATCH_SIZE = 10026_GLOBAL_KEY = (stats.GlobalStat, 'total_entity_usage', '')27_PROPERTY_TYPE_TO_DSS_NAME = {28 unicode: ('String', 'STRING'),29 bool: ('Boolean', 'BOOLEAN'),30 long: ('Integer', 'INT64'),31 type(None): ('NULL', 'NULL'),32 float: ('Float', 'DOUBLE'),33 datastore_types.Key: ('Key', 'REFERENCE'),34 datastore_types.Blob: ('Blob', 'STRING'),35 datastore_types.EmbeddedEntity: ('EmbeddedEntity', 'STRING'),36 datastore_types.ByteString: ('ShortBlob', 'STRING'),37 datastore_types.Text: ('Text', 'STRING'),38 users.User: ('User', 'USER'),39 datastore_types.Category: ('Category', 'STRING'),40 datastore_types.Link: ('Link', 'STRING'),41 datastore_types.Email: ('Email', 'STRING'),42 datetime.datetime: ('Date/Time', 'INT64'),43 datastore_types.GeoPt: ('GeoPt', 'POINT'),44 datastore_types.IM: ('IM', 'STRING'),45 datastore_types.PhoneNumber: ('PhoneNumber', 'STRING'),46 datastore_types.PostalAddress: ('PostalAddress', 'STRING'),47 datastore_types.Rating: ('Rating', 'INT64'),48 datastore_types.BlobKey: ('BlobKey', 'STRING'),49 }50class DatastoreStatsProcessor(object):51 """Generates datastore stats for an app's an datastore entities."""52 def __init__(self, _app=None):53 self.app_id = datastore_types.ResolveAppId(_app)54 self.whole_app_stats = {}55 self.namespace_stats = {}56 self.found_non_empty_namespace = False57 self.old_stat_keys = []58 self.timestamp = datetime.datetime.utcnow()59 def __ScanAllNamespaces(self):60 """Scans all the namespaces and processes each namespace."""61 namespace_query = datastore.Query('__namespace__', _app=self.app_id)62 for namespace_entity in namespace_query.Run():63 name = namespace_entity.key().name()64 if name is None:65 name = ''66 self.__ProcessNamespace(name)67 def __ProcessNamespace(self, namespace):68 """Process all the entities in a given namespace."""69 all_query = datastore.Query(namespace=namespace, _app=self.app_id)70 for entity in all_query.Run():71 self.found_non_empty_namespace |= (namespace != '')72 proto = entity.ToPb()73 proto_size = len(proto.SerializeToString())74 if entity.key().kind() in stats._DATASTORE_STATS_CLASSES_BY_KIND:75 stat_kind = stats._DATASTORE_STATS_CLASSES_BY_KIND[entity.key().kind()]76 self.old_stat_keys.append(entity.key())77 self.__AggregateTotal(proto_size, entity.key(), proto, namespace,78 stat_kind)79 else:80 self.__ProcessUserEntity(proto_size, entity.key(), proto, namespace)81 def __GetPropertyIndexStat(self, namespace, kind_name,82 entity_key_size, prop):83 """Return the size and count of indexes for a property of an EntityProto."""84 property_index_size = (len(self.app_id) + len(kind_name) +85 len(prop.value().SerializeToString()) +86 len(namespace) + entity_key_size)87 return (property_index_size, 2)88 def __GetTypeIndexStat(self, namespace, kind_name, entity_key_size):89 """Return the size and count of indexes by type of an EntityProto."""90 type_index_size = (len(self.app_id) + len(kind_name) + entity_key_size91 + len(namespace))92 return (type_index_size, 1)93 def __ProcessUserEntity(self, proto_size, key, proto, namespace):94 """Increment datastore stats for a non stats record."""95 self.__AggregateTotal(proto_size, key, proto, namespace, None)96 kind_name = key.kind()97 entity_key_size = (len(proto.key().app()) + len(namespace) +98 len(proto.key().path().SerializeToString()) +99 len(proto.entity_group().SerializeToString()))100 self.__AggregateCompositeIndices(proto, namespace, kind_name,101 entity_key_size)102 type_index_size, type_index_count = self.__GetTypeIndexStat(namespace,103 kind_name,104 entity_key_size)105 property_index_count = 0106 property_index_size = 0107 for prop_list in (proto.property_list(), proto.raw_property_list()):108 for prop in prop_list:109 index_size, index_count = self.__GetPropertyIndexStat(namespace,110 kind_name,111 entity_key_size,112 prop)113 property_index_size += index_size114 property_index_count += index_count115 builtin_index_size = type_index_size + property_index_size116 builtin_index_count = type_index_count + property_index_count117 self.__Increment(self.whole_app_stats, 1,118 (stats.KindStat, kind_name, ''),119 proto_size,120 builtin_index_count=builtin_index_count,121 builtin_index_size=builtin_index_size,122 kind_name=kind_name)123 self.__Increment(self.namespace_stats, 1,124 (stats.NamespaceKindStat, kind_name, namespace),125 proto_size,126 builtin_index_count=builtin_index_count,127 builtin_index_size=builtin_index_size,128 kind_name=kind_name)129 if key.parent() is None:130 whole_app_model = stats.KindRootEntityStat131 namespace_model = stats.NamespaceKindRootEntityStat132 else:133 whole_app_model = stats.KindNonRootEntityStat134 namespace_model = stats.NamespaceKindNonRootEntityStat135 self.__Increment(self.whole_app_stats, 1,136 (whole_app_model, kind_name, ''),137 proto_size,138 kind_name=kind_name)139 self.__Increment(self.namespace_stats, 1,140 (namespace_model, kind_name, namespace),141 proto_size,142 kind_name=kind_name)143 self.__ProcessProperties(144 kind_name,145 namespace,146 entity_key_size,147 (proto.property_list(), proto.raw_property_list()))148 def __ProcessProperties(self, kind_name, namespace, entity_key_size,149 prop_lists):150 for prop_list in prop_lists:151 for prop in prop_list:152 try:153 value = datastore_types.FromPropertyPb(prop)154 self.__AggregateProperty(kind_name, namespace, entity_key_size,155 prop, value)156 except (AssertionError, AttributeError, TypeError, ValueError), e:157 logging.error('Cannot process property %r, exception %s' %158 (prop, e))159 def __AggregateProperty(self, kind_name, namespace, entity_key_size,160 prop, value):161 property_name = prop.name()162 property_type = _PROPERTY_TYPE_TO_DSS_NAME[type(value)][0]163 index_property_type = _PROPERTY_TYPE_TO_DSS_NAME[type(value)][1]164 size = len(prop.SerializeToString())165 index_size, index_count = self.__GetPropertyIndexStat(namespace, kind_name,166 entity_key_size, prop)167 self.__Increment(self.whole_app_stats, 1,168 (stats.PropertyTypeStat, property_type, ''),169 size,170 builtin_index_count=0,171 builtin_index_size=0,172 property_type=property_type)173 self.__Increment(self.whole_app_stats, 0,174 (stats.PropertyTypeStat, index_property_type, ''),175 0,176 builtin_index_count=index_count,177 builtin_index_size=index_size,178 property_type=index_property_type)179 self.__Increment(self.namespace_stats, 1,180 (stats.NamespacePropertyTypeStat,181 property_type, namespace),182 size,183 builtin_index_count=0,184 builtin_index_size=0,185 property_type=property_type)186 self.__Increment(self.namespace_stats, 0,187 (stats.NamespacePropertyTypeStat,188 index_property_type, namespace),189 0,190 builtin_index_count=index_count,191 builtin_index_size=index_size,192 property_type=index_property_type)193 self.__Increment(self.whole_app_stats, 1,194 (stats.KindPropertyTypeStat,195 property_type + '_' + kind_name, ''),196 size,197 builtin_index_count=0,198 builtin_index_size=0,199 property_type=property_type, kind_name=kind_name)200 self.__Increment(self.whole_app_stats, 0,201 (stats.KindPropertyTypeStat,202 index_property_type + '_' + kind_name, ''),203 0,204 builtin_index_count=index_count,205 builtin_index_size=index_size,206 property_type=index_property_type, kind_name=kind_name)207 self.__Increment(self.namespace_stats, 1,208 (stats.NamespaceKindPropertyTypeStat,209 property_type + '_' + kind_name, namespace),210 size,211 builtin_index_count=0,212 builtin_index_size=0,213 property_type=property_type, kind_name=kind_name)214 self.__Increment(self.namespace_stats, 0,215 (stats.NamespaceKindPropertyTypeStat,216 index_property_type + '_' + kind_name, namespace),217 0,218 builtin_index_count=index_count,219 builtin_index_size=index_size,220 property_type=index_property_type, kind_name=kind_name)221 self.__Increment(self.whole_app_stats, 1,222 (stats.KindPropertyNameStat,223 property_name + '_' + kind_name, ''),224 size,225 builtin_index_count=index_count,226 builtin_index_size=index_size,227 property_name=property_name, kind_name=kind_name)228 self.__Increment(self.namespace_stats, 1,229 (stats.NamespaceKindPropertyNameStat,230 property_name + '_' + kind_name, namespace),231 size,232 builtin_index_count=index_count,233 builtin_index_size=index_size,234 property_name=property_name, kind_name=kind_name)235 self.__Increment(self.whole_app_stats, 1,236 (stats.KindPropertyNamePropertyTypeStat,237 property_type + '_' + property_name + '_' + kind_name,238 ''), size,239 builtin_index_count=0,240 builtin_index_size=0,241 property_type=property_type,242 property_name=property_name, kind_name=kind_name)243 self.__Increment(self.whole_app_stats, 0,244 (stats.KindPropertyNamePropertyTypeStat,245 index_property_type + '_' + property_name + '_' +246 kind_name,247 ''), 0,248 builtin_index_count=index_count,249 builtin_index_size=index_size,250 property_type=index_property_type,251 property_name=property_name, kind_name=kind_name)252 self.__Increment(self.namespace_stats, 1,253 (stats.NamespaceKindPropertyNamePropertyTypeStat,254 property_type + '_' + property_name + '_' + kind_name,255 namespace),256 size,257 builtin_index_count=0,258 builtin_index_size=0,259 property_type=property_type,260 property_name=property_name, kind_name=kind_name)261 self.__Increment(self.namespace_stats, 0,262 (stats.NamespaceKindPropertyNamePropertyTypeStat,263 index_property_type + '_' + property_name + '_' +264 kind_name,265 namespace),266 0,267 builtin_index_count=index_count,268 builtin_index_size=index_size,269 property_type=index_property_type,270 property_name=property_name, kind_name=kind_name)271 def __GetCompositeIndexStat(self, definition, proto, namespace, kind_name,272 entity_key_size):273 """Get statistics of composite index for a index definition of an entity."""274 property_list = proto.property_list()275 property_count = []276 property_size = []277 index_count = 1278 for indexed_prop in definition.property_list():279 name = indexed_prop.name()280 count = 0281 prop_size = 0282 for prop in property_list:283 if prop.name() == name:284 count += 1285 prop_size += len(prop.SerializeToString())286 property_count.append(count)287 property_size.append(prop_size)288 index_count *= count289 if index_count == 0:290 return (0, 0)291 index_only_size = 0292 for i in range(len(property_size)):293 index_only_size += property_size[i] * (index_count / property_count[i])294 index_size = (index_count * (entity_key_size + len(kind_name) +295 len(self.app_id) + len(namespace)) +296 index_only_size * 2)297 return (index_size, index_count)298 def __AggregateCompositeIndices(self, proto, namespace, kind_name,299 entity_key_size):300 """Aggregate statistics of composite indexes for an entity."""301 composite_indices = datastore_admin.GetIndices(self.app_id)302 for index in composite_indices:303 definition = index.definition()304 if kind_name != definition.entity_type():305 continue306 index_size, index_count = self.__GetCompositeIndexStat(definition, proto,307 namespace,308 kind_name,309 entity_key_size)310 if index_count == 0:311 continue312 name_id = namespace313 if not name_id:314 name_id = 1315 self.__Increment(self.whole_app_stats, 0, _GLOBAL_KEY, 0,316 composite_index_count=index_count,317 composite_index_size=index_size)318 self.__Increment(self.whole_app_stats, 0,319 (stats.NamespaceStat, name_id, ''), 0,320 composite_index_count=index_count,321 composite_index_size=index_size,322 subject_namespace=namespace)323 self.__Increment(self.namespace_stats, 0,324 (stats.NamespaceGlobalStat, 'total_entity_usage',325 namespace), 0,326 composite_index_count=index_count,327 composite_index_size=index_size)328 self.__Increment(self.whole_app_stats, 0,329 (stats.KindStat, kind_name, ''), 0,330 composite_index_count=index_count,331 composite_index_size=index_size,332 kind_name=kind_name)333 self.__Increment(self.namespace_stats, 0,334 (stats.NamespaceKindStat, kind_name, namespace), 0,335 composite_index_count=index_count,336 composite_index_size=index_size,337 kind_name=kind_name)338 index_id = index.id()339 self.__Increment(self.whole_app_stats, index_count,340 (stats.KindCompositeIndexStat,341 kind_name + '_%s' % index_id, ''), index_size,342 kind_name=kind_name, index_id=index_id)343 self.__Increment(self.namespace_stats, index_count,344 (stats.NamespaceKindCompositeIndexStat,345 kind_name + '_%s' % index_id, namespace), index_size,346 kind_name=kind_name, index_id=index_id)347 def __AggregateTotal(self, size, key, proto, namespace, stat_kind):348 """Aggregate total datastore stats."""349 kind_name = key.kind()350 entity_key_size = (len(proto.key().app()) +351 len(proto.key().path().SerializeToString()) +352 len(proto.entity_group().SerializeToString()))353 type_index_size, type_index_count = self.__GetTypeIndexStat(namespace,354 kind_name,355 entity_key_size)356 property_index_count = 0357 property_index_size = 0358 for prop_list in (proto.property_list(), proto.raw_property_list()):359 for prop in prop_list:360 index_size, index_count = self.__GetPropertyIndexStat(namespace,361 kind_name,362 entity_key_size,363 prop)364 property_index_size += index_size365 property_index_count += index_count366 builtin_index_size = type_index_size + property_index_size367 builtin_index_count = type_index_count + property_index_count368 if stat_kind == stats.GlobalStat:369 count = 0370 else:371 count = 1372 self.__Increment(self.whole_app_stats, count, _GLOBAL_KEY, size,373 builtin_index_count=builtin_index_count,374 builtin_index_size=builtin_index_size)375 name_id = namespace376 if not name_id:377 name_id = 1378 if (stat_kind == stats.NamespaceStat) and (namespace == ''):379 count = 0380 self.__Increment(self.whole_app_stats, count,381 (stats.NamespaceStat, name_id, ''),382 size,383 builtin_index_count=builtin_index_count,384 builtin_index_size=builtin_index_size,385 subject_namespace=namespace)386 if stat_kind == stats.NamespaceGlobalStat:387 count = 0388 self.__Increment(389 self.namespace_stats, count,390 (stats.NamespaceGlobalStat, 'total_entity_usage', namespace), size,391 builtin_index_count=builtin_index_count,392 builtin_index_size=builtin_index_size)393 def __Increment(self, stats_dict, count, stat_key, size,394 builtin_index_count=0, builtin_index_size=0,395 composite_index_count=0, composite_index_size=0, **kwds):396 """Increment stats for a particular kind.397 Args:398 stats_dict: The dictionary where the entities are held.399 The entities are keyed by stat_key. e.g. The400 __Stat_Total__ entity will be found in stats_dict[_GLOBAL_KEY].401 count: The amount to increment the datastore stat by.402 stat_key: A tuple of (db.Model of the stat, key value, namespace).403 size: The "bytes" to increment the size by.404 builtin_index_count: The bytes of builtin index to add in to a stat.405 builtin_index_size: The count of builtin index to add in to a stat.406 composite_index_count: The bytes of composite index to add in to a stat.407 composite_index_size: The count of composite index to add in to a stat.408 kwds: Name value pairs that are set on the created entities.409 """410 if stat_key not in stats_dict:411 stat_model = stat_key[0](412 key=datastore_types.Key.from_path(stat_key[0].STORED_KIND_NAME,413 stat_key[1],414 namespace=stat_key[2],415 _app=self.app_id),416 _app=self.app_id)417 stats_dict[stat_key] = stat_model418 for field, value in kwds.iteritems():419 setattr(stat_model, field, value)420 stat_model.count = count421 if size:422 stat_model.entity_bytes = size423 if builtin_index_size:424 stat_model.builtin_index_bytes = builtin_index_size425 stat_model.builtin_index_count = builtin_index_count426 if composite_index_size:427 stat_model.composite_index_bytes = composite_index_size428 stat_model.composite_index_count = composite_index_count429 stat_model.bytes = size + builtin_index_size + composite_index_size430 stat_model.timestamp = self.timestamp431 else:432 stat_model = stats_dict[stat_key]433 stat_model.count += count434 if size:435 stat_model.entity_bytes += size436 if builtin_index_size:437 stat_model.builtin_index_bytes += builtin_index_size438 stat_model.builtin_index_count += builtin_index_count439 if composite_index_size:440 stat_model.composite_index_bytes += composite_index_size441 stat_model.composite_index_count += composite_index_count442 stat_model.bytes += size + builtin_index_size + composite_index_size443 def __Finalize(self):444 """Finishes processing, deletes all old stats and writes new ones."""445 for i in range(0, len(self.old_stat_keys), DELETE_BATCH_SIZE):446 datastore.Delete(self.old_stat_keys[i:i+DELETE_BATCH_SIZE])447 self.written = 0448 for stat in self.whole_app_stats.itervalues():449 if stat.count or not (isinstance(stat, stats.GlobalStat) or450 isinstance(stat, stats.NamespaceStat)):451 stat.put()452 self.written += 1453 if self.found_non_empty_namespace:454 for stat in self.namespace_stats.itervalues():455 if stat.count or not isinstance(stat, stats.NamespaceGlobalStat):456 stat.put()457 self.written += 1458 def Run(self):459 """Scans the datastore, computes new stats and writes them."""460 self.__ScanAllNamespaces()461 self.__Finalize()462 return self463 def Report(self):464 """Produce a small report about the result."""465 stat = self.whole_app_stats.get(_GLOBAL_KEY, None)466 entity_size = 0467 entity_count = 0468 builtin_index_size = 0469 builtin_index_count = 0470 composite_index_size = 0471 composite_index_count = 0472 if stat:473 entity_size = stat.entity_bytes474 entity_count = stat.count475 builtin_index_size = stat.builtin_index_bytes476 builtin_index_count = stat.builtin_index_count477 composite_index_size = stat.composite_index_bytes478 composite_index_count = stat.composite_index_count479 if not entity_count:480 entity_count = 1481 return ('Scanned %d entities of total %d bytes, %d index entries of total '482 '%d bytes and %d composite index entries of total %d bytes. '483 'Inserted %d new records.'484 % (entity_count, entity_size, builtin_index_count,485 builtin_index_size, composite_index_count, composite_index_size,...
test_database.py
Source:test_database.py
...10# the test cases for database11class database_test_cases(unittest.TestCase):12 def setUp(self):13 self.db = Database()14 self.sql_count_article = 'SELECT count(article_id) FROM HooliASE.articles'15 self.sql_count_question = 'SELECT count(question_id) FROM HooliASE.questions'16 self.sql_count_history = 'SELECT count(history_id) FROM HooliASE.history'17 self.sql_count_feedback = 'SELECT count(id) FROM HooliASE.answer_feedback'18 self.sql_delete_article = 'DELETE from HooliASE.articles WHERE article_id=%s'19 self.sql_delete_question = 'DELETE from HooliASE.questions WHERE question_id=%s'20 self.sql_delete_history = 'DELETE from HooliASE.history WHERE history_id=%s'21 self.sql_delete_feedback = 'DELETE from HooliASE.answer_feedback WHERE id=%s'22 # add_article(title, content)23 def test_add_article(self):24 # count the original row number25 self.db.mycursor.execute(self.sql_count_article)26 ori_row = count_rows(self.db.mycursor)27 # add a new test record into articles table28 article_id = self.db.add_article('ASE', 'We all love ASE')29 # assert the success of insertion30 self.db.mycursor.execute(self.sql_count_article)31 new_row = count_rows(self.db.mycursor)...
ets_cleaning_nltk.py
Source:ets_cleaning_nltk.py
...84 # Counting85 ZHO_length.append(words)86 ZHO_num_sentences.append(num_sentence)87 ZHO_one_count.append(ones)88 ZHO_the_count.append(determiners.count('the'))89 ZHO_aan_count.append(determiners.count('a') + determiners.count('an'))90 ZHO_this_count.append(determiners.count('this'))91 ZHO_that_count.append(determiners.count('that'))92 ZHO_these_count.append(determiners.count('these'))93 ZHO_those_count.append(determiners.count('those'))94 determiners = [x for x in determiners if x not in det_in_question]95 ZHO_other_det_count.append(len(determiners))96 ZHO_noun_count.append(len(nouns))97 ZHO_verb_count.append(len(verbs))98 ZHO_adj_count.append(len(adjs))99 ZHO_modal_count.append(len(modal))100 ZHO_PRP_count.append(prp)101mandarin_L1['essay_len'] = ZHO_length102mandarin_L1['num_sentence'] = ZHO_num_sentences103mandarin_L1['noun_count'] = ZHO_noun_count104mandarin_L1['verb_count'] = ZHO_verb_count105mandarin_L1['adj_count'] = ZHO_adj_count106mandarin_L1['modal_count'] = ZHO_modal_count107mandarin_L1['noun_per_sentence'] = mandarin_L1['noun_count'] / mandarin_L1['num_sentence']108mandarin_L1['verb_per_sentence'] = mandarin_L1['verb_count'] / mandarin_L1['num_sentence']109mandarin_L1['adj_per_sentence'] = mandarin_L1['adj_count'] / mandarin_L1['num_sentence']110mandarin_L1['modal_per_sentence'] = mandarin_L1['modal_count'] / mandarin_L1['num_sentence']111mandarin_L1['the_count'] = ZHO_the_count112mandarin_L1['aan_count'] = ZHO_aan_count113mandarin_L1['one_count'] = ZHO_one_count114mandarin_L1['this_count'] = ZHO_this_count115mandarin_L1['that_count'] = ZHO_that_count116mandarin_L1['these_count'] = ZHO_these_count117mandarin_L1['those_count'] = ZHO_those_count118mandarin_L1['other_det_count'] = ZHO_other_det_count119mandarin_L1['PRP_count'] = ZHO_PRP_count120mandarin_L1['the_freq'] = mandarin_L1['the_count'] / mandarin_L1['essay_len']121mandarin_L1['aan_freq'] = mandarin_L1['aan_count'] / mandarin_L1['essay_len']122mandarin_L1['one_freq'] = mandarin_L1['one_count'] / mandarin_L1['essay_len']123mandarin_L1['this_freq'] = mandarin_L1['this_count'] / mandarin_L1['essay_len']124mandarin_L1['that_freq'] = mandarin_L1['that_count'] / mandarin_L1['essay_len']125mandarin_L1['these_freq'] = mandarin_L1['these_count'] / mandarin_L1['essay_len']126mandarin_L1['those_freq'] = mandarin_L1['those_count'] / mandarin_L1['essay_len']127mandarin_L1['other_det_freq'] = mandarin_L1['other_det_count'] / mandarin_L1['essay_len']128mandarin_L1['PRP_freq'] = mandarin_L1['PRP_count'] / mandarin_L1['essay_len']129'''SPANISH L1'''130SPA_length = []131SPA_num_sentences = []132SPA_the_count = []133SPA_aan_count = []134SPA_one_count = []135SPA_this_count = []136SPA_that_count = []137SPA_these_count = []138SPA_those_count = []139SPA_other_det_count = []140SPA_noun_count = []141SPA_verb_count = []142SPA_modal_count = []143SPA_adj_count = []144SPA_PRP_count = []145for f in SPA_file:146 f_name = SPA_path + f147 temp = open(f_name, 'r')148 num_sentence = 0149 ones = 0150 determiners = []151 nouns = []152 verbs = []153 adjs = []154 modal = []155 words = 0 # number of words156 prp = 0157 for line in temp:158 arr = line.strip().split()159 if not len(arr) == 0:160 line_tokenized = nltk.word_tokenize(line.lower())161 tagged = [list(ele) for ele in nltk.pos_tag(line_tokenized)]162 tagged = [ele for ele in tagged if ele[0] not in puncs] # leave out punctuations163 words += len(tagged)164 one = np.array([x for x in tagged if x[0] == 'one'])165 dt = np.array([x for x in tagged if x[1] == 'DT'])166 n = np.array([x for x in tagged if x[1] in noun_tag])167 v = np.array([x for x in tagged if x[1] in verb_tag])168 a = np.array([x for x in tagged if x[1] in adj_tag])169 m = np.array([x for x in tagged if x[1] == 'MD'])170 ones += len(one)171 prp += find_PRP(tagged)172 if dt.shape[0] != 0:173 determiners += list(dt[:, 0])174 if n.shape[0] != 0:175 nouns += list(n[:, 0])176 if v.shape[0] != 0:177 verbs += list(v[:, 0])178 if a.shape[0] != 0:179 adjs += list(a[:, 0])180 if m.shape[0] != 0:181 modal += list(m[:, 0])182 num_sentence += 1183 temp.close()184 # Counting185 SPA_length.append(words)186 SPA_num_sentences.append(num_sentence)187 SPA_one_count.append(ones)188 SPA_the_count.append(determiners.count('the'))189 SPA_aan_count.append(determiners.count('a') + determiners.count('an'))190 SPA_this_count.append(determiners.count('this'))191 SPA_that_count.append(determiners.count('that'))192 SPA_these_count.append(determiners.count('these'))193 SPA_those_count.append(determiners.count('those'))194 determiners = [x for x in determiners if x not in det_in_question]195 SPA_other_det_count.append(len(determiners))196 SPA_noun_count.append(len(nouns))197 SPA_verb_count.append(len(verbs))198 SPA_adj_count.append(len(adjs))199 SPA_modal_count.append(len(modal))200 SPA_PRP_count.append(prp)201spanish_L1['essay_len'] = SPA_length202spanish_L1['num_sentence'] = SPA_num_sentences203spanish_L1['noun_count'] = SPA_noun_count204spanish_L1['verb_count'] = SPA_verb_count205spanish_L1['adj_count'] = SPA_adj_count206spanish_L1['modal_count'] = SPA_modal_count207spanish_L1['noun_per_sentence'] = spanish_L1['noun_count'] / spanish_L1['num_sentence']...
analysis.py
Source:analysis.py
...34 prov = (each.split())[0]35 data.append(prov)36 count={}37 for i in data:38 count[i]=data.count(i)39 # print(count)40 prov=list(count.keys())41 nums=list(count.values())42 return prov,nums43def prov_plt():44 prov, nums = provience()45 plt.figure(figsize=(8, 4))46 plt.xticks(rotation=0)47 plt.bar(prov, nums, color='g')48 plt.xlabel('ç份')49 plt.ylabel('æ°é')50 plt.title('ä¸åç份æ°éåå¸å¾')51 plt.legend()52 plt.show()53# è¯äºåæ ¹æ®è¯äºæ°æ®è¿è¡åæçæ±ç¶å¾54def cloud_plt():55 def cloud_data():56 title=data_analysis('title')57 titles=[]58 # 对æ¯ä¸ªæ é¢è¿è¡åè¯59 for each in title:60 title_cut=jieba.lcut(each)61 titles.append(title_cut)62 # åé¤ä¸éè¦çè¯è¯63 title_del=[]64 for line in titles:65 line_del=[]66 for word in line:67 if word not in ['2018','å¦å¦','â¤','ã','ã',' ','Chinism','å·¥ä½å®¤','å强']:68 line_del.append(word)69 title_del.append(line_del)70 # print(title_del)71 # å
ç´ å»é,æ¯ä¸ªæ é¢ä¸ä¸å«éå¤å
ç´ 72 title_clean=[]73 for each in title_del:74 line_dist=[]75 for word in each:76 if word not in line_dist:77 line_dist.append(word)78 title_clean.append(line_dist)79 # å°ææè¯è¯è½¬ä¸ºä¸ä¸ªlist80 allwords_dist=[]81 for line in title_clean:82 for word in line:83 allwords_dist.append(word)84 # æå表转为æ°æ®æ¡85 allwords_dist=pandas.DataFrame({'allwords':allwords_dist})86 # 对è¯è¯è¿è¡åç±»æ±æ»87 word_count=allwords_dist.allwords.value_counts().reset_index()88 # æ·»å åå89 word_count.columns=['word','count']90 # print(allwords_dist)91 return word_count,title_clean92 def cloud_data_count():93 # è·åååééæ°æ®94 sell_count = data_analysis('sell_count')95 word_count, title_clean = cloud_data()96 ws_count = []97 # ååä¸å
å«ç»è®¡çè¯æ¶ï¼å°å
¶ééå å
¥list98 for each in word_count.word:99 i = 0100 s_list = []101 for t in title_clean:102 if each in t:103 s_list.append(int(sell_count[i]))104 # print(s_list)105 i += 1106 # ç»è®¡ä¸ä¸ªå
³é®è¯æå
å«ååçééæ»æ°107 ws_count.append(sum(s_list))108 # æå表转为æ°æ®æ¡109 ws_count = pandas.DataFrame({'ws_count': ws_count})110 # æword_count, ws_countå并为ä¸ä¸ªè¡¨111 word_count = pandas.concat([word_count, ws_count], axis=1, ignore_index=True)112 word_count.columns = ['word', 'count', 'ws_count']113 # ååºæå114 word_count.sort_values('ws_count', inplace=True, ascending=True)115 # åæ大30è¡æ°æ®116 df_ws = word_count.tail(30)117 return df_ws118 # å¾äºé¨å119 word_count=cloud_data()[0]120 # 设置åä½ï¼èæ¯é¢è²ï¼åä½æ大å·ï¼121 w_c=WordCloud(font_path='/usr/local/lib/python3.6/dist-packages/matplotlib/mpl-data/fonts/ttf/simhei.ttf',122 background_color='white',123 max_font_size=60,124 margin=1)125 # åå400个è¯è¿è¡å¯è§å126 wc=w_c.fit_words({x[0]:x[1] for x in word_count.head(1000).values})127 # 设置å¾ä¼å128 plt.imshow(wc,interpolation='bilinear')129 # å»é¤è¾¹æ¡130 plt.axis('off')131 plt.show()132 # ç»è®¡åææ±ç¶å¾é¨å133 data = cloud_data_count()134 index = np.arange(data.word.size)135 # plt.figure(figsize=(6,12))136 plt.barh(index, data.ws_count, align='center', alpha=0.8)137 plt.yticks(index, data.word)138 # æ·»å æ°æ®æ ç¾139 for y, x in zip(index, data.ws_count):140 plt.text(x, y, '%.0f' % x, ha='left', va='center')141 plt.show()142def impact_analysis():143 sell_count=pandas.DataFrame({'sell_count': data_analysis('sell_count')})144 price=[]145 for i in data_analysis('price'):146 p=i.split('-')147 p_i=p[0].split('.')...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!