Best Python code snippet using playwright-python
datastore_stats_generator.py
Source:datastore_stats_generator.py
1#!/usr/bin/env python2#3# Copyright 2007 Google Inc.4#5# Licensed under the Apache License, Version 2.0 (the "License");6# you may not use this file except in compliance with the License.7# You may obtain a copy of the License at8#9# http://www.apache.org/licenses/LICENSE-2.010#11# Unless required by applicable law or agreed to in writing, software12# distributed under the License is distributed on an "AS IS" BASIS,13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.14# See the License for the specific language governing permissions and15# limitations under the License.16#17"""Generate Datastore Stats over Dev mode appserver's datastore."""18import datetime19import logging20from google.appengine.api import datastore21from google.appengine.api import datastore_admin22from google.appengine.api import datastore_types23from google.appengine.api import users24from google.appengine.ext.db import stats25DELETE_BATCH_SIZE = 10026_GLOBAL_KEY = (stats.GlobalStat, 'total_entity_usage', '')27_PROPERTY_TYPE_TO_DSS_NAME = {28 unicode: ('String', 'STRING'),29 bool: ('Boolean', 'BOOLEAN'),30 long: ('Integer', 'INT64'),31 type(None): ('NULL', 'NULL'),32 float: ('Float', 'DOUBLE'),33 datastore_types.Key: ('Key', 'REFERENCE'),34 datastore_types.Blob: ('Blob', 'STRING'),35 datastore_types.EmbeddedEntity: ('EmbeddedEntity', 'STRING'),36 datastore_types.ByteString: ('ShortBlob', 'STRING'),37 datastore_types.Text: ('Text', 'STRING'),38 users.User: ('User', 'USER'),39 datastore_types.Category: ('Category', 'STRING'),40 datastore_types.Link: ('Link', 'STRING'),41 datastore_types.Email: ('Email', 'STRING'),42 datetime.datetime: ('Date/Time', 'INT64'),43 datastore_types.GeoPt: ('GeoPt', 'POINT'),44 datastore_types.IM: ('IM', 'STRING'),45 datastore_types.PhoneNumber: ('PhoneNumber', 'STRING'),46 datastore_types.PostalAddress: ('PostalAddress', 'STRING'),47 datastore_types.Rating: ('Rating', 'INT64'),48 datastore_types.BlobKey: ('BlobKey', 'STRING'),49 }50class DatastoreStatsProcessor(object):51 """Generates datastore stats for an app's an datastore entities."""52 def __init__(self, _app=None):53 self.app_id = datastore_types.ResolveAppId(_app)54 self.whole_app_stats = {}55 self.namespace_stats = {}56 self.found_non_empty_namespace = False57 self.old_stat_keys = []58 self.timestamp = datetime.datetime.utcnow()59 def __ScanAllNamespaces(self):60 """Scans all the namespaces and processes each namespace."""61 namespace_query = datastore.Query('__namespace__', _app=self.app_id)62 for namespace_entity in namespace_query.Run():63 name = namespace_entity.key().name()64 if name is None:65 name = ''66 self.__ProcessNamespace(name)67 def __ProcessNamespace(self, namespace):68 """Process all the entities in a given namespace."""69 all_query = datastore.Query(namespace=namespace, _app=self.app_id)70 for entity in all_query.Run():71 self.found_non_empty_namespace |= (namespace != '')72 proto = entity.ToPb()73 proto_size = len(proto.SerializeToString())74 if entity.key().kind() in stats._DATASTORE_STATS_CLASSES_BY_KIND:75 stat_kind = stats._DATASTORE_STATS_CLASSES_BY_KIND[entity.key().kind()]76 self.old_stat_keys.append(entity.key())77 self.__AggregateTotal(proto_size, entity.key(), proto, namespace,78 stat_kind)79 else:80 self.__ProcessUserEntity(proto_size, entity.key(), proto, namespace)81 def __GetPropertyIndexStat(self, namespace, kind_name,82 entity_key_size, prop):83 """Return the size and count of indexes for a property of an EntityProto."""84 property_index_size = (len(self.app_id) + len(kind_name) +85 len(prop.value().SerializeToString()) +86 len(namespace) + entity_key_size)87 return (property_index_size, 2)88 def __GetTypeIndexStat(self, namespace, kind_name, entity_key_size):89 """Return the size and count of indexes by type of an EntityProto."""90 type_index_size = (len(self.app_id) + len(kind_name) + entity_key_size91 + len(namespace))92 return (type_index_size, 1)93 def __ProcessUserEntity(self, proto_size, key, proto, namespace):94 """Increment datastore stats for a non stats record."""95 self.__AggregateTotal(proto_size, key, proto, namespace, None)96 kind_name = key.kind()97 entity_key_size = (len(proto.key().app()) + len(namespace) +98 len(proto.key().path().SerializeToString()) +99 len(proto.entity_group().SerializeToString()))100 self.__AggregateCompositeIndices(proto, namespace, kind_name,101 entity_key_size)102 type_index_size, type_index_count = self.__GetTypeIndexStat(namespace,103 kind_name,104 entity_key_size)105 property_index_count = 0106 property_index_size = 0107 for prop_list in (proto.property_list(), proto.raw_property_list()):108 for prop in prop_list:109 index_size, index_count = self.__GetPropertyIndexStat(namespace,110 kind_name,111 entity_key_size,112 prop)113 property_index_size += index_size114 property_index_count += index_count115 builtin_index_size = type_index_size + property_index_size116 builtin_index_count = type_index_count + property_index_count117 self.__Increment(self.whole_app_stats, 1,118 (stats.KindStat, kind_name, ''),119 proto_size,120 builtin_index_count=builtin_index_count,121 builtin_index_size=builtin_index_size,122 kind_name=kind_name)123 self.__Increment(self.namespace_stats, 1,124 (stats.NamespaceKindStat, kind_name, namespace),125 proto_size,126 builtin_index_count=builtin_index_count,127 builtin_index_size=builtin_index_size,128 kind_name=kind_name)129 if key.parent() is None:130 whole_app_model = stats.KindRootEntityStat131 namespace_model = stats.NamespaceKindRootEntityStat132 else:133 whole_app_model = stats.KindNonRootEntityStat134 namespace_model = stats.NamespaceKindNonRootEntityStat135 self.__Increment(self.whole_app_stats, 1,136 (whole_app_model, kind_name, ''),137 proto_size,138 kind_name=kind_name)139 self.__Increment(self.namespace_stats, 1,140 (namespace_model, kind_name, namespace),141 proto_size,142 kind_name=kind_name)143 self.__ProcessProperties(144 kind_name,145 namespace,146 entity_key_size,147 (proto.property_list(), proto.raw_property_list()))148 def __ProcessProperties(self, kind_name, namespace, entity_key_size,149 prop_lists):150 for prop_list in prop_lists:151 for prop in prop_list:152 try:153 value = datastore_types.FromPropertyPb(prop)154 self.__AggregateProperty(kind_name, namespace, entity_key_size,155 prop, value)156 except (AssertionError, AttributeError, TypeError, ValueError), e:157 logging.error('Cannot process property %r, exception %s' %158 (prop, e))159 def __AggregateProperty(self, kind_name, namespace, entity_key_size,160 prop, value):161 property_name = prop.name()162 property_type = _PROPERTY_TYPE_TO_DSS_NAME[type(value)][0]163 index_property_type = _PROPERTY_TYPE_TO_DSS_NAME[type(value)][1]164 size = len(prop.SerializeToString())165 index_size, index_count = self.__GetPropertyIndexStat(namespace, kind_name,166 entity_key_size, prop)167 self.__Increment(self.whole_app_stats, 1,168 (stats.PropertyTypeStat, property_type, ''),169 size,170 builtin_index_count=0,171 builtin_index_size=0,172 property_type=property_type)173 self.__Increment(self.whole_app_stats, 0,174 (stats.PropertyTypeStat, index_property_type, ''),175 0,176 builtin_index_count=index_count,177 builtin_index_size=index_size,178 property_type=index_property_type)179 self.__Increment(self.namespace_stats, 1,180 (stats.NamespacePropertyTypeStat,181 property_type, namespace),182 size,183 builtin_index_count=0,184 builtin_index_size=0,185 property_type=property_type)186 self.__Increment(self.namespace_stats, 0,187 (stats.NamespacePropertyTypeStat,188 index_property_type, namespace),189 0,190 builtin_index_count=index_count,191 builtin_index_size=index_size,192 property_type=index_property_type)193 self.__Increment(self.whole_app_stats, 1,194 (stats.KindPropertyTypeStat,195 property_type + '_' + kind_name, ''),196 size,197 builtin_index_count=0,198 builtin_index_size=0,199 property_type=property_type, kind_name=kind_name)200 self.__Increment(self.whole_app_stats, 0,201 (stats.KindPropertyTypeStat,202 index_property_type + '_' + kind_name, ''),203 0,204 builtin_index_count=index_count,205 builtin_index_size=index_size,206 property_type=index_property_type, kind_name=kind_name)207 self.__Increment(self.namespace_stats, 1,208 (stats.NamespaceKindPropertyTypeStat,209 property_type + '_' + kind_name, namespace),210 size,211 builtin_index_count=0,212 builtin_index_size=0,213 property_type=property_type, kind_name=kind_name)214 self.__Increment(self.namespace_stats, 0,215 (stats.NamespaceKindPropertyTypeStat,216 index_property_type + '_' + kind_name, namespace),217 0,218 builtin_index_count=index_count,219 builtin_index_size=index_size,220 property_type=index_property_type, kind_name=kind_name)221 self.__Increment(self.whole_app_stats, 1,222 (stats.KindPropertyNameStat,223 property_name + '_' + kind_name, ''),224 size,225 builtin_index_count=index_count,226 builtin_index_size=index_size,227 property_name=property_name, kind_name=kind_name)228 self.__Increment(self.namespace_stats, 1,229 (stats.NamespaceKindPropertyNameStat,230 property_name + '_' + kind_name, namespace),231 size,232 builtin_index_count=index_count,233 builtin_index_size=index_size,234 property_name=property_name, kind_name=kind_name)235 self.__Increment(self.whole_app_stats, 1,236 (stats.KindPropertyNamePropertyTypeStat,237 property_type + '_' + property_name + '_' + kind_name,238 ''), size,239 builtin_index_count=0,240 builtin_index_size=0,241 property_type=property_type,242 property_name=property_name, kind_name=kind_name)243 self.__Increment(self.whole_app_stats, 0,244 (stats.KindPropertyNamePropertyTypeStat,245 index_property_type + '_' + property_name + '_' +246 kind_name,247 ''), 0,248 builtin_index_count=index_count,249 builtin_index_size=index_size,250 property_type=index_property_type,251 property_name=property_name, kind_name=kind_name)252 self.__Increment(self.namespace_stats, 1,253 (stats.NamespaceKindPropertyNamePropertyTypeStat,254 property_type + '_' + property_name + '_' + kind_name,255 namespace),256 size,257 builtin_index_count=0,258 builtin_index_size=0,259 property_type=property_type,260 property_name=property_name, kind_name=kind_name)261 self.__Increment(self.namespace_stats, 0,262 (stats.NamespaceKindPropertyNamePropertyTypeStat,263 index_property_type + '_' + property_name + '_' +264 kind_name,265 namespace),266 0,267 builtin_index_count=index_count,268 builtin_index_size=index_size,269 property_type=index_property_type,270 property_name=property_name, kind_name=kind_name)271 def __GetCompositeIndexStat(self, definition, proto, namespace, kind_name,272 entity_key_size):273 """Get statistics of composite index for a index definition of an entity."""274 property_list = proto.property_list()275 property_count = []276 property_size = []277 index_count = 1278 for indexed_prop in definition.property_list():279 name = indexed_prop.name()280 count = 0281 prop_size = 0282 for prop in property_list:283 if prop.name() == name:284 count += 1285 prop_size += len(prop.SerializeToString())286 property_count.append(count)287 property_size.append(prop_size)288 index_count *= count289 if index_count == 0:290 return (0, 0)291 index_only_size = 0292 for i in range(len(property_size)):293 index_only_size += property_size[i] * (index_count / property_count[i])294 index_size = (index_count * (entity_key_size + len(kind_name) +295 len(self.app_id) + len(namespace)) +296 index_only_size * 2)297 return (index_size, index_count)298 def __AggregateCompositeIndices(self, proto, namespace, kind_name,299 entity_key_size):300 """Aggregate statistics of composite indexes for an entity."""301 composite_indices = datastore_admin.GetIndices(self.app_id)302 for index in composite_indices:303 definition = index.definition()304 if kind_name != definition.entity_type():305 continue306 index_size, index_count = self.__GetCompositeIndexStat(definition, proto,307 namespace,308 kind_name,309 entity_key_size)310 if index_count == 0:311 continue312 name_id = namespace313 if not name_id:314 name_id = 1315 self.__Increment(self.whole_app_stats, 0, _GLOBAL_KEY, 0,316 composite_index_count=index_count,317 composite_index_size=index_size)318 self.__Increment(self.whole_app_stats, 0,319 (stats.NamespaceStat, name_id, ''), 0,320 composite_index_count=index_count,321 composite_index_size=index_size,322 subject_namespace=namespace)323 self.__Increment(self.namespace_stats, 0,324 (stats.NamespaceGlobalStat, 'total_entity_usage',325 namespace), 0,326 composite_index_count=index_count,327 composite_index_size=index_size)328 self.__Increment(self.whole_app_stats, 0,329 (stats.KindStat, kind_name, ''), 0,330 composite_index_count=index_count,331 composite_index_size=index_size,332 kind_name=kind_name)333 self.__Increment(self.namespace_stats, 0,334 (stats.NamespaceKindStat, kind_name, namespace), 0,335 composite_index_count=index_count,336 composite_index_size=index_size,337 kind_name=kind_name)338 index_id = index.id()339 self.__Increment(self.whole_app_stats, index_count,340 (stats.KindCompositeIndexStat,341 kind_name + '_%s' % index_id, ''), index_size,342 kind_name=kind_name, index_id=index_id)343 self.__Increment(self.namespace_stats, index_count,344 (stats.NamespaceKindCompositeIndexStat,345 kind_name + '_%s' % index_id, namespace), index_size,346 kind_name=kind_name, index_id=index_id)347 def __AggregateTotal(self, size, key, proto, namespace, stat_kind):348 """Aggregate total datastore stats."""349 kind_name = key.kind()350 entity_key_size = (len(proto.key().app()) +351 len(proto.key().path().SerializeToString()) +352 len(proto.entity_group().SerializeToString()))353 type_index_size, type_index_count = self.__GetTypeIndexStat(namespace,354 kind_name,355 entity_key_size)356 property_index_count = 0357 property_index_size = 0358 for prop_list in (proto.property_list(), proto.raw_property_list()):359 for prop in prop_list:360 index_size, index_count = self.__GetPropertyIndexStat(namespace,361 kind_name,362 entity_key_size,363 prop)364 property_index_size += index_size365 property_index_count += index_count366 builtin_index_size = type_index_size + property_index_size367 builtin_index_count = type_index_count + property_index_count368 if stat_kind == stats.GlobalStat:369 count = 0370 else:371 count = 1372 self.__Increment(self.whole_app_stats, count, _GLOBAL_KEY, size,373 builtin_index_count=builtin_index_count,374 builtin_index_size=builtin_index_size)375 name_id = namespace376 if not name_id:377 name_id = 1378 if (stat_kind == stats.NamespaceStat) and (namespace == ''):379 count = 0380 self.__Increment(self.whole_app_stats, count,381 (stats.NamespaceStat, name_id, ''),382 size,383 builtin_index_count=builtin_index_count,384 builtin_index_size=builtin_index_size,385 subject_namespace=namespace)386 if stat_kind == stats.NamespaceGlobalStat:387 count = 0388 self.__Increment(389 self.namespace_stats, count,390 (stats.NamespaceGlobalStat, 'total_entity_usage', namespace), size,391 builtin_index_count=builtin_index_count,392 builtin_index_size=builtin_index_size)393 def __Increment(self, stats_dict, count, stat_key, size,394 builtin_index_count=0, builtin_index_size=0,395 composite_index_count=0, composite_index_size=0, **kwds):396 """Increment stats for a particular kind.397 Args:398 stats_dict: The dictionary where the entities are held.399 The entities are keyed by stat_key. e.g. The400 __Stat_Total__ entity will be found in stats_dict[_GLOBAL_KEY].401 count: The amount to increment the datastore stat by.402 stat_key: A tuple of (db.Model of the stat, key value, namespace).403 size: The "bytes" to increment the size by.404 builtin_index_count: The bytes of builtin index to add in to a stat.405 builtin_index_size: The count of builtin index to add in to a stat.406 composite_index_count: The bytes of composite index to add in to a stat.407 composite_index_size: The count of composite index to add in to a stat.408 kwds: Name value pairs that are set on the created entities.409 """410 if stat_key not in stats_dict:411 stat_model = stat_key[0](412 key=datastore_types.Key.from_path(stat_key[0].STORED_KIND_NAME,413 stat_key[1],414 namespace=stat_key[2],415 _app=self.app_id),416 _app=self.app_id)417 stats_dict[stat_key] = stat_model418 for field, value in kwds.iteritems():419 setattr(stat_model, field, value)420 stat_model.count = count421 if size:422 stat_model.entity_bytes = size423 if builtin_index_size:424 stat_model.builtin_index_bytes = builtin_index_size425 stat_model.builtin_index_count = builtin_index_count426 if composite_index_size:427 stat_model.composite_index_bytes = composite_index_size428 stat_model.composite_index_count = composite_index_count429 stat_model.bytes = size + builtin_index_size + composite_index_size430 stat_model.timestamp = self.timestamp431 else:432 stat_model = stats_dict[stat_key]433 stat_model.count += count434 if size:435 stat_model.entity_bytes += size436 if builtin_index_size:437 stat_model.builtin_index_bytes += builtin_index_size438 stat_model.builtin_index_count += builtin_index_count439 if composite_index_size:440 stat_model.composite_index_bytes += composite_index_size441 stat_model.composite_index_count += composite_index_count442 stat_model.bytes += size + builtin_index_size + composite_index_size443 def __Finalize(self):444 """Finishes processing, deletes all old stats and writes new ones."""445 for i in range(0, len(self.old_stat_keys), DELETE_BATCH_SIZE):446 datastore.Delete(self.old_stat_keys[i:i+DELETE_BATCH_SIZE])447 self.written = 0448 for stat in self.whole_app_stats.itervalues():449 if stat.count or not (isinstance(stat, stats.GlobalStat) or450 isinstance(stat, stats.NamespaceStat)):451 stat.put()452 self.written += 1453 if self.found_non_empty_namespace:454 for stat in self.namespace_stats.itervalues():455 if stat.count or not isinstance(stat, stats.NamespaceGlobalStat):456 stat.put()457 self.written += 1458 def Run(self):459 """Scans the datastore, computes new stats and writes them."""460 self.__ScanAllNamespaces()461 self.__Finalize()462 return self463 def Report(self):464 """Produce a small report about the result."""465 stat = self.whole_app_stats.get(_GLOBAL_KEY, None)466 entity_size = 0467 entity_count = 0468 builtin_index_size = 0469 builtin_index_count = 0470 composite_index_size = 0471 composite_index_count = 0472 if stat:473 entity_size = stat.entity_bytes474 entity_count = stat.count475 builtin_index_size = stat.builtin_index_bytes476 builtin_index_count = stat.builtin_index_count477 composite_index_size = stat.composite_index_bytes478 composite_index_count = stat.composite_index_count479 if not entity_count:480 entity_count = 1481 return ('Scanned %d entities of total %d bytes, %d index entries of total '482 '%d bytes and %d composite index entries of total %d bytes. '483 'Inserted %d new records.'484 % (entity_count, entity_size, builtin_index_count,485 builtin_index_size, composite_index_count, composite_index_size,...
test_database.py
Source:test_database.py
...10# the test cases for database11class database_test_cases(unittest.TestCase):12 def setUp(self):13 self.db = Database()14 self.sql_count_article = 'SELECT count(article_id) FROM HooliASE.articles'15 self.sql_count_question = 'SELECT count(question_id) FROM HooliASE.questions'16 self.sql_count_history = 'SELECT count(history_id) FROM HooliASE.history'17 self.sql_count_feedback = 'SELECT count(id) FROM HooliASE.answer_feedback'18 self.sql_delete_article = 'DELETE from HooliASE.articles WHERE article_id=%s'19 self.sql_delete_question = 'DELETE from HooliASE.questions WHERE question_id=%s'20 self.sql_delete_history = 'DELETE from HooliASE.history WHERE history_id=%s'21 self.sql_delete_feedback = 'DELETE from HooliASE.answer_feedback WHERE id=%s'22 # add_article(title, content)23 def test_add_article(self):24 # count the original row number25 self.db.mycursor.execute(self.sql_count_article)26 ori_row = count_rows(self.db.mycursor)27 # add a new test record into articles table28 article_id = self.db.add_article('ASE', 'We all love ASE')29 # assert the success of insertion30 self.db.mycursor.execute(self.sql_count_article)31 new_row = count_rows(self.db.mycursor)...
ets_cleaning_nltk.py
Source:ets_cleaning_nltk.py
...84 # Counting85 ZHO_length.append(words)86 ZHO_num_sentences.append(num_sentence)87 ZHO_one_count.append(ones)88 ZHO_the_count.append(determiners.count('the'))89 ZHO_aan_count.append(determiners.count('a') + determiners.count('an'))90 ZHO_this_count.append(determiners.count('this'))91 ZHO_that_count.append(determiners.count('that'))92 ZHO_these_count.append(determiners.count('these'))93 ZHO_those_count.append(determiners.count('those'))94 determiners = [x for x in determiners if x not in det_in_question]95 ZHO_other_det_count.append(len(determiners))96 ZHO_noun_count.append(len(nouns))97 ZHO_verb_count.append(len(verbs))98 ZHO_adj_count.append(len(adjs))99 ZHO_modal_count.append(len(modal))100 ZHO_PRP_count.append(prp)101mandarin_L1['essay_len'] = ZHO_length102mandarin_L1['num_sentence'] = ZHO_num_sentences103mandarin_L1['noun_count'] = ZHO_noun_count104mandarin_L1['verb_count'] = ZHO_verb_count105mandarin_L1['adj_count'] = ZHO_adj_count106mandarin_L1['modal_count'] = ZHO_modal_count107mandarin_L1['noun_per_sentence'] = mandarin_L1['noun_count'] / mandarin_L1['num_sentence']108mandarin_L1['verb_per_sentence'] = mandarin_L1['verb_count'] / mandarin_L1['num_sentence']109mandarin_L1['adj_per_sentence'] = mandarin_L1['adj_count'] / mandarin_L1['num_sentence']110mandarin_L1['modal_per_sentence'] = mandarin_L1['modal_count'] / mandarin_L1['num_sentence']111mandarin_L1['the_count'] = ZHO_the_count112mandarin_L1['aan_count'] = ZHO_aan_count113mandarin_L1['one_count'] = ZHO_one_count114mandarin_L1['this_count'] = ZHO_this_count115mandarin_L1['that_count'] = ZHO_that_count116mandarin_L1['these_count'] = ZHO_these_count117mandarin_L1['those_count'] = ZHO_those_count118mandarin_L1['other_det_count'] = ZHO_other_det_count119mandarin_L1['PRP_count'] = ZHO_PRP_count120mandarin_L1['the_freq'] = mandarin_L1['the_count'] / mandarin_L1['essay_len']121mandarin_L1['aan_freq'] = mandarin_L1['aan_count'] / mandarin_L1['essay_len']122mandarin_L1['one_freq'] = mandarin_L1['one_count'] / mandarin_L1['essay_len']123mandarin_L1['this_freq'] = mandarin_L1['this_count'] / mandarin_L1['essay_len']124mandarin_L1['that_freq'] = mandarin_L1['that_count'] / mandarin_L1['essay_len']125mandarin_L1['these_freq'] = mandarin_L1['these_count'] / mandarin_L1['essay_len']126mandarin_L1['those_freq'] = mandarin_L1['those_count'] / mandarin_L1['essay_len']127mandarin_L1['other_det_freq'] = mandarin_L1['other_det_count'] / mandarin_L1['essay_len']128mandarin_L1['PRP_freq'] = mandarin_L1['PRP_count'] / mandarin_L1['essay_len']129'''SPANISH L1'''130SPA_length = []131SPA_num_sentences = []132SPA_the_count = []133SPA_aan_count = []134SPA_one_count = []135SPA_this_count = []136SPA_that_count = []137SPA_these_count = []138SPA_those_count = []139SPA_other_det_count = []140SPA_noun_count = []141SPA_verb_count = []142SPA_modal_count = []143SPA_adj_count = []144SPA_PRP_count = []145for f in SPA_file:146 f_name = SPA_path + f147 temp = open(f_name, 'r')148 num_sentence = 0149 ones = 0150 determiners = []151 nouns = []152 verbs = []153 adjs = []154 modal = []155 words = 0 # number of words156 prp = 0157 for line in temp:158 arr = line.strip().split()159 if not len(arr) == 0:160 line_tokenized = nltk.word_tokenize(line.lower())161 tagged = [list(ele) for ele in nltk.pos_tag(line_tokenized)]162 tagged = [ele for ele in tagged if ele[0] not in puncs] # leave out punctuations163 words += len(tagged)164 one = np.array([x for x in tagged if x[0] == 'one'])165 dt = np.array([x for x in tagged if x[1] == 'DT'])166 n = np.array([x for x in tagged if x[1] in noun_tag])167 v = np.array([x for x in tagged if x[1] in verb_tag])168 a = np.array([x for x in tagged if x[1] in adj_tag])169 m = np.array([x for x in tagged if x[1] == 'MD'])170 ones += len(one)171 prp += find_PRP(tagged)172 if dt.shape[0] != 0:173 determiners += list(dt[:, 0])174 if n.shape[0] != 0:175 nouns += list(n[:, 0])176 if v.shape[0] != 0:177 verbs += list(v[:, 0])178 if a.shape[0] != 0:179 adjs += list(a[:, 0])180 if m.shape[0] != 0:181 modal += list(m[:, 0])182 num_sentence += 1183 temp.close()184 # Counting185 SPA_length.append(words)186 SPA_num_sentences.append(num_sentence)187 SPA_one_count.append(ones)188 SPA_the_count.append(determiners.count('the'))189 SPA_aan_count.append(determiners.count('a') + determiners.count('an'))190 SPA_this_count.append(determiners.count('this'))191 SPA_that_count.append(determiners.count('that'))192 SPA_these_count.append(determiners.count('these'))193 SPA_those_count.append(determiners.count('those'))194 determiners = [x for x in determiners if x not in det_in_question]195 SPA_other_det_count.append(len(determiners))196 SPA_noun_count.append(len(nouns))197 SPA_verb_count.append(len(verbs))198 SPA_adj_count.append(len(adjs))199 SPA_modal_count.append(len(modal))200 SPA_PRP_count.append(prp)201spanish_L1['essay_len'] = SPA_length202spanish_L1['num_sentence'] = SPA_num_sentences203spanish_L1['noun_count'] = SPA_noun_count204spanish_L1['verb_count'] = SPA_verb_count205spanish_L1['adj_count'] = SPA_adj_count206spanish_L1['modal_count'] = SPA_modal_count207spanish_L1['noun_per_sentence'] = spanish_L1['noun_count'] / spanish_L1['num_sentence']...
analysis.py
Source:analysis.py
...34 prov = (each.split())[0]35 data.append(prov)36 count={}37 for i in data:38 count[i]=data.count(i)39 # print(count)40 prov=list(count.keys())41 nums=list(count.values())42 return prov,nums43def prov_plt():44 prov, nums = provience()45 plt.figure(figsize=(8, 4))46 plt.xticks(rotation=0)47 plt.bar(prov, nums, color='g')48 plt.xlabel('ç份')49 plt.ylabel('æ°é')50 plt.title('ä¸åç份æ°éåå¸å¾')51 plt.legend()52 plt.show()53# è¯äºåæ ¹æ®è¯äºæ°æ®è¿è¡åæçæ±ç¶å¾54def cloud_plt():55 def cloud_data():56 title=data_analysis('title')57 titles=[]58 # 对æ¯ä¸ªæ é¢è¿è¡åè¯59 for each in title:60 title_cut=jieba.lcut(each)61 titles.append(title_cut)62 # åé¤ä¸éè¦çè¯è¯63 title_del=[]64 for line in titles:65 line_del=[]66 for word in line:67 if word not in ['2018','å¦å¦','â¤','ã','ã',' ','Chinism','å·¥ä½å®¤','å强']:68 line_del.append(word)69 title_del.append(line_del)70 # print(title_del)71 # å
ç´ å»é,æ¯ä¸ªæ é¢ä¸ä¸å«éå¤å
ç´ 72 title_clean=[]73 for each in title_del:74 line_dist=[]75 for word in each:76 if word not in line_dist:77 line_dist.append(word)78 title_clean.append(line_dist)79 # å°ææè¯è¯è½¬ä¸ºä¸ä¸ªlist80 allwords_dist=[]81 for line in title_clean:82 for word in line:83 allwords_dist.append(word)84 # æå表转为æ°æ®æ¡85 allwords_dist=pandas.DataFrame({'allwords':allwords_dist})86 # 对è¯è¯è¿è¡åç±»æ±æ»87 word_count=allwords_dist.allwords.value_counts().reset_index()88 # æ·»å åå89 word_count.columns=['word','count']90 # print(allwords_dist)91 return word_count,title_clean92 def cloud_data_count():93 # è·åååééæ°æ®94 sell_count = data_analysis('sell_count')95 word_count, title_clean = cloud_data()96 ws_count = []97 # ååä¸å
å«ç»è®¡çè¯æ¶ï¼å°å
¶ééå å
¥list98 for each in word_count.word:99 i = 0100 s_list = []101 for t in title_clean:102 if each in t:103 s_list.append(int(sell_count[i]))104 # print(s_list)105 i += 1106 # ç»è®¡ä¸ä¸ªå
³é®è¯æå
å«ååçééæ»æ°107 ws_count.append(sum(s_list))108 # æå表转为æ°æ®æ¡109 ws_count = pandas.DataFrame({'ws_count': ws_count})110 # æword_count, ws_countå并为ä¸ä¸ªè¡¨111 word_count = pandas.concat([word_count, ws_count], axis=1, ignore_index=True)112 word_count.columns = ['word', 'count', 'ws_count']113 # ååºæå114 word_count.sort_values('ws_count', inplace=True, ascending=True)115 # åæ大30è¡æ°æ®116 df_ws = word_count.tail(30)117 return df_ws118 # å¾äºé¨å119 word_count=cloud_data()[0]120 # 设置åä½ï¼èæ¯é¢è²ï¼åä½æ大å·ï¼121 w_c=WordCloud(font_path='/usr/local/lib/python3.6/dist-packages/matplotlib/mpl-data/fonts/ttf/simhei.ttf',122 background_color='white',123 max_font_size=60,124 margin=1)125 # åå400个è¯è¿è¡å¯è§å126 wc=w_c.fit_words({x[0]:x[1] for x in word_count.head(1000).values})127 # 设置å¾ä¼å128 plt.imshow(wc,interpolation='bilinear')129 # å»é¤è¾¹æ¡130 plt.axis('off')131 plt.show()132 # ç»è®¡åææ±ç¶å¾é¨å133 data = cloud_data_count()134 index = np.arange(data.word.size)135 # plt.figure(figsize=(6,12))136 plt.barh(index, data.ws_count, align='center', alpha=0.8)137 plt.yticks(index, data.word)138 # æ·»å æ°æ®æ ç¾139 for y, x in zip(index, data.ws_count):140 plt.text(x, y, '%.0f' % x, ha='left', va='center')141 plt.show()142def impact_analysis():143 sell_count=pandas.DataFrame({'sell_count': data_analysis('sell_count')})144 price=[]145 for i in data_analysis('price'):146 p=i.split('-')147 p_i=p[0].split('.')...
LambdaTest’s Playwright tutorial will give you a broader idea about the Playwright automation framework, its unique features, and use cases with examples to exceed your understanding of Playwright testing. This tutorial will give A to Z guidance, from installing the Playwright framework to some best practices and advanced concepts.
Get 100 minutes of automation test minutes FREE!!