Best Python code snippet using lisa_python
domain_classify.py
Source:domain_classify.py
1# -*- coding: utf-8 -*-23import os4import re5import scws6import sys7import csv8import opencc9from global_utils_do import *1011sys.path.append('../../trans')12from trans import trans, traditional2simplified1314# cc = opencc.OpenCC('t2s', opencc_path='/usr/bin/opencc')15s = load_scws()1617def classify_by_biostring(bio_string):#æ ¹æ®ç¨æ·bio_stringåå1819 # bio_string_s = cc.convert(bio_string.decode('utf-8'))20 bio_string_s = bio_string.decode('utf-8')21 2223 kwdlist = bio_string_s.encode('utf-8')#cut(s, bio_string_s.encode('utf-8'))24 lawyerw_weight = sum([1 for keyword in lawyerw if keyword in kwdlist]) # å¾å¸25 adminw_weight = sum([1 for keyword in adminw if keyword in kwdlist]) # ç»ç»26 mediaw_weight = sum([1 for keyword in mediaw if keyword in kwdlist]) # åªä½27 businessw_weight = sum([1 for keyword in businessw if keyword in kwdlist]) # åä¸äººå£«28 govw_weight = sum([1 for keyword in govw if keyword in kwdlist]) # æ¿åºå®å29 mediaworkerw_weight = sum([1 for keyword in mediaworkerw if keyword in kwdlist]) # åªä½äººå£«30 universityw_weight = sum([1 for keyword in universityw if keyword in kwdlist]) # é«æ ¡3132 max_weight = 033 label = 'other'34 #equal_list = []35 36 if max_weight < businessw_weight:37 max_weight = businessw_weight38 label = 'business'39## equal_list = ['business']40## else:41## pass4243 if max_weight < adminw_weight:44 max_weight = adminw_weight45 label = 'admin'46## equal_list = ['admin']47## elif max_weight == adminw_weight:#å¦æç¸ç48## equal_list.append('admin')49## else:50## pass5152 if max_weight < mediaw_weight:53 max_weight = mediaw_weight54 label = 'media'55## equal_list = ['media']56## elif max_weight == mediaw_weight:#å¦æç¸ç57## equal_list.append('media')58## else:59## pass6061 if max_weight < lawyerw_weight:62 max_weight = lawyerw_weight63 label = 'lawyer'64## equal_list = ['lawyer']65## elif max_weight == lawyerw_weight:#å¦æç¸ç66## equal_list.append('lawyer')67## else:68## pass6970 if max_weight < govw_weight:71 max_weight = govw_weight72 gov = 'politician'73## equal_list = ['politician']74## elif max_weight == govw_weight:#å¦æç¸ç75## equal_list.append('politician')76## else:77## pass7879 if max_weight < mediaworkerw_weight:80 max_weight = mediaworkerw_weight81 label = 'mediaworker'82## equal_list = ['mediaworker']83## elif max_weight == mediaworkerw_weight:#å¦æç¸ç84## equal_list.append('mediaworker')85## else:86## pass8788 if max_weight < universityw_weight:89 max_weight = universityw_weight90 label = 'university'91## equal_list = ['university']92## elif max_weight == universityw_weight:#å¦æç¸ç93## equal_list.append('university')94## else:95## pass9697## if len(equal_list) > 2:98## label = 'other'99## elif len(equal_list) == 2:100## l1 = equal_list[0]101## l2 = equal_list[1]102## if DICT_LENGTH[l1] > DICT_LENGTH[l2]:103## label = l2104## elif DICT_LENGTH[l1] < DICT_LENGTH[l2]:105## label = l1106## else:107## pass108## else:109## pass110111 return label112113def classify_inner_outer(location):114115 # bio_string_s = traditional2simplified(location.decode('utf-8'))116 bio_string_s = location.decode('utf-8')117 new_location = bio_string_s.encode('utf-8')118 flag = 0119 for city in inner_city:120 if city in new_location:121 flag = 1122 break123124 return flag125126def domain_main(user_data):#twitterç¨æ·èº«ä»½å类主å½æ°127 '''128 è¾å
¥æ°æ®ï¼129 user_dataç¨æ·æ°æ®åå
¸ï¼{'uid':{'description':description,'username':username,'location':location,'number_of_text':number of text}...}130 description:twitterç¨æ·èæ¯ä¿¡æ¯ä¸çdescriptionã注æï¼æé¨åå
容æ¯è±æï¼éè¦è½¬æ¢æä¸æ131 username:twitterç¨æ·èæ¯ä¿¡æ¯ä¸çusername132 location:twitterç¨æ·èæ¯ä¿¡æ¯ä¸çlocationã注æï¼æé¨åå
容æ¯è±æï¼éè¦è½¬æ¢æä¸æ133 number_of_text:ç¨æ·æè¿7天åå¸æ°é134135 è¾åºæ°æ®ï¼136 user_labelç¨æ·èº«ä»½åå
¸:{'uid':label,'uid':label...}137 '''138 if len(user_data) == 0:139 return {}140141 user_label = dict()142 for k,v in user_data.iteritems():143 label = 'other'144 try:145 description = v['description']146 except KeyError:147 description = ''148 try:149 username = v['username']150 except KeyError:151 username = ''152 try:153 location = v['location']154 except KeyError:155 location = '' 156 try:157 number_of_text = v['number_of_text']158 except KeyError:159 number_of_text = 0160 161 bio_string = username + '_' + description162 #æ ¹æ®bio_stringåå163 if len(bio_string) > 1:164 label = classify_by_biostring(bio_string)165166 if label == 'admin':#ç»ç»167 if location:168 flag = classify_inner_outer(location)169 if flag == 1:#å¢å
170 label = 'inner_admin'171 else:172 label = 'outer_admin'173 else:174 label = 'admin'175 elif label == 'media':#åªä½176 if location:177 flag = classify_inner_outer(location)178 if flag == 1:#å¢å
179 label = 'inner_media'180 else:181 label = 'outer_media'182 else:183 label = 'media'184 else:185 pass186 187 if label != 'other':188 user_label[k] = label189 continue190191 #æ ¹æ®åå¸æ°éå¤å®192 if number_of_text >= ACTIVE_COUNT:193 label = 'active'194 user_label[k] = label195196 return user_label197
...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!