Best Python code snippet using avocado_python
slim.js
Source:slim.js
...69 if (stream.pos < pos) {70 stream.pos = pos;71 return style;72 }73 return state.tokenize(stream, state);74 };75 return function(stream, state) {76 state.tokenize = restore;77 return tokenize(stream, state);78 };79 }80 function maybeBackup(stream, state, pat, offset, style) {81 var cur = stream.current();82 var idx = cur.search(pat);83 if (idx > -1) {84 state.tokenize = backup(stream.pos, state.tokenize, style);85 stream.backUp(cur.length - idx - offset);86 }87 return style;88 }89 function continueLine(state, column) {90 state.stack = {91 parent: state.stack,92 style: "continuation",93 indented: column,94 tokenize: state.line95 };96 state.line = state.tokenize;97 }98 function finishContinue(state) {99 if (state.line == state.tokenize) {100 state.line = state.stack.tokenize;101 state.stack = state.stack.parent;102 }103 }104 function lineContinuable(column, tokenize) {105 return function(stream, state) {106 finishContinue(state);107 if (stream.match(/^\\$/)) {108 continueLine(state, column);109 return "lineContinuation";110 }111 var style = tokenize(stream, state);112 if (stream.eol() && stream.current().match(/(?:^|[^\\])(?:\\\\)*\\$/)) {113 stream.backUp(1);114 }115 return style;116 };117 }118 function commaContinuable(column, tokenize) {119 return function(stream, state) {120 finishContinue(state);121 var style = tokenize(stream, state);122 if (stream.eol() && stream.current().match(/,$/)) {123 continueLine(state, column);124 }125 return style;126 };127 }128 function rubyInQuote(endQuote, tokenize) {129 // TODO: add multi line support130 return function(stream, state) {131 var ch = stream.peek();132 if (ch == endQuote && state.rubyState.tokenize.length == 1) {133 // step out of ruby context as it seems to complete processing all the braces134 stream.next();135 state.tokenize = tokenize;136 return "closeAttributeTag";137 } else {138 return ruby(stream, state);139 }140 };141 }142 function startRubySplat(tokenize) {143 var rubyState;144 var runSplat = function(stream, state) {145 if (state.rubyState.tokenize.length == 1 && !state.rubyState.context.prev) {146 stream.backUp(1);147 if (stream.eatSpace()) {148 state.rubyState = rubyState;149 state.tokenize = tokenize;150 return tokenize(stream, state);151 }152 stream.next();153 }154 return ruby(stream, state);155 };156 return function(stream, state) {157 rubyState = state.rubyState;158 state.rubyState = CodeMirror.startState(rubyMode);159 state.tokenize = runSplat;160 return ruby(stream, state);161 };162 }163 function ruby(stream, state) {164 return rubyMode.token(stream, state.rubyState);165 }166 function htmlLine(stream, state) {167 if (stream.match(/^\\$/)) {168 return "lineContinuation";169 }170 return html(stream, state);171 }172 function html(stream, state) {173 if (stream.match(/^#\{/)) {174 state.tokenize = rubyInQuote("}", state.tokenize);175 return null;176 }177 return maybeBackup(stream, state, /[^\\]#\{/, 1, htmlMode.token(stream, state.htmlState));178 }179 function startHtmlLine(lastTokenize) {180 return function(stream, state) {181 var style = htmlLine(stream, state);182 if (stream.eol()) state.tokenize = lastTokenize;183 return style;184 };185 }186 function startHtmlMode(stream, state, offset) {187 state.stack = {188 parent: state.stack,189 style: "html",190 indented: stream.column() + offset, // pipe + space191 tokenize: state.line192 };193 state.line = state.tokenize = html;194 return null;195 }196 function comment(stream, state) {197 stream.skipToEnd();198 return state.stack.style;199 }200 function commentMode(stream, state) {201 state.stack = {202 parent: state.stack,203 style: "comment",204 indented: state.indented + 1,205 tokenize: state.line206 };207 state.line = comment;208 return comment(stream, state);209 }210 function attributeWrapper(stream, state) {211 if (stream.eat(state.stack.endQuote)) {212 state.line = state.stack.line;213 state.tokenize = state.stack.tokenize;214 state.stack = state.stack.parent;215 return null;216 }217 if (stream.match(wrappedAttributeNameRegexp)) {218 state.tokenize = attributeWrapperAssign;219 return "slimAttribute";220 }221 stream.next();222 return null;223 }224 function attributeWrapperAssign(stream, state) {225 if (stream.match(/^==?/)) {226 state.tokenize = attributeWrapperValue;227 return null;228 }229 return attributeWrapper(stream, state);230 }231 function attributeWrapperValue(stream, state) {232 var ch = stream.peek();233 if (ch == '"' || ch == "\'") {234 state.tokenize = readQuoted(ch, "string", true, false, attributeWrapper);235 stream.next();236 return state.tokenize(stream, state);237 }238 if (ch == '[') {239 return startRubySplat(attributeWrapper)(stream, state);240 }241 if (stream.match(/^(true|false|nil)\b/)) {242 state.tokenize = attributeWrapper;243 return "keyword";244 }245 return startRubySplat(attributeWrapper)(stream, state);246 }247 function startAttributeWrapperMode(state, endQuote, tokenize) {248 state.stack = {249 parent: state.stack,250 style: "wrapper",251 indented: state.indented + 1,252 tokenize: tokenize,253 line: state.line,254 endQuote: endQuote255 };256 state.line = state.tokenize = attributeWrapper;257 return null;258 }259 function sub(stream, state) {260 if (stream.match(/^#\{/)) {261 state.tokenize = rubyInQuote("}", state.tokenize);262 return null;263 }264 var subStream = new CodeMirror.StringStream(stream.string.slice(state.stack.indented), stream.tabSize);265 subStream.pos = stream.pos - state.stack.indented;266 subStream.start = stream.start - state.stack.indented;267 subStream.lastColumnPos = stream.lastColumnPos - state.stack.indented;268 subStream.lastColumnValue = stream.lastColumnValue - state.stack.indented;269 var style = state.subMode.token(subStream, state.subState);270 stream.pos = subStream.pos + state.stack.indented;271 return style;272 }273 function firstSub(stream, state) {274 state.stack.indented = stream.column();275 state.line = state.tokenize = sub;276 return state.tokenize(stream, state);277 }278 function createMode(mode) {279 var query = embedded[mode];280 var spec = CodeMirror.mimeModes[query];281 if (spec) {282 return CodeMirror.getMode(config, spec);283 }284 var factory = CodeMirror.modes[query];285 if (factory) {286 return factory(config, {name: query});287 }288 return CodeMirror.getMode(config, "null");289 }290 function getMode(mode) {291 if (!modes.hasOwnProperty(mode)) {292 return modes[mode] = createMode(mode);293 }294 return modes[mode];295 }296 function startSubMode(mode, state) {297 var subMode = getMode(mode);298 var subState = CodeMirror.startState(subMode);299 state.subMode = subMode;300 state.subState = subState;301 state.stack = {302 parent: state.stack,303 style: "sub",304 indented: state.indented + 1,305 tokenize: state.line306 };307 state.line = state.tokenize = firstSub;308 return "slimSubmode";309 }310 function doctypeLine(stream, _state) {311 stream.skipToEnd();312 return "slimDoctype";313 }314 function startLine(stream, state) {315 var ch = stream.peek();316 if (ch == '<') {317 return (state.tokenize = startHtmlLine(state.tokenize))(stream, state);318 }319 if (stream.match(/^[|']/)) {320 return startHtmlMode(stream, state, 1);321 }322 if (stream.match(/^\/(!|\[\w+])?/)) {323 return commentMode(stream, state);324 }325 if (stream.match(/^(-|==?[<>]?)/)) {326 state.tokenize = lineContinuable(stream.column(), commaContinuable(stream.column(), ruby));327 return "slimSwitch";328 }329 if (stream.match(/^doctype\b/)) {330 state.tokenize = doctypeLine;331 return "keyword";332 }333 var m = stream.match(embeddedRegexp);334 if (m) {335 return startSubMode(m[1], state);336 }337 return slimTag(stream, state);338 }339 function slim(stream, state) {340 if (state.startOfLine) {341 return startLine(stream, state);342 }343 return slimTag(stream, state);344 }345 function slimTag(stream, state) {346 if (stream.eat('*')) {347 state.tokenize = startRubySplat(slimTagExtras);348 return null;349 }350 if (stream.match(nameRegexp)) {351 state.tokenize = slimTagExtras;352 return "slimTag";353 }354 return slimClass(stream, state);355 }356 function slimTagExtras(stream, state) {357 if (stream.match(/^(<>?|><?)/)) {358 state.tokenize = slimClass;359 return null;360 }361 return slimClass(stream, state);362 }363 function slimClass(stream, state) {364 if (stream.match(classIdRegexp)) {365 state.tokenize = slimClass;366 return "slimId";367 }368 if (stream.match(classNameRegexp)) {369 state.tokenize = slimClass;370 return "slimClass";371 }372 return slimAttribute(stream, state);373 }374 function slimAttribute(stream, state) {375 if (stream.match(/^([\[\{\(])/)) {376 return startAttributeWrapperMode(state, closing[RegExp.$1], slimAttribute);377 }378 if (stream.match(attributeNameRegexp)) {379 state.tokenize = slimAttributeAssign;380 return "slimAttribute";381 }382 if (stream.peek() == '*') {383 stream.next();384 state.tokenize = startRubySplat(slimContent);385 return null;386 }387 return slimContent(stream, state);388 }389 function slimAttributeAssign(stream, state) {390 if (stream.match(/^==?/)) {391 state.tokenize = slimAttributeValue;392 return null;393 }394 // should never happen, because of forward lookup395 return slimAttribute(stream, state);396 }397 function slimAttributeValue(stream, state) {398 var ch = stream.peek();399 if (ch == '"' || ch == "\'") {400 state.tokenize = readQuoted(ch, "string", true, false, slimAttribute);401 stream.next();402 return state.tokenize(stream, state);403 }404 if (ch == '[') {405 return startRubySplat(slimAttribute)(stream, state);406 }407 if (ch == ':') {408 return startRubySplat(slimAttributeSymbols)(stream, state);409 }410 if (stream.match(/^(true|false|nil)\b/)) {411 state.tokenize = slimAttribute;412 return "keyword";413 }414 return startRubySplat(slimAttribute)(stream, state);415 }416 function slimAttributeSymbols(stream, state) {417 stream.backUp(1);418 if (stream.match(/^[^\s],(?=:)/)) {419 state.tokenize = startRubySplat(slimAttributeSymbols);420 return null;421 }422 stream.next();423 return slimAttribute(stream, state);424 }425 function readQuoted(quote, style, embed, unescaped, nextTokenize) {426 return function(stream, state) {427 finishContinue(state);428 var fresh = stream.current().length == 0;429 if (stream.match(/^\\$/, fresh)) {430 if (!fresh) return style;431 continueLine(state, state.indented);432 return "lineContinuation";433 }434 if (stream.match(/^#\{/, fresh)) {435 if (!fresh) return style;436 state.tokenize = rubyInQuote("}", state.tokenize);437 return null;438 }439 var escaped = false, ch;440 while ((ch = stream.next()) != null) {441 if (ch == quote && (unescaped || !escaped)) {442 state.tokenize = nextTokenize;443 break;444 }445 if (embed && ch == "#" && !escaped) {446 if (stream.eat("{")) {447 stream.backUp(2);448 break;449 }450 }451 escaped = !escaped && ch == "\\";452 }453 if (stream.eol() && escaped) {454 stream.backUp(1);455 }456 return style;457 };458 }459 function slimContent(stream, state) {460 if (stream.match(/^==?/)) {461 state.tokenize = ruby;462 return "slimSwitch";463 }464 if (stream.match(/^\/$/)) { // tag close hint465 state.tokenize = slim;466 return null;467 }468 if (stream.match(/^:/)) { // inline tag469 state.tokenize = slimTag;470 return "slimSwitch";471 }472 startHtmlMode(stream, state, 0);473 return state.tokenize(stream, state);474 }475 var mode = {476 // default to html mode477 startState: function() {478 var htmlState = CodeMirror.startState(htmlMode);479 var rubyState = CodeMirror.startState(rubyMode);480 return {481 htmlState: htmlState,482 rubyState: rubyState,483 stack: null,484 last: null,485 tokenize: slim,486 line: slim,487 indented: 0488 };489 },490 copyState: function(state) {491 return {492 htmlState : CodeMirror.copyState(htmlMode, state.htmlState),493 rubyState: CodeMirror.copyState(rubyMode, state.rubyState),494 subMode: state.subMode,495 subState: state.subMode && CodeMirror.copyState(state.subMode, state.subState),496 stack: state.stack,497 last: state.last,498 tokenize: state.tokenize,499 line: state.line500 };501 },502 token: function(stream, state) {503 if (stream.sol()) {504 state.indented = stream.indentation();505 state.startOfLine = true;506 state.tokenize = state.line;507 while (state.stack && state.stack.indented > state.indented && state.last != "slimSubmode") {508 state.line = state.tokenize = state.stack.tokenize;509 state.stack = state.stack.parent;510 state.subMode = null;511 state.subState = null;512 }513 }514 if (stream.eatSpace()) return null;515 var style = state.tokenize(stream, state);516 state.startOfLine = false;517 if (style) state.last = style;518 return styleMap.hasOwnProperty(style) ? styleMap[style] : style;519 },520 blankLine: function(state) {521 if (state.subMode && state.subMode.blankLine) {522 return state.subMode.blankLine(state.subState);523 }524 },525 innerMode: function(state) {526 if (state.subMode) return {state: state.subState, mode: state.subMode};527 return {state: state, mode: mode};528 }529 //indent: function(state) {...
test_base.py
Source:test_base.py
...46 assert (normalize_function(tz.curry(f2, b=1)) ==47 normalize_function(tz.curry(f2, b=1)))48 assert (normalize_function(tz.curry(f2, b=1)) !=49 normalize_function(tz.curry(f2, b=2)))50def test_tokenize():51 a = (1, 2, 3)52 assert isinstance(tokenize(a), (str, bytes))53@pytest.mark.skipif('not np')54def test_tokenize_numpy_array_consistent_on_values():55 assert (tokenize(np.random.RandomState(1234).random_sample(1000)) ==56 tokenize(np.random.RandomState(1234).random_sample(1000)))57@pytest.mark.skipif('not np')58def test_tokenize_numpy_array_supports_uneven_sizes():59 tokenize(np.random.random(7).astype(dtype='i2'))60@pytest.mark.skipif('not np')61def test_tokenize_discontiguous_numpy_array():62 tokenize(np.random.random(8)[::2])63@pytest.mark.skipif('not np')64def test_tokenize_numpy_datetime():65 tokenize(np.array(['2000-01-01T12:00:00'], dtype='M8[ns]'))66@pytest.mark.skipif('not np')67def test_tokenize_numpy_scalar():68 assert tokenize(np.array(1.0, dtype='f8')) == tokenize(np.array(1.0, dtype='f8'))69 assert (tokenize(np.array([(1, 2)], dtype=[('a', 'i4'), ('b', 'i8')])[0]) ==70 tokenize(np.array([(1, 2)], dtype=[('a', 'i4'), ('b', 'i8')])[0]))71@pytest.mark.skipif('not np')72def test_tokenize_numpy_array_on_object_dtype():73 assert (tokenize(np.array(['a', 'aa', 'aaa'], dtype=object)) ==74 tokenize(np.array(['a', 'aa', 'aaa'], dtype=object)))75 assert (tokenize(np.array(['a', None, 'aaa'], dtype=object)) ==76 tokenize(np.array(['a', None, 'aaa'], dtype=object)))77 assert (tokenize(np.array([(1, 'a'), (1, None), (1, 'aaa')], dtype=object)) ==78 tokenize(np.array([(1, 'a'), (1, None), (1, 'aaa')], dtype=object)))79 if sys.version_info[0] == 2:80 assert (tokenize(np.array([unicode("Rebeca Alón", encoding="utf-8")], dtype=object)) ==81 tokenize(np.array([unicode("Rebeca Alón", encoding="utf-8")], dtype=object)))82@pytest.mark.skipif('not np')83def test_tokenize_numpy_memmap():84 with tmpfile('.npy') as fn:85 x = np.arange(5)86 np.save(fn, x)87 y = tokenize(np.load(fn, mmap_mode='r'))88 with tmpfile('.npy') as fn:89 x = np.arange(5)90 np.save(fn, x)91 z = tokenize(np.load(fn, mmap_mode='r'))92 assert y != z93 with tmpfile('.npy') as fn:94 x = np.random.normal(size=(10, 10))95 np.save(fn, x)96 mm = np.load(fn, mmap_mode='r')97 mm2 = np.load(fn, mmap_mode='r')98 a = tokenize(mm[0, :])99 b = tokenize(mm[1, :])100 c = tokenize(mm[0:3, :])101 d = tokenize(mm[:, 0])102 assert len(set([a, b, c, d])) == 4103 assert tokenize(mm) == tokenize(mm2)104 assert tokenize(mm[1, :]) == tokenize(mm2[1, :])105@pytest.mark.skipif('not np')106def test_tokenize_numpy_memmap_no_filename():107 # GH 1562:108 with tmpfile('.npy') as fn1, tmpfile('.npy') as fn2:109 x = np.arange(5)110 np.save(fn1, x)111 np.save(fn2, x)112 a = np.load(fn1, mmap_mode='r')113 b = a + a114 assert tokenize(b) == tokenize(b)115@pytest.mark.skipif('not np')116def test_tokenize_numpy_ufunc_consistent():117 assert tokenize(np.sin) == '02106e2c67daf452fb480d264e0dac21'118 assert tokenize(np.cos) == 'c99e52e912e4379882a9a4b387957a0b'119 # Make a ufunc that isn't in the numpy namespace. Similar to120 # any found in other packages.121 inc = np.frompyfunc(lambda x: x + 1, 1, 1)122 assert tokenize(inc) == tokenize(inc)123def test_normalize_base():124 for i in [1, long(1), 1.1, '1', slice(1, 2, 3)]:125 assert normalize_token(i) is i126@pytest.mark.skipif('not pd')127def test_tokenize_pandas():128 a = pd.DataFrame({'x': [1, 2, 3], 'y': ['4', 'asd', None]}, index=[1, 2, 3])129 b = pd.DataFrame({'x': [1, 2, 3], 'y': ['4', 'asd', None]}, index=[1, 2, 3])130 assert tokenize(a) == tokenize(b)131 b.index.name = 'foo'132 assert tokenize(a) != tokenize(b)133 a = pd.DataFrame({'x': [1, 2, 3], 'y': ['a', 'b', 'a']})134 b = pd.DataFrame({'x': [1, 2, 3], 'y': ['a', 'b', 'a']})135 a['z'] = a.y.astype('category')136 assert tokenize(a) != tokenize(b)137 b['z'] = a.y.astype('category')138 assert tokenize(a) == tokenize(b)139def test_tokenize_kwargs():140 assert tokenize(5, x=1) == tokenize(5, x=1)141 assert tokenize(5) != tokenize(5, x=1)142 assert tokenize(5, x=1) != tokenize(5, x=2)143 assert tokenize(5, x=1) != tokenize(5, y=1)144def test_tokenize_same_repr():145 class Foo(object):146 def __init__(self, x):147 self.x = x148 def __repr__(self):149 return 'a foo'150 assert tokenize(Foo(1)) != tokenize(Foo(2))151@pytest.mark.skipif('not np')152def test_tokenize_sequences():153 assert tokenize([1]) != tokenize([2])154 assert tokenize([1]) != tokenize((1,))155 assert tokenize([1]) == tokenize([1])156 x = np.arange(2000) # long enough to drop information in repr157 y = np.arange(2000)158 y[1000] = 0 # middle isn't printed in repr159 assert tokenize([x]) != tokenize([y])160def test_tokenize_dict():161 assert tokenize({'x': 1, 1: 'x'}) == tokenize({'x': 1, 1: 'x'})162def test_tokenize_set():163 assert tokenize({1, 2, 'x', (1, 'x')}) == tokenize({1, 2, 'x', (1, 'x')})164def test_tokenize_ordered_dict():165 with ignoring(ImportError):166 from collections import OrderedDict167 a = OrderedDict([('a', 1), ('b', 2)])168 b = OrderedDict([('a', 1), ('b', 2)])169 c = OrderedDict([('b', 2), ('a', 1)])170 assert tokenize(a) == tokenize(b)171 assert tokenize(a) != tokenize(c)172@pytest.mark.skipif('not np')173def test_tokenize_object_array_with_nans():174 a = np.array([u'foo', u'Jos\xe9', np.nan], dtype='O')175 assert tokenize(a) == tokenize(a)176@pytest.mark.parametrize('x', [1, True, 'a', b'a', 1.0, 1j, 1.0j,177 [], (), {}, None, str, int])178def test_tokenize_base_types(x):179 assert tokenize(x) == tokenize(x), x180@pytest.mark.skipif('not db')181def test_compute_no_opt():182 # Bag does `fuse` by default. Test that with `optimize_graph=False` that183 # doesn't get called. We check this by using a callback to track the keys184 # that are computed.185 from dask.callbacks import Callback186 b = db.from_sequence(range(100), npartitions=4)187 add1 = tz.partial(add, 1)188 mul2 = tz.partial(mul, 2)189 o = b.map(add1).map(mul2)190 # Check that with the kwarg, the optimization doesn't happen191 keys = []192 with Callback(pretask=lambda key, *args: keys.append(key)):193 o.compute(get=dask.get, optimize_graph=False)...
Tokenizer.test.js
Source:Tokenizer.test.js
...8 })910 it("intentionally fail with invalid character",()=>{11 expect(() => {12 tokenizer.tokenize('\\');13 }).toThrow();14 })1516 it("intentionally fail with open ended string",()=>{17 expect(() => {18 tokenizer.tokenize('"');19 }).toThrow();2021 expect(() => {22 tokenizer.tokenize("'");23 }).toThrow();24 })2526 it("intentionally fail with incomplete OR",()=>{27 expect(() => {28 tokenizer.tokenize('|');29 }).toThrow();30 })3132 it("intentionally fail with incomplete AND",()=>{33 expect(() => {34 tokenizer.tokenize('&');35 }).toThrow();36 })37 38 it("whitespace program",()=>{39 expect(tokenizer.tokenize(' ')).toEqual([{line: 1, type: TokenType.NewLine, value: ""}]);40 })4142 it("blank program",()=>{43 expect(tokenizer.tokenize('')).toEqual([{line: 1, type: TokenType.NewLine, value: null}]);44 })4546 it("comment only program has just a newline token with the comment",()=>{47 const commentCode = '//adsasdas';48 let tokenList=tokenizer.tokenize(commentCode);49 expect(tokenList).toEqual([{line: 1, type: TokenType.NewLine, value: commentCode}]);50 })5152 it("LineDelim",()=>{53 expect(tokenizer.tokenize(';')[0]).toEqual({line: 1, type: TokenType.LineDelim, value: null});54 })5556 it("NewLine",()=>{57 expect(tokenizer.tokenize('\n')[0]).toEqual({line: 2, type: TokenType.NewLine, value: ""});58 })5960 it("Double",()=>{61 expect(tokenizer.tokenize('double')[0]).toEqual({line: 1, type: TokenType.Double, value: null});62 })6364 it("String",()=>{65 expect(tokenizer.tokenize('string')[0]).toEqual({line: 1, type: TokenType.String, value: null});66 })6768 it("Bool",()=>{69 expect(tokenizer.tokenize('bool')[0]).toEqual({line: 1, type: TokenType.Bool, value: null});70 })7172 it("DoubleLiteral",()=>{73 expect(tokenizer.tokenize('123.45')[0]).toEqual({line: 1, type: TokenType.DoubleLiteral, value: 123.45});74 expect(tokenizer.tokenize('45')[0]).toEqual({line: 1, type: TokenType.DoubleLiteral, value: 45});75 expect(tokenizer.tokenize('.45')[0]).toEqual({line: 1, type: TokenType.DoubleLiteral, value: .45});76 expect(tokenizer.tokenize('45.')[0]).toEqual({line: 1, type: TokenType.DoubleLiteral, value: 45.});77 })7879 it("StringLiteral",()=>{80 expect(tokenizer.tokenize('""')[0]).toEqual({line: 1, type: TokenType.StringLiteral, value: ""});81 expect(tokenizer.tokenize('"1"')[0]).toEqual({line: 1, type: TokenType.StringLiteral, value: "1"});82 const allCharDQ='`1234567890-=qwertyuiop[]\\asdfghjkl;\'zxcvbnm,./~!@#$%^&*()_+QWERTYUIOP{}|ASDFGHJKL:ZXCVBNM<>?';83 expect(tokenizer.tokenize('"'+allCharDQ+'"')[0]).toEqual({line: 1, type: TokenType.StringLiteral, value: allCharDQ});8485 expect(tokenizer.tokenize("''")[0]).toEqual({line: 1, type: TokenType.StringLiteral, value: ""});86 expect(tokenizer.tokenize("'1'")[0]).toEqual({line: 1, type: TokenType.StringLiteral, value: "1"});87 const allCharSQ="`1234567890-=qwertyuiop[]\\asdfghjkl;\"zxcvbnm,./~!@#$%^&*()_+QWERTYUIOP{}|ASDFGHJKL:ZXCVBNM<>?";88 expect(tokenizer.tokenize("'"+allCharSQ+"'")[0]).toEqual({line: 1, type: TokenType.StringLiteral, value: allCharSQ});89 })9091 it("Ident",()=>{92 const expectGoodIdent = (name) => {93 expect(tokenizer.tokenize(name)[0]).toEqual({line: 1, type: TokenType.Ident, value: name});94 }95 const expectBadIdent = (name) => {96 expect(tokenizer.tokenize(name)[0]).not.toEqual({line: 1, type: TokenType.Ident, value: name});97 }9899 expectGoodIdent("a");100 expectGoodIdent("_");101 expectGoodIdent("True");102 expectGoodIdent("False");103 expectGoodIdent("Null");104 expectGoodIdent("_qwertyuiopasdfghjklzxcvbnm_QWERTYUIOPASDFGHJKLZXCVBNM1234567890_");105 expectBadIdent("0NAME");106 expectBadIdent("A.B");107 })108109 it("True",()=>{110 expect(tokenizer.tokenize('true')[0]).toEqual({line: 1, type: TokenType.True, value: null});111 })112113 it("False",()=>{114 expect(tokenizer.tokenize('false')[0]).toEqual({line: 1, type: TokenType.False, value: null});115 })116117 it("Null",()=>{118 expect(tokenizer.tokenize('null')[0]).toEqual({line: 1, type: TokenType.Null, value: null});119 })120121 it("LeftParen",()=>{122 expect(tokenizer.tokenize('(')[0]).toEqual({line: 1, type: TokenType.LeftParen, value: null});123 })124125 it("RightParen",()=>{126 expect(tokenizer.tokenize(')')[0]).toEqual({line: 1, type: TokenType.RightParen, value: null});127 })128129 it("LeftSqaure",()=>{130 expect(tokenizer.tokenize('[')[0]).toEqual({line: 1, type: TokenType.LeftSqaure, value: null});131 })132133 it("RightSqaure",()=>{134 expect(tokenizer.tokenize(']')[0]).toEqual({line: 1, type: TokenType.RightSqaure, value: null});135 })136137 it("Comma",()=>{138 expect(tokenizer.tokenize(',')[0]).toEqual({line: 1, type: TokenType.Comma, value: null});139 })140141 it("Dot",()=>{142 expect(tokenizer.tokenize('.')[0]).toEqual({line: 1, type: TokenType.Dot, value: null});143 })144145 it("Not",()=>{146 expect(tokenizer.tokenize('!')[0]).toEqual({line: 1, type: TokenType.Not, value: null});147 })148149 it("And",()=>{150 expect(tokenizer.tokenize('&&')[0]).toEqual({line: 1, type: TokenType.And, value: null});151 })152153 it("Or",()=>{154 expect(tokenizer.tokenize('||')[0]).toEqual({line: 1, type: TokenType.Or, value: null});155 })156157 it("Plus",()=>{158 expect(tokenizer.tokenize('+')[0]).toEqual({line: 1, type: TokenType.Plus, value: null});159 })160161 it("Minus",()=>{162 expect(tokenizer.tokenize('-')[0]).toEqual({line: 1, type: TokenType.Minus, value: null});163 })164165 it("Divide",()=>{166 expect(tokenizer.tokenize('/')[0]).toEqual({line: 1, type: TokenType.Divide, value: null});167 })168169 it("Multiply",()=>{170 expect(tokenizer.tokenize('*')[0]).toEqual({line: 1, type: TokenType.Multiply, value: null});171 })172173 it("Mod",()=>{174 expect(tokenizer.tokenize('%')[0]).toEqual({line: 1, type: TokenType.Mod, value: null});175 })176177 it("Exponent",()=>{178 expect(tokenizer.tokenize('^')[0]).toEqual({line: 1, type: TokenType.Exponent, value: null});179 })180181 it("Question",()=>{182 expect(tokenizer.tokenize('?')[0]).toEqual({line: 1, type: TokenType.Question, value: null});183 })184185 it("Colon",()=>{186 expect(tokenizer.tokenize(':')[0]).toEqual({line: 1, type: TokenType.Colon, value: null});187 })188189 it("Assignment",()=>{190 expect(tokenizer.tokenize('=')[0]).toEqual({line: 1, type: TokenType.Assignment, value: null});191 })192193 it("Equals",()=>{194 expect(tokenizer.tokenize('==')[0]).toEqual({line: 1, type: TokenType.Equals, value: null});195 })196197 it("NotEquals",()=>{198 expect(tokenizer.tokenize('!=')[0]).toEqual({line: 1, type: TokenType.NotEquals, value: null});199 })200201 it("Lesser",()=>{202 expect(tokenizer.tokenize('<')[0]).toEqual({line: 1, type: TokenType.Lesser, value: null});203 })204205 it("LesserEquals",()=>{206 expect(tokenizer.tokenize('<=')[0]).toEqual({line: 1, type: TokenType.LesserEquals, value: null});207 })208209 it("Greater",()=>{210 expect(tokenizer.tokenize('>')[0]).toEqual({line: 1, type: TokenType.Greater, value: null});211 })212213 it("GreaterEquals",()=>{214 expect(tokenizer.tokenize('>=')[0]).toEqual({line: 1, type: TokenType.GreaterEquals, value: null});215 })216217 it("Min",()=>{218 expect(tokenizer.tokenize('min')[0]).toEqual({line: 1, type: TokenType.Min, value: null});219 })220221 it("Max",()=>{222 expect(tokenizer.tokenize('max')[0]).toEqual({line: 1, type: TokenType.Max, value: null});223 })224225 it("Abs",()=>{226 expect(tokenizer.tokenize('abs')[0]).toEqual({line: 1, type: TokenType.Abs, value: null});227 })228229 it("Clamp",()=>{230 expect(tokenizer.tokenize('clamp')[0]).toEqual({line: 1, type: TokenType.Clamp, value: null});231 })232233 it("Floor",()=>{234 expect(tokenizer.tokenize('floor')[0]).toEqual({line: 1, type: TokenType.Floor, value: null});235 })236237 it("Ceil",()=>{238 expect(tokenizer.tokenize('ceil')[0]).toEqual({line: 1, type: TokenType.Ceil, value: null});239 })240241 it("LCase",()=>{242 expect(tokenizer.tokenize('lcase')[0]).toEqual({line: 1, type: TokenType.LCase, value: null});243 })244245 it("UCase",()=>{246 expect(tokenizer.tokenize('ucase')[0]).toEqual({line: 1, type: TokenType.UCase, value: null});247 })248249 it("Trim",()=>{250 expect(tokenizer.tokenize('trim')[0]).toEqual({line: 1, type: TokenType.Trim, value: null});251 })252253 it("Len",()=>{254 expect(tokenizer.tokenize('len')[0]).toEqual({line: 1, type: TokenType.Len, value: null});255 })256257 it("SubStr",()=>{258 expect(tokenizer.tokenize('substr')[0]).toEqual({line: 1, type: TokenType.SubStr, value: null});259 })260261 it("While",()=>{262 expect(tokenizer.tokenize('while')[0]).toEqual({line: 1, type: TokenType.While, value: null});263 })264265 it("For",()=>{266 expect(tokenizer.tokenize('for')[0]).toEqual({line: 1, type: TokenType.For, value: null});267 })268269 it("Loop",()=>{270 expect(tokenizer.tokenize('loop')[0]).toEqual({line: 1, type: TokenType.Loop, value: null});271 })272273 it("If",()=>{274 expect(tokenizer.tokenize('if')[0]).toEqual({line: 1, type: TokenType.If, value: null});275 })276277 it("Else",()=>{278 expect(tokenizer.tokenize('else')[0]).toEqual({line: 1, type: TokenType.Else, value: null});279 })280281 it("Break",()=>{282 expect(tokenizer.tokenize('break')[0]).toEqual({line: 1, type: TokenType.Break, value: null});283 })284285 it("LeftCurly",()=>{286 expect(tokenizer.tokenize('{')[0]).toEqual({line: 1, type: TokenType.LeftCurly, value: null});287 })288289 it("RightCurly",()=>{290 expect(tokenizer.tokenize('}')[0]).toEqual({line: 1, type: TokenType.RightCurly, value: null});291 })292293 it("Return",()=>{294 expect(tokenizer.tokenize('return')[0]).toEqual({line: 1, type: TokenType.Return, value: null});295 })296297 it("Exit",()=>{298 expect(tokenizer.tokenize('exit')[0]).toEqual({line: 1, type: TokenType.Exit, value: null});299 })
...
__init__.py
Source:__init__.py
...13tokenizers can be used to find the words and punctuation in a string:14 >>> from nltk.tokenize import word_tokenize15 >>> s = '''Good muffins cost $3.88\nin New York. Please buy me16 ... two of them.\n\nThanks.'''17 >>> word_tokenize(s)18 ['Good', 'muffins', 'cost', '$', '3.88', 'in', 'New', 'York', '.',19 'Please', 'buy', 'me', 'two', 'of', 'them', '.', 'Thanks', '.']20This particular tokenizer requires the Punkt sentence tokenization21models to be installed. NLTK also provides a simpler,22regular-expression based tokenizer, which splits text on whitespace23and punctuation:24 >>> from nltk.tokenize import wordpunct_tokenize25 >>> wordpunct_tokenize(s)26 ['Good', 'muffins', 'cost', '$', '3', '.', '88', 'in', 'New', 'York', '.',27 'Please', 'buy', 'me', 'two', 'of', 'them', '.', 'Thanks', '.']28We can also operate at the level of sentences, using the sentence29tokenizer directly as follows:30 >>> from nltk.tokenize import sent_tokenize, word_tokenize31 >>> sent_tokenize(s)32 ['Good muffins cost $3.88\nin New York.', 'Please buy me\ntwo of them.', 'Thanks.']33 >>> [word_tokenize(t) for t in sent_tokenize(s)]34 [['Good', 'muffins', 'cost', '$', '3.88', 'in', 'New', 'York', '.'],35 ['Please', 'buy', 'me', 'two', 'of', 'them', '.'], ['Thanks', '.']]36Caution: when tokenizing a Unicode string, make sure you are not37using an encoded version of the string (it may be necessary to38decode it first, e.g. with ``s.decode("utf8")``.39NLTK tokenizers can produce token-spans, represented as tuples of integers40having the same semantics as string slices, to support efficient comparison41of tokenizers. (These methods are implemented as generators.)42 >>> from nltk.tokenize import WhitespaceTokenizer43 >>> list(WhitespaceTokenizer().span_tokenize(s))44 [(0, 4), (5, 12), (13, 17), (18, 23), (24, 26), (27, 30), (31, 36), (38, 44),45 (45, 48), (49, 51), (52, 55), (56, 58), (59, 64), (66, 73)]46There are numerous ways to tokenize text. If you need more control over47tokenization, see the other methods provided in this package.48For further information, please see Chapter 3 of the NLTK book.49"""50import re51from nltk.data import load52from nltk.tokenize.casual import TweetTokenizer, casual_tokenize53from nltk.tokenize.mwe import MWETokenizer54from nltk.tokenize.punkt import PunktSentenceTokenizer55from nltk.tokenize.regexp import (56 RegexpTokenizer,57 WhitespaceTokenizer,58 BlanklineTokenizer,59 WordPunctTokenizer,60 wordpunct_tokenize,61 regexp_tokenize,62 blankline_tokenize,63)64from nltk.tokenize.repp import ReppTokenizer65from nltk.tokenize.sexpr import SExprTokenizer, sexpr_tokenize66from nltk.tokenize.simple import (67 SpaceTokenizer,68 TabTokenizer,69 LineTokenizer,70 line_tokenize,71)72from nltk.tokenize.texttiling import TextTilingTokenizer73from nltk.tokenize.toktok import ToktokTokenizer74from nltk.tokenize.treebank import TreebankWordTokenizer75from nltk.tokenize.util import string_span_tokenize, regexp_span_tokenize76from nltk.tokenize.stanford_segmenter import StanfordSegmenter77from nltk.tokenize.sonority_sequencing import SyllableTokenizer78# Standard sentence tokenizer.79def sent_tokenize(text, language='english'):80 """81 Return a sentence-tokenized copy of *text*,82 using NLTK's recommended sentence tokenizer83 (currently :class:`.PunktSentenceTokenizer`84 for the specified language).85 :param text: text to split into sentences86 :param language: the model name in the Punkt corpus87 """88 tokenizer = load('tokenizers/punkt/{0}.pickle'.format(language))89 return tokenizer.tokenize(text)90# Standard word tokenizer.91_treebank_word_tokenizer = TreebankWordTokenizer()92# See discussion on https://github.com/nltk/nltk/pull/143793# Adding to TreebankWordTokenizer, nltk.word_tokenize now splits on94# - chervon quotes u'\xab' and u'\xbb' .95# - unicode quotes u'\u2018', u'\u2019', u'\u201c' and u'\u201d'96# See https://github.com/nltk/nltk/issues/1995#issuecomment-37674160897# Also, behavior of splitting on clitics now follows Stanford CoreNLP98# - clitics covered (?!re|ve|ll|m|t|s|d)(\w)\b99improved_open_quote_regex = re.compile(u'([«âââ]|[`]+)', re.U)100improved_open_single_quote_regex = re.compile(r"(?i)(\')(?!re|ve|ll|m|t|s|d)(\w)\b", re.U)101improved_close_quote_regex = re.compile(u'([»ââ])', re.U)102improved_punct_regex = re.compile(r'([^\.])(\.)([\]\)}>"\'' u'»ââ ' r']*)\s*$', re.U)103_treebank_word_tokenizer.STARTING_QUOTES.insert(0, (improved_open_quote_regex, r' \1 '))104_treebank_word_tokenizer.STARTING_QUOTES.append((improved_open_single_quote_regex, r'\1 \2'))105_treebank_word_tokenizer.ENDING_QUOTES.insert(0, (improved_close_quote_regex, r' \1 '))106_treebank_word_tokenizer.PUNCTUATION.insert(0, (improved_punct_regex, r'\1 \2 \3 '))107def word_tokenize(text, language='english', preserve_line=False):108 """109 Return a tokenized copy of *text*,110 using NLTK's recommended word tokenizer111 (currently an improved :class:`.TreebankWordTokenizer`112 along with :class:`.PunktSentenceTokenizer`113 for the specified language).114 :param text: text to split into words115 :type text: str116 :param language: the model name in the Punkt corpus117 :type language: str118 :param preserve_line: An option to keep the preserve the sentence and not sentence tokenize it.119 :type preserve_line: bool120 """121 sentences = [text] if preserve_line else sent_tokenize(text, language)122 return [123 token for sent in sentences for token in _treebank_word_tokenizer.tokenize(sent)...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!