Join the 1-day Testing & QA Summit featuring 15+ Expert Speakers.Register for FREE! Join TestMu Conference

How to use ParseHTML method of html Package

Best K6 code snippet using html.ParseHTML

parsehtml.go

Source:parsehtml.go

1package parsehtml2import (3	"bufio"4	"io/ioutil"5	"os"6	"regexp"7	"strconv"8	"strings"9	"github.com/MaestroError/html-strings-affixer/config"10	"golang.org/x/exp/slices"11)12type Parsehtml struct {13	file             string14	found_strings    map[string][]map[string]string15	content          string16	original_content string17	// options18	ignore_characters []string19	extractions       []string20	// Affixes to search string21	prefix string22	suffix string23	// regex24	regexp       *regexp.Regexp25	search_regex string26}27/*28*29 */30func (parse *Parsehtml) Init(file string, c config.Config) {31	parse.found_strings = make(map[string][]map[string]string)32	parse.SetFile(file)33	parse.getFileContent()34	// set options from config35	parse.setIgnoreCharacters(c.GetIgnoreCharacters())36	parse.setExtractions(c.GetAllowedMethods())37}38func (parse *Parsehtml) ParseFile(file string, c config.Config) *Parsehtml {39	parse.Init(file, c)40	if slices.Contains(parse.extractions, "text") {41		parse.ExtractText()42	}43	if slices.Contains(parse.extractions, "placeholder") {44		parse.ExtractPlaceholder()45	}46	if slices.Contains(parse.extractions, "alt") {47		parse.ExtractAlt()48	}49	if slices.Contains(parse.extractions, "title") {50		parse.ExtractTitle()51	}52	if slices.Contains(parse.extractions, "hastag") {53		parse.ExtractHashtag()54	}55	return parse56}57// setters58func (parse *Parsehtml) SetFile(file string) {59	parse.file = file60}61// Adds new string in found_strings62// sets trimmed string as "found" and original string as "original_string"63// type -> string describing type of visible html, you can specify it while calling parse.parseContent method64// lines -> lines where found string exists, you can get it with parse.findLineOfString method65func (parse *Parsehtml) AddNewString(found string, original_string string, found_type string, lines string) {66	foundObject := make(map[string]string)67	foundObject["found"] = found68	foundObject["original_string"] = original_string69	foundObject["type"] = found_type70	foundObject["lines"] = lines71	parse.found_strings["data"] = append(parse.found_strings["data"], foundObject)72}73func (parse *Parsehtml) GetFoundStrings() map[string][]map[string]string {74	return parse.found_strings75}76func (parse *Parsehtml) AddIgnoreCharacter(char string) {77	parse.ignore_characters = append(parse.ignore_characters, char)78}79func (parse *Parsehtml) SetPrefix(prefix string) {80	parse.prefix = prefix81}82func (parse *Parsehtml) SetSuffix(suffix string) {83	parse.suffix = suffix84}85// Simple strings extraction method - just plain strings in HTML86func (parse *Parsehtml) ExtractText() {87	// set affixes for simple strings extraction88	parse.SetPrefix("\\>")89	parse.SetSuffix("\\<")90	// Generates regex based on prefix, suffix and denied characters91	parse.generateRegex()92	// Parses content and adds strings in found_strings with specific type93	parse.parseContent("text")94}95// HTML input's Placeholders attributes extraction method96// XX - Can't use word "placeholder" inside placeholder - XX ?? why? it does well97func (parse *Parsehtml) ExtractPlaceholder() {98	// set affixes for simple strings extraction99	// (?i) = case insensitive100	parse.SetPrefix("(?i)placeholder=(\"|')")101	parse.SetSuffix("(\"|')")102	// Generates regex based on prefix, suffix and denied characters103	parse.generateRegex()104	// Parses content and adds strings in found_strings with specific type105	parse.parseContent("placeholder")106}107// HTML img's alt attributes extraction method108func (parse *Parsehtml) ExtractAlt() {109	// set affixes for simple strings extraction110	parse.SetPrefix("(?i)alt=(\"|')")111	parse.SetSuffix("(\"|')")112	// Generates regex based on prefix, suffix and denied characters113	parse.generateRegex()114	// Parses content and adds strings in found_strings with specific type115	parse.parseContent("alt")116}117// HTML title attributes extraction method118func (parse *Parsehtml) ExtractTitle() {119	// set affixes for simple strings extraction120	parse.SetPrefix("(?i)title=(\"|')")121	parse.SetSuffix("(\"|')")122	// Generates regex based on prefix, suffix and denied characters123	parse.generateRegex()124	// Parses content and adds strings in found_strings with specific type125	parse.parseContent("title")126}127// Extracts "#text" type (selected) strings128func (parse *Parsehtml) ExtractHashtag() {129	// set affixes for simple strings extraction130	parse.SetPrefix("(\"|'|>)\\s*#")131	parse.SetSuffix("(\"|'|<)")132	// Generates regex based on prefix, suffix and denied characters133	parse.generateRegex()134	// Parses content and adds strings in found_strings with specific type135	// @todo add "#" as strip to remove it while replacing136	parse.parseContent("hashtag")137}138// privates139func (parse *Parsehtml) setFoundStrings(found_strings map[string][]map[string]string) {140	parse.found_strings = found_strings141}142func (parse *Parsehtml) renewContent() {143	parse.content = parse.original_content144}145func (parse *Parsehtml) findLineOfString(str string) []string {146	f, err := os.Open(parse.file)147	if err != nil {148		// return 0, err149		panic(err)150	}151	defer f.Close()152	// Splits on newlines by default.153	scanner := bufio.NewScanner(f)154	foundOnLines := []string{}155	line := 1156	// check each line for founded string existence157	for scanner.Scan() {158		if strings.Contains(scanner.Text(), str) {159			// append line as string in foundOnLines array160			foundOnLines = append(foundOnLines, strconv.Itoa(line))161		}162		line++163	}164	if err := scanner.Err(); err != nil {165		// Handle the error166		panic(err)167	}168	return foundOnLines169}170// Reads file and sets content (as content and original_content properties)171func (parse *Parsehtml) getFileContent() {172	var r []byte173	var err error174	r, err = ioutil.ReadFile(parse.file)175	if err != nil {176		panic(err)177	}178	content := string(r)179	parse.content = content180	parse.original_content = content181}182func (parse *Parsehtml) setIgnoreCharacters(ignore_characters []string) {183	parse.ignore_characters = ignore_characters184}185func (parse *Parsehtml) setExtractions(allowed_parse_methods []string) {186	parse.extractions = allowed_parse_methods187}188// Generates regex based on prefix, suffix and denied characters189// sets search_regex as regular expression string190// and regexp as regexp object191func (parse *Parsehtml) generateRegex() {192	if parse.prefix != "" && parse.suffix != "" {193		deniedCharString := strings.Join(parse.ignore_characters, "\\")194		// [^\s+] -> used to not match whitespace195		reg := regexp.MustCompile(parse.prefix + `[^` + deniedCharString + `].[^\s+][^` + deniedCharString + `]+` + parse.suffix)196		parse.search_regex = reg.String()197		parse.regexp = reg198	}199}200// parses content, trims found strings and adds in found_strings if not already exists201func (parse *Parsehtml) parseContent(htmlType string) {202	// find all strings based on regex203	submatchall := parse.regexp.FindAllString(parse.content, -1)204	for _, element := range submatchall {205		// removes (trims) finding prefix and suffix206		re := regexp.MustCompile(parse.prefix)207		found := re.ReplaceAllString(element, "")208		re = regexp.MustCompile(parse.suffix)209		found = re.ReplaceAllString(found, "")210		// add as new string if no duplicates found211		if !parse.checkDuplicate(found) {212			lines := parse.findLineOfString(found)213			parse.AddNewString(found, element, htmlType, strings.Join(lines, ", "))214		}215	}216}217// check if string already exists in found strings218func (parse *Parsehtml) checkDuplicate(found string) bool {219	result := false220	// @todo check also type of string or "original_string" (maybe some string will need different methods to replace)221	for _, fs := range parse.found_strings[parse.file] {222		if fs["found"] == found {223			result = true224			break225		}226	}227	return result228}...

page_test.go

Source:page_test.go

...11		panic(fmt.Sprintf("can't open test data: %v", err))12	}13	return f14}15func TestParseHTML(t *testing.T) {16	baseURL, err := SanitizedURLFromString("http://www.example.com/a/b/c")17	if err != nil {18		panic(fmt.Sprintf("can't build sanitized url"))19	}20	t.Run("ä¸è¬çãªHTMLã®å ´å", func(t *testing.T) {21		html, err := ParseHTML(openTestData("testdata/test.html"), baseURL)22		if err != nil {23			t.Errorf("ParseHTML(testdata/test.html) = error, want = no error")24			return25		}26		if html.Title() != "ãã¹ãç¨HTML" {27			t.Errorf("ParseHTML(testdata/test.html).Title() = %s, want = \"ãã¹ãç¨HTML\"", html.Title())28		}29		if !html.NoIndex() {30			t.Errorf("ParseHTML(testdata/test.html).NoIndex() = false, want = true")31		}32		if len(html.AllURL()) != 3 {33			t.Errorf("len(ParseHTML(testdata/test.html).AllURL()) = %d, want = 3", len(html.AllURL()))34		}35		wantURL := []string{"http://example1.com", "https://example2.com", "http://www.example.com/a/b/rel.html"}36		for i, want := range wantURL {37			if html.AllURL()[i].String() != want {38				t.Errorf("ParseHTML(testdata/test.html).AllURL()[%d] = %s, want = %s", i, html.AllURL()[i].String(), want)39			}40		}41	})42	t.Run("nofollowãå¨é¢çã«æå®ããã¦ããHTMLã®å ´å", func(t *testing.T) {43		html, err := ParseHTML(openTestData("testdata/nofollow.html"), baseURL)44		if err != nil {45			t.Errorf("ParseHTML(testdata/nofollow.html) = error, want = no error")46			return47		}48		if html.Title() != "ãã¹ãç¨HTML" {49			t.Errorf("ParseHTML(testdata/nofollow.html).Title() = %s, want = \"ãã¹ãç¨HTML\"", html.Title())50		}51		if html.NoIndex() {52			t.Errorf("ParseHTML(testdata/nofollow.html).NoIndex() = true, want = false")53		}54		if len(html.AllURL()) != 0 {55			t.Errorf("len(ParseHTML(testdata/nofollow.html).AllURL()) = %d, want = 0", len(html.AllURL()))56		}57	})58}...

parsekind_yamlenums.go

Source:parsekind_yamlenums.go

...6)7var (8	_ParseKindNameToValue = map[string]ParseKind{9		"ParseRaw":      ParseRaw,10		"ParseHTML":     ParseHTML,11		"ParseMarkdown": ParseMarkdown,12	}13	_ParseKindValueToName = map[ParseKind]string{14		ParseRaw:      "ParseRaw",15		ParseHTML:     "ParseHTML",16		ParseMarkdown: "ParseMarkdown",17	}18)19func init() {20	var v ParseKind21	if _, ok := interface{}(v).(fmt.Stringer); ok {22		_ParseKindNameToValue = map[string]ParseKind{23			interface{}(ParseRaw).(fmt.Stringer).String():      ParseRaw,24			interface{}(ParseHTML).(fmt.Stringer).String():     ParseHTML,25			interface{}(ParseMarkdown).(fmt.Stringer).String(): ParseMarkdown,26		}27	}28}29// MarshalYAML is generated so ParseKind satisfies yaml.Marshaler.30func (r ParseKind) MarshalYAML() ([]byte, error) {31	if s, ok := interface{}(r).(fmt.Stringer); ok {32		return yaml.Marshal(s.String())33	}34	s, ok := _ParseKindValueToName[r]35	if !ok {36		return nil, fmt.Errorf("invalid ParseKind: %d", r)37	}38	return yaml.Marshal(s)...

ParseHTML

Using AI Code Generation

1import (2func main() {3 if err != nil {4 fmt.Println("Error in loading URL")5 }6}

ParseHTML

Using AI Code Generation

1import (2func main() {3	z := html.NewTokenizer(res.Body)4	for {5		tt := z.Next()6		switch {7			t := z.Token()8			if isAnchor {9				for _, a := range t.Attr {10					if a.Key == "href" {11						fmt.Printf("Link: %q12					}13				}14			}15		}16	}17}

ParseHTML

Using AI Code Generation

1import (2func main() {3	if err != nil {4		log.Fatal(err)5	}6	defer resp.Body.Close()7	doc, err := html.Parse(resp.Body)8	if err != nil {9		log.Fatal(err)10	}11	var f func(*html.Node)12	f = func(n *html.Node) {13		if n.Type == html.ElementNode {14			fmt.Println(n.Data)15		}16		for c := n.FirstChild; c != nil; c = c.NextSibling {17			f(c)18		}19	}20	f(doc)21}

ParseHTML

Using AI Code Generation

1import (2func main() {3    doc, err := html.Parse(strings.NewReader("<html><head></head><body><h1>Hi</h1></body></html>"))4    if err != nil {5        fmt.Println("Error in parsing the HTML")6    }7    fmt.Println(doc)8}9&{0xc00000c0c0 [] []}10import (11func main() {12    doc, err := html.ParseFragment(strings.NewReader("<h1>Hi</h1>"), &html.Node{Type: html.ElementNode, Data: "body"})13    if err != nil {14        fmt.Println("Error in parsing the HTML")15    }16    fmt.Println(doc)17}18[&{0xc00000c0c0 [] []}]19import (20func main() {21    doc, err := html.Parse(strings.NewReader("<html><head></head><body><h1>Hi</h1></body></html

ParseHTML

Using AI Code Generation

1import (2func main() {3	doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))4	if err != nil {5		log.Fatal(err)6	}7	fmt.Println(doc.Find("h1").Text())8}9import (10func main() {11	doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))12	if err != nil {13		log.Fatal(err)14	}15	fmt.Println(doc.Find("h1").Text())16}17import (18func main() {19	if err != nil {20		log.Fatal(err)21	}22	fmt.Println(doc.Find("h1").Text())23}24import (25func main() {26	doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))27	if err != nil {28		log.Fatal(err)29	}30	fmt.Println(doc.Find("h1").Text())31}32import (33func main() {34	if err != nil {35		log.Fatal(err)36	}37	fmt.Println(doc.Find("h1").Text())38}

ParseHTML

Using AI Code Generation

1import (2func main() {3	if err != nil {4		panic(err)5	}6}7import (8func main() {9	if err != nil {10		panic(err)11	}12}13import (14func main() {15	if err != nil {16		panic(err)17	}18	fmt.Println(htmlquery.InnerText(html))19}20import (21func main() {22	if err != nil {23		panic(err)24	}25}26import (27func main() {28	if err != nil {29		panic(err)30	}31}32import (33func main() {34	if err != nil {35		panic(err)36	}37}38import (

ParseHTML

Using AI Code Generation

1import "fmt"2import "github.com/antchfx/htmlquery"3func main() {4    if err != nil {5        panic(err)6    }7    for _, n := range nodes {8        fmt.Println(htmlquery.InnerText(n))9    }10}

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.