chenzhaoyang
2025-12-17 d3e5a4b7658ece4f845bbc0c4f95acf3fbdf8a61
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
title: HTML Entity Recognition
type: community
group: Structured Data Parsing
image: /static/templates/html-entity-recognition.png
details: |
  <h1>Extract entities from hypertext documents</h1>
  <dl>
    <dt>Industry Applications</dt>
    <dd>web scraping, content extraction, digital publishing, news aggregation, e-commerce data mining, social media monitoring, academic research, market research, competitive intelligence, SEO analysis, content management, data journalism</dd>
    <dt>Associated Models</dt>
    <dd>DOM parsing, web content extraction, structured data extraction, HTML parsing</dd>
    <dt>Domain Terminology</dt>
    <dd>web entity extraction, HTML annotation, web document processing, hypertext analysis, markup extraction</dd>
  </dl>
config: |
  <View>
    <HyperTextLabels name="ner" toName="text">
      <Label value="Title" background="green"/>
      <Label value="Author" background="blue"/>
      <Label value="Body" background="yellow"/>
    </HyperTextLabels>
 
    <View style="border: 1px solid #CCC;
                 border-radius: 10px;
                 padding: 5px">
      <HyperText name="text" value="$html"/>
    </View>
  </View>