Capricorn

Parse html according to configuration.

Capricorn is a html parsing library that supports recursion and custom execution order.

Version info Downloads docs example branch parameter

Default execution order

vec![String::from("selects"),
    String::from("each"),
    String::from("select_params"),
    String::from("nodes"),
    String::from("has"),
    String::from("contains")];

selects > each > (one or all or fields) > ... text_attr_html > (text or attr or html);
selects > select_params > selects > ... text_attr_html > (text or attr or html);
selects > nodes > has > contains > text_attr_html > (text or attr or html);

Support:

| Capricorn | support | example |val type| | :----: | :----: | :----- |:----:| | selects element | ✔ | fieldname:
  selects:
      - element
name | String | | selects class | ✔ | fieldname:
  selects:
      - .class
name | String | | selects class element | ✔ | fieldname:
  selects:
      - .class
name
      - elementname | String | | first | ✔ | fieldname:
  selects:
      - elementname
  nodes:
      first: true | String | | last | ✔ | field
name:
  selects:
      - elementname
  nodes:
      last: true | String | | eq | ✔ | field
name:
  selects:
      - elementname
  nodes:
      eq: 0 | String | | parent | ✔ | field
name:
  selects:
      - elementname
  nodes:
      parent: true | String | | children | ✔ | field
name:
  selects:
      - elementname
  nodes:
      children: true | String | | prev
sibling | ✔ | fieldname:
  selects:
      - element
name
  nodes:
      prevsibling: true | String | | nextsibling | ✔ | fieldname:
  selects:
      - element
name
  nodes:
      nextsibling: true | String | | hasclass | ✔ | fieldname:
  selects:
      - element
name
  has:
      class: classname | String | | hasattr | ✔ | fieldname:
  selects:
      - element
name
  has:
      attr: attrname | String | | each one | ✔ | fieldname:
  selects:
      - elementname
  each:
      one:
          selects:
              - .class
name
          ... | String | | each all | ✔ | fieldname:
  selects:
      - element
name
  each:
      all:
          selects:
              - .classname
          ... | Array | | each fields | ✔ | field
name:
  selects:
      - elementname
  each:
      fields:
        field
name:
          selects:
              - .classname
          ...
        field
name1:
          selects:
              - .classname
          ... | Map | | select
params | ✔ | fieldname:
  selects:
      - element
name
  selectparams:
      selects:
          - .class
name
      ... | ... | | text | ✔ | fieldname:
  selects:
      - element
name
  textattrhtml:
      text: true | String | | attr | ✔ | fieldname:
  selects:
      - element
name
  textattrhtml:
      attr: true | String | | html | ✔ | fieldname:
  selects:
      - element
name
  textattrhtml:
      html: true | String | | text contains | ✔ | fieldname:
  selects:
      - element
name
  contains:
      contains:
          text:
              - test | String | | text not contains | ✔ | fieldname:
  selects:
      - element
name
  contains:
      notcontains:
          text:
              - test | String | | html contains | ✔ | field
name:
  selects:
      - elementname
  contains:
      contains:
          html:
              - test | String | | html not contains | ✔ | field
name:
  selects:
      - elementname
  contains:
      not
contains:
          html:
              - test | String | | exec order | ✔ | fieldname:
  exec
order:
      - selects
      - has
      - nodes
  selects:
      - elementname
  has:
      class: class
name
  nodes:
      first: true | String | | data format splits | ✔ | fieldname:
  selects:
      - element
name
  dataformat:
      splits:
          - { key: str } | Array | | data format splits | ✔ | field
name:
  selects:
      - elementname
  data
format:
      splits:
          - { key: str, index: 0 } | String | | data format replaces | ✔ | fieldname:
  selects:
      - element
name
  dataformat:
      replaces:
          - str | String | | data format deletes | ✔ | field
name:
  selects:
      - elementname
  data
format:
      deletes:
          - str | String | | data format find | ✔ | fieldname:
  selects:
      - element
name
  dataformat:
      find: regex | String | | data format find
iter | ✔ | fieldname:
  selects:
      - element
name
  dataformat:
      find
iter: regex | Array | | Multi-version regular matching err | ✔ |regexesmatchparsehtml:
    - regex: regex
      version: 1
      err: err
msg | Err | | Multi-version regular matching fields | ✔ |regexesmatchparsehtml:
    - regex: regex
      version: 1
      fields:
        field
name:
          selects:
          ...
        field_name:
          selects:
          ... | Map |

Parse html code, more...

let yml = read_file("./test_html/test.yml").unwrap();
let params: parse::HashMapSelectParams = serde_yaml::from_str(&yml).unwrap();
let html = read_file("./test_html/test.html").unwrap();
let r = parse::parse_html(&params, &html);

Multi-version regular matching parsing html code, more...

let yml = read_file("./test_html/regexes_match_parse_html.yml").unwrap();
let v:  match_html::MatchHtmlVec = serde_yaml::from_str(&yml).unwrap();
let html = read_file("./test_html/test.html").unwrap();
let r =  v.regexes_match_parse_html(html)?;