pastebin - collaborative debugging tool
rovema.kpaste.net RSS


ksh93 parse_rfc1738_url - parse RFC 1838 URLs
Posted by Anonymous on Wed 19th Jul 2023 13:58
raw | new post

  1. #!/bin/ksh93
  2.  
  3. #
  4. # parse_rfc1738_url - parse RFC 1838 URLs
  5. #
  6. # Output variables are named after RFC 1838 Section 5 ("BNF for
  7. # specific URL schemes")
  8. #
  9. function parse_rfc1738_url
  10. {
  11.         typeset url="$2"
  12.         typeset leftover
  13.         nameref data="$1" # output compound variable
  14.        
  15.         # ~(E) is POSIX extended regular expression matching (instead
  16.         # of shell pattern), "x" means "multiline", "l" means "left
  17.         # anchor", "r" means "right anchor"
  18.         leftover="${url/~(Elrx)
  19.                 (.+?)                           # scheme
  20.                 :\/\/                           # '://'
  21.                 (                               # login
  22.                         (?:
  23.                                 (.+?)           # user (optional)
  24.                                 (?::(.+))?      # password (optional)
  25.                                 @
  26.                         )?
  27.                         (                       # hostport
  28.                                 (.+?)           # host
  29.                                 (?::([[:digit:]]+))? # port (optional)
  30.                         )
  31.                 )
  32.                 (?:\/(.*?))?/X}"                # path (optional)
  33.  
  34.         # All parsed data should be captured via eregex in .sh.match - if
  35.         # there is anything left (except the 'X') then the input string did
  36.         # not properly match the eregex
  37.         [[ "$leftover" == 'X' ]] ||
  38.                 { print -u2 -f $"%s: Parser error, leftover=%q\n" \
  39.                         "$0" "$leftover" ; return 1 ; }
  40.  
  41.         data.url="${.sh.match[0]}"
  42.         data.scheme="${.sh.match[1]}"
  43.         data.login="${.sh.match[2]}"
  44.         # FIXME: This should use [[ ! -v .sh.match[3] ]], but ksh93u has bugs
  45.         [[ "${.sh.match[3]}" != '' ]] && data.user="${.sh.match[3]}"
  46.         [[ "${.sh.match[4]}" != '' ]] && data.password="${.sh.match[4]}"
  47.         data.hostport="${.sh.match[5]}"
  48.         data.host="${.sh.match[6]}"
  49.         [[ "${.sh.match[7]}" != '' ]] && integer data.port="${.sh.match[7]}"
  50.         [[ "${.sh.match[8]}" != '' ]] && data.uripath="${.sh.match[8]}"
  51.  
  52.         return 0
  53. }
  54.  
  55. function main
  56. {
  57.         compound c
  58.        
  59.         input="foo://host/path1/path2" ; printf "## input=%q\n" "$input"
  60.         parse_rfc1738_url c "$input"
  61.         print -v c
  62.  
  63.         input="foo://myusr@host:14/path1/path2" ; printf "## input=%q\n" "$input"
  64.         parse_rfc1738_url c "$input"
  65.         print -v c
  66.  
  67.         input="foo://myusr:mypasswd@host:14/path1/path2" ; printf "## input=%q\n" "$input"
  68.         parse_rfc1738_url c "$input"
  69.         print -v c
  70.  
  71. }
  72.  
  73. main

Submit a correction or amendment below (click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.

Syntax highlighting:

To highlight particular lines, prefix each line with {%HIGHLIGHT}




All content is user-submitted.
The administrators of this site (kpaste.net) are not responsible for their content.
Abuse reports should be emailed to us at