pastebin - collaborative debugging tool
rovema.kpaste.net RSS


ksh93 parse_rfc1738_url - parse RFC 1838 URLs
Posted by Anonymous on Wed 19th Jul 2023 14:59
raw | new post

  1.  
  2. #
  3. # parse_rfc1738_url - parse RFC 1838 URLs
  4. #
  5. # Output variables are named after RFC 1838 Section 5 ("BNF for
  6. # specific URL schemes")
  7. #
  8. function parse_rfc1738_url
  9. {
  10.         set -o nounset
  11.  
  12.         typeset url="$2"
  13.         typeset leftover
  14.         nameref data="$1" # output compound variable
  15.        
  16.         # ~(E) is POSIX extended regular expression matching (instead
  17.         # of shell pattern), "x" means "multiline", "l" means "left
  18.         # anchor", "r" means "right anchor"
  19.         leftover="${url/~(Elrx)
  20.                 (.+?)                           # scheme
  21.                 :\/\/                           # '://'
  22.                 (                               # login
  23.                         (?:
  24.                                 (.+?)           # user (optional)
  25.                                 (?::(.+))?      # password (optional)
  26.                                 @
  27.                         )?
  28.                         (                       # hostport
  29.                                 (.+?)           # host
  30.                                 (?::([[:digit:]]+))? # port (optional)
  31.                         )
  32.                 )
  33.                 (?:\/(.*?))?/X}"                # path (optional)
  34.  
  35.         # All parsed data should be captured via eregex in .sh.match - if
  36.         # there is anything left (except the 'X') then the input string did
  37.         # not properly match the eregex
  38.         [[ "$leftover" == 'X' ]] ||
  39.                 { print -u2 -f $"%s: Parser error, leftover=%q\n" \
  40.                         "$0" "$leftover" ; return 1 ; }
  41.  
  42.         data.url="${.sh.match[0]}"
  43.         data.scheme="${.sh.match[1]}"
  44.         data.login="${.sh.match[2]}"
  45.         # FIXME: This should use [[ ! -v .sh.match[3] ]], but ksh93u has bugs
  46.         [[ "${.sh.match[3]-}" != '' ]] && data.user="${.sh.match[3]}"
  47.         [[ "${.sh.match[4]-}" != '' ]] && data.password="${.sh.match[4]}"
  48.         data.hostport="${.sh.match[5]}"
  49.         data.host="${.sh.match[6]}"
  50.         [[ "${.sh.match[7]-}" != '' ]] && integer data.port="${.sh.match[7]}"
  51.         [[ "${.sh.match[8]-}" != '' ]] && data.uripath="${.sh.match[8]}"
  52.  
  53.         return 0
  54. }
  55.  
  56. function main
  57. {
  58.         compound c
  59.        
  60.         input="foo://host/path1/path2" ; printf "## input=%q\n" "$input"
  61.         parse_rfc1738_url c "$input"
  62.         print -v c
  63.  
  64.         input="foo://myusr@host:14/path1/path2" ; printf "## input=%q\n" "$input"
  65.         parse_rfc1738_url c "$input"
  66.         print -v c
  67.  
  68.         input="foo://myusr:mypasswd@host:14/path1/path2" ; printf "## input=%q\n" "$input"
  69.         parse_rfc1738_url c "$input"
  70.         print -v c
  71.  
  72. }
  73.  
  74. main

Submit a correction or amendment below (click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.

Syntax highlighting:

To highlight particular lines, prefix each line with {%HIGHLIGHT}




All content is user-submitted.
The administrators of this site (kpaste.net) are not responsible for their content.
Abuse reports should be emailed to us at