jsoup connect

jsoup  connect

 

/**
 * Creates a new {@link Connection} to a URL. Use to fetch and parse a HTML page.
 * <p>
 * Use examples:
 * <ul>
 *  <li><code>Document doc = Jsoup.connect("http://example.com").userAgent("Mozilla").data("name", "jsoup").get();</code></li>
 *  <li><code>Document doc = Jsoup.connect("http://example.com").cookie("auth", "token").post();</code></li>
 * </ul>
 * @param url URL to connect to. The protocol must be {@code http} or {@code https}.
 * @return the connection. You can add data, cookies, and headers; set the user-agent, referrer, method; and then execute.
 */
public static Connection connect(String url) {
    return HttpConnection.connect(url);
}

 

 

public static Connection connect(String url) {
    Connection con = new HttpConnection();
    con.url(url);
    return con;
}

 

 

public Connection url(String url) {
    Validate.notEmpty(url, "Must supply a valid URL");
    try {
        req.url(new URL(encodeUrl(url)));
    } catch (MalformedURLException e) {
        throw new IllegalArgumentException("Malformed URL: " + url, e);
    }
    return this;
}

 

 

public T url(URL url) {
    Validate.notNull(url, "URL must not be null");
    this.url = url;
    return (T) this;
}

 

/**
* Creates a URL by parsing the given spec with the specified handler
* within a specified context. If the handler is null, the parsing
* occurs as with the two argument constructor.
*
* @param      context   the context in which to parse the specification.
* @param      spec      the {@code String} to parse as a URL.
* @param      handler   the stream handler for the URL.
* @exception  MalformedURLException  if no protocol is specified, or an
*               unknown protocol is found, or {@code spec} is {@code null}.
* @exception  SecurityException
*        if a security manager exists and its
*        {@code checkPermission} method doesn't allow
*        specifying a stream handler.
* @see        java.net.URL#URL(java.lang.String, java.lang.String,
*                  int, java.lang.String)
* @see        java.net.URLStreamHandler
* @see        java.net.URLStreamHandler#parseURL(java.net.URL,
*                  java.lang.String, int, int)
*/
public URL(URL context, String spec, URLStreamHandler handler)
throws MalformedURLException
{
    String original = spec;
    int i, limit, c;
    int start = 0;
    String newProtocol = null;
    boolean aRef=false;
    boolean isRelative = false;

    // Check for permission to specify a handler
    if (handler != null) {
        SecurityManager sm = System.getSecurityManager();
        if (sm != null) {
            checkSpecifyHandler(sm);
        }
    }

    try {
        limit = spec.length();
        while ((limit > 0) && (spec.charAt(limit - 1) <= ' ')) {
            limit--;        //eliminate trailing whitespace
        }
        while ((start < limit) && (spec.charAt(start) <= ' ')) {
            start++;        // eliminate leading whitespace
        }

        if (spec.regionMatches(true, start, "url:", 0, 4)) {
            start += 4;
        }
        if (start < spec.length() && spec.charAt(start) == '#') {
            /* we're assuming this is a ref relative to the context URL.
            * This means protocols cannot start w/ '#', but we must parse
            * ref URL's like: "hello:there" w/ a ':' in them.
            */
            aRef=true;
        }
        for (i = start ; !aRef && (i < limit) &&((c = spec.charAt(i)) != '/') ; i++) {
            if (c == ':') {

                String s = spec.substring(start, i).toLowerCase();
                if (isValidProtocol(s)) {
                    newProtocol = s;
                    start = i + 1;
                }
                break;
            }
        }

        // Only use our context if the protocols match.
        protocol = newProtocol;
        if ((context != null) && ((newProtocol == null) ||
        newProtocol.equalsIgnoreCase(context.protocol))) {
            // inherit the protocol handler from the context
            // if not specified to the constructor
            if (handler == null) {
                handler = context.handler;
            }

            // If the context is a hierarchical URL scheme and the spec
            // contains a matching scheme then maintain backwards
            // compatibility and treat it as if the spec didn't contain
            // the scheme; see 5.2.3 of RFC2396
            if (context.path != null && context.path.startsWith("/"))
            {
                newProtocol = null;
            }

            if (newProtocol == null) {
                protocol = context.protocol;
                authority = context.authority;
                userInfo = context.userInfo;
                host = context.host;
                port = context.port;
                file = context.file;
                path = context.path;
                isRelative = true;
            }
        }

        if (protocol == null) {
            throw new MalformedURLException("no protocol: "+original);
        }

        // Get the protocol handler if not specified or the protocol
        // of the context could not be used
        if (handler == null &&(handler = getURLStreamHandler(protocol)) == null) {
            throw new MalformedURLException("unknown protocol: "+protocol);
        }

        this.handler = handler;

        i = spec.indexOf('#', start);
        if (i >= 0) {
            ref = spec.substring(i + 1, limit);
            limit = i;
        }

        /*
        * Handle special case inheritance of query and fragment
        * implied by RFC2396 section 5.2.2.
        */
        if (isRelative && start == limit) {
        query = context.query;
            if (ref == null) {
                ref = context.ref;
            }
        }

        handler.parseURL(this, spec, start, limit);

    } catch(MalformedURLException e) {
        throw e;
    } catch(Exception e) {
        MalformedURLException exception = new MalformedURLException(e.getMessage());
        exception.initCause(e);
        throw exception;
    }
}

 

 

####################################

posted @ 2021-11-01 08:53  西北逍遥  阅读(464)  评论(0编辑  收藏  举报