Useful utilities for managing a MongoDB instance written in Go. - bsondump - display BSON files in a human-readable format - mongoimport - Convert data from JSON, TSV or CSV and insert them into a collection - mongoexport - Write an existing collection to CSV or JSON format - mongodump/mongorestore - Dump MongoDB backups to disk in .BSON format, or restore them to a live database - mongostat - Monitor live MongoDB servers, replica sets, or sharded clusters - mongofiles - Read, write, delete, or update files in GridFS - mongotop - Monitor read/write activity on a mongo server - mongoreplay - Capture, observe, and replay traffic for MongoDB WWW: https://github.com/mongodb/mongo-tools PR: 237352 Submitted by: Andrew Shevchuk <dev.ashevchuk@gmail.com> (based on)
325 lines
8.4 KiB
Go
325 lines
8.4 KiB
Go
--- common/util/file.go.orig 2018-11-21 17:52:58 UTC
|
|
+++ common/util/file.go
|
|
@@ -9,11 +9,68 @@
|
|
import (
|
|
"bufio"
|
|
"io"
|
|
- "net/url"
|
|
"os"
|
|
"path/filepath"
|
|
+ "strconv"
|
|
)
|
|
|
|
+// Error reports an error and the operation and URL that caused it.
|
|
+type Error struct {
|
|
+ Op string
|
|
+ URL string
|
|
+ Err error
|
|
+}
|
|
+
|
|
+func (e *Error) Error() string { return e.Op + " " + e.URL + ": " + e.Err.Error() }
|
|
+
|
|
+func ishex(c byte) bool {
|
|
+ switch {
|
|
+ case '0' <= c && c <= '9':
|
|
+ return true
|
|
+ case 'a' <= c && c <= 'f':
|
|
+ return true
|
|
+ case 'A' <= c && c <= 'F':
|
|
+ return true
|
|
+ }
|
|
+ return false
|
|
+}
|
|
+
|
|
+func unhex(c byte) byte {
|
|
+ switch {
|
|
+ case '0' <= c && c <= '9':
|
|
+ return c - '0'
|
|
+ case 'a' <= c && c <= 'f':
|
|
+ return c - 'a' + 10
|
|
+ case 'A' <= c && c <= 'F':
|
|
+ return c - 'A' + 10
|
|
+ }
|
|
+ return 0
|
|
+}
|
|
+
|
|
+type encoding int
|
|
+
|
|
+const (
|
|
+ encodePath encoding = 1 + iota
|
|
+ encodePathSegment
|
|
+ encodeHost
|
|
+ encodeZone
|
|
+ encodeUserPassword
|
|
+ encodeQueryComponent
|
|
+ encodeFragment
|
|
+)
|
|
+
|
|
+type EscapeError string
|
|
+
|
|
+func (e EscapeError) Error() string {
|
|
+ return "invalid URL escape " + strconv.Quote(string(e))
|
|
+}
|
|
+
|
|
+type InvalidHostError string
|
|
+
|
|
+func (e InvalidHostError) Error() string {
|
|
+ return "invalid character " + strconv.Quote(string(e)) + " in host name"
|
|
+}
|
|
+
|
|
// GetFieldsFromFile fetches the first line from the contents of the file
|
|
// at "path"
|
|
func GetFieldsFromFile(path string) ([]string, error) {
|
|
@@ -42,11 +99,11 @@
|
|
}
|
|
|
|
func EscapeCollectionName(collName string) string {
|
|
- return url.PathEscape(collName)
|
|
+ return PathEscape(collName)
|
|
}
|
|
|
|
func UnescapeCollectionName(escapedCollName string) (string, error) {
|
|
- return url.PathUnescape(escapedCollName)
|
|
+ return PathUnescape(escapedCollName)
|
|
}
|
|
|
|
type WrappedReadCloser struct {
|
|
@@ -76,3 +133,238 @@
|
|
}
|
|
return innerErr
|
|
}
|
|
+
|
|
+// Return true if the specified character should be escaped when
|
|
+// appearing in a URL string, according to RFC 3986.
|
|
+//
|
|
+// Please be informed that for now shouldEscape does not check all
|
|
+// reserved characters correctly. See golang.org/issue/5684.
|
|
+func shouldEscape(c byte, mode encoding) bool {
|
|
+ // §2.3 Unreserved characters (alphanum)
|
|
+ if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
|
|
+ return false
|
|
+ }
|
|
+
|
|
+ if mode == encodeHost || mode == encodeZone {
|
|
+ // §3.2.2 Host allows
|
|
+ // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
|
|
+ // as part of reg-name.
|
|
+ // We add : because we include :port as part of host.
|
|
+ // We add [ ] because we include [ipv6]:port as part of host.
|
|
+ // We add < > because they're the only characters left that
|
|
+ // we could possibly allow, and Parse will reject them if we
|
|
+ // escape them (because hosts can't use %-encoding for
|
|
+ // ASCII bytes).
|
|
+ switch c {
|
|
+ case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"':
|
|
+ return false
|
|
+ }
|
|
+ }
|
|
+
|
|
+ switch c {
|
|
+ case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
|
|
+ return false
|
|
+
|
|
+ case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
|
|
+ // Different sections of the URL allow a few of
|
|
+ // the reserved characters to appear unescaped.
|
|
+ switch mode {
|
|
+ case encodePath: // §3.3
|
|
+ // The RFC allows : @ & = + $ but saves / ; , for assigning
|
|
+ // meaning to individual path segments. This package
|
|
+ // only manipulates the path as a whole, so we allow those
|
|
+ // last three as well. That leaves only ? to escape.
|
|
+ return c == '?'
|
|
+
|
|
+ case encodePathSegment: // §3.3
|
|
+ // The RFC allows : @ & = + $ but saves / ; , for assigning
|
|
+ // meaning to individual path segments.
|
|
+ return c == '/' || c == ';' || c == ',' || c == '?'
|
|
+
|
|
+ case encodeUserPassword: // §3.2.1
|
|
+ // The RFC allows ';', ':', '&', '=', '+', '$', and ',' in
|
|
+ // userinfo, so we must escape only '@', '/', and '?'.
|
|
+ // The parsing of userinfo treats ':' as special so we must escape
|
|
+ // that too.
|
|
+ return c == '@' || c == '/' || c == '?' || c == ':'
|
|
+
|
|
+ case encodeQueryComponent: // §3.4
|
|
+ // The RFC reserves (so we must escape) everything.
|
|
+ return true
|
|
+
|
|
+ case encodeFragment: // §4.1
|
|
+ // The RFC text is silent but the grammar allows
|
|
+ // everything, so escape nothing.
|
|
+ return false
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if mode == encodeFragment {
|
|
+ // RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are
|
|
+ // included in reserved from RFC 2396 §2.2. The remaining sub-delims do not
|
|
+ // need to be escaped. To minimize potential breakage, we apply two restrictions:
|
|
+ // (1) we always escape sub-delims outside of the fragment, and (2) we always
|
|
+ // escape single quote to avoid breaking callers that had previously assumed that
|
|
+ // single quotes would be escaped. See issue #19917.
|
|
+ switch c {
|
|
+ case '!', '(', ')', '*':
|
|
+ return false
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // Everything else must be escaped.
|
|
+ return true
|
|
+}
|
|
+
|
|
+// PathUnescape does the inverse transformation of PathEscape,
|
|
+// converting each 3-byte encoded substring of the form "%AB" into the
|
|
+// hex-decoded byte 0xAB. It returns an error if any % is not followed
|
|
+// by two hexadecimal digits.
|
|
+//
|
|
+// PathUnescape is identical to QueryUnescape except that it does not
|
|
+// unescape '+' to ' ' (space).
|
|
+func PathUnescape(s string) (string, error) {
|
|
+ return unescape(s, encodePathSegment)
|
|
+}
|
|
+
|
|
+// unescape unescapes a string; the mode specifies
|
|
+// which section of the URL string is being unescaped.
|
|
+func unescape(s string, mode encoding) (string, error) {
|
|
+ // Count %, check that they're well-formed.
|
|
+ n := 0
|
|
+ hasPlus := false
|
|
+ for i := 0; i < len(s); {
|
|
+ switch s[i] {
|
|
+ case '%':
|
|
+ n++
|
|
+ if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
|
|
+ s = s[i:]
|
|
+ if len(s) > 3 {
|
|
+ s = s[:3]
|
|
+ }
|
|
+ return "", EscapeError(s)
|
|
+ }
|
|
+ // Per https://tools.ietf.org/html/rfc3986#page-21
|
|
+ // in the host component %-encoding can only be used
|
|
+ // for non-ASCII bytes.
|
|
+ // But https://tools.ietf.org/html/rfc6874#section-2
|
|
+ // introduces %25 being allowed to escape a percent sign
|
|
+ // in IPv6 scoped-address literals. Yay.
|
|
+ if mode == encodeHost && unhex(s[i+1]) < 8 && s[i:i+3] != "%25" {
|
|
+ return "", EscapeError(s[i : i+3])
|
|
+ }
|
|
+ if mode == encodeZone {
|
|
+ // RFC 6874 says basically "anything goes" for zone identifiers
|
|
+ // and that even non-ASCII can be redundantly escaped,
|
|
+ // but it seems prudent to restrict %-escaped bytes here to those
|
|
+ // that are valid host name bytes in their unescaped form.
|
|
+ // That is, you can use escaping in the zone identifier but not
|
|
+ // to introduce bytes you couldn't just write directly.
|
|
+ // But Windows puts spaces here! Yay.
|
|
+ v := unhex(s[i+1])<<4 | unhex(s[i+2])
|
|
+ if s[i:i+3] != "%25" && v != ' ' && shouldEscape(v, encodeHost) {
|
|
+ return "", EscapeError(s[i : i+3])
|
|
+ }
|
|
+ }
|
|
+ i += 3
|
|
+ case '+':
|
|
+ hasPlus = mode == encodeQueryComponent
|
|
+ i++
|
|
+ default:
|
|
+ if (mode == encodeHost || mode == encodeZone) && s[i] < 0x80 && shouldEscape(s[i], mode) {
|
|
+ return "", InvalidHostError(s[i : i+1])
|
|
+ }
|
|
+ i++
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if n == 0 && !hasPlus {
|
|
+ return s, nil
|
|
+ }
|
|
+
|
|
+ t := make([]byte, len(s)-2*n)
|
|
+ j := 0
|
|
+ for i := 0; i < len(s); {
|
|
+ switch s[i] {
|
|
+ case '%':
|
|
+ t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
|
|
+ j++
|
|
+ i += 3
|
|
+ case '+':
|
|
+ if mode == encodeQueryComponent {
|
|
+ t[j] = ' '
|
|
+ } else {
|
|
+ t[j] = '+'
|
|
+ }
|
|
+ j++
|
|
+ i++
|
|
+ default:
|
|
+ t[j] = s[i]
|
|
+ j++
|
|
+ i++
|
|
+ }
|
|
+ }
|
|
+ return string(t), nil
|
|
+}
|
|
+
|
|
+// PathEscape escapes the string so it can be safely placed
|
|
+// inside a URL path segment.
|
|
+func PathEscape(s string) string {
|
|
+ return escape(s, encodePathSegment)
|
|
+}
|
|
+
|
|
+func escape(s string, mode encoding) string {
|
|
+ spaceCount, hexCount := 0, 0
|
|
+ for i := 0; i < len(s); i++ {
|
|
+ c := s[i]
|
|
+ if shouldEscape(c, mode) {
|
|
+ if c == ' ' && mode == encodeQueryComponent {
|
|
+ spaceCount++
|
|
+ } else {
|
|
+ hexCount++
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if spaceCount == 0 && hexCount == 0 {
|
|
+ return s
|
|
+ }
|
|
+
|
|
+ var buf [64]byte
|
|
+ var t []byte
|
|
+
|
|
+ required := len(s) + 2*hexCount
|
|
+ if required <= len(buf) {
|
|
+ t = buf[:required]
|
|
+ } else {
|
|
+ t = make([]byte, required)
|
|
+ }
|
|
+
|
|
+ if hexCount == 0 {
|
|
+ copy(t, s)
|
|
+ for i := 0; i < len(s); i++ {
|
|
+ if s[i] == ' ' {
|
|
+ t[i] = '+'
|
|
+ }
|
|
+ }
|
|
+ return string(t)
|
|
+ }
|
|
+
|
|
+ j := 0
|
|
+ for i := 0; i < len(s); i++ {
|
|
+ switch c := s[i]; {
|
|
+ case c == ' ' && mode == encodeQueryComponent:
|
|
+ t[j] = '+'
|
|
+ j++
|
|
+ case shouldEscape(c, mode):
|
|
+ t[j] = '%'
|
|
+ t[j+1] = "0123456789ABCDEF"[c>>4]
|
|
+ t[j+2] = "0123456789ABCDEF"[c&15]
|
|
+ j += 3
|
|
+ default:
|
|
+ t[j] = s[i]
|
|
+ j++
|
|
+ }
|
|
+ }
|
|
+ return string(t)
|
|
+}
|