[{"title":"( 90 个子文件 431KB ) Python-PySpark编程最佳实践指南","children":[{"title":"spark-syntax-master","children":[{"title":".github","children":[{"title":"ISSUE_TEMPLATE","children":[{"title":"feature-topic-request.md <span style='color:#111;'> 539B </span>","children":null,"spread":false}],"spread":true}],"spread":true},{"title":"src","children":[{"title":"Chapter 7 - High Performance Code","children":[{"title":"Section 1.3 - Joins on Skewed Data (High Frequency Keys I).ipynb <span style='color:#111;'> 24.90KB </span>","children":null,"spread":false},{"title":"Section 1.4 - Joins on Skewed Data (High Frequency Keys II) (WIP).ipynb <span style='color:#111;'> 12.01KB </span>","children":null,"spread":false},{"title":"Section 1.1 - Filter Pushdown.ipynb <span style='color:#111;'> 18.59KB </span>","children":null,"spread":false},{"title":"Section 1.2 - Joins on Skewed Data (Null Keys).ipynb <span style='color:#111;'> 20.78KB </span>","children":null,"spread":false}],"spread":true},{"title":"Random","children":[{"title":"spark-sugar.ipynb <span style='color:#111;'> 17.88KB </span>","children":null,"spread":false},{"title":"understanding-error-logs.ipynb <span style='color:#111;'> 2.46KB </span>","children":null,"spread":false},{"title":"Agg Optimization? oooo.ipynb <span style='color:#111;'> 6.94KB </span>","children":null,"spread":false},{"title":"Project before Join?.ipynb <span style='color:#111;'> 7.54KB </span>","children":null,"spread":false},{"title":"spark - freqItems.ipynb <span style='color:#111;'> 8.06KB </span>","children":null,"spread":false},{"title":"is it better to do fillna or coalesce spark.ipynb <span style='color:#111;'> 7.40KB </span>","children":null,"spread":false}],"spread":true},{"title":"images","children":[{"title":"master-slave.png <span style='color:#111;'> 123.56KB </span>","children":null,"spread":false},{"title":"mapreduce.png <span style='color:#111;'> 36.09KB </span>","children":null,"spread":false},{"title":"ieee-floating-point-representation.png <span style='color:#111;'> 7.94KB </span>","children":null,"spread":false},{"title":"key-terms.png <span style='color:#111;'> 41.69KB </span>","children":null,"spread":false}],"spread":true},{"title":"Chapter 1 - Basics","children":[{"title":"Section 1 - Useful Material.md <span style='color:#111;'> 2.66KB </span>","children":null,"spread":false},{"title":"Section 4 - More Comfortable with SQL?.ipynb <span style='color:#111;'> 6.41KB </span>","children":null,"spread":false},{"title":"Section 2 - Creating your First Data Object.ipynb <span style='color:#111;'> 4.88KB </span>","children":null,"spread":false},{"title":"Section 3 - Reading your First Dataset.ipynb <span style='color:#111;'> 7.05KB </span>","children":null,"spread":false}],"spread":true},{"title":"Chapter 6 - Tuning & Spark Parameters","children":[{"title":"Section 1.1 - Understanding how Spark Works.md <span style='color:#111;'> 6.04KB </span>","children":null,"spread":false}],"spread":true},{"title":"Chapter 3 - Aggregates","children":[{"title":"Section 2 - Non Deterministic Ordering for GroupBys.ipynb <span style='color:#111;'> 11.15KB </span>","children":null,"spread":false},{"title":"Section 1 - Clean Aggregations.ipynb <span style='color:#111;'> 10.31KB </span>","children":null,"spread":false}],"spread":true},{"title":"Chapter 2 - Exploring the Spark APIs","children":[{"title":"Section 1.4 - Decimals and Why did my Decimals Overflow.ipynb <span style='color:#111;'> 22.82KB </span>","children":null,"spread":false},{"title":"Section 1.2 - Arrays and Lists.ipynb <span style='color:#111;'> 5.44KB </span>","children":null,"spread":false},{"title":"Section 2.11 - Unionizing Multiple Dataframes.ipynb <span style='color:#111;'> 23.59KB </span>","children":null,"spread":false},{"title":"Section 1.1 - Struct Types.ipynb <span style='color:#111;'> 4.48KB </span>","children":null,"spread":false},{"title":"Section 2.5 - Casting Columns to Different Type.ipynb <span style='color:#111;'> 7.01KB </span>","children":null,"spread":false},{"title":"Section 2.9 - Filling in Null Values.ipynb <span style='color:#111;'> 16.54KB </span>","children":null,"spread":false},{"title":"Section 2.7 - Equality Statements in Spark and Comparison with Nulls.ipynb <span style='color:#111;'> 13.86KB </span>","children":null,"spread":false},{"title":"Section 2.10 - Spark Functions aren't Enough, I Need my Own!.ipynb <span style='color:#111;'> 16.21KB </span>","children":null,"spread":false},{"title":"Section 2 - Performing your First Transformations.ipynb <span style='color:#111;'> 7.03KB </span>","children":null,"spread":false},{"title":"Section 1.3 - Maps and Dictionaries.ipynb <span style='color:#111;'> 6.96KB </span>","children":null,"spread":false},{"title":"Section 2.1 - Looking at Your Data.ipynb <span style='color:#111;'> 11.14KB </span>","children":null,"spread":false},{"title":"Section 2.6 - Filtering Data.ipynb <span style='color:#111;'> 9.17KB </span>","children":null,"spread":false},{"title":"Section 2.8 - Case Statements.ipynb <span style='color:#111;'> 9.13KB </span>","children":null,"spread":false},{"title":"Section 3.1 - One to Many Rows.ipynb <span style='color:#111;'> 17.74KB </span>","children":null,"spread":false},{"title":"Section 3.2 - Range Join Conditions (WIP).ipynb <span style='color:#111;'> 12.49KB </span>","children":null,"spread":false},{"title":"Section 2.4 - Constant Values and Column Expressions.ipynb <span style='color:#111;'> 13.07KB </span>","children":null,"spread":false},{"title":"Section 2.2 - Selecting a Subset of Columns.ipynb <span style='color:#111;'> 9.79KB </span>","children":null,"spread":false},{"title":"Section 2.3 - Creating New Columns and Transforming Data.ipynb <span style='color:#111;'> 10.82KB </span>","children":null,"spread":false},{"title":"Section 2.12 - Performing Joins (clean one).ipynb <span style='color:#111;'> 14.23KB </span>","children":null,"spread":false}],"spread":false},{"title":"data","children":[{"title":"pets.csv <span style='color:#111;'> 252B </span>","children":null,"spread":false}],"spread":true},{"title":"Chapter 4 - Window Objects","children":[{"title":"Section 2 - Ordering High Frequency Data with a Window Object.ipynb <span style='color:#111;'> 12.13KB </span>","children":null,"spread":false},{"title":"Section 1 - Default Behaviour of a Window Object.ipynb <span style='color:#111;'> 13.77KB </span>","children":null,"spread":false}],"spread":true}],"spread":true},{"title":"README.md <span style='color:#111;'> 9.38KB </span>","children":null,"spread":false},{"title":".gitignore <span style='color:#111;'> 1.21KB </span>","children":null,"spread":false},{"title":"gitbook","children":[{"title":"Vagrantfile <span style='color:#111;'> 879B </span>","children":null,"spread":false},{"title":"gitbook-auto-summary.py <span style='color:#111;'> 4.65KB </span>","children":null,"spread":false},{"title":"src","children":[{"title":"Chapter 7 - High Performance Code","children":[{"title":"Section 1.2 - Joins on Skewed Data <Null Keys>.md <span style='color:#111;'> 10.07KB </span>","children":null,"spread":false},{"title":"Section 1.3 - Joins on Skewed Data <High Frequency Keys I>.md <span style='color:#111;'> 11.47KB </span>","children":null,"spread":false},{"title":"Section 1.4 - Joins on Skewed Data <High Frequency Keys II> <WIP>.md <span style='color:#111;'> 5.35KB </span>","children":null,"spread":false},{"title":"Section 1.1 - Filter Pushdown.md <span style='color:#111;'> 8.23KB </span>","children":null,"spread":false}],"spread":true},{"title":"SUMMARY.md <span style='color:#111;'> 4.33KB </span>","children":null,"spread":false},{"title":"Chapter 1 - Basics","children":[{"title":"Section 1 - Useful Material.md <span style='color:#111;'> 2.66KB </span>","children":null,"spread":false},{"title":"Section 2 - Creating your First Data Object.md <span style='color:#111;'> 2.38KB </span>","children":null,"spread":false},{"title":"Section 3 - Reading your First Dataset.md <span style='color:#111;'> 3.80KB </span>","children":null,"spread":false},{"title":"Section 4 - More Comfortable with SQL.md <span style='color:#111;'> 2.65KB </span>","children":null,"spread":false}],"spread":true},{"title":"Chapter 6 - Tuning & Spark Parameters","children":[{"title":"Section 1.1 - Understanding how Spark Works.md <span style='color:#111;'> 6.04KB </span>","children":null,"spread":false}],"spread":true},{"title":"README.md <span style='color:#111;'> 1.19KB </span>","children":null,"spread":false},{"title":"Chapter 3 - Aggregates","children":[{"title":"Section 2 - Non Deterministic Ordering for GroupBys.md <span style='color:#111;'> 4.48KB </span>","children":null,"spread":false},{"title":"Section 1 - Clean Aggregations.md <span style='color:#111;'> 3.69KB </span>","children":null,"spread":false}],"spread":false},{"title":"Chapter 2 - Exploring the Spark APIs","children":[{"title":"Section 2.6 - Filtering Data.md <span style='color:#111;'> 3.71KB </span>","children":null,"spread":false},{"title":"Section 2.8 - Case Statements.md <span style='color:#111;'> 3.89KB </span>","children":null,"spread":false},{"title":"Section 2.9 - Filling in Null Values.md <span style='color:#111;'> 7.32KB </span>","children":null,"spread":false},{"title":"Section 2.3 - Creating New Columns and Transforming Data.md <span style='color:#111;'> 4.51KB </span>","children":null,"spread":false},{"title":"Section 1.4 - Decimals and Why did my Decimals Overflow.md <span style='color:#111;'> 13.20KB </span>","children":null,"spread":false},{"title":"Section 1.1 - Struct Types.md <span style='color:#111;'> 2.63KB </span>","children":null,"spread":false},{"title":"Section 2.5 - Casting Columns to Different Type.md <span style='color:#111;'> 2.84KB </span>","children":null,"spread":false},{"title":"Section 2.7 - Equality Statements in Spark and Comparison with Nulls.md <span style='color:#111;'> 5.93KB </span>","children":null,"spread":false},{"title":"Section 2.11 - Unionizing Multiple Dataframes.md <span style='color:#111;'> 10.47KB </span>","children":null,"spread":false},{"title":"Section 3.2 - Range Join Conditions <WIP>.md <span style='color:#111;'> 5.51KB </span>","children":null,"spread":false},{"title":"Section 2.2 - Selecting a Subset of Columns.md <span style='color:#111;'> 4.24KB </span>","children":null,"spread":false},{"title":"Section 2.12 - Performing Joins <clean one>.md <span style='color:#111;'> 5.69KB </span>","children":null,"spread":false},{"title":"Section 1.2 - Arrays and Lists.md <span style='color:#111;'> 3.18KB </span>","children":null,"spread":false},{"title":"Section 2 - Performing your First Transformations.md <span style='color:#111;'> 2.78KB </span>","children":null,"spread":false},{"title":"Section 2.4 - Constant Values and Column Expressions.md <span style='color:#111;'> 5.26KB </span>","children":null,"spread":false},{"title":"Section 1.3 - Maps and Dictionaries.md <span style='color:#111;'> 4.39KB </span>","children":null,"spread":false},{"title":"Section 2.10 - Spark Functions aren't Enough, I Need my Own!.md <span style='color:#111;'> 8.29KB </span>","children":null,"spread":false},{"title":"Section 3.1 - One to Many Rows.md <span style='color:#111;'> 7.63KB </span>","children":null,"spread":false},{"title":"Section 2.1 - Looking at Your Data.md <span style='color:#111;'> 5.41KB </span>","children":null,"spread":false}],"spread":false},{"title":"cover.jpg <span style='color:#111;'> 66.60KB </span>","children":null,"spread":false},{"title":"Chapter 4 - Window Objects","children":[{"title":"Section 2 - Ordering High Frequency Data with a Window Object.md <span style='color:#111;'> 5.08KB </span>","children":null,"spread":false},{"title":"Section 1 - Default Behaviour of a Window Object.md <span style='color:#111;'> 6.14KB </span>","children":null,"spread":false}],"spread":false}],"spread":true},{"title":"Dockerfile <span style='color:#111;'> 896B </span>","children":null,"spread":false},{"title":"book.json <span style='color:#111;'> 679B </span>","children":null,"spread":false},{"title":"Makefile <span style='color:#111;'> 1.36KB </span>","children":null,"spread":false},{"title":".bookignore <span style='color:#111;'> 103B </span>","children":null,"spread":false},{"title":"convert-ipynb2markdown.py <span style='color:#111;'> 1.72KB </span>","children":null,"spread":false},{"title":"package-lock.json <span style='color:#111;'> 180.36KB </span>","children":null,"spread":false},{"title":"package.json <span style='color:#111;'> 1.01KB </span>","children":null,"spread":false}],"spread":true}],"spread":true}],"spread":true}]