update jekyll

jiaxianhua · jiaxianhua · commit 625e1981ff10 · 2019-01-04T16:02:17.000+08:00
diff --git a/Gemfile b/Gemfile
@@ -1,5 +1,4 @@
 source "https://rubygems.org"
-ruby RUBY_VERSION
 
 # Hello! This is where you manage which Jekyll version is used to run.
 # When you want to use a different version, change it below, save the
@@ -9,7 +8,7 @@ ruby RUBY_VERSION
 #
 # This will help ensure the proper Jekyll version is running.
 # Happy Jekylling!
-gem "jekyll", "3.4.2"
+gem "jekyll", "~> 3.8.5"
 
 # This is the default theme for new Jekyll sites. You may change this to anything you like.
 gem "minima", "~> 2.0"
@@ -20,9 +19,12 @@ gem "minima", "~> 2.0"
 
 # If you have any plugins, put them here!
 group :jekyll_plugins do
-   gem "jekyll-feed", "~> 0.6"
+  gem "jekyll-feed", "~> 0.6"
 end
 
 # Windows does not include zoneinfo files, so bundle the tzinfo-data gem
-gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby]
+gem "tzinfo-data", platforms: [:mingw, :mswin, :x64_mingw, :jruby]
+
+# Performance-booster for watching directories on Windows
+gem "wdm", "~> 0.1.0" if Gem.win_platform?
 
diff --git a/Gemfile.lock b/Gemfile.lock
@@ -1,57 +1,74 @@
 GEM
   remote: https://rubygems.org/
   specs:
-    addressable (2.5.0)
-      public_suffix (~> 2.0, >= 2.0.2)
+    addressable (2.5.2)
+      public_suffix (>= 2.0.2, < 4.0)
     colorator (1.1.0)
-    ffi (1.9.18)
+    concurrent-ruby (1.1.4)
+    em-websocket (0.5.1)
+      eventmachine (>= 0.12.9)
+      http_parser.rb (~> 0.6.0)
+    eventmachine (1.2.7)
+    ffi (1.9.25)
     forwardable-extended (2.6.0)
-    jekyll (3.4.2)
+    http_parser.rb (0.6.0)
+    i18n (0.9.5)
+      concurrent-ruby (~> 1.0)
+    jekyll (3.8.5)
       addressable (~> 2.4)
       colorator (~> 1.0)
+      em-websocket (~> 0.5)
+      i18n (~> 0.7)
       jekyll-sass-converter (~> 1.0)
-      jekyll-watch (~> 1.1)
-      kramdown (~> 1.3)
-      liquid (~> 3.0)
+      jekyll-watch (~> 2.0)
+      kramdown (~> 1.14)
+      liquid (~> 4.0)
       mercenary (~> 0.3.3)
       pathutil (~> 0.9)
-      rouge (~> 1.7)
+      rouge (>= 1.7, < 4)
       safe_yaml (~> 1.0)
-    jekyll-feed (0.9.1)
+    jekyll-feed (0.11.0)
       jekyll (~> 3.3)
-    jekyll-sass-converter (1.5.0)
+    jekyll-sass-converter (1.5.2)
       sass (~> 3.4)
-    jekyll-watch (1.5.0)
-      listen (~> 3.0, < 3.1)
-    kramdown (1.13.2)
-    liquid (3.0.6)
-    listen (3.0.8)
+    jekyll-seo-tag (2.5.0)
+      jekyll (~> 3.3)
+    jekyll-watch (2.1.2)
+      listen (~> 3.0)
+    kramdown (1.17.0)
+    liquid (4.0.1)
+    listen (3.1.5)
       rb-fsevent (~> 0.9, >= 0.9.4)
       rb-inotify (~> 0.9, >= 0.9.7)
+      ruby_dep (~> 1.2)
     mercenary (0.3.6)
-    minima (2.1.0)
-      jekyll (~> 3.3)
-    pathutil (0.14.0)
+    minima (2.5.0)
+      jekyll (~> 3.5)
+      jekyll-feed (~> 0.9)
+      jekyll-seo-tag (~> 2.1)
+    pathutil (0.16.2)
       forwardable-extended (~> 2.6)
-    public_suffix (2.0.5)
-    rb-fsevent (0.9.8)
-    rb-inotify (0.9.8)
-      ffi (>= 0.5.0)
-    rouge (1.11.1)
+    public_suffix (3.0.3)
+    rb-fsevent (0.10.3)
+    rb-inotify (0.10.0)
+      ffi (~> 1.0)
+    rouge (3.3.0)
+    ruby_dep (1.5.0)
     safe_yaml (1.0.4)
-    sass (3.4.23)
+    sass (3.7.2)
+      sass-listen (~> 4.0.0)
+    sass-listen (4.0.0)
+      rb-fsevent (~> 0.9, >= 0.9.4)
+      rb-inotify (~> 0.9, >= 0.9.7)
 
 PLATFORMS
   ruby
 
 DEPENDENCIES
-  jekyll (= 3.4.2)
+  jekyll (~> 3.8.5)
   jekyll-feed (~> 0.6)
   minima (~> 2.0)
   tzinfo-data
 
-RUBY VERSION
-   ruby 2.3.0p0
-
 BUNDLED WITH
-   1.14.6
+   1.16.2
diff --git a/_config.yml b/_config.yml
@@ -17,21 +17,17 @@ title: iosdevlog
 email: iosdevlog@iosdevlog.com
 description: > # this means to ignore newlines until "baseurl:"
         iosdevlog.com
-baseurl: "" # the subpath of your site, e.g. /blog
+baseurl: ""
 url: "http://iosdevlog.com" # the base hostname & protocol for your site, e.g. http://example.com
 twitter_username: iosdevlog
 github_username: iosdevlog 
 
-paginate: 50
-
 # Build settings
 markdown: kramdown
 theme: minima
-gems:
+plugins:
   - jekyll-feed
-exclude:
-  - Gemfile
-  - Gemfile.lock
 
 # Google Analytics
 google_analytics: UA-104431384-1
+
diff --git a/_posts/2019-01-03-nlp.md b/_posts/2019-01-03-nlp.md
@@ -11,13 +11,17 @@ tags: []
 # 自然语言处理简介
 
 现在，让我们先从介绍自然语言处理(NLP)开始吧。众所周知，语言是人们日常生 活的核心部分，任何与语言问题相关的工作都会显得非常有意思。希望这本书能带你领略 到 NLP 的风采，并引起学习 NLP 的兴趣。首先，我们需要来了解一下该领域中的一些令 人惊叹的概念，并在工作中实际尝试一些具有挑战性的 NLP 应用。
+
 在英语环境中，语言处理研究这一领域通常被简称为 NLP。对语言有深入研究的人通 常被叫作语言学家，而“计算机语言学家”这个专用名词则指的是将计算机科学应用于语 言处理领域的人。因此从本质上来说，一个计算机语言学家应该既有足够的语言理解能力， 同时还可以用其计算机技能来模拟出语言的不同方面。虽然计算机语言学家主要研究的是 语言处理理论，但 NLP 无疑是对计算机语言学的具体应用。
+
 NLP 多数情况下指的是计算机上各种大同小异的语言处理应用，以及用 NLP 技术所构 建的实际应用程序。在实践中，NLP 与教孩子学语言的过程非常类似。其大多数任务(如 对单词、语句的理解，形成语法和结构都正确的语句等)对于人类而言都是非常自然的能 力。但对于 NLP 来说，其中有一些任务就必须要转向标识化处理、语块分解、词性标注、 语法解析、机器翻译及语音识别等这些领域的一部分，且这些任务有一大部分还仍是当前 计算机领域中非常棘手的挑战。在本书中，我们将更侧重于讨论 NLP 的实用方面，因此我 们会假设读者在 NLP 上已经有了一些背景知识。所以，读者最好在最低限度上对编程语言 有一点了解，并对 NLP 和语言学有一定的兴趣。
 
 目前，NLP 已被认为是业界最为稀缺的技能之一。自大数据 的概念问世之后，我们所面对的主要挑战是——业界需要越来越多不仅能处理结构化数据， 同时也能处理半结构化或非结构化数据的人才。对于我们所生产出来的那些博客、微博、 Facebook 订阅、聊天信息、E-mail 以及网络评论等，各公司都在致力于收集所有不同种类 的数据，以便建立更好的客户针对性，形成有意义的见解。而要想处理所有的这些非结构 化数据源，我们就需要掌握一些 NLP 技能的人员。
+
 身处信息时代，我们甚至不能想象生活中没有 Google 会是什么样子。我们会因一些最基本的事情而用到 Siri;我们会需要用垃圾过滤器来过滤垃圾邮件;我们会需要在自己的 Word 文档中用到拼写检查器等。在现实世界中所要用到的 NLP 应用数不胜数。
 
 在这里，我们可以再列举一些令人惊叹的 NLP 应用实例。虽然你很可能已经用过它们，
+
 但未必知道这些应用是基于 NLP 技术的。
 
 • 拼写校正(MS Word/其他编辑器)
@@ -205,7 +209,7 @@ SVM 属于非概率分类器。SVM 会在无限维空间中构造出一组超平
 
 事实上，它比较适 合用于在各种数据集的子样本上构建多决策树型的分类器。另外，该森林中的每个树结构都建立 在一个随机的最佳特征子集上。最后，启用这些树结构的动作也找出了所有随机特征子集中的最 佳子集。总而言之，随机森林是当前众多分类算法中表现最佳的算法之一
 
-# 示例 饭店评论
+# 示例 分析 饭店评论
 
 
 ```python
@@ -233,10 +237,6 @@ dataset.head()
     dtypes: int64(1), object(1)
     memory usage: 15.7+ KB
 
-
-
-
-
 <div>
 <style scoped>
     .dataframe tbody tr th:only-of-type {
@@ -324,15 +324,10 @@ for i in range(0, 1000):
     [nltk_data]     /Users/iosdevlog/nltk_data...
     [nltk_data]   Package stopwords is already up-to-date!
 
-
-
 ```python
 corpus[:10]
 ```
 
-
-
-
     ['wow love place',
      'crust good',
      'tasti textur nasti',
@@ -344,9 +339,6 @@ corpus[:10]
      'fri great',
      'great touch']
 
-
-
-
 ```python
 # Creating the Bag of Words model
 from sklearn.feature_extraction.text import CountVectorizer
@@ -357,52 +349,34 @@ y = dataset.iloc[:, 1].values
 X[:5]
 ```
 
-
-
-
     array([[0, 0, 0, ..., 0, 0, 0],
            [0, 0, 0, ..., 0, 0, 0],
            [0, 0, 0, ..., 0, 0, 0],
            [0, 0, 0, ..., 0, 0, 0],
            [0, 0, 0, ..., 0, 0, 0]], dtype=int64)
 
-
-
-
 ```python
 # Splitting the dataset into the Training set and Test set
 from sklearn.model_selection import train_test_split
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
 X_train[:5]
 ```
 
-
-
-
     array([[0, 0, 0, ..., 0, 0, 0],
            [0, 0, 0, ..., 0, 0, 0],
            [0, 0, 0, ..., 0, 0, 0],
            [0, 0, 0, ..., 0, 0, 0],
            [0, 0, 0, ..., 0, 0, 0]], dtype=int64)
 
-
-
-
 ```python
 # Fitting Naive Bayes to the Training set
 from sklearn.naive_bayes import GaussianNB
 classifier = GaussianNB()
 classifier.fit(X_train, y_train)
 ```
 
-
-
-
     GaussianNB(priors=None, var_smoothing=1e-09)
 
-
-
-
 ```python
 # Predicting the Test set results
 # Looking at first 5 testing data, we can see we predicted the first 3 incorrectly as positive reviews, and last 2 correctly as negative review
@@ -417,26 +391,14 @@ print(cv.inverse_transform(X_test[:5]))
     [0 0 0 0 0]
     [array(['aw', 'food', 'present'], dtype='<U17'), array(['food', 'servic', 'worst'], dtype='<U17'), array(['dine', 'never', 'place'], dtype='<U17'), array(['disgrac', 'guess', 'mayb', 'night', 'went'], dtype='<U17'), array(['avoid', 'lover', 'mean', 'place', 'sushi'], dtype='<U17')]
 
-
-
 ```python
 # Making the Confusion Matrix
 from sklearn.metrics import confusion_matrix
 cm = confusion_matrix(y_test, y_pred)
 cm
 ```
 
-
-
-
     array([[55, 42],
            [12, 91]])
 
-
-
 > 参考资料：《NLTK基础教程》，《机器学习 A-Z》
-
-
-```python
-
-```