diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle
index 8d0e0f09..f31e7e92 100644
Binary files a/docs/build/doctrees/environment.pickle and b/docs/build/doctrees/environment.pickle differ
diff --git a/docs/build/doctrees/examples.doctree b/docs/build/doctrees/examples.doctree
index 459db6af..ed989d52 100644
Binary files a/docs/build/doctrees/examples.doctree and b/docs/build/doctrees/examples.doctree differ
diff --git a/docs/build/doctrees/feature_builder.doctree b/docs/build/doctrees/feature_builder.doctree
index e13cdc16..70c1ee9d 100644
Binary files a/docs/build/doctrees/feature_builder.doctree and b/docs/build/doctrees/feature_builder.doctree differ
diff --git a/docs/build/doctrees/index.doctree b/docs/build/doctrees/index.doctree
index 1a4d56a7..f6754f42 100644
Binary files a/docs/build/doctrees/index.doctree and b/docs/build/doctrees/index.doctree differ
diff --git a/docs/build/html/_sources/examples.rst.txt b/docs/build/html/_sources/examples.rst.txt
index 637d96db..b7bc948d 100644
--- a/docs/build/html/_sources/examples.rst.txt
+++ b/docs/build/html/_sources/examples.rst.txt
@@ -85,9 +85,7 @@ Now we are ready to call the FeatureBuilder on our data. All we need to do is de
 		timestamp_col = "timestamp",
 		grouping_keys = ["batch_num", "round_num"],
 		vector_directory = "./vector_data/",
-		output_file_path_chat_level = "./jury_output_chat_level.csv",
-		output_file_path_user_level = "./jury_output_user_level.csv",
-		output_file_path_conv_level = "./jury_output_conversation_level.csv",
+		output_file_base = "jury_output",
 		turns = True
 	)
 	jury_feature_builder.featurize()
@@ -95,6 +93,9 @@ Now we are ready to call the FeatureBuilder on our data. All we need to do is de
 Basic Input Columns
 ^^^^^^^^^^^^^^^^^^^^
 
+Conversation Parameters
+"""""""""""""""""""""""""
+
 * The **input_df** parameter is where you pass in your dataframe. In this case, we want to run the FeatureBuilder on the juries data that we read in!
 
 * The **speaker_id_col** refers to the name of the column containing a unique identifier for each speaker / participant in the conversation. Here, in the data, the name of our columns is called "speaker_nickname."
@@ -105,6 +106,8 @@ Basic Input Columns
 
 	* If you do not pass anything in, "message" is the default value for this parameter.
 
+	* We assume that all messages are ordered chronologically.
+
 * The **timestamp_col** refers to the name of the column containing when each utterance was said. In this case, we have exactly one timestamp for each message, stored in "timestamp." 
 
 	* If you do not pass anything in, "timestamp" is the default value for this parameter.
@@ -125,21 +128,39 @@ Basic Input Columns
 
 		conversation_id_col = "batch_num"
 
+Vector Directory
+""""""""""""""""""
+
 * The **vector_directory** is the name of a directory in which we will store some pre-processed information. Some features require running inference from HuggingFace's `RoBERTa-based sentiment model <https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment>`_, and others require generating `SBERT vectors <https://sbert.net/>`_. These processes take time, and we cache the outputs so that subsequent runs of the FeatureBuilder on the same dataset will not take as much time. Therefore, we require you to pass in a location where you'd like us to save these outputs.
 
+	* By default, the directory is named "vector_data/."
+
 	* **Note that we do not require the name of the vector directory to be a folder that already exists**; if it doesn't exist, we will create it for you.
 
 	* Inside the folder, we will store the RoBERTa outputs in a subfolder called "sentiment", and the SBERT vectors in a subfolder called "sentence." We will create both of these subfolders for you.
 
 	* The **turns** parameter, which we will discuss later, controls whether or not you'd like the FeatureBuilder to treat successive utterances by the same individual as a single "turn," or whether you'd like them to be treated separately. We will cache different versions of outputs based on this parameter; we use a subfolder called "chats" (when **turns=False**) or "turns" (when **turns=True**).
 
-* There are three output files for each run of the FeatureBuilder, which mirror the three levels of analysis: utterance-, speaker-, and conversation-level. (Please see the section on `Generating Features: Utterance-, Speaker-, and Conversation-Level <intro#generating_features>`_ for more details.) However, this means that we require you to provide a path for where you would like us to store each of the output files; **output_file_path_chat_level** (Utterance- or Chat-Level Features), **output_file_path_user_level** (Speaker- or User-Level Features), and **output_file_path_conv_level** (Conversation-Level Features).
+.. _output_file_details:
+
+Output File Naming Details 
+""""""""""""""""""""""""""""
+
+* There are three output files for each run of the FeatureBuilder, which mirror the three levels of analysis: utterance-, speaker-, and conversation-level. (Please see the section on `Generating Features: Utterance-, Speaker-, and Conversation-Level <intro#generating_features>`_ for more details.) These are generated using the **output_file_base** parameter.
+
+	* **All of the outputs will be generated in a folder called "output."**
+
+	* Within the "output" folder, **we generate sub-folders such that the three files will be located in subfolders called "chat," "user," and "conv," respectively.**
+
+	* Similar to the **vector_directory** parameter, the "chat" directory will be renamed to "turn" depending on the value of the **turns** parameter.
+
+* It is possible to generate different names for each of the three output files, rather than using the same base file path by modifying **output_file_path_chat_level** (Utterance- or Chat-Level Features), **output_file_path_user_level** (Speaker- or User-Level Features), and **output_file_path_conv_level** (Conversation-Level Features). However, because outputs are organized in the specific locations described above, **we have specific requirements for inputting the output paths, and we will modify the path under the hood to match our file naming schema,** rather than saving the file directly to the specified location.
 
 	* We expect that you pass in a **path**, not just a filename. For example, the path needs to be "./my_file.csv", and not just "my_file.csv"; you will get an error if you pass in only a name without the "/".
 
-	* Regardless of your path location, we will automatically append the name "output" to the fornt of your file path, such that **all of the outputs will be generated in a folder called "output."**
+	* Regardless of your path location, we will automatically append the name "output" to the fornt of your file path.
 
-	* Within the "output" folder, **we will also generate sub-folders such that the three files will be located in subfolders called "chat," "user," and "conv," respectively.**
+	* Within the "output" folder, **we will also generate the chat/user/conv sub-folders.**
 
 	* If you pass in a path that already contains the above automatically-generated elements (for example, "./output/chat/my_chat_features.csv"), we will skip these steps and directly save it in the relevant folder.
 
@@ -153,7 +174,7 @@ Basic Input Columns
 
 		output_file_path_chat_level = "./output/chat/jury_output_chat_level.csv"
 
-	* And these two ways of specifying an output path are equivalent, assumign that turns=True:
+	* And these two ways of specifying an output path are equivalent, assuming that turns=True:
 
 	.. code-block:: python
 
@@ -161,6 +182,10 @@ Basic Input Columns
 
 		output_file_path_chat_level = "./output/turn/jury_output_turn_level.csv"
 
+
+Turns
+""""""
+
 * The **turns** parameter controls whether we want to treat successive messages from the same person as a single turn. For example, in a text conversation, sometimes individuals will send many message in rapid succession, as follows:
 
 	* **John**: Hey Michael
diff --git a/docs/build/html/_sources/index.rst.txt b/docs/build/html/_sources/index.rst.txt
index fe4e036e..9e4be9bf 100644
--- a/docs/build/html/_sources/index.rst.txt
+++ b/docs/build/html/_sources/index.rst.txt
@@ -62,11 +62,10 @@ Once you import the tool, you will be able to declare a FeatureBuilder object, w
       timestamp_col= "timestamp",
       # this is where we'll cache things like sentence vectors; this directory doesn't have to exist; we'll create it for you!
       vector_directory = "./vector_data/",
-      # give us names for the utterance (chat), speaker (user), and conversation-level outputs
-      output_file_path_chat_level = "./my_output_chat_level.csv", 
-      output_file_path_user_level = "./my_output_user_level.csv",
-      output_file_path_conv_level = "./my_output_conversation_level.csv",
-      # if true, this will combine successive turns by the same speaker.
+      # this will be the base file path for which we generate the three outputs;
+      # you will get your outputs in output/chat/my_output_chat_level.csv; output/conv/my_output_conv_level.csv; and output/user/my_output_user_level.
+      output_file_base = "my_output"
+      # it will also store the output into output/turns/my_output_chat_level.csv
       turns = False,
       # these features depend on sentence vectors, so they take longer to generate on larger datasets. Add them in manually if you are interested in adding them to your output!
       custom_features = [  
diff --git a/docs/build/html/examples.html b/docs/build/html/examples.html
index 5adbe896..f91818c9 100644
--- a/docs/build/html/examples.html
+++ b/docs/build/html/examples.html
@@ -160,9 +160,7 @@ <h3>Configuring the FeatureBuilder<a class="headerlink" href="#configuring-the-f
         <span class="n">timestamp_col</span> <span class="o">=</span> <span class="s2">&quot;timestamp&quot;</span><span class="p">,</span>
         <span class="n">grouping_keys</span> <span class="o">=</span> <span class="p">[</span><span class="s2">&quot;batch_num&quot;</span><span class="p">,</span> <span class="s2">&quot;round_num&quot;</span><span class="p">],</span>
         <span class="n">vector_directory</span> <span class="o">=</span> <span class="s2">&quot;./vector_data/&quot;</span><span class="p">,</span>
-        <span class="n">output_file_path_chat_level</span> <span class="o">=</span> <span class="s2">&quot;./jury_output_chat_level.csv&quot;</span><span class="p">,</span>
-        <span class="n">output_file_path_user_level</span> <span class="o">=</span> <span class="s2">&quot;./jury_output_user_level.csv&quot;</span><span class="p">,</span>
-        <span class="n">output_file_path_conv_level</span> <span class="o">=</span> <span class="s2">&quot;./jury_output_conversation_level.csv&quot;</span><span class="p">,</span>
+        <span class="n">output_file_base</span> <span class="o">=</span> <span class="s2">&quot;jury_output&quot;</span><span class="p">,</span>
         <span class="n">turns</span> <span class="o">=</span> <span class="kc">True</span>
 <span class="p">)</span>
 <span class="n">jury_feature_builder</span><span class="o">.</span><span class="n">featurize</span><span class="p">()</span>
@@ -170,6 +168,8 @@ <h3>Configuring the FeatureBuilder<a class="headerlink" href="#configuring-the-f
 </div>
 <section id="basic-input-columns">
 <h4>Basic Input Columns<a class="headerlink" href="#basic-input-columns" title="Link to this heading"></a></h4>
+<section id="conversation-parameters">
+<h5>Conversation Parameters<a class="headerlink" href="#conversation-parameters" title="Link to this heading"></a></h5>
 <ul>
 <li><p>The <strong>input_df</strong> parameter is where you pass in your dataframe. In this case, we want to run the FeatureBuilder on the juries data that we read in!</p></li>
 <li><p>The <strong>speaker_id_col</strong> refers to the name of the column containing a unique identifier for each speaker / participant in the conversation. Here, in the data, the name of our columns is called “speaker_nickname.”</p>
@@ -183,6 +183,7 @@ <h4>Basic Input Columns<a class="headerlink" href="#basic-input-columns" title="
 <blockquote>
 <div><ul class="simple">
 <li><p>If you do not pass anything in, “message” is the default value for this parameter.</p></li>
+<li><p>We assume that all messages are ordered chronologically.</p></li>
 </ul>
 </div></blockquote>
 </li>
@@ -208,21 +209,41 @@ <h4>Basic Input Columns<a class="headerlink" href="#basic-input-columns" title="
 </div>
 </div></blockquote>
 </li>
+</ul>
+</section>
+<section id="vector-directory">
+<h5>Vector Directory<a class="headerlink" href="#vector-directory" title="Link to this heading"></a></h5>
+<ul>
 <li><p>The <strong>vector_directory</strong> is the name of a directory in which we will store some pre-processed information. Some features require running inference from HuggingFace’s <a class="reference external" href="https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment">RoBERTa-based sentiment model</a>, and others require generating <a class="reference external" href="https://sbert.net/">SBERT vectors</a>. These processes take time, and we cache the outputs so that subsequent runs of the FeatureBuilder on the same dataset will not take as much time. Therefore, we require you to pass in a location where you’d like us to save these outputs.</p>
 <blockquote>
 <div><ul class="simple">
+<li><p>By default, the directory is named “vector_data/.”</p></li>
 <li><p><strong>Note that we do not require the name of the vector directory to be a folder that already exists</strong>; if it doesn’t exist, we will create it for you.</p></li>
 <li><p>Inside the folder, we will store the RoBERTa outputs in a subfolder called “sentiment”, and the SBERT vectors in a subfolder called “sentence.” We will create both of these subfolders for you.</p></li>
 <li><p>The <strong>turns</strong> parameter, which we will discuss later, controls whether or not you’d like the FeatureBuilder to treat successive utterances by the same individual as a single “turn,” or whether you’d like them to be treated separately. We will cache different versions of outputs based on this parameter; we use a subfolder called “chats” (when <strong>turns=False</strong>) or “turns” (when <strong>turns=True</strong>).</p></li>
 </ul>
 </div></blockquote>
 </li>
-<li><p>There are three output files for each run of the FeatureBuilder, which mirror the three levels of analysis: utterance-, speaker-, and conversation-level. (Please see the section on <a class="reference external" href="intro#generating_features">Generating Features: Utterance-, Speaker-, and Conversation-Level</a> for more details.) However, this means that we require you to provide a path for where you would like us to store each of the output files; <strong>output_file_path_chat_level</strong> (Utterance- or Chat-Level Features), <strong>output_file_path_user_level</strong> (Speaker- or User-Level Features), and <strong>output_file_path_conv_level</strong> (Conversation-Level Features).</p>
+</ul>
+</section>
+<section id="output-file-naming-details">
+<span id="output-file-details"></span><h5>Output File Naming Details<a class="headerlink" href="#output-file-naming-details" title="Link to this heading"></a></h5>
+<ul>
+<li><p>There are three output files for each run of the FeatureBuilder, which mirror the three levels of analysis: utterance-, speaker-, and conversation-level. (Please see the section on <a class="reference external" href="intro#generating_features">Generating Features: Utterance-, Speaker-, and Conversation-Level</a> for more details.) These are generated using the <strong>output_file_base</strong> parameter.</p>
+<blockquote>
+<div><ul class="simple">
+<li><p><strong>All of the outputs will be generated in a folder called “output.”</strong></p></li>
+<li><p>Within the “output” folder, <strong>we generate sub-folders such that the three files will be located in subfolders called “chat,” “user,” and “conv,” respectively.</strong></p></li>
+<li><p>Similar to the <strong>vector_directory</strong> parameter, the “chat” directory will be renamed to “turn” depending on the value of the <strong>turns</strong> parameter.</p></li>
+</ul>
+</div></blockquote>
+</li>
+<li><p>It is possible to generate different names for each of the three output files, rather than using the same base file path by modifying <strong>output_file_path_chat_level</strong> (Utterance- or Chat-Level Features), <strong>output_file_path_user_level</strong> (Speaker- or User-Level Features), and <strong>output_file_path_conv_level</strong> (Conversation-Level Features). However, because outputs are organized in the specific locations described above, <strong>we have specific requirements for inputting the output paths, and we will modify the path under the hood to match our file naming schema,</strong> rather than saving the file directly to the specified location.</p>
 <blockquote>
 <div><ul class="simple">
 <li><p>We expect that you pass in a <strong>path</strong>, not just a filename. For example, the path needs to be “./my_file.csv”, and not just “my_file.csv”; you will get an error if you pass in only a name without the “/”.</p></li>
-<li><p>Regardless of your path location, we will automatically append the name “output” to the fornt of your file path, such that <strong>all of the outputs will be generated in a folder called “output.”</strong></p></li>
-<li><p>Within the “output” folder, <strong>we will also generate sub-folders such that the three files will be located in subfolders called “chat,” “user,” and “conv,” respectively.</strong></p></li>
+<li><p>Regardless of your path location, we will automatically append the name “output” to the fornt of your file path.</p></li>
+<li><p>Within the “output” folder, <strong>we will also generate the chat/user/conv sub-folders.</strong></p></li>
 <li><p>If you pass in a path that already contains the above automatically-generated elements (for example, “./output/chat/my_chat_features.csv”), we will skip these steps and directly save it in the relevant folder.</p></li>
 <li><p>Similar to the <strong>vector_directory</strong> parameter, the “chat” directory will be renamed to “turn” depending on the value of the <strong>turns</strong> parameter.</p></li>
 <li><p>This means that the following two ways of specifying an output path are equivalent, assuming that turns=False:</p></li>
@@ -233,7 +254,7 @@ <h4>Basic Input Columns<a class="headerlink" href="#basic-input-columns" title="
 </pre></div>
 </div>
 <ul class="simple">
-<li><p>And these two ways of specifying an output path are equivalent, assumign that turns=True:</p></li>
+<li><p>And these two ways of specifying an output path are equivalent, assuming that turns=True:</p></li>
 </ul>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">output_file_path_chat_level</span> <span class="o">=</span> <span class="s2">&quot;./jury_output_turn_level.csv&quot;</span>
 
@@ -242,6 +263,11 @@ <h4>Basic Input Columns<a class="headerlink" href="#basic-input-columns" title="
 </div>
 </div></blockquote>
 </li>
+</ul>
+</section>
+<section id="turns">
+<h5>Turns<a class="headerlink" href="#turns" title="Link to this heading"></a></h5>
+<ul>
 <li><p>The <strong>turns</strong> parameter controls whether we want to treat successive messages from the same person as a single turn. For example, in a text conversation, sometimes individuals will send many message in rapid succession, as follows:</p>
 <blockquote>
 <div><ul>
@@ -260,6 +286,7 @@ <h4>Basic Input Columns<a class="headerlink" href="#basic-input-columns" title="
 </li>
 </ul>
 </section>
+</section>
 <section id="advanced-configuration-columns">
 <h4>Advanced Configuration Columns<a class="headerlink" href="#advanced-configuration-columns" title="Link to this heading"></a></h4>
 <p>More advanced users of the FeatureBuilder should consider the following optional parameters, depending on their needs.</p>
diff --git a/docs/build/html/feature_builder.html b/docs/build/html/feature_builder.html
index a700d5f3..16fbb9a7 100644
--- a/docs/build/html/feature_builder.html
+++ b/docs/build/html/feature_builder.html
@@ -97,7 +97,7 @@
 <span id="feature-builder-module"></span><span id="feature-builder"></span><h1>feature_builder module<a class="headerlink" href="#module-feature_builder" title="Link to this heading"></a></h1>
 <dl class="py class">
 <dt class="sig sig-object py" id="feature_builder.FeatureBuilder">
-<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">feature_builder.</span></span><span class="sig-name descname"><span class="pre">FeatureBuilder</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_df</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">DataFrame</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vector_directory</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_file_path_chat_level</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_file_path_user_level</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_file_path_conv_level</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">custom_features</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">[]</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">analyze_first_pct</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">[1.0]</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">turns</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">conversation_id_col</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'conversation_num'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">speaker_id_col</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'speaker_nickname'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">message_col</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'message'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">timestamp_col</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'timestamp'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">grouping_keys</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">[]</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">cumulative_grouping</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">within_task</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">ner_training_df</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">DataFrame</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">ner_cutoff</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0.9</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">regenerate_vectors</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compute_vectors_from_preprocessed</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#feature_builder.FeatureBuilder" title="Link to this definition"></a></dt>
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">feature_builder.</span></span><span class="sig-name descname"><span class="pre">FeatureBuilder</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="pre">input_df:</span> <span class="pre">~pandas.core.frame.DataFrame,</span> <span class="pre">vector_directory:</span> <span class="pre">./vector_data/,</span> <span class="pre">output_file_base='output',</span> <span class="pre">output_file_path_chat_level=None,</span> <span class="pre">output_file_path_user_level=None,</span> <span class="pre">output_file_path_conv_level=None,</span> <span class="pre">custom_features:</span> <span class="pre">list</span> <span class="pre">=</span> <span class="pre">[],</span> <span class="pre">analyze_first_pct:</span> <span class="pre">list</span> <span class="pre">=</span> <span class="pre">[1.0],</span> <span class="pre">turns:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False,</span> <span class="pre">conversation_id_col:</span> <span class="pre">str</span> <span class="pre">=</span> <span class="pre">'conversation_num',</span> <span class="pre">speaker_id_col:</span> <span class="pre">str</span> <span class="pre">=</span> <span class="pre">'speaker_nickname',</span> <span class="pre">message_col:</span> <span class="pre">str</span> <span class="pre">=</span> <span class="pre">'message',</span> <span class="pre">timestamp_col:</span> <span class="pre">str</span> <span class="pre">|</span> <span class="pre">tuple[str,</span> <span class="pre">str]</span> <span class="pre">=</span> <span class="pre">'timestamp',</span> <span class="pre">grouping_keys:</span> <span class="pre">list</span> <span class="pre">=</span> <span class="pre">[],</span> <span class="pre">cumulative_grouping=False,</span> <span class="pre">within_task=False,</span> <span class="pre">ner_training_df:</span> <span class="pre">~pandas.core.frame.DataFrame</span> <span class="pre">=</span> <span class="pre">None,</span> <span class="pre">ner_cutoff:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">0.9,</span> <span class="pre">regenerate_vectors:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False,</span> <span class="pre">compute_vectors_from_preprocessed:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></em><span class="sig-paren">)</span><a class="headerlink" href="#feature_builder.FeatureBuilder" title="Link to this definition"></a></dt>
 <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
 <p>The FeatureBuilder is the main engine that reads in the user’s inputs and specifications and generates
 conversational features. The FeatureBuilder separately calls the classes (the ChatLevelFeaturesCalculator,
@@ -107,10 +107,11 @@
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>input_df</strong> (<em>pd.DataFrame</em>) – A pandas DataFrame containing the conversation data that you wish to featurize.</p></li>
-<li><p><strong>vector_directory</strong> (<em>str</em>) – Directory path where the vectors are to be cached.</p></li>
-<li><p><strong>output_file_path_chat_level</strong> (<em>str</em>) – Path where the chat (utterance)-level output csv file is to be generated.</p></li>
-<li><p><strong>output_file_path_user_level</strong> (<em>str</em>) – Path where the user (speaker)-level output csv file is to be generated.</p></li>
-<li><p><strong>output_file_path_conv_level</strong> (<em>str</em>) – Path where the conversation-level output csv file is to be generated.</p></li>
+<li><p><strong>vector_directory</strong> (<em>str</em>) – Directory path where the vectors are to be cached. Defaults to “./vector_data/”</p></li>
+<li><p><strong>output_file_base</strong> (<em>str</em>) – Base name for the output files, which will be used to auto-generate filenames for each of the three levels. Defaults to “output.”</p></li>
+<li><p><strong>output_file_path_chat_level</strong> (<em>str</em>) – Path where the chat (utterance)-level output csv file is to be generated. (This parameter will override the base name.)</p></li>
+<li><p><strong>output_file_path_user_level</strong> (<em>str</em>) – Path where the user (speaker)-level output csv file is to be generated. (This parameter will override the base name.)</p></li>
+<li><p><strong>output_file_path_conv_level</strong> (<em>str</em>) – Path where the conversation-level output csv file is to be generated. (This parameter will override the base name.)</p></li>
 <li><p><strong>custom_features</strong> (<em>list</em><em>, </em><em>optional</em>) – A list of additional features outside of the default features that should be calculated.
 Defaults to an empty list (i.e., no additional features beyond the defaults will be computed).</p></li>
 <li><p><strong>analyze_first_pct</strong> (<em>list</em><em>(</em><em>float</em><em>)</em><em>, </em><em>optional</em>) – Analyze the first X% of the data. This parameter is useful because the earlier stages of the conversation may be more predictive than the later stages. Thus, researchers may wish to analyze only the first X% of the conversation data and compare the performance with using the full dataset. Defaults to [1.0].</p></li>
diff --git a/docs/build/html/index.html b/docs/build/html/index.html
index 4b32ac44..a522400c 100644
--- a/docs/build/html/index.html
+++ b/docs/build/html/index.html
@@ -125,11 +125,10 @@ <h3>Declaring a FeatureBuilder<a class="headerlink" href="#declaring-a-featurebu
    <span class="n">timestamp_col</span><span class="o">=</span> <span class="s2">&quot;timestamp&quot;</span><span class="p">,</span>
    <span class="c1"># this is where we&#39;ll cache things like sentence vectors; this directory doesn&#39;t have to exist; we&#39;ll create it for you!</span>
    <span class="n">vector_directory</span> <span class="o">=</span> <span class="s2">&quot;./vector_data/&quot;</span><span class="p">,</span>
-   <span class="c1"># give us names for the utterance (chat), speaker (user), and conversation-level outputs</span>
-   <span class="n">output_file_path_chat_level</span> <span class="o">=</span> <span class="s2">&quot;./my_output_chat_level.csv&quot;</span><span class="p">,</span>
-   <span class="n">output_file_path_user_level</span> <span class="o">=</span> <span class="s2">&quot;./my_output_user_level.csv&quot;</span><span class="p">,</span>
-   <span class="n">output_file_path_conv_level</span> <span class="o">=</span> <span class="s2">&quot;./my_output_conversation_level.csv&quot;</span><span class="p">,</span>
-   <span class="c1"># if true, this will combine successive turns by the same speaker.</span>
+   <span class="c1"># this will be the base file path for which we generate the three outputs;</span>
+   <span class="c1"># you will get your outputs in output/chat/my_output_chat_level.csv; output/conv/my_output_conv_level.csv; and output/user/my_output_user_level.</span>
+   <span class="n">output_file_base</span> <span class="o">=</span> <span class="s2">&quot;my_output&quot;</span>
+   <span class="c1"># it will also store the output into output/turns/my_output_chat_level.csv</span>
    <span class="n">turns</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
    <span class="c1"># these features depend on sentence vectors, so they take longer to generate on larger datasets. Add them in manually if you are interested in adding them to your output!</span>
    <span class="n">custom_features</span> <span class="o">=</span> <span class="p">[</span>
diff --git a/docs/build/html/objects.inv b/docs/build/html/objects.inv
index b4636754..7ce944fa 100644
Binary files a/docs/build/html/objects.inv and b/docs/build/html/objects.inv differ
diff --git a/docs/build/html/searchindex.js b/docs/build/html/searchindex.js
index 6822fd2b..2c6bb4a1 100644
--- a/docs/build/html/searchindex.js
+++ b/docs/build/html/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"alltitles": {"A Light-Touch, One-Function Package": [[0, "a-light-touch-one-function-package"]], "Additional FeatureBuilder Considerations": [[1, "additional-featurebuilder-considerations"]], "Advanced Configuration Columns": [[1, "advanced-configuration-columns"]], "Basic Input Columns": [[1, "basic-input-columns"]], "Certainty": [[30, "certainty"]], "Citation": [[29, "citation"], [30, "citation"], [31, "citation"], [32, "citation"], [33, "citation"], [34, "citation"], [35, "citation"], [36, "citation"], [37, "citation"], [38, "citation"], [40, "citation"], [41, "citation"], [42, "citation"], [43, "citation"], [44, "citation"], [45, "citation"], [46, "citation"], [47, "citation"], [48, "citation"], [49, "citation"], [50, "citation"], [51, "citation"], [52, "citation"], [53, "citation"], [54, "citation"], [55, "citation"], [56, "citation"], [57, "citation"], [58, "citation"], [59, "citation"], [60, "citation"]], "Configuring the FeatureBuilder": [[1, "configuring-the-featurebuilder"]], "Content Word Accommodation": [[31, "content-word-accommodation"]], "Contents:": [[61, null]], "Conversation-Level Features": [[11, "conversation-level-features"], [39, "conversation-level-features"]], "Conversational Repair": [[32, "conversational-repair"]], "Customizable Parameters": [[0, "customizable-parameters"]], "Dale-Chall Score": [[33, "dale-chall-score"]], "Declaring a FeatureBuilder": [[61, "declaring-a-featurebuilder"]], "Demo / Sample Code": [[0, "demo-sample-code"], [1, "demo-sample-code"]], "Discursive Diversity": [[34, "discursive-diversity"]], "Example:": [[41, "example"]], "FEATURE NAME": [[29, "feature-name"]], "Feature Column Names": [[1, "feature-column-names"], [61, "feature-column-names"]], "Feature Documentation": [[62, "feature-documentation"]], "Feature Information": [[1, "feature-information"], [61, "feature-information"]], "Features: Conceptual Documentation": [[39, "features-conceptual-documentation"]], "Features: Technical Documentation": [[11, "features-technical-documentation"]], "Forward Flow": [[35, "forward-flow"]], "Function Word Accommodation": [[36, "function-word-accommodation"]], "Generating Features: Utterance-, Speaker-, and Conversation-Level": [[62, "generating-features-utterance-speaker-and-conversation-level"]], "Getting Started": [[1, "getting-started"], [61, "getting-started"], [62, "getting-started"]], "Gini Coefficient": [[37, "gini-coefficient"]], "Hedge": [[38, "hedge"]], "High*Level Intuition": [[54, "high-level-intuition"]], "High-Level Intuition": [[29, "high-level-intuition"], [30, "high-level-intuition"], [31, "high-level-intuition"], [32, "high-level-intuition"], [33, "high-level-intuition"], [34, "high-level-intuition"], [35, "high-level-intuition"], [36, "high-level-intuition"], [37, "high-level-intuition"], [38, "high-level-intuition"], [40, "high-level-intuition"], [41, "high-level-intuition"], [42, "high-level-intuition"], [43, "high-level-intuition"], [44, "high-level-intuition"], [45, "high-level-intuition"], [46, "high-level-intuition"], [47, "high-level-intuition"], [48, "high-level-intuition"], [49, "high-level-intuition"], [50, "high-level-intuition"], [51, "high-level-intuition"], [52, "high-level-intuition"], [53, "high-level-intuition"], [55, "high-level-intuition"], [56, "high-level-intuition"], [57, "high-level-intuition"], [58, "high-level-intuition"], [59, "high-level-intuition"], [60, "high-level-intuition"]], "Implementation": [[32, "implementation"], [42, "implementation"], [52, "implementation"], [54, "implementation"]], "Implementation Basics": [[29, "implementation-basics"], [30, "implementation-basics"], [31, "implementation-basics"], [33, "implementation-basics"], [34, "implementation-basics"], [35, "implementation-basics"], [36, "implementation-basics"], [37, "implementation-basics"], [38, "implementation-basics"], [40, "implementation-basics"], [41, "implementation-basics"], [43, "implementation-basics"], [44, "implementation-basics"], [45, "implementation-basics"], [46, "implementation-basics"], [47, "implementation-basics"], [48, "implementation-basics"], [49, "implementation-basics"], [50, "implementation-basics"], [51, "implementation-basics"], [53, "implementation-basics"], [55, "implementation-basics"], [56, "implementation-basics"], [57, "implementation-basics"], [58, "implementation-basics"], [59, "implementation-basics"], [60, "implementation-basics"]], "Implementation Notes/Caveats": [[29, "implementation-notes-caveats"], [30, "implementation-notes-caveats"], [31, "implementation-notes-caveats"], [33, "implementation-notes-caveats"], [34, "implementation-notes-caveats"], [35, "implementation-notes-caveats"], [36, "implementation-notes-caveats"], [38, "implementation-notes-caveats"], [40, "implementation-notes-caveats"], [41, "implementation-notes-caveats"], [43, "implementation-notes-caveats"], [44, "implementation-notes-caveats"], [45, "implementation-notes-caveats"], [46, "implementation-notes-caveats"], [47, "implementation-notes-caveats"], [48, "implementation-notes-caveats"], [49, "implementation-notes-caveats"], [50, "implementation-notes-caveats"], [51, "implementation-notes-caveats"], [53, "implementation-notes-caveats"], [55, "implementation-notes-caveats"], [56, "implementation-notes-caveats"], [57, "implementation-notes-caveats"], [58, "implementation-notes-caveats"], [59, "implementation-notes-caveats"]], "Import Recommendations: Virtual Environment and Pip": [[1, "import-recommendations-virtual-environment-and-pip"], [61, "import-recommendations-virtual-environment-and-pip"]], "Importing the Package": [[1, "importing-the-package"]], "Indices and Tables": [[61, "indices-and-tables"]], "Information Diversity": [[40, "information-diversity"]], "Information Exchange": [[41, "information-exchange"]], "Input File": [[34, "id2"]], "Inspecting Generated Features": [[1, "inspecting-generated-features"], [61, "inspecting-generated-features"]], "Interpretation:": [[41, "interpretation"]], "Interpreting the Feature": [[29, "interpreting-the-feature"], [30, "interpreting-the-feature"], [31, "interpreting-the-feature"], [32, "interpreting-the-feature"], [33, "interpreting-the-feature"], [34, "interpreting-the-feature"], [35, "interpreting-the-feature"], [36, "interpreting-the-feature"], [37, "interpreting-the-feature"], [38, "interpreting-the-feature"], [40, "interpreting-the-feature"], [41, "interpreting-the-feature"], [42, "interpreting-the-feature"], [43, "interpreting-the-feature"], [44, "interpreting-the-feature"], [45, "interpreting-the-feature"], [46, "interpreting-the-feature"], [47, "interpreting-the-feature"], [48, "interpreting-the-feature"], [49, "interpreting-the-feature"], [50, "interpreting-the-feature"], [51, "interpreting-the-feature"], [52, "interpreting-the-feature"], [53, "interpreting-the-feature"], [54, "interpreting-the-feature"], [55, "interpreting-the-feature"], [56, "interpreting-the-feature"], [57, "interpreting-the-feature"], [58, "interpreting-the-feature"], [59, "interpreting-the-feature"], [60, "interpreting-the-feature"]], "Introduction": [[62, "introduction"]], "Key Assumptions and Parameters": [[0, "key-assumptions-and-parameters"]], "Linguistic Inquiry and Word Count (LIWC) and Other Lexicons": [[42, "linguistic-inquiry-and-word-count-liwc-and-other-lexicons"]], "Message Length": [[43, "message-length"]], "Message Quantity": [[44, "message-quantity"]], "Mimicry (BERT)": [[45, "mimicry-bert"]], "Motivation": [[62, "motivation"]], "Moving Mimicry": [[46, "moving-mimicry"]], "Named Entity Recognition": [[47, "named-entity-recognition"]], "Named Entity Training Examples": [[47, "id2"]], "Online Discussion Tags": [[48, "online-discussion-tags"]], "Other Utilities": [[69, "other-utilities"]], "Ouput File": [[34, "id3"]], "Our Team": [[62, "our-team"]], "Output File": [[30, "id2"], [35, "id2"], [45, "id2"], [46, "id2"], [47, "id3"], [51, "id1"]], "Package Assumptions": [[0, "package-assumptions"]], "Politeness Strategies": [[50, "politeness-strategies"]], "Politeness/Receptiveness Markers": [[49, "politeness-receptiveness-markers"]], "Positivity Z-Score": [[52, "positivity-z-score"]], "Proportion of First Person Pronouns": [[53, "proportion-of-first-person-pronouns"]], "Question (Naive)": [[54, "question-naive"]], "Related Features": [[29, "related-features"], [30, "related-features"], [31, "related-features"], [32, "related-features"], [33, "related-features"], [34, "related-features"], [35, "related-features"], [36, "related-features"], [37, "related-features"], [38, "related-features"], [40, "related-features"], [41, "related-features"], [42, "related-features"], [43, "related-features"], [44, "related-features"], [45, "related-features"], [46, "related-features"], [47, "related-features"], [48, "related-features"], [49, "related-features"], [50, "related-features"], [51, "related-features"], [52, "related-features"], [53, "related-features"], [54, "related-features"], [55, "related-features"], [56, "related-features"], [57, "related-features"], [58, "related-features"], [59, "related-features"], [60, "related-features"]], "Sentiment (RoBERTa)": [[51, "sentiment-roberta"]], "Speaker Turn Counts": [[59, "id2"]], "Speaker- (User) Level Features": [[11, "speaker-user-level-features"]], "Table of Contents": [[61, "table-of-contents"]], "Team Burstiness": [[55, "team-burstiness"]], "Textblob Polarity": [[56, "textblob-polarity"]], "Textblob Subjectivity": [[57, "textblob-subjectivity"]], "The Basics": [[0, "the-basics"]], "The FeatureBuilder": [[62, "the-featurebuilder"]], "The Team Communication Toolkit": [[61, "the-team-communication-toolkit"]], "Time Difference": [[58, "time-difference"]], "Troubleshooting": [[1, "troubleshooting"], [61, "troubleshooting"]], "Turn Taking Index": [[59, "turn-taking-index"]], "Using the Package": [[61, "using-the-package"]], "Utilities": [[69, "utilities"]], "Utterance- (Chat) Level Features": [[11, "utterance-chat-level-features"], [39, "utterance-chat-level-features"]], "Walkthrough: Running the FeatureBuilder on Your Data": [[1, "walkthrough-running-the-featurebuilder-on-your-data"]], "Word Type-Token Ratio": [[60, "word-type-token-ratio"]], "Worked Example": [[1, "worked-example"]], "assign_chunk_nums module": [[63, "module-utils.assign_chunk_nums"]], "basic_features module": [[3, "module-features.basic_features"]], "burstiness module": [[4, "module-features.burstiness"]], "calculate_chat_level_features module": [[64, "module-utils.calculate_chat_level_features"]], "calculate_conversation_level_features module": [[65, "module-utils.calculate_conversation_level_features"]], "calculate_user_level_features module": [[66, "module-utils.calculate_user_level_features"]], "certainty module": [[5, "module-features.certainty"]], "check_embeddings module": [[67, "module-utils.check_embeddings"]], "discursive_diversity module": [[6, "module-features.discursive_diversity"]], "feature_builder module": [[2, "module-feature_builder"]], "fflow module": [[7, "module-features.fflow"]], "get_all_DD_features module": [[8, "module-features.get_all_DD_features"]], "get_user_network module": [[9, "module-features.get_user_network"]], "gini_coefficient module": [[68, "module-utils.gini_coefficient"]], "hedge module": [[10, "module-features.hedge"]], "info_exchange_zscore module": [[12, "module-features.info_exchange_zscore"]], "information_diversity module": [[13, "module-features.information_diversity"]], "lexical_features_v2 module": [[14, "module-features.lexical_features_v2"]], "named_entity_recognition_features module": [[15, "module-features.named_entity_recognition_features"]], "other_lexical_features module": [[16, "module-features.other_lexical_features"]], "politeness_features module": [[17, "module-features.politeness_features"]], "politeness_v2 module": [[18, "module-features.politeness_v2"]], "politeness_v2_helper module": [[19, "module-features.politeness_v2_helper"]], "preload_word_lists module": [[70, "module-utils.preload_word_lists"]], "preprocess module": [[71, "module-utils.preprocess"]], "question_num module": [[20, "module-features.question_num"]], "readability module": [[21, "module-features.readability"]], "reddit_tags module": [[22, "module-features.reddit_tags"]], "summarize_features module": [[72, "module-utils.summarize_features"]], "temporal_features module": [[23, "module-features.temporal_features"]], "textblob_sentiment_analysis module": [[24, "module-features.textblob_sentiment_analysis"]], "turn_taking_features module": [[25, "module-features.turn_taking_features"]], "variance_in_DD module": [[26, "module-features.variance_in_DD"]], "within_person_discursive_range module": [[27, "module-features.within_person_discursive_range"]], "word_mimicry module": [[28, "module-features.word_mimicry"]], "z-scores:": [[41, "z-scores"]], "zscore_chats_and_conversation module": [[73, "module-utils.zscore_chats_and_conversation"]], "\u201cDriver\u201d Classes: Utterance-, Conversation-, and Speaker-Level Features": [[69, "driver-classes-utterance-conversation-and-speaker-level-features"]]}, "docnames": ["basics", "examples", "feature_builder", "features/basic_features", "features/burstiness", "features/certainty", "features/discursive_diversity", "features/fflow", "features/get_all_DD_features", "features/get_user_network", "features/hedge", "features/index", "features/info_exchange_zscore", "features/information_diversity", "features/lexical_features_v2", "features/named_entity_recognition_features", "features/other_lexical_features", "features/politeness_features", "features/politeness_v2", "features/politeness_v2_helper", "features/question_num", "features/readability", "features/reddit_tags", "features/temporal_features", "features/textblob_sentiment_analysis", "features/turn_taking_features", "features/variance_in_DD", "features/within_person_discursive_range", "features/word_mimicry", "features_conceptual/TEMPLATE", "features_conceptual/certainty", "features_conceptual/content_word_accommodation", "features_conceptual/conversational_repair", "features_conceptual/dale_chall_score", "features_conceptual/discursive_diversity", "features_conceptual/forward_flow", "features_conceptual/function_word_accommodation", "features_conceptual/gini_coefficient", "features_conceptual/hedge", "features_conceptual/index", "features_conceptual/information_diversity", "features_conceptual/information_exchange", "features_conceptual/liwc", "features_conceptual/message_length", "features_conceptual/message_quantity", "features_conceptual/mimicry_bert", "features_conceptual/moving_mimicry", "features_conceptual/named_entity_recognition", "features_conceptual/online_discussions_tags", "features_conceptual/politeness_receptiveness_markers", "features_conceptual/politeness_strategies", "features_conceptual/positivity_bert", "features_conceptual/positivity_z_score", "features_conceptual/proportion_of_first_person_pronouns", "features_conceptual/questions", "features_conceptual/team_burstiness", "features_conceptual/textblob_polarity", "features_conceptual/textblob_subjectivity", "features_conceptual/time_difference", "features_conceptual/turn_taking_index", "features_conceptual/word_ttr", "index", "intro", "utils/assign_chunk_nums", "utils/calculate_chat_level_features", "utils/calculate_conversation_level_features", "utils/calculate_user_level_features", "utils/check_embeddings", "utils/gini_coefficient", "utils/index", "utils/preload_word_lists", "utils/preprocess", "utils/summarize_features", "utils/zscore_chats_and_conversation"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["basics.rst", "examples.rst", "feature_builder.rst", "features/basic_features.rst", "features/burstiness.rst", "features/certainty.rst", "features/discursive_diversity.rst", "features/fflow.rst", "features/get_all_DD_features.rst", "features/get_user_network.rst", "features/hedge.rst", "features/index.rst", "features/info_exchange_zscore.rst", "features/information_diversity.rst", "features/lexical_features_v2.rst", "features/named_entity_recognition_features.rst", "features/other_lexical_features.rst", "features/politeness_features.rst", "features/politeness_v2.rst", "features/politeness_v2_helper.rst", "features/question_num.rst", "features/readability.rst", "features/reddit_tags.rst", "features/temporal_features.rst", "features/textblob_sentiment_analysis.rst", "features/turn_taking_features.rst", "features/variance_in_DD.rst", "features/within_person_discursive_range.rst", "features/word_mimicry.rst", "features_conceptual/TEMPLATE.rst", "features_conceptual/certainty.rst", "features_conceptual/content_word_accommodation.rst", "features_conceptual/conversational_repair.rst", "features_conceptual/dale_chall_score.rst", "features_conceptual/discursive_diversity.rst", "features_conceptual/forward_flow.rst", "features_conceptual/function_word_accommodation.rst", "features_conceptual/gini_coefficient.rst", "features_conceptual/hedge.rst", "features_conceptual/index.rst", "features_conceptual/information_diversity.rst", "features_conceptual/information_exchange.rst", "features_conceptual/liwc.rst", "features_conceptual/message_length.rst", "features_conceptual/message_quantity.rst", "features_conceptual/mimicry_bert.rst", "features_conceptual/moving_mimicry.rst", "features_conceptual/named_entity_recognition.rst", "features_conceptual/online_discussions_tags.rst", "features_conceptual/politeness_receptiveness_markers.rst", "features_conceptual/politeness_strategies.rst", "features_conceptual/positivity_bert.rst", "features_conceptual/positivity_z_score.rst", "features_conceptual/proportion_of_first_person_pronouns.rst", "features_conceptual/questions.rst", "features_conceptual/team_burstiness.rst", "features_conceptual/textblob_polarity.rst", "features_conceptual/textblob_subjectivity.rst", "features_conceptual/time_difference.rst", "features_conceptual/turn_taking_index.rst", "features_conceptual/word_ttr.rst", "index.rst", "intro.rst", "utils/assign_chunk_nums.rst", "utils/calculate_chat_level_features.rst", "utils/calculate_conversation_level_features.rst", "utils/calculate_user_level_features.rst", "utils/check_embeddings.rst", "utils/gini_coefficient.rst", "utils/index.rst", "utils/preload_word_lists.rst", "utils/preprocess.rst", "utils/summarize_features.rst", "utils/zscore_chats_and_conversation.rst"], "indexentries": {"adverb_limiter() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.adverb_limiter", false]], "assert_key_columns_present() (in module utils.preprocess)": [[71, "utils.preprocess.assert_key_columns_present", false]], "assign_chunk_nums() (in module utils.assign_chunk_nums)": [[63, "utils.assign_chunk_nums.assign_chunk_nums", false]], "bare_command() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.bare_command", false]], "built_spacy_ner() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.built_spacy_ner", false]], "burstiness() (in module features.burstiness)": [[4, "features.burstiness.burstiness", false]], "calculate_chat_level_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_chat_level_features", false]], "calculate_conversation_level_features() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.calculate_conversation_level_features", false]], "calculate_hedge_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_hedge_features", false]], "calculate_id_score() (in module features.information_diversity)": [[13, "features.information_diversity.calculate_ID_score", false]], "calculate_info_diversity() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.calculate_info_diversity", false]], "calculate_named_entities() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.calculate_named_entities", false]], "calculate_num_question_naive() (in module features.question_num)": [[20, "features.question_num.calculate_num_question_naive", false]], "calculate_politeness_sentiment() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_politeness_sentiment", false]], "calculate_politeness_v2() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_politeness_v2", false]], "calculate_team_burstiness() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.calculate_team_burstiness", false]], "calculate_textblob_sentiment() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_textblob_sentiment", false]], "calculate_user_level_features() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.calculate_user_level_features", false]], "calculate_vector_word_mimicry() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_vector_word_mimicry", false]], "calculate_word_mimicry() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.calculate_word_mimicry", false]], "chat_level_features() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.chat_level_features", false]], "chatlevelfeaturescalculator (class in utils.calculate_chat_level_features)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator", false]], "check_embeddings() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.check_embeddings", false]], "classify_ntri() (in module features.other_lexical_features)": [[16, "features.other_lexical_features.classify_NTRI", false]], "classify_text_dalechall() (in module features.readability)": [[21, "features.readability.classify_text_dalechall", false]], "clean_text() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.clean_text", false]], "coerce_to_date_or_number() (in module features.temporal_features)": [[23, "features.temporal_features.coerce_to_date_or_number", false]], "commit_data() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.commit_data", false]], "compress() (in module utils.preprocess)": [[71, "utils.preprocess.compress", false]], "compute_frequency() (in module features.word_mimicry)": [[28, "features.word_mimicry.compute_frequency", false]], "computetf() (in module features.word_mimicry)": [[28, "features.word_mimicry.computeTF", false]], "concat_bert_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.concat_bert_features", false]], "conjection_seperator() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.conjection_seperator", false]], "content_mimicry_score() (in module features.word_mimicry)": [[28, "features.word_mimicry.Content_mimicry_score", false]], "conv_level_features() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.conv_level_features", false]], "conv_to_float_arr() (in module features.get_all_dd_features)": [[8, "features.get_all_DD_features.conv_to_float_arr", false]], "conversationlevelfeaturescalculator (class in utils.calculate_conversation_level_features)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator", false]], "count_all_caps() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_all_caps", false]], "count_bullet_points() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_bullet_points", false]], "count_characters() (in module features.basic_features)": [[3, "features.basic_features.count_characters", false]], "count_difficult_words() (in module features.readability)": [[21, "features.readability.count_difficult_words", false]], "count_ellipses() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_ellipses", false]], "count_emojis() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_emojis", false]], "count_emphasis() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_emphasis", false]], "count_line_breaks() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_line_breaks", false]], "count_links() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_links", false]], "count_matches() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.count_matches", false]], "count_messages() (in module features.basic_features)": [[3, "features.basic_features.count_messages", false]], "count_numbering() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_numbering", false]], "count_parentheses() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_parentheses", false]], "count_quotes() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_quotes", false]], "count_responding_to_someone() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_responding_to_someone", false]], "count_spacy_matches() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.count_spacy_matches", false]], "count_syllables() (in module features.readability)": [[21, "features.readability.count_syllables", false]], "count_turn_taking_index() (in module features.turn_taking_features)": [[25, "features.turn_taking_features.count_turn_taking_index", false]], "count_turns() (in module features.turn_taking_features)": [[25, "features.turn_taking_features.count_turns", false]], "count_user_references() (in module features.reddit_tags)": [[22, "features.reddit_tags.count_user_references", false]], "count_words() (in module features.basic_features)": [[3, "features.basic_features.count_words", false]], "create_chunks() (in module utils.assign_chunk_nums)": [[63, "utils.assign_chunk_nums.create_chunks", false]], "create_chunks_messages() (in module utils.assign_chunk_nums)": [[63, "utils.assign_chunk_nums.create_chunks_messages", false]], "create_cumulative_rows() (in module utils.preprocess)": [[71, "utils.preprocess.create_cumulative_rows", false]], "dale_chall_helper() (in module features.readability)": [[21, "features.readability.dale_chall_helper", false]], "feat_counts() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.feat_counts", false]], "feature_builder": [[2, "module-feature_builder", false]], "featurebuilder (class in feature_builder)": [[2, "feature_builder.FeatureBuilder", false]], "features.basic_features": [[3, "module-features.basic_features", false]], "features.burstiness": [[4, "module-features.burstiness", false]], "features.certainty": [[5, "module-features.certainty", false]], "features.discursive_diversity": [[6, "module-features.discursive_diversity", false]], "features.fflow": [[7, "module-features.fflow", false]], "features.get_all_dd_features": [[8, "module-features.get_all_DD_features", false]], "features.get_user_network": [[9, "module-features.get_user_network", false]], "features.hedge": [[10, "module-features.hedge", false]], "features.info_exchange_zscore": [[12, "module-features.info_exchange_zscore", false]], "features.information_diversity": [[13, "module-features.information_diversity", false]], "features.lexical_features_v2": [[14, "module-features.lexical_features_v2", false]], "features.named_entity_recognition_features": [[15, "module-features.named_entity_recognition_features", false]], "features.other_lexical_features": [[16, "module-features.other_lexical_features", false]], "features.politeness_features": [[17, "module-features.politeness_features", false]], "features.politeness_v2": [[18, "module-features.politeness_v2", false]], "features.politeness_v2_helper": [[19, "module-features.politeness_v2_helper", false]], "features.question_num": [[20, "module-features.question_num", false]], "features.readability": [[21, "module-features.readability", false]], "features.reddit_tags": [[22, "module-features.reddit_tags", false]], "features.temporal_features": [[23, "module-features.temporal_features", false]], "features.textblob_sentiment_analysis": [[24, "module-features.textblob_sentiment_analysis", false]], "features.turn_taking_features": [[25, "module-features.turn_taking_features", false]], "features.variance_in_dd": [[26, "module-features.variance_in_DD", false]], "features.within_person_discursive_range": [[27, "module-features.within_person_discursive_range", false]], "features.word_mimicry": [[28, "module-features.word_mimicry", false]], "featurize() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.featurize", false]], "function_mimicry_score() (in module features.word_mimicry)": [[28, "features.word_mimicry.function_mimicry_score", false]], "generate_bert() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.generate_bert", false]], "generate_certainty_pkl() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.generate_certainty_pkl", false]], "generate_lexicon_pkl() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.generate_lexicon_pkl", false]], "generate_vect() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.generate_vect", false]], "get_average() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_average", false]], "get_centroids() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.get_centroids", false]], "get_certainty() (in module features.certainty)": [[5, "features.certainty.get_certainty", false]], "get_certainty_score() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_certainty_score", false]], "get_content_words_in_message() (in module features.word_mimicry)": [[28, "features.word_mimicry.get_content_words_in_message", false]], "get_conversation_level_aggregates() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_conversation_level_aggregates", false]], "get_cosine_similarity() (in module features.discursive_diversity)": [[6, "features.discursive_diversity.get_cosine_similarity", false]], "get_dale_chall_easy_words() (in module utils.preload_word_lists)": [[70, "utils.preload_word_lists.get_dale_chall_easy_words", false]], "get_dale_chall_score_and_classfication() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_dale_chall_score_and_classfication", false]], "get_dd() (in module features.discursive_diversity)": [[6, "features.discursive_diversity.get_DD", false]], "get_dd_features() (in module features.get_all_dd_features)": [[8, "features.get_all_DD_features.get_DD_features", false]], "get_dep_pairs() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.get_dep_pairs", false]], "get_dep_pairs_noneg() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.get_dep_pairs_noneg", false]], "get_discursive_diversity_features() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_discursive_diversity_features", false]], "get_first_pct_of_chat() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.get_first_pct_of_chat", false]], "get_first_person_words() (in module utils.preload_word_lists)": [[70, "utils.preload_word_lists.get_first_person_words", false]], "get_forward_flow() (in module features.fflow)": [[7, "features.fflow.get_forward_flow", false]], "get_forward_flow() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_forward_flow", false]], "get_function_words() (in module utils.preload_word_lists)": [[70, "utils.preload_word_lists.get_function_words", false]], "get_function_words_in_message() (in module features.word_mimicry)": [[28, "features.word_mimicry.get_function_words_in_message", false]], "get_gini() (in module utils.gini_coefficient)": [[68, "utils.gini_coefficient.get_gini", false]], "get_gini_features() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_gini_features", false]], "get_info_diversity() (in module features.information_diversity)": [[13, "features.information_diversity.get_info_diversity", false]], "get_info_exchange_wordcount() (in module features.info_exchange_zscore)": [[12, "features.info_exchange_zscore.get_info_exchange_wordcount", false]], "get_liwc_count() (in module features.lexical_features_v2)": [[14, "features.lexical_features_v2.get_liwc_count", false]], "get_max() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_max", false]], "get_mimicry_bert() (in module features.word_mimicry)": [[28, "features.word_mimicry.get_mimicry_bert", false]], "get_min() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_min", false]], "get_moving_mimicry() (in module features.word_mimicry)": [[28, "features.word_mimicry.get_moving_mimicry", false]], "get_named_entity() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_named_entity", false]], "get_nan_vector() (in module features.within_person_discursive_range)": [[27, "features.within_person_discursive_range.get_nan_vector", false]], "get_polarity_score() (in module features.textblob_sentiment_analysis)": [[24, "features.textblob_sentiment_analysis.get_polarity_score", false]], "get_politeness_strategies() (in module features.politeness_features)": [[17, "features.politeness_features.get_politeness_strategies", false]], "get_politeness_v2() (in module features.politeness_v2)": [[18, "features.politeness_v2.get_politeness_v2", false]], "get_proportion_first_pronouns() (in module features.other_lexical_features)": [[16, "features.other_lexical_features.get_proportion_first_pronouns", false]], "get_question_words() (in module utils.preload_word_lists)": [[70, "utils.preload_word_lists.get_question_words", false]], "get_reddit_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_reddit_features", false]], "get_sentiment() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.get_sentiment", false]], "get_stdev() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_stdev", false]], "get_subjectivity_score() (in module features.textblob_sentiment_analysis)": [[24, "features.textblob_sentiment_analysis.get_subjectivity_score", false]], "get_sum() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_sum", false]], "get_team_burstiness() (in module features.burstiness)": [[4, "features.burstiness.get_team_burstiness", false]], "get_temporal_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.get_temporal_features", false]], "get_time_diff() (in module features.temporal_features)": [[23, "features.temporal_features.get_time_diff", false]], "get_time_diff_startend() (in module features.temporal_features)": [[23, "features.temporal_features.get_time_diff_startend", false]], "get_turn() (in module features.turn_taking_features)": [[25, "features.turn_taking_features.get_turn", false]], "get_turn_id() (in module utils.preprocess)": [[71, "utils.preprocess.get_turn_id", false]], "get_turn_taking_features() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_turn_taking_features", false]], "get_unique_pairwise_combos() (in module features.discursive_diversity)": [[6, "features.discursive_diversity.get_unique_pairwise_combos", false]], "get_user_average_dataframe() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_user_average_dataframe", false]], "get_user_level_aggregates() (utils.calculate_conversation_level_features.conversationlevelfeaturescalculator method)": [[65, "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator.get_user_level_aggregates", false]], "get_user_level_averaged_features() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.get_user_level_averaged_features", false]], "get_user_level_summary_statistics_features() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.get_user_level_summary_statistics_features", false]], "get_user_level_summed_features() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.get_user_level_summed_features", false]], "get_user_network() (in module features.get_user_network)": [[9, "features.get_user_network.get_user_network", false]], "get_user_network() (utils.calculate_user_level_features.userlevelfeaturescalculator method)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator.get_user_network", false]], "get_user_sum_dataframe() (in module utils.summarize_features)": [[72, "utils.summarize_features.get_user_sum_dataframe", false]], "get_variance_in_dd() (in module features.variance_in_dd)": [[26, "features.variance_in_DD.get_variance_in_DD", false]], "get_within_person_disc_range() (in module features.within_person_discursive_range)": [[27, "features.within_person_discursive_range.get_within_person_disc_range", false]], "get_word_ttr() (in module features.other_lexical_features)": [[16, "features.other_lexical_features.get_word_TTR", false]], "get_zscore_across_all_chats() (in module utils.zscore_chats_and_conversation)": [[73, "utils.zscore_chats_and_conversation.get_zscore_across_all_chats", false]], "get_zscore_across_all_conversations() (in module utils.zscore_chats_and_conversation)": [[73, "utils.zscore_chats_and_conversation.get_zscore_across_all_conversations", false]], "gini_coefficient() (in module utils.gini_coefficient)": [[68, "utils.gini_coefficient.gini_coefficient", false]], "info_diversity() (in module features.information_diversity)": [[13, "features.information_diversity.info_diversity", false]], "info_exchange() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.info_exchange", false]], "is_hedged_sentence_1() (in module features.hedge)": [[10, "features.hedge.is_hedged_sentence_1", false]], "lexical_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.lexical_features", false]], "liwc_features() (in module features.lexical_features_v2)": [[14, "features.lexical_features_v2.liwc_features", false]], "load_saved_data() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.load_saved_data", false]], "load_to_dict() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.load_to_dict", false]], "load_to_lists() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.load_to_lists", false]], "merge_conv_data_with_original() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.merge_conv_data_with_original", false]], "mimic_words() (in module features.word_mimicry)": [[28, "features.word_mimicry.mimic_words", false]], "module": [[2, "module-feature_builder", false], [3, "module-features.basic_features", false], [4, "module-features.burstiness", false], [5, "module-features.certainty", false], [6, "module-features.discursive_diversity", false], [7, "module-features.fflow", false], [8, "module-features.get_all_DD_features", false], [9, "module-features.get_user_network", false], [10, "module-features.hedge", false], [12, "module-features.info_exchange_zscore", false], [13, "module-features.information_diversity", false], [14, "module-features.lexical_features_v2", false], [15, "module-features.named_entity_recognition_features", false], [16, "module-features.other_lexical_features", false], [17, "module-features.politeness_features", false], [18, "module-features.politeness_v2", false], [19, "module-features.politeness_v2_helper", false], [20, "module-features.question_num", false], [21, "module-features.readability", false], [22, "module-features.reddit_tags", false], [23, "module-features.temporal_features", false], [24, "module-features.textblob_sentiment_analysis", false], [25, "module-features.turn_taking_features", false], [26, "module-features.variance_in_DD", false], [27, "module-features.within_person_discursive_range", false], [28, "module-features.word_mimicry", false], [63, "module-utils.assign_chunk_nums", false], [64, "module-utils.calculate_chat_level_features", false], [65, "module-utils.calculate_conversation_level_features", false], [66, "module-utils.calculate_user_level_features", false], [67, "module-utils.check_embeddings", false], [68, "module-utils.gini_coefficient", false], [70, "module-utils.preload_word_lists", false], [71, "module-utils.preprocess", false], [72, "module-utils.summarize_features", false], [73, "module-utils.zscore_chats_and_conversation", false]], "named_entities() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.named_entities", false]], "num_named_entity() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.num_named_entity", false]], "other_lexical_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.other_lexical_features", false]], "phrase_split() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.phrase_split", false]], "positivity_zscore() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.positivity_zscore", false]], "prep_simple() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.prep_simple", false]], "prep_whole() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.prep_whole", false]], "preprocess_chat_data() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.preprocess_chat_data", false]], "preprocess_conversation_columns() (in module utils.preprocess)": [[71, "utils.preprocess.preprocess_conversation_columns", false]], "preprocess_naive_turns() (in module utils.preprocess)": [[71, "utils.preprocess.preprocess_naive_turns", false]], "preprocess_text() (in module utils.preprocess)": [[71, "utils.preprocess.preprocess_text", false]], "preprocess_text_lowercase_but_retain_punctuation() (in module utils.preprocess)": [[71, "utils.preprocess.preprocess_text_lowercase_but_retain_punctuation", false]], "preprocessing() (in module features.information_diversity)": [[13, "features.information_diversity.preprocessing", false]], "punctuation_seperator() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.punctuation_seperator", false]], "question() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.Question", false]], "read_in_lexicons() (in module utils.check_embeddings)": [[67, "utils.check_embeddings.read_in_lexicons", false]], "reduce_chunks() (in module utils.assign_chunk_nums)": [[63, "utils.assign_chunk_nums.reduce_chunks", false]], "remove_active_user() (in module features.get_user_network)": [[9, "features.get_user_network.remove_active_user", false]], "save_features() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.save_features", false]], "sentence_pad() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.sentence_pad", false]], "sentence_split() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.sentence_split", false]], "sentenciser() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.sentenciser", false]], "set_self_conv_data() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.set_self_conv_data", false]], "text_based_features() (utils.calculate_chat_level_features.chatlevelfeaturescalculator method)": [[64, "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator.text_based_features", false]], "token_count() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.token_count", false]], "train_spacy_ner() (in module features.named_entity_recognition_features)": [[15, "features.named_entity_recognition_features.train_spacy_ner", false]], "user_level_features() (feature_builder.featurebuilder method)": [[2, "feature_builder.FeatureBuilder.user_level_features", false]], "userlevelfeaturescalculator (class in utils.calculate_user_level_features)": [[66, "utils.calculate_user_level_features.UserLevelFeaturesCalculator", false]], "utils.assign_chunk_nums": [[63, "module-utils.assign_chunk_nums", false]], "utils.calculate_chat_level_features": [[64, "module-utils.calculate_chat_level_features", false]], "utils.calculate_conversation_level_features": [[65, "module-utils.calculate_conversation_level_features", false]], "utils.calculate_user_level_features": [[66, "module-utils.calculate_user_level_features", false]], "utils.check_embeddings": [[67, "module-utils.check_embeddings", false]], "utils.gini_coefficient": [[68, "module-utils.gini_coefficient", false]], "utils.preload_word_lists": [[70, "module-utils.preload_word_lists", false]], "utils.preprocess": [[71, "module-utils.preprocess", false]], "utils.summarize_features": [[72, "module-utils.summarize_features", false]], "utils.zscore_chats_and_conversation": [[73, "module-utils.zscore_chats_and_conversation", false]], "word_start() (in module features.politeness_v2_helper)": [[19, "features.politeness_v2_helper.word_start", false]]}, "objects": {"": [[2, 0, 0, "-", "feature_builder"]], "feature_builder": [[2, 1, 1, "", "FeatureBuilder"]], "feature_builder.FeatureBuilder": [[2, 2, 1, "", "chat_level_features"], [2, 2, 1, "", "conv_level_features"], [2, 2, 1, "", "featurize"], [2, 2, 1, "", "get_first_pct_of_chat"], [2, 2, 1, "", "merge_conv_data_with_original"], [2, 2, 1, "", "preprocess_chat_data"], [2, 2, 1, "", "save_features"], [2, 2, 1, "", "set_self_conv_data"], [2, 2, 1, "", "user_level_features"]], "features": [[3, 0, 0, "-", "basic_features"], [4, 0, 0, "-", "burstiness"], [5, 0, 0, "-", "certainty"], [6, 0, 0, "-", "discursive_diversity"], [7, 0, 0, "-", "fflow"], [8, 0, 0, "-", "get_all_DD_features"], [9, 0, 0, "-", "get_user_network"], [10, 0, 0, "-", "hedge"], [12, 0, 0, "-", "info_exchange_zscore"], [13, 0, 0, "-", "information_diversity"], [14, 0, 0, "-", "lexical_features_v2"], [15, 0, 0, "-", "named_entity_recognition_features"], [16, 0, 0, "-", "other_lexical_features"], [17, 0, 0, "-", "politeness_features"], [18, 0, 0, "-", "politeness_v2"], [19, 0, 0, "-", "politeness_v2_helper"], [20, 0, 0, "-", "question_num"], [21, 0, 0, "-", "readability"], [22, 0, 0, "-", "reddit_tags"], [23, 0, 0, "-", "temporal_features"], [24, 0, 0, "-", "textblob_sentiment_analysis"], [25, 0, 0, "-", "turn_taking_features"], [26, 0, 0, "-", "variance_in_DD"], [27, 0, 0, "-", "within_person_discursive_range"], [28, 0, 0, "-", "word_mimicry"]], "features.basic_features": [[3, 3, 1, "", "count_characters"], [3, 3, 1, "", "count_messages"], [3, 3, 1, "", "count_words"]], "features.burstiness": [[4, 3, 1, "", "burstiness"], [4, 3, 1, "", "get_team_burstiness"]], "features.certainty": [[5, 3, 1, "", "get_certainty"]], "features.discursive_diversity": [[6, 3, 1, "", "get_DD"], [6, 3, 1, "", "get_cosine_similarity"], [6, 3, 1, "", "get_unique_pairwise_combos"]], "features.fflow": [[7, 3, 1, "", "get_forward_flow"]], "features.get_all_DD_features": [[8, 3, 1, "", "conv_to_float_arr"], [8, 3, 1, "", "get_DD_features"]], "features.get_user_network": [[9, 3, 1, "", "get_user_network"], [9, 3, 1, "", "remove_active_user"]], "features.hedge": [[10, 3, 1, "", "is_hedged_sentence_1"]], "features.info_exchange_zscore": [[12, 3, 1, "", "get_info_exchange_wordcount"]], "features.information_diversity": [[13, 3, 1, "", "calculate_ID_score"], [13, 3, 1, "", "get_info_diversity"], [13, 3, 1, "", "info_diversity"], [13, 3, 1, "", "preprocessing"]], "features.lexical_features_v2": [[14, 3, 1, "", "get_liwc_count"], [14, 3, 1, "", "liwc_features"]], "features.named_entity_recognition_features": [[15, 3, 1, "", "built_spacy_ner"], [15, 3, 1, "", "calculate_named_entities"], [15, 3, 1, "", "named_entities"], [15, 3, 1, "", "num_named_entity"], [15, 3, 1, "", "train_spacy_ner"]], "features.other_lexical_features": [[16, 3, 1, "", "classify_NTRI"], [16, 3, 1, "", "get_proportion_first_pronouns"], [16, 3, 1, "", "get_word_TTR"]], "features.politeness_features": [[17, 3, 1, "", "get_politeness_strategies"]], "features.politeness_v2": [[18, 3, 1, "", "get_politeness_v2"]], "features.politeness_v2_helper": [[19, 3, 1, "", "Question"], [19, 3, 1, "", "adverb_limiter"], [19, 3, 1, "", "bare_command"], [19, 3, 1, "", "clean_text"], [19, 3, 1, "", "commit_data"], [19, 3, 1, "", "conjection_seperator"], [19, 3, 1, "", "count_matches"], [19, 3, 1, "", "count_spacy_matches"], [19, 3, 1, "", "feat_counts"], [19, 3, 1, "", "get_dep_pairs"], [19, 3, 1, "", "get_dep_pairs_noneg"], [19, 3, 1, "", "load_saved_data"], [19, 3, 1, "", "load_to_dict"], [19, 3, 1, "", "load_to_lists"], [19, 3, 1, "", "phrase_split"], [19, 3, 1, "", "prep_simple"], [19, 3, 1, "", "prep_whole"], [19, 3, 1, "", "punctuation_seperator"], [19, 3, 1, "", "sentence_pad"], [19, 3, 1, "", "sentence_split"], [19, 3, 1, "", "sentenciser"], [19, 3, 1, "", "token_count"], [19, 3, 1, "", "word_start"]], "features.question_num": [[20, 3, 1, "", "calculate_num_question_naive"]], "features.readability": [[21, 3, 1, "", "classify_text_dalechall"], [21, 3, 1, "", "count_difficult_words"], [21, 3, 1, "", "count_syllables"], [21, 3, 1, "", "dale_chall_helper"]], "features.reddit_tags": [[22, 3, 1, "", "count_all_caps"], [22, 3, 1, "", "count_bullet_points"], [22, 3, 1, "", "count_ellipses"], [22, 3, 1, "", "count_emojis"], [22, 3, 1, "", "count_emphasis"], [22, 3, 1, "", "count_line_breaks"], [22, 3, 1, "", "count_links"], [22, 3, 1, "", "count_numbering"], [22, 3, 1, "", "count_parentheses"], [22, 3, 1, "", "count_quotes"], [22, 3, 1, "", "count_responding_to_someone"], [22, 3, 1, "", "count_user_references"]], "features.temporal_features": [[23, 3, 1, "", "coerce_to_date_or_number"], [23, 3, 1, "", "get_time_diff"], [23, 3, 1, "", "get_time_diff_startend"]], "features.textblob_sentiment_analysis": [[24, 3, 1, "", "get_polarity_score"], [24, 3, 1, "", "get_subjectivity_score"]], "features.turn_taking_features": [[25, 3, 1, "", "count_turn_taking_index"], [25, 3, 1, "", "count_turns"], [25, 3, 1, "", "get_turn"]], "features.variance_in_DD": [[26, 3, 1, "", "get_variance_in_DD"]], "features.within_person_discursive_range": [[27, 3, 1, "", "get_nan_vector"], [27, 3, 1, "", "get_within_person_disc_range"]], "features.word_mimicry": [[28, 3, 1, "", "Content_mimicry_score"], [28, 3, 1, "", "computeTF"], [28, 3, 1, "", "compute_frequency"], [28, 3, 1, "", "function_mimicry_score"], [28, 3, 1, "", "get_content_words_in_message"], [28, 3, 1, "", "get_function_words_in_message"], [28, 3, 1, "", "get_mimicry_bert"], [28, 3, 1, "", "get_moving_mimicry"], [28, 3, 1, "", "mimic_words"]], "utils": [[63, 0, 0, "-", "assign_chunk_nums"], [64, 0, 0, "-", "calculate_chat_level_features"], [65, 0, 0, "-", "calculate_conversation_level_features"], [66, 0, 0, "-", "calculate_user_level_features"], [67, 0, 0, "-", "check_embeddings"], [68, 0, 0, "-", "gini_coefficient"], [70, 0, 0, "-", "preload_word_lists"], [71, 0, 0, "-", "preprocess"], [72, 0, 0, "-", "summarize_features"], [73, 0, 0, "-", "zscore_chats_and_conversation"]], "utils.assign_chunk_nums": [[63, 3, 1, "", "assign_chunk_nums"], [63, 3, 1, "", "create_chunks"], [63, 3, 1, "", "create_chunks_messages"], [63, 3, 1, "", "reduce_chunks"]], "utils.calculate_chat_level_features": [[64, 1, 1, "", "ChatLevelFeaturesCalculator"]], "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator": [[64, 2, 1, "", "calculate_chat_level_features"], [64, 2, 1, "", "calculate_hedge_features"], [64, 2, 1, "", "calculate_politeness_sentiment"], [64, 2, 1, "", "calculate_politeness_v2"], [64, 2, 1, "", "calculate_textblob_sentiment"], [64, 2, 1, "", "calculate_vector_word_mimicry"], [64, 2, 1, "", "calculate_word_mimicry"], [64, 2, 1, "", "concat_bert_features"], [64, 2, 1, "", "get_certainty_score"], [64, 2, 1, "", "get_dale_chall_score_and_classfication"], [64, 2, 1, "", "get_forward_flow"], [64, 2, 1, "", "get_named_entity"], [64, 2, 1, "", "get_reddit_features"], [64, 2, 1, "", "get_temporal_features"], [64, 2, 1, "", "info_exchange"], [64, 2, 1, "", "lexical_features"], [64, 2, 1, "", "other_lexical_features"], [64, 2, 1, "", "positivity_zscore"], [64, 2, 1, "", "text_based_features"]], "utils.calculate_conversation_level_features": [[65, 1, 1, "", "ConversationLevelFeaturesCalculator"]], "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator": [[65, 2, 1, "", "calculate_conversation_level_features"], [65, 2, 1, "", "calculate_info_diversity"], [65, 2, 1, "", "calculate_team_burstiness"], [65, 2, 1, "", "get_conversation_level_aggregates"], [65, 2, 1, "", "get_discursive_diversity_features"], [65, 2, 1, "", "get_gini_features"], [65, 2, 1, "", "get_turn_taking_features"], [65, 2, 1, "", "get_user_level_aggregates"]], "utils.calculate_user_level_features": [[66, 1, 1, "", "UserLevelFeaturesCalculator"]], "utils.calculate_user_level_features.UserLevelFeaturesCalculator": [[66, 2, 1, "", "calculate_user_level_features"], [66, 2, 1, "", "get_centroids"], [66, 2, 1, "", "get_user_level_averaged_features"], [66, 2, 1, "", "get_user_level_summary_statistics_features"], [66, 2, 1, "", "get_user_level_summed_features"], [66, 2, 1, "", "get_user_network"]], "utils.check_embeddings": [[67, 3, 1, "", "check_embeddings"], [67, 3, 1, "", "generate_bert"], [67, 3, 1, "", "generate_certainty_pkl"], [67, 3, 1, "", "generate_lexicon_pkl"], [67, 3, 1, "", "generate_vect"], [67, 3, 1, "", "get_sentiment"], [67, 3, 1, "", "read_in_lexicons"]], "utils.gini_coefficient": [[68, 3, 1, "", "get_gini"], [68, 3, 1, "", "gini_coefficient"]], "utils.preload_word_lists": [[70, 3, 1, "", "get_dale_chall_easy_words"], [70, 3, 1, "", "get_first_person_words"], [70, 3, 1, "", "get_function_words"], [70, 3, 1, "", "get_question_words"]], "utils.preprocess": [[71, 3, 1, "", "assert_key_columns_present"], [71, 3, 1, "", "compress"], [71, 3, 1, "", "create_cumulative_rows"], [71, 3, 1, "", "get_turn_id"], [71, 3, 1, "", "preprocess_conversation_columns"], [71, 3, 1, "", "preprocess_naive_turns"], [71, 3, 1, "", "preprocess_text"], [71, 3, 1, "", "preprocess_text_lowercase_but_retain_punctuation"]], "utils.summarize_features": [[72, 3, 1, "", "get_average"], [72, 3, 1, "", "get_max"], [72, 3, 1, "", "get_min"], [72, 3, 1, "", "get_stdev"], [72, 3, 1, "", "get_sum"], [72, 3, 1, "", "get_user_average_dataframe"], [72, 3, 1, "", "get_user_sum_dataframe"]], "utils.zscore_chats_and_conversation": [[73, 3, 1, "", "get_zscore_across_all_chats"], [73, 3, 1, "", "get_zscore_across_all_conversations"]]}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "function", "Python function"]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:function"}, "terms": {"": [0, 1, 2, 4, 5, 9, 11, 13, 25, 28, 29, 31, 32, 34, 35, 36, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 55, 59, 61, 62, 64, 65, 66], "0": [0, 1, 2, 5, 10, 13, 16, 21, 24, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 45, 46, 47, 50, 51, 53, 55, 59, 61], "000": 42, "00222437221134802": [5, 64], "01": 51, "02": 51, "04": 40, "0496": [21, 33], "05": [13, 40, 50, 51], "06": 51, "08": 50, "09": [45, 46, 50], "1": [0, 1, 2, 3, 10, 13, 22, 24, 32, 34, 35, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 51, 53, 55, 56, 57, 59, 61, 62], "10": [1, 5, 6, 21, 24, 33, 42, 59, 61, 64], "100": [1, 21, 33, 37, 42, 47, 62], "1000": 42, "10th": 33, "1145": [21, 24], "1177": [5, 64], "11th": 33, "12": [35, 45, 46, 50], "1287": 6, "12th": 33, "13": 50, "14": 50, "15": [37, 50], "1579": [21, 33], "17": 50, "1948": 33, "195": 36, "1977": 62, "1lpngokujsx": 5, "1st": 50, "1st_person": 50, "1st_person_pl": 50, "1st_person_start": 50, "2": [1, 2, 34, 35, 41, 47, 59, 61, 62], "20": [37, 59], "2004": 42, "2007": [5, 42], "2009": 60, "2012": 55, "2013": [12, 16, 31, 32, 36, 37, 38, 41, 43, 50, 52, 54, 70], "2015": [53, 58, 60], "2016": 4, "2017": 13, "2018": [40, 44, 55], "2019": [35, 52], "2020": [18, 21, 24, 33, 49, 50, 56, 57], "2021": [1, 6, 43, 44], "2022": [13, 34], "2023": [1, 5, 30, 59, 61, 64], "2024": 40, "21": 59, "22": [41, 50], "2384068": 4, "24": [1, 61], "25": 47, "27": 50, "28": 50, "29": 50, "2nd": 50, "2nd_person": 50, "2nd_person_start": 50, "3": [0, 1, 2, 21, 34, 41, 42, 51, 59, 61, 71], "30": 50, "3000": 33, "32": [34, 50], "3432929": [21, 24], "35": 51, "36": 50, "38": 50, "39": 49, "39512260": 68, "3n": 59, "4": [0, 1, 5, 13, 21, 30, 33, 41, 42, 56, 61, 62], "4274": 6, "43": 50, "45": 50, "47": 50, "49": 50, "4pit4bqz6": 5, "4th": [21, 33], "5": [1, 5, 21, 30, 33, 37, 41, 59], "50": [1, 47], "52": 50, "53": 50, "57": 50, "58": 50, "5th": 33, "6": [1, 33, 43], "60": 51, "63": 50, "6365": 21, "64": 67, "68": 47, "6th": 33, "7": [30, 33, 48], "70": 50, "78": [35, 50], "7th": 33, "8": [1, 30, 33], "80": [21, 70], "82": 41, "85": 34, "86": 35, "87": 50, "89": [45, 46], "8th": 33, "9": [2, 5, 21, 30, 33, 40, 47, 50], "9123": 47, "92": 51, "93chall_readability_formula": [21, 70], "94": 15, "95": 47, "97": 51, "9855072464": 47, "9992": 47, "99954": 47, "9th": 33, "A": [1, 2, 4, 12, 13, 14, 15, 16, 17, 18, 19, 21, 23, 25, 28, 33, 34, 35, 37, 38, 40, 41, 44, 45, 46, 47, 49, 50, 51, 52, 57, 59, 60, 61, 62, 66, 67, 68, 70, 71, 72, 73], "And": [1, 62], "As": [1, 31, 35, 36, 40, 45, 61], "But": [1, 50, 62], "By": [1, 42, 50], "For": [0, 1, 31, 34, 37, 41, 42, 43, 47, 49, 54, 56, 59, 62, 65], "If": [0, 1, 2, 5, 21, 29, 30, 35, 45, 47, 50, 55, 61, 62, 63, 64, 67, 71], "In": [1, 21, 30, 31, 34, 35, 36, 37, 39, 41, 42, 45, 46, 47, 50, 55, 59, 61, 62], "It": [1, 2, 31, 32, 33, 36, 37, 41, 44, 45, 46, 50, 64, 65, 66, 67, 71], "NO": 37, "NOT": [1, 61], "No": [19, 53], "Not": 41, "One": [1, 37, 61], "That": [29, 55], "The": [1, 2, 3, 4, 5, 7, 9, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 58, 59, 60, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73], "Then": [1, 55, 61], "There": [1, 11, 32, 61, 66], "These": [1, 11, 17, 32, 34, 42, 48, 52, 61, 62, 69], "To": [0, 1, 29, 31, 34, 37, 40, 55, 56, 57, 61, 62], "WITH": 21, "Will": 50, "_deviat": 55, "_preprocessed_": 0, "abil": [13, 29], "abl": [31, 36, 61], "abort": 1, "about": [1, 12, 29, 31, 36, 41, 47, 61, 62], "abov": [1, 21, 34, 61], "abstract_id": 4, "accept": [0, 1, 58, 61], "access": [0, 1, 15, 61], "accommod": [28, 32, 39, 45, 46, 64, 65, 66], "accord": [21, 37, 59, 64, 70], "accordingli": 63, "account": [1, 29, 32, 42], "accus": 50, "achiev": [50, 62], "acknowledg": 49, "acm": [21, 24], "acommod": 36, "across": [1, 13, 28, 31, 34, 40, 41, 50, 62, 64, 73], "action": 59, "activ": [1, 9, 44, 55, 71], "actual": [41, 56], "ad": [61, 62, 71], "adapt": 59, "add": [0, 1, 2, 21, 51, 61], "addit": [0, 2, 32, 34, 42, 63, 69], "addition": [0, 30, 31, 32, 54], "address": 1, "adjac": 71, "adjust": [0, 21, 37, 63], "advanc": [31, 36], "advantag": 4, "adverb": [19, 31, 36], "adverb_limit": [19, 49], "affect": [0, 1, 29, 35, 44], "affirm": 49, "after": [0, 1, 31, 34, 36, 43, 61, 62, 64], "again": [32, 34], "against": [28, 31, 36, 52], "agarw": 62, "aggreg": [0, 1, 3, 11, 37, 44, 61, 62, 65, 66, 72], "agre": 47, "agreement": 49, "ah": [31, 36], "ai": 62, "aim": [39, 62], "airtim": [37, 62], "al": [1, 5, 16, 18, 21, 24, 30, 31, 32, 33, 34, 35, 36, 38, 42, 43, 44, 49, 50, 52, 53, 54, 56, 57, 58, 59, 60, 64], "algorithm": [56, 57], "align": [35, 51], "all": [0, 1, 2, 6, 12, 13, 15, 19, 22, 28, 30, 31, 34, 35, 36, 37, 40, 41, 42, 46, 48, 49, 51, 52, 55, 58, 61, 62, 64, 66, 71, 73], "allow": 1, "almaatouq": 59, "along": 1, "alongsid": 1, "alphabet": 49, "alphanumer": 71, "alreadi": [0, 1, 2, 4, 10, 12, 16, 67], "also": [0, 1, 2, 28, 30, 31, 32, 34, 36, 37, 38, 42, 47, 51, 54, 60, 61, 62, 64, 65, 67, 69, 71], "alsobai": 59, "altern": 59, "although": [1, 23, 31, 36], "alwai": [1, 55], "am": [31, 36, 42, 54, 62], "amaz": [48, 56], "ambient": 32, "american": 33, "ami": [47, 59, 62], "amic": 62, "among": [36, 37, 52, 55, 62], "amongst": [6, 35, 48], "an": [1, 2, 5, 8, 11, 12, 13, 21, 29, 30, 31, 32, 33, 34, 36, 38, 40, 41, 42, 45, 47, 48, 50, 51, 52, 54, 59, 60, 61, 62, 63, 65, 66, 68], "analys": [1, 62], "analysi": [1, 11, 52, 62, 67, 71], "analyt": 62, "analyz": [0, 1, 2, 13, 14, 16, 17, 19, 20, 21, 22, 24, 28, 43, 52, 62, 67, 71], "analyze_first_pct": [0, 1, 2], "angri": 47, "ani": [0, 1, 29, 31, 33, 38, 54, 62, 71], "annot": [17, 50], "anoth": [30, 34, 36, 48], "answer": 29, "anybodi": [31, 36], "anyth": [1, 2, 23, 31, 36, 56], "anywher": [31, 36], "apartment": 42, "api": 47, "api_refer": 24, "apolog": [17, 50], "apologi": 49, "appear": [0, 15, 37, 38, 42, 64], "append": [1, 17, 64, 65, 66, 67], "appli": [4, 13, 18, 62, 64, 69], "applic": [29, 71], "appreci": 50, "approach": [32, 38, 42, 45, 46, 49, 53, 64], "appropri": 69, "ar": [0, 1, 2, 3, 5, 9, 10, 11, 15, 17, 19, 21, 23, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 45, 46, 47, 48, 49, 51, 54, 55, 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 69, 71], "arcross": 34, "area": 62, "aren": [31, 36], "around": 2, "arous": 48, "arrai": [6, 8, 68], "articl": [37, 50], "ask": [20, 47, 54], "ask_ag": 49, "aspect": [50, 62], "assert_key_columns_pres": 71, "assign": [1, 31, 36, 38, 45, 46, 52, 59, 61, 63, 71], "assign_chunk_num": 69, "associ": [4, 15, 21, 29, 30, 31, 32, 36, 40, 45, 46, 47, 48, 61], "assum": [0, 1, 2, 10, 12, 16, 23, 41, 60, 61, 71], "assumign": 1, "assumpt": [1, 41, 61], "asterisk": 22, "attribut": [1, 11, 34, 51, 52, 56, 62], "author": [5, 31, 36, 59], "automat": [1, 61, 69], "auxiliari": [31, 36], "avail": [1, 61, 62, 63, 64, 67], "averag": [11, 13, 28, 30, 33, 34, 35, 40, 41, 46, 52, 64, 65, 66, 72], "avil": 62, "avoid": 30, "awar": 29, "awesom": 62, "b": [4, 34, 35, 45, 46, 55, 62], "back": 62, "bag": [32, 38, 42, 45, 46, 49, 53, 56, 57], "bare_command": [19, 49], "base": [1, 2, 15, 18, 19, 31, 32, 34, 35, 36, 37, 40, 42, 51, 52, 53, 54, 55, 56, 57, 61, 62, 63, 64, 65, 66, 71], "basic": [10, 11, 12, 16, 61, 62], "basic_featur": 11, "batch": 67, "batch_num": 1, "batch_siz": 67, "bay": [56, 57], "bbevi": 18, "becaus": [1, 2, 12, 21, 31, 36, 40, 56, 61], "becom": [44, 61, 62], "been": [1, 2, 12, 16, 31, 36, 61], "befor": [0, 1, 2, 17, 31, 36, 45, 48], "beforehand": 64, "begin": [34, 54, 58, 61, 62, 63], "behavior": [0, 2, 62, 63], "being": [4, 13, 14, 16, 17, 20, 21, 24, 31, 32, 36, 43, 47, 51, 55, 56, 60], "belong": [1, 42], "below": [1, 11, 21, 33, 36, 45, 48, 51, 61, 62, 69], "ber": 54, "bert": [0, 1, 31, 35, 36, 39, 46, 61, 64, 67], "bert_path": 67, "bert_sentiment_data": [1, 61, 64], "best": 29, "better": 61, "between": [4, 6, 13, 21, 23, 24, 28, 30, 31, 34, 35, 36, 37, 40, 45, 46, 55, 58, 59, 62, 64, 65], "betwen": 34, "beyond": 2, "big": 59, "binari": [10, 32, 38], "blame": 47, "blob": [1, 24, 61], "block": [22, 32, 48, 59], "blog": 15, "bold": [22, 64], "bool": [2, 63, 67, 71], "bootstrap": 62, "both": [1, 2, 42, 52, 54, 55, 59, 62], "bother": 50, "bottom": 59, "bought": 41, "bound": [29, 35, 36, 37, 42, 52, 55], "boundari": [34, 35], "break": [22, 48, 64], "brief": 44, "broader": 52, "broken": 59, "btw": 50, "bug": [1, 61], "build": [1, 7, 34, 45, 46, 62], "built": 11, "built_spacy_n": 15, "bullet": [22, 48, 64], "bunch": 59, "burst": 58, "bursti": [1, 11, 39, 58, 61, 65], "by_the_wai": 49, "c": [12, 34, 35, 45, 46, 62], "cach": [0, 1, 2, 51, 61], "calcul": [2, 5, 11, 12, 16, 18, 21, 28, 33, 41, 48, 49, 50, 56, 57, 58, 60, 62, 63, 64, 65, 66, 67, 68, 72, 73], "calculate_chat_level_featur": [1, 61, 69], "calculate_conversation_level_featur": 69, "calculate_hedge_featur": 64, "calculate_id_scor": 13, "calculate_info_divers": 65, "calculate_named_ent": 15, "calculate_num_question_na": 20, "calculate_politeness_senti": 64, "calculate_politeness_v2": 64, "calculate_team_bursti": 65, "calculate_textblob_senti": 64, "calculate_user_level_featur": 69, "calculate_vector_word_mimicri": 64, "calculate_word_mimicri": 64, "call": [1, 2, 8, 13, 61, 62, 64, 69], "can": [0, 1, 11, 23, 31, 32, 33, 34, 36, 37, 42, 43, 44, 47, 48, 49, 50, 52, 54, 60, 61, 62, 69], "can_you": 49, "cannot": [1, 31, 36, 45, 46, 49, 62], "cao": [21, 24, 33, 43, 44, 56, 57, 62], "cap": [22, 48, 64], "capit": [0, 2, 48], "captur": [29, 30, 32, 34, 35, 38, 41, 42, 55], "caract": 40, "cardiffnlp": [1, 61], "carefulli": 60, "casa_token": 5, "case": [1, 13, 16, 29, 30, 31, 36, 37, 41, 45, 46, 51, 55, 56, 59, 61], "casual": 43, "categori": [21, 32, 45, 46, 49, 52], "caus": [31, 32, 36, 59], "caveat": 1, "center": 62, "central": 34, "centroid": [34, 66], "certain": [5, 19, 30, 42, 45, 46, 49], "certainli": 42, "certainti": [11, 38, 39, 42, 64, 67], "cfm": 4, "chall": [1, 21, 39, 64, 70], "chang": [1, 34, 50, 61, 71], "charact": [2, 3, 15, 19, 37, 49, 62, 64, 65, 66, 71], "characterist": 62, "chat": [0, 1, 2, 4, 5, 6, 7, 8, 12, 13, 14, 16, 23, 25, 28, 29, 32, 35, 36, 41, 44, 45, 46, 49, 59, 61, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73], "chat_data": [2, 6, 7, 8, 26, 27, 28, 63, 64, 65, 66, 67, 71], "chat_df": 14, "chat_featur": [1, 61], "chat_level_data": 72, "chat_level_featur": 2, "chatlevelfeaturescalcul": [1, 2, 17, 21, 61, 64, 69], "chats_data": 73, "check": [19, 23, 44, 64, 67, 71], "check_embed": [1, 61, 69], "chen": 62, "choos": 60, "chose": 1, "chunk": [34, 59, 63], "chunk_num": 63, "circlelyt": 13, "citat": [21, 24], "cite": 50, "clarif": [16, 32, 64], "class": [1, 2, 31, 61, 62, 64, 65, 66], "classif": [21, 64], "classifi": [16, 21, 50, 56, 57], "classify_ntri": 16, "classify_text_dalechal": 21, "clean": [2, 17, 19, 67], "clean_text": 19, "clear": 1, "close": [31, 48, 62], "closer": [45, 46, 59], "clue": 62, "cmu": 12, "code": [6, 18, 29, 32, 51, 55, 61, 62, 68], "coeffici": [4, 39, 62, 65, 68], "coerce_to_date_or_numb": 23, "cognit": 62, "col": 2, "colab": [0, 1], "collabor": [59, 62], "collaps": 2, "collect": [1, 2, 34, 49, 50, 52, 61, 62], "colleg": 33, "column": [0, 2, 4, 6, 7, 8, 9, 12, 13, 14, 16, 18, 23, 25, 28, 51, 56, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73], "column_count_frequ": 28, "column_count_mim": 28, "column_mimc": 28, "column_nam": 71, "column_to_summar": 72, "com": [1, 2, 4, 5, 13, 15, 18, 64, 68, 71], "comb": 62, "combin": [0, 1, 6, 28, 61, 64, 71], "come": [1, 12, 13, 21, 32, 33, 58, 61], "comm": [1, 61], "command": [1, 61], "comment": 48, "commit": 23, "commit_data": 19, "common": [32, 62, 64], "commonli": 37, "commun": [0, 1, 11, 44, 48, 55, 60, 62, 64], "companion": 1, "compar": [2, 31, 35, 44, 45, 52, 64, 71, 73], "compat": [1, 61], "complement": [31, 36], "complet": [1, 2, 55], "complex": [35, 43, 50, 62], "compon": 50, "comprehens": [33, 48], "compress": 71, "comput": [0, 2, 4, 5, 6, 10, 11, 12, 13, 14, 28, 29, 30, 31, 34, 35, 36, 37, 40, 41, 42, 45, 46, 49, 52, 55, 62, 64, 65, 66, 69, 73], "compute_frequ": 28, "compute_vectors_from_preprocess": [0, 2], "computetf": 28, "conain": 61, "concat_bert_featur": [1, 61, 64], "concaten": [19, 49, 64, 71], "concentr": 55, "concept": [29, 39, 42, 62], "conceptu": [61, 62], "concis": 43, "concret": 29, "conduct": 1, "confid": [2, 5, 15, 30, 47, 64], "conflict": 62, "confound": 44, "congruent": 34, "conjection_seper": 19, "conjunct": [19, 31, 36, 49], "conjunction_start": 49, "connect": 39, "conscious": 35, "consecut": 22, "consequ": 0, "consid": [1, 33, 37], "consider": [61, 62], "consist": [36, 40, 41], "constitut": 41, "constrain": [34, 35], "construct": [11, 55, 62], "constructor": 47, "consult": 5, "contain": [1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 23, 25, 28, 29, 30, 35, 38, 42, 47, 49, 55, 61, 62, 63, 64, 67, 71, 72, 73], "content": [0, 1, 12, 13, 28, 34, 36, 39, 41, 42, 45, 46, 62, 64, 67], "content_mimicry_scor": 28, "content_word_mimicri": 28, "context": [2, 32, 42, 48, 62, 71], "continu": [56, 57], "contract": 49, "contrast": 39, "contribut": [13, 34, 37, 62], "control": 1, "conv": 1, "conv_data": [2, 65], "conv_features_al": [1, 61], "conv_features_bas": [1, 61], "conv_level_featur": 2, "conv_to_float_arr": 8, "convei": [6, 34, 52], "conveni": [1, 61], "convers": [0, 1, 2, 3, 4, 6, 7, 8, 9, 12, 13, 23, 25, 28, 29, 31, 34, 35, 36, 37, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 52, 55, 58, 59, 61, 63, 64, 65, 66, 68, 71, 72, 73], "conversation_id": [2, 28, 61, 71], "conversation_id_col": [0, 1, 2, 4, 6, 7, 8, 9, 13, 23, 25, 26, 27, 61, 63, 64, 65, 66, 68, 72, 73], "conversation_num": [0, 1, 2, 6, 7, 66, 71, 73], "conversationlevelfeaturescalcul": [2, 65, 69], "convert": [8, 41, 49, 71], "convict": 5, "convokit": [17, 50, 62, 64], "coordin": 55, "copi": [0, 1], "copular": [31, 36], "core": [34, 69], "cornel": 17, "corpu": 50, "corrado": 37, "correl": [41, 55], "correspond": [30, 34, 35, 40, 49, 55, 66], "cosin": [6, 7, 13, 28, 31, 34, 35, 36, 40, 45, 46, 65], "could": [1, 31, 33, 36, 50, 54], "could_you": 49, "couldn": [31, 36], "count": [1, 3, 12, 14, 15, 16, 19, 21, 25, 28, 30, 31, 32, 36, 39, 41, 43, 44, 49, 52, 53, 54, 56, 58, 64, 65, 66], "count_all_cap": 22, "count_bullet_point": 22, "count_charact": 3, "count_difficult_word": 21, "count_ellips": 22, "count_emoji": 22, "count_emphasi": 22, "count_line_break": 22, "count_link": 22, "count_match": [19, 49], "count_messag": 3, "count_numb": 22, "count_parenthes": 22, "count_quot": 22, "count_responding_to_someon": 22, "count_spacy_match": 19, "count_syl": 21, "count_turn": 25, "count_turn_taking_index": 25, "count_user_refer": 22, "count_word": 3, "countabl": 65, "countd": 36, "counterfactu": 50, "cours": [16, 31, 34, 36, 63], "creat": [0, 1, 2, 13, 19, 31, 40, 42, 61, 62, 64, 65, 66, 71], "create_chunk": 63, "create_chunks_messag": 63, "create_cumulative_row": 71, "credit": 33, "crowd": 13, "csv": [0, 1, 2, 61, 62, 67], "cumul": [1, 2, 71], "cumulative_group": [0, 1, 2, 71], "current": [1, 11, 23, 31, 34, 35, 36, 40, 45, 46, 58, 61, 64, 71], "curt": 43, "custom": [0, 62], "custom_featur": [0, 1, 2, 61], "customiz": 62, "cut": 1, "cutoff": [2, 15, 47, 64], "d": [1, 31, 34, 36, 61], "dale": [1, 21, 39, 64, 70], "dale_chall_help": 21, "danescu": 50, "dash": 22, "data": [0, 2, 6, 7, 8, 9, 13, 19, 20, 32, 37, 40, 41, 47, 51, 55, 61, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73], "datafram": [0, 1, 2, 4, 6, 7, 8, 9, 12, 13, 14, 15, 16, 17, 18, 19, 23, 25, 28, 37, 47, 49, 59, 61, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73], "dataknowsal": 15, "dataset": [1, 2, 9, 12, 13, 28, 31, 41, 47, 52, 61, 64, 65, 66, 73], "date": [1, 61], "datetim": [23, 58], "dcosta": 62, "deal": [50, 59], "death": 1, "debat": 59, "decid": 62, "decis": [1, 13, 62], "declar": [1, 62, 69], "deepli": 62, "default": [0, 1, 2, 5, 13, 16, 30, 34, 35, 42, 47, 62, 63, 66, 67, 71, 73], "defer": [17, 50], "defin": [0, 11, 21, 31, 34, 36, 40, 59, 62, 64, 65, 66, 70], "definit": [1, 3, 44], "degre": [6, 30, 36, 45, 46, 55], "delet": 29, "deliber": 1, "demo": 61, "democrat": 1, "demystifi": 62, "denomin": 59, "densiti": 60, "dep_": 49, "dep_pair": 19, "depend": [0, 1, 10, 19, 32, 49, 52, 61, 63], "deriv": [2, 11, 65, 66], "describ": [11, 62], "descript": [1, 61], "design": [0, 1, 2, 13, 34, 62], "desir": [2, 63, 72], "detail": [0, 1, 33, 41, 43, 61, 62], "detect": [1, 32, 37, 38, 47, 48, 49, 54], "determin": [13, 18, 31, 35, 36, 40, 45, 46, 71], "dev": 24, "develop": [5, 37, 40, 62], "deviat": [4, 5, 29, 40, 41, 55, 58, 65, 72, 73], "df": [4, 8, 9, 12, 13, 16, 18, 23, 28, 63, 71], "dict": [17, 19, 28, 67], "dictionari": [1, 15, 17, 19, 28, 30, 42, 49, 61, 67], "did": [1, 31, 36, 37, 47, 50, 54, 62], "didn": [31, 36], "differ": [1, 2, 4, 11, 12, 23, 29, 31, 34, 36, 37, 39, 40, 44, 45, 46, 47, 49, 55, 62, 63, 64, 65, 66, 71], "differenti": [49, 59], "difficult": [21, 33], "difficult_word": 21, "difficulti": 33, "dimens": [40, 62], "dimension": [34, 35], "dinner": 41, "direct": [34, 43, 45, 47, 50, 69], "direct_quest": [32, 50, 54], "direct_start": 50, "directli": [1, 62, 69], "directori": [0, 1, 2, 19, 61, 65, 67], "disagr": 49, "disagre": 51, "discours": [31, 36], "discret": [31, 36, 45, 46], "discurs": [0, 1, 6, 8, 39, 40, 61, 65, 66], "discursive_divers": 11, "discus": 8, "discuss": [0, 1, 31, 34, 39, 40, 42, 43, 61, 62, 71], "dispers": 68, "displai": [1, 34, 42, 46, 61], "dispos": 1, "distanc": [34, 35, 40], "distinct": [36, 59], "distinguish": 59, "div": 16, "diverg": [6, 34, 35], "divers": [0, 1, 6, 8, 13, 39, 61, 65], "divid": [16, 34, 59, 63], "dl": [21, 24], "do": [0, 1, 29, 31, 34, 36, 37, 43, 49, 50, 54, 62, 69], "doc": 19, "doc_top": 13, "document": [1, 17, 61, 69], "doe": [1, 2, 29, 40, 42, 43, 45, 47, 54, 61, 71], "doesn": [0, 1, 2, 29, 31, 36, 45, 61], "doi": [5, 6, 21, 24, 64], "domain": 50, "don": [31, 36, 49, 54, 62, 67], "done": [2, 50], "dot": 22, "doubl": 30, "down": [31, 36], "download": [1, 61], "download_resourc": [1, 61], "downstream": [17, 62], "dozen": 62, "drive": [62, 69], "driver": [2, 61, 64, 65, 66], "drop": [0, 2, 64], "due": [34, 59], "duncan": 62, "duplic": [1, 2, 71], "durat": [58, 63], "dure": [2, 55, 59, 62], "dynam": [59, 61], "e": [0, 1, 2, 4, 15, 20, 29, 30, 31, 32, 34, 35, 36, 37, 38, 41, 42, 47, 48, 49, 52, 54, 56, 59, 61, 63, 65, 66, 71], "e2": [21, 70], "each": [0, 1, 2, 3, 4, 7, 8, 9, 11, 12, 15, 17, 19, 23, 25, 28, 30, 31, 34, 35, 36, 37, 40, 41, 42, 44, 45, 46, 47, 49, 50, 51, 52, 55, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73], "earlier": [0, 1, 2, 42], "easi": [1, 21, 62, 70], "easier": 21, "easili": 33, "easy_word": 21, "eat": 34, "echo": 31, "econom": 37, "edg": [29, 59], "edu": [1, 12, 16, 17, 70], "effect": [1, 41], "effici": 1, "effort": 55, "either": [20, 23, 52, 55], "elaps": [23, 58], "element": [1, 6], "ellips": [22, 48, 64], "els": [1, 22, 47, 64], "embed": [8, 31, 34, 35, 36, 45, 46, 65, 66, 67, 69], "emili": [30, 35, 45, 46, 47, 59, 62], "emoji": [22, 48, 64], "emot": [1, 61], "emoticon": 48, "emphas": [22, 48, 64], "emphasi": 48, "empirica": [1, 2, 71], "emploi": 45, "empti": [0, 2, 13], "en": [1, 21, 24, 61, 70], "en_core_web_sm": [1, 61], "enabl": 71, "enclos": 22, "encod": [1, 8], "encompass": 62, "encount": [1, 34, 35, 61], "encourag": 64, "end": [0, 1, 15, 20, 23, 34, 54, 62, 63], "engag": 43, "engin": 2, "english": [34, 42], "enjoi": 62, "ensur": [0, 1, 40, 49, 61, 63, 67, 71], "entir": [1, 12, 36, 40, 41, 52, 59, 62, 73], "entiti": [0, 1, 2, 15, 39, 64], "entityrecogn": 47, "entri": [1, 28, 61], "ep8dauru1ogvjurwdbof5h6ayfbslvughjyiv31d_as6ppbt": 5, "equal": [1, 21, 23, 34, 37, 40, 55, 59, 61, 62, 63], "equival": [0, 1, 41, 55, 61], "eric": 62, "error": [1, 16, 61], "especi": [41, 62], "essenti": [51, 71], "estim": 31, "et": [1, 5, 16, 18, 21, 24, 30, 31, 32, 33, 34, 35, 36, 38, 42, 43, 44, 49, 50, 52, 53, 54, 56, 57, 58, 59, 60, 64], "etc": [10, 15, 16, 17, 42], "evalu": [5, 47, 50], "evan": 62, "even": [0, 1, 2, 34, 37, 42, 62, 63, 67], "evenli": [34, 55], "event": [1, 34, 55, 61], "ever": 62, "everi": [1, 4, 13, 31, 34, 35, 36, 61, 62], "everybodi": [31, 36], "everyon": [31, 36, 47, 62], "everyth": [31, 36, 56], "everywher": [31, 36], "evolut": 35, "evolv": [35, 71], "exactli": [1, 2, 71], "examin": [40, 62, 63], "exampl": [0, 10, 11, 15, 21, 24, 29, 31, 32, 34, 37, 42, 43, 48, 50, 51, 54, 56, 59, 60, 61, 62], "example_data": 1, "exce": 15, "exchang": [12, 35, 39, 40, 45, 55, 64], "exclud": [0, 41, 42], "exclus": [41, 42], "excus": 32, "exhibit": 35, "exist": [0, 1, 2, 55, 61, 62, 63, 64, 67], "expand": 49, "expect": [1, 37, 47], "expected_valu": 47, "explain": 29, "explan": [29, 43], "explor": [61, 62], "express": [5, 14, 30, 31, 32, 36, 38, 42, 64], "extend": 1, "extens": [43, 44], "extent": [1, 4, 7, 12, 31, 34, 35, 37, 51, 55, 59, 61], "extern": 48, "extra": 51, "extract": [1, 17, 19, 28, 40, 50, 64], "extrem": [55, 56, 57], "face": [1, 51, 61], "facilit": [62, 71], "fact": [4, 35, 50, 54, 59], "factual": [17, 24, 50], "fail": [1, 61], "fals": [0, 1, 2, 31, 54, 61, 71], "famili": 42, "far": [34, 35, 46, 50, 62], "faster": 14, "feat_count": 19, "featuer": 2, "featur": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 63, 64, 65, 66, 67], "feature_build": [0, 1, 61], "feature_dict": [1, 61], "feature_method": [64, 65], "feature_nam": [1, 61], "featurebuild": [0, 2, 47, 69], "features_conceptu": [1, 61], "few": [48, 62], "fewer": [12, 60], "fflow": 11, "field": [13, 17], "file": [0, 1, 2, 12, 14, 19, 61, 65, 67], "filenam": [0, 1, 19], "filenotfounderror": 67, "fill": 71, "filler": [37, 60], "filler_paus": 49, "filter": [19, 62], "final": [1, 2, 34, 42, 62], "find": [1, 19, 28, 50], "fingertip": 62, "finit": 55, "first": [0, 1, 2, 11, 12, 16, 19, 31, 34, 35, 36, 39, 40, 41, 42, 45, 46, 49, 52, 54, 59, 61, 62, 64, 70, 71], "first_person": 12, "first_person_plur": 49, "first_person_raw": [12, 16], "first_person_singl": 49, "five": 37, "fix": 52, "flag": 71, "float": [2, 4, 5, 6, 8, 10, 13, 14, 16, 21, 24, 25, 28, 68], "floor": 59, "flow": [0, 1, 7, 31, 36, 39, 41, 45, 46, 61, 64], "focal": [31, 36], "focu": 41, "folder": [0, 1, 19], "follow": [1, 2, 16, 17, 29, 31, 32, 33, 41, 42, 47, 49, 50, 53, 55, 59, 60, 61, 64, 65], "for_m": 49, "for_you": 49, "forc": [0, 1, 61], "form": 1, "formal": [1, 61], "formal_titl": 49, "format": [1, 8, 17, 22, 47, 48, 61, 62, 64], "former": [45, 46], "formula": [33, 42, 59, 64, 70], "fornt": 1, "forward": [0, 1, 7, 39, 41, 61, 64], "forward_flow": 35, "found": [1, 5, 28, 30, 33, 61, 69], "four": [1, 8], "fourth": 33, "frac": 55, "fraction": 59, "framework": [49, 50, 62], "frequenc": [28, 31, 44, 64], "frequency_dict": 28, "fridai": 34, "from": [0, 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 16, 19, 21, 28, 29, 31, 32, 33, 34, 35, 36, 39, 41, 42, 49, 50, 51, 53, 55, 56, 57, 58, 61, 62, 64, 65, 66, 67, 71], "full": [1, 2, 37], "full_empirical_dataset": 1, "fulli": [32, 48], "functinon": 12, "function": [1, 2, 3, 4, 10, 11, 12, 13, 14, 16, 20, 21, 23, 28, 31, 39, 44, 45, 46, 50, 56, 57, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 73], "function_mimic_word": 28, "function_mimicry_scor": 28, "function_word_mimicri": 28, "function_word_refer": 28, "fund": 62, "further": [1, 2, 61, 71], "futur": [23, 66], "g": [0, 1, 4, 15, 20, 29, 31, 32, 36, 37, 38, 41, 42, 47, 48, 52, 54, 59, 61, 63, 65, 66, 71], "game": [1, 2, 59, 71], "gaug": [33, 52], "gener": [0, 2, 9, 11, 12, 16, 21, 31, 34, 35, 36, 40, 42, 45, 46, 49, 51, 59, 67, 69, 71, 72], "generaliz": 23, "generate_bert": 67, "generate_certainty_pkl": 67, "generate_lexicon_pkl": 67, "generate_vect": 67, "gensim": 40, "get": [0, 16, 20, 21, 28, 30, 31, 36, 49, 66], "get_all_dd_featur": 11, "get_averag": 72, "get_centroid": 66, "get_certainti": 5, "get_certainty_scor": 64, "get_content_words_in_messag": 28, "get_conversation_level_aggreg": 65, "get_cosine_similar": 6, "get_dale_chall_easy_word": [21, 70], "get_dale_chall_score_and_classf": 64, "get_dd": 6, "get_dd_featur": 8, "get_dep_pair": [19, 49], "get_dep_pairs_noneg": [19, 49], "get_discursive_diversity_featur": 65, "get_first_pct_of_chat": 2, "get_first_person_word": [12, 70], "get_forward_flow": [7, 64], "get_function_word": 70, "get_function_words_in_messag": 28, "get_gini": 68, "get_gini_featur": 65, "get_info_divers": 13, "get_info_exchange_wordcount": 12, "get_liwc_count": 14, "get_max": 72, "get_mimicry_bert": 28, "get_min": 72, "get_moving_mimicri": 28, "get_named_ent": 64, "get_nan_vector": 27, "get_polarity_scor": 24, "get_politeness_strategi": 17, "get_politeness_v2": 18, "get_proportion_first_pronoun": 16, "get_question_word": 70, "get_reddit_featur": 64, "get_senti": 67, "get_stdev": 72, "get_subjectivity_scor": 24, "get_sum": 72, "get_team_bursti": 4, "get_temporal_featur": [4, 64], "get_time_diff": 23, "get_time_diff_startend": 23, "get_turn": 25, "get_turn_id": 71, "get_turn_taking_featur": 65, "get_unique_pairwise_combo": 6, "get_user_average_datafram": 72, "get_user_level_aggreg": 65, "get_user_level_averaged_featur": 66, "get_user_level_summary_statistics_featur": 66, "get_user_level_summed_featur": 66, "get_user_network": [11, 66], "get_user_sum_datafram": 72, "get_variance_in_dd": 26, "get_within_person_disc_rang": 27, "get_word_ttr": 16, "get_zscore_across_all_chat": 73, "get_zscore_across_all_convers": 73, "gina": 62, "gini": [39, 62, 65, 68], "gini_coeffici": [11, 69], "github": [0, 1, 2, 18, 71], "give": [1, 29, 37, 61], "give_ag": 49, "given": [5, 6, 13, 14, 28, 30, 31, 33, 34, 35, 36, 40, 41, 55, 59, 66, 67, 71], "go": [1, 34, 35, 45, 46, 50, 62], "goal": 62, "good": [50, 56, 62], "goodby": 49, "googl": [0, 1], "got": [31, 36], "gotta": [31, 36], "grade": 33, "grader": 21, "grai": 35, "grammat": 36, "granularli": 35, "grate": 62, "gratitud": [17, 49, 50], "great": [47, 50, 51, 56, 59, 60, 62], "greater": 55, "greet": 50, "groceri": 41, "group": [0, 1, 2, 4, 13, 29, 33, 34, 41, 52, 59, 62, 68, 71, 72], "grouping_kei": [0, 1, 2, 71], "gt": 22, "guess": 10, "gun": 1, "gy": 15, "gym": 34, "ha": [0, 1, 2, 32, 34, 35, 37, 42, 43, 46, 52, 54, 55, 56, 59, 61, 62, 63, 71], "had": [1, 31, 36, 54, 61], "hadn": [31, 36], "handl": [19, 29, 71], "happen": [1, 2, 55, 62, 63], "happi": 42, "harder": 21, "hashedg": [17, 50], "hasn": [31, 36], "hasneg": 50, "hasposit": 50, "hate": 31, "have": [0, 1, 2, 10, 12, 16, 31, 34, 36, 37, 40, 41, 42, 45, 46, 50, 54, 59, 60, 61, 62, 71], "haven": [31, 36], "he": [1, 31, 36], "header": 18, "hear": 32, "heart": [61, 62], "heat": 1, "heavi": 62, "hedg": [11, 30, 39, 49, 50, 64], "hei": [1, 35, 45, 46, 50], "helena": [47, 62], "hello": [0, 43, 49], "help": [0, 31, 34, 36, 43, 45, 46, 52, 58, 69], "helper": [23, 67], "her": [30, 31, 36], "here": [0, 1, 29, 34, 41, 42, 47, 61, 66], "herself": [31, 36], "hesit": [60, 64], "hi": [31, 35, 36, 43, 45, 46], "hierach": 71, "hierarch": 71, "high": [0, 1, 2, 61, 62, 71], "higher": [21, 31, 34, 36, 40, 41, 42, 44, 45, 46, 55, 60], "highest": 71, "highlight": 1, "him": [31, 36], "himself": [31, 36], "hmm": [31, 36], "hoc": 62, "hole": 62, "home": 42, "homework": 34, "homonym": 31, "hope": 35, "host": [45, 46], "hour": 48, "how": [1, 5, 29, 30, 31, 34, 35, 36, 39, 43, 45, 51, 52, 54, 56, 62], "howev": [0, 1, 3, 35, 40, 42, 44, 54, 56, 61, 62], "howitwork": 1, "html": [1, 15, 17, 24, 61], "http": [1, 2, 4, 5, 6, 12, 13, 15, 16, 17, 18, 21, 24, 41, 45, 46, 47, 61, 64, 68, 70, 71], "hu": [1, 42, 62], "hug": [1, 51, 61], "huggingfac": 1, "huh": [31, 32, 36], "human": [37, 50, 62], "hyperlink": 48, "hyphen": [1, 61], "hypothet": 42, "i": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 19, 20, 21, 22, 23, 24, 25, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 71, 73], "iby1": 5, "id": [2, 4, 7, 23, 28, 62, 66, 68, 71, 72, 73], "idea": [12, 35, 40, 47, 51], "ident": [34, 35], "identif": 1, "identifi": [0, 1, 2, 4, 8, 9, 15, 23, 25, 30, 41, 47, 50, 52, 61, 63, 64, 71, 72], "identiif": [13, 71], "ignor": [1, 32], "illustr": [1, 41, 48, 62], "imagin": 1, "immedi": [31, 35, 64], "impact": [1, 60], "impersonal_pronoun": 49, "implement": 64, "impli": 37, "import": [31, 32, 36, 44, 45, 62, 69], "incent": 13, "includ": [0, 1, 2, 10, 17, 22, 31, 32, 35, 36, 42, 45, 46, 51, 52, 56, 61, 62, 66, 71], "inclus": [13, 71], "incongru": [8, 34], "incorpor": [1, 42, 45, 46], "increas": [1, 42, 62], "increment": 71, "independ": 1, "index": [1, 2, 4, 13, 25, 37, 39, 55, 61, 65], "indic": [1, 2, 16, 21, 22, 30, 32, 34, 35, 36, 40, 41, 43, 44, 48, 49, 50, 52, 55, 60, 63, 71], "indirect": 50, "indirect_btw": 50, "indirect_greet": 50, "indirectli": 69, "individu": [0, 1, 5, 11, 31, 34, 37, 45, 50, 59, 60, 62, 72], "inequ": 37, "infer": [1, 51, 67], "influenc": 1, "info": [13, 18, 64], "info_divers": 13, "info_exchang": 64, "info_exchange_wordcount": [41, 64], "info_exchange_zscor": 11, "inform": [6, 11, 12, 13, 24, 32, 34, 39, 48, 62, 64, 65], "informal_titl": 49, "information_divers": 11, "initi": [2, 62, 63, 64, 65, 66], "input": [0, 2, 4, 6, 12, 13, 14, 15, 16, 19, 20, 21, 22, 28, 31, 50, 55, 60, 62, 63, 64, 65, 66, 67, 71, 72], "input_column": [65, 66], "input_data": [25, 68, 72], "input_df": [1, 2, 61, 71], "inquiri": [30, 39, 52], "insid": 1, "insight": 1, "inspir": 15, "instal": [1, 61, 62], "instanc": [1, 22, 50, 59, 66], "instanti": 2, "insteac": 1, "instead": [1, 2, 62], "instruct": [1, 61], "int": [2, 3, 10, 13, 15, 16, 19, 20, 21, 22, 28, 63, 64, 67], "intact": 71, "integ": [13, 40, 47], "intend": 59, "interact": [1, 11, 43, 44, 62, 69], "interconnect": 62, "interest": [1, 61, 62], "interfac": 62, "intermedi": [59, 64], "intern": 29, "interpret": [0, 23], "interrupt": 59, "interv": [58, 65], "introduc": 62, "introduct": [11, 61], "invalid": 67, "invers": 64, "involv": [41, 62, 65], "io": [1, 24, 47, 61], "ipynb": [0, 1], "is_hedged_sentence_1": 10, "isn": [1, 31, 36], "issu": [1, 31, 36, 37, 42, 61], "ital": 64, "italic": 22, "item": [0, 71], "its": [0, 2, 15, 31, 35, 36, 40, 41, 47, 54, 55, 64, 69], "itself": [23, 31, 36, 44], "john": 1, "jonson": 62, "journal": [5, 64], "json": [1, 61], "jurafski": 70, "juri": 1, "juries_df": 1, "jury_conversations_with_outcome_var": 1, "jury_feature_build": 1, "jury_output_chat_level": [1, 61], "jury_output_conversation_level": 1, "jury_output_turn_level": 1, "jury_output_user_level": 1, "just": [0, 1, 2, 31, 36, 46, 50, 59, 61, 62], "katharina": 34, "keep": [1, 71], "kei": [1, 2, 4, 19, 28, 30, 54, 61, 71], "keyerror": 71, "keyword": [19, 49], "kind": [10, 62], "kitchen": 42, "knob": 0, "know": [1, 30], "knowledg": 29, "known": [1, 32, 61], "kumar": 62, "kw": 19, "lab": [1, 2, 62, 71], "label": [1, 15, 21, 51], "lack": [31, 38, 45, 46], "languag": [15, 34, 42, 50, 62], "larg": [31, 69], "larger": [0, 61], "last": [1, 31], "late": 32, "later": [0, 1, 2, 42, 61], "latest": [1, 61], "latter": [31, 36], "lda": [13, 40], "learn": [1, 61, 62], "least": [10, 32, 42, 63, 67], "led": 62, "legal": 49, "lemmat": [13, 40], "len": 28, "length": [35, 39, 41, 42, 44], "less": [13, 32, 50, 52, 55, 62, 63], "let": [41, 49, 53], "let_me_know": 49, "letter": [49, 71], "level": [0, 1, 2, 3, 4, 6, 7, 8, 9, 12, 13, 14, 16, 23, 61, 64, 65, 66, 71, 72], "lexic": [10, 12, 14, 16, 31, 32, 36, 42, 60, 62, 64], "lexical_featur": [14, 64], "lexical_features_v2": [10, 11], "lexicon": [5, 10, 14, 30, 39, 50, 52, 67, 69], "lexicons_dict": 67, "librari": [34, 51, 56, 57], "lift": 62, "light": 61, "like": [1, 22, 31, 34, 36, 41, 50, 61, 62], "limiat": 32, "limit": [11, 32, 37, 42, 54], "line": [0, 1, 19, 22, 48, 61, 62, 64], "linear": 64, "linguist": [18, 19, 30, 39, 50, 52], "link": [22, 29, 48, 50, 64], "list": [1, 2, 6, 7, 10, 11, 12, 13, 15, 19, 20, 21, 22, 28, 31, 33, 36, 37, 42, 48, 49, 50, 53, 54, 61, 64, 65, 66, 67, 68, 70, 71], "literatur": 62, "littl": 38, "littlehors": 1, "liu": [42, 52], "live": [1, 54], "liwc": [14, 30, 39, 51, 52, 56, 62], "liwc_featur": [10, 14], "lix": 34, "ll": [1, 31, 36, 61], "load": [19, 69], "load_saved_data": 19, "load_to_dict": 19, "load_to_list": 19, "loc": 15, "local": [1, 51, 61], "locat": [1, 62], "long": [4, 42], "longer": [30, 41, 43, 48, 61, 62], "look": [2, 34, 61, 65, 66], "loos": 36, "lot": [31, 36], "loud": 60, "love": [31, 56], "low": [1, 2, 29, 55, 60, 71], "lower": [21, 31, 33, 36, 41, 42, 44, 55, 60], "lowercas": [2, 13, 40, 48, 49, 71], "lowest": 71, "lpearl": 16, "lst": 6, "m": [30, 31, 36], "made": [1, 23, 35, 59, 61, 62], "magnitud": 55, "mai": [1, 2, 11, 31, 32, 35, 36, 37, 41, 42, 43, 44, 54, 61, 62, 71], "main": [1, 2, 5, 62, 64, 65, 66], "make": [0, 1, 5, 34, 55, 56, 62, 66, 69, 71], "man": 62, "mani": [1, 4, 11, 32, 37, 41, 60, 62, 66], "manner": [55, 62], "manual": [1, 61], "map": [13, 34], "mark": [19, 20, 22, 43, 54, 64, 71], "marker": [18, 32, 39, 42, 50, 51, 52, 54, 56], "marlow": 44, "matarazzo": 62, "match": [5, 16, 19, 30], "math": 34, "matter": 47, "max": 66, "max_num_chunk": 63, "maxim": [34, 35, 37], "maximum": [63, 65, 72], "mayb": [38, 47], "mcfarland": 70, "me": [31, 32, 36, 41, 50, 53], "mean": [0, 1, 4, 6, 11, 13, 21, 29, 31, 34, 36, 40, 41, 42, 47, 55, 56, 58, 61, 62, 65, 66, 73], "meaning": [41, 55], "meaningless": 41, "meant": 39, "measur": [0, 7, 12, 13, 20, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 51, 52, 54, 55, 56, 57, 58, 59, 60, 62, 64, 68], "mechan": 32, "medium": 21, "meet": 48, "member": [13, 34, 37, 55], "merg": [2, 8, 65, 66], "merge_conv_data_with_origin": 2, "messag": [0, 1, 2, 3, 4, 5, 8, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 28, 30, 31, 34, 35, 36, 37, 39, 41, 45, 46, 47, 48, 50, 51, 52, 55, 56, 57, 58, 61, 62, 63, 64, 65, 66, 67, 71, 73], "messaga": 61, "message_col": [0, 1, 2, 12, 13, 14, 61, 64, 65, 67, 71], "message_embed": [6, 7, 8], "message_lower_with_punc": 71, "metadata": [0, 1], "method": [5, 31, 41, 50, 62], "metric": [0, 1, 2, 8, 30, 34, 35, 46, 47, 48, 55, 66], "michael": 1, "mid": [1, 2, 71], "middl": [21, 34, 63], "might": [0, 1, 29, 43, 48, 53], "mikeyeoman": [18, 64], "mileston": 34, "mimic": [28, 31, 36, 45], "mimic_word": 28, "mimick": [28, 31, 64], "mimicri": [0, 1, 28, 31, 35, 36, 39, 61, 64], "mimicry_bert": [45, 46], "mind": [1, 35, 50], "mine": [31, 36, 53, 59], "minim": [0, 41, 60], "minimum": [65, 72], "minu": [12, 41, 64], "minut": [55, 58], "mirror": 1, "miss": [1, 32, 61, 71], "mitig": [31, 36], "mizil": 50, "mm": [31, 36], "mnsc": 6, "modal": 50, "mode": 60, "model": [1, 13, 15, 31, 34, 35, 36, 40, 45, 46, 47, 51, 62, 67], "modif": 35, "modifi": [9, 19, 32, 64], "modul": [0, 1, 11, 34, 49, 61, 69], "monologu": 59, "more": [0, 1, 2, 11, 12, 22, 23, 24, 31, 32, 34, 36, 37, 40, 41, 42, 43, 44, 45, 46, 50, 52, 55, 59, 61, 62, 71], "morn": 1, "most": [24, 31, 55, 62, 69], "motiv": 61, "move": [0, 1, 28, 31, 36, 39, 45, 59, 61], "movi": 31, "much": [1, 31, 34, 35, 36, 45, 62], "multi": [1, 2, 71], "multidimension": [45, 46], "multipl": [0, 1, 2, 19, 62, 71], "must": [1, 6, 62, 71], "my": [30, 31, 35, 36, 45, 46, 50, 53], "my_chat_featur": 1, "my_feature_build": 61, "my_fil": [0, 1], "my_output_chat_level": 61, "my_output_conversation_level": 61, "my_output_user_level": 61, "my_pandas_datafram": 61, "myself": [31, 36, 53], "n": [35, 45, 46, 47, 57, 59, 60], "n_chat": 59, "na": [5, 33, 43, 44, 48, 49, 50, 53, 58], "naiv": [2, 20, 32, 34, 38, 39, 53, 56, 57, 64], "name": [0, 2, 4, 7, 8, 9, 12, 13, 14, 15, 17, 19, 23, 25, 28, 30, 32, 35, 39, 45, 46, 50, 51, 56, 63, 64, 66, 67, 68, 71, 72, 73], "name_to_train": 47, "named_ent": [15, 47], "named_entity_recognition_featur": 11, "nan": [0, 34], "nate": [35, 45, 46], "nathaniel": [35, 45, 46], "nativ": 50, "natur": [43, 55], "ndarrai": 68, "nearest": [13, 40], "nearli": 62, "necessari": [63, 67], "need": [0, 1, 2, 21, 62, 66, 67], "need_sent": 67, "need_senti": 67, "neg": [1, 24, 29, 31, 34, 35, 36, 42, 50, 51, 52, 54, 56, 61, 62, 67], "negat": [19, 49], "negative_bert": [1, 51, 61], "negative_emot": [49, 51, 52, 56], "negoti": 62, "neighborhood": 54, "neither": 30, "ner": 15, "ner_cutoff": [0, 1, 2, 47, 64], "ner_train": 64, "ner_training_df": [0, 1, 2, 47, 64], "nest": [0, 1, 2, 22, 71], "net": [45, 46], "network": 11, "neutral": [1, 5, 24, 30, 51, 55, 61, 67], "neutral_bert": [1, 51, 61], "never": 1, "new": [1, 4, 13, 34, 61, 64, 65, 66, 72], "new_column_nam": 72, "next": [1, 32, 47, 58], "nice": [1, 50, 54, 61], "nicknam": 1, "niculescu": 50, "night": 31, "nikhil": [59, 62], "nltk": [1, 42, 61], "nobodi": [31, 36], "nois": 32, "non": [1, 2, 28, 31, 37, 48, 61, 62, 71], "none": [1, 2, 19, 23, 37, 55, 61, 64, 65, 66, 67], "nor": 30, "normal": [19, 31], "notabl": 62, "note": [0, 1, 2, 12, 16, 20, 42, 61, 71], "notebook": [0, 1], "noth": [31, 36, 56], "noun": 1, "novel": [45, 46], "now": [0, 1, 2], "nowher": [31, 36], "np": 68, "ntri": 32, "null": 34, "num": 48, "num_char": 65, "num_chunk": [27, 63], "num_hedge_word": 10, "num_messag": 65, "num_named_ent": [15, 47], "num_row": 63, "num_top": 13, "num_word": [12, 16, 65], "number": [0, 3, 11, 12, 13, 14, 15, 16, 19, 20, 21, 22, 23, 25, 28, 31, 32, 34, 36, 37, 40, 41, 42, 43, 44, 47, 48, 49, 54, 56, 58, 59, 60, 62, 63, 64, 66, 69, 71, 72], "numer": [0, 1, 13, 33, 68, 72, 73], "numpi": [1, 61, 68], "o": 35, "object": [1, 2, 19, 44, 50, 57, 58, 61, 62, 64, 65, 66], "obtain": [1, 13, 17, 23, 24, 34, 61], "occur": [0, 4, 31, 42, 71], "occurr": 19, "off": [1, 31, 36], "offer": 0, "offici": 61, "often": [36, 47, 48, 62], "oh": [31, 36, 48], "okai": [31, 36], "older": [1, 61], "on_column": [18, 23, 28, 68, 72, 73], "onc": [1, 2, 11, 58, 61, 62], "one": [0, 1, 2, 4, 10, 12, 19, 23, 25, 29, 31, 32, 36, 37, 47, 51, 56, 59, 61, 62, 67, 68, 71, 73], "ones": [31, 36], "onli": [0, 1, 2, 5, 11, 23, 29, 31, 32, 34, 36, 37, 45, 53, 58, 59, 61, 62, 71], "onlin": [1, 32, 39, 64], "onward": 0, "open": [0, 62, 66], "operation": [39, 50, 59], "opinion": [24, 31], "oppos": [2, 31, 34, 35, 55], "opposit": 34, "option": [1, 2, 37, 62, 63, 67, 71], "order": [0, 1, 35, 37, 42, 71], "org": [6, 15, 21, 24, 41, 70], "origin": [1, 2, 5, 12, 21, 31, 32, 35, 36, 37, 45, 46, 49, 59], "orthogon": 34, "other": [1, 9, 11, 28, 29, 30, 31, 32, 34, 35, 36, 37, 39, 40, 45, 46, 48, 51, 52, 54, 56, 58, 59, 61, 62, 64, 66, 71], "other_lexical_featur": [11, 64], "otherwis": [2, 10, 21, 23, 32, 38, 63, 67], "our": [0, 1, 2, 11, 13, 29, 31, 32, 36, 37, 39, 53, 59, 61, 71], "ourselv": 53, "out": [1, 2, 16, 19, 31, 36, 55, 60, 62], "outcom": [1, 44, 62], "output": [0, 1, 2, 10, 17, 19, 40, 61, 62, 64, 67], "output_file_path_chat_level": [0, 1, 2, 61], "output_file_path_conv_level": [0, 1, 2, 61], "output_file_path_user_level": [0, 1, 2, 61], "output_path": 67, "outsid": [1, 2, 12], "over": [1, 16, 29, 31, 34, 35, 36, 37, 53, 55, 60, 62, 71], "overal": [30, 31, 34, 36, 45, 46], "overrid": [0, 1], "overview": [0, 61, 62], "overwritten": 1, "own": [0, 1, 9, 35, 62], "p": 55, "pacakg": 24, "pace": [43, 62], "packag": [17, 18, 40, 62], "pad": 19, "page": [1, 11, 29, 39, 61, 62, 69], "pair": [6, 19, 34, 49, 71], "pairwis": [6, 34], "panda": [0, 1, 2, 12, 14, 16, 23, 47, 64, 65, 66, 71, 72, 73], "paper": [4, 5, 12, 18, 29, 40, 50, 64], "paragraph": 22, "param": 71, "paramet": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 47, 61, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73], "paramt": 1, "pardon": 32, "parenthes": [22, 48, 64], "parenthet": [22, 48], "pars": [16, 50, 60], "part": [1, 10, 13, 29, 36, 42, 52, 71], "particip": [1, 9, 37, 62], "particl": [31, 36], "particular": [11, 32, 34, 41, 45, 47, 51, 59, 62], "particularli": 42, "partner": 32, "pass": [1, 13, 21, 47, 71], "path": [0, 1, 2, 19, 67], "path_in": 19, "pattern": [4, 11, 19, 55, 62, 67], "paus": 4, "pd": [1, 2, 4, 6, 7, 8, 9, 12, 13, 14, 15, 16, 18, 19, 23, 25, 63, 64, 65, 66, 67, 68, 71], "pdf": [5, 12, 13, 16, 18, 21, 24, 64, 70], "penalti": 1, "pennebak": [12, 37, 41, 42, 52], "pennyslvania": 62, "peopl": [1, 32, 59, 62], "per": [1, 6, 9, 19, 42, 63, 66, 72], "percentag": [2, 21], "perfect": [37, 59], "perform": [0, 1, 2, 16, 50], "perhap": 1, "period": [4, 34, 55], "person": [1, 8, 12, 15, 16, 32, 34, 39, 41, 42, 50, 59, 62, 64, 70], "perspect": 1, "petrocelli": 5, "phrase": [19, 30, 38, 54], "phrase_split": 19, "pickl": [19, 67], "piec": [36, 42, 59, 63], "pl": 50, "place": [55, 61, 62], "plan": [34, 35, 45, 46], "player": 59, "pleas": [0, 1, 38, 49, 50, 61, 62], "please_start": 50, "point": [22, 24, 34, 35, 45, 46, 48, 52, 64, 66], "poisson": 55, "polar": [24, 39, 51, 52, 64], "polit": [1, 17, 18, 30, 32, 38, 39, 42, 51, 52, 54, 56, 64], "politeness_featur": 11, "politeness_v2": 11, "politeness_v2_help": 11, "politenessstrategi": [17, 50], "portion": 0, "posit": [0, 1, 11, 15, 24, 29, 31, 39, 42, 50, 51, 54, 56, 61, 62, 64, 67], "positive_affect_lexical_per_100": [51, 52, 56], "positive_bert": [1, 51, 61], "positive_emot": [49, 51, 52, 56], "positivity_bert": [1, 61], "positivity_zscor": 64, "positivity_zscore_chat": 52, "positivity_zscore_convers": 52, "possess": 31, "possibl": [1, 34, 62, 66], "possibli": [38, 62], "practic": [34, 35], "pre": [1, 4, 21, 37, 49, 64], "preced": [31, 35, 71], "precend": 35, "precis": 47, "precomput": 51, "predefin": 19, "predetermin": [31, 36], "predict": [2, 47, 51, 64], "prefer": [0, 1], "preload_word_list": 69, "prep_simpl": 19, "prep_whol": 19, "preposit": [31, 36], "preproces": 48, "preprocess": [0, 1, 2, 13, 19, 40, 43, 49, 51, 61, 69], "preprocess_chat_data": 2, "preprocess_conversation_column": 71, "preprocess_naive_turn": 71, "preprocess_text": 71, "preprocess_text_lowercase_but_retain_punctu": 71, "presenc": [2, 32, 67], "present": [1, 2, 14, 30, 31, 38, 42, 55, 62, 71], "prespecifi": 19, "prevent": 51, "previou": [1, 7, 28, 31, 36, 45, 46, 58, 64, 71], "primari": 34, "print": 2, "prior": [2, 64, 71], "priya": [47, 62], "probabl": [15, 47], "problem": 62, "procedur": 62, "proceed": 46, "process": [0, 1, 2, 4, 10, 21, 37, 55, 62, 64, 65, 67, 69, 71], "prodi": 15, "produc": [2, 34], "product": 15, "professor": 62, "progress": [1, 2], "project": [54, 62], "pronoun": [12, 16, 31, 36, 39, 41, 42, 64, 70], "proper": 1, "properti": [1, 61], "proport": [16, 39, 42, 64], "propos": 37, "provid": [0, 1, 2, 15, 29, 30, 33, 36, 39, 44, 47, 54, 62], "proxi": 42, "pseudonym": 1, "psycholog": 42, "pub": 70, "publish": [5, 30, 64], "pubsonlin": 6, "punctuat": [0, 2, 16, 19, 20, 21, 28, 43, 54, 60, 71], "punctuation_seper": 19, "puncut": 48, "pure": [24, 36], "purpos": 1, "put": [34, 50, 62, 66], "py": [0, 1, 14, 49, 61], "pypi": [1, 61], "python": [1, 32, 41, 56, 57, 61, 62, 68], "qtd": 62, "qualiti": 41, "quantifi": [31, 36, 62], "quantiti": [37, 39, 41, 47], "quartil": 50, "question": [16, 19, 20, 29, 32, 39, 49, 50, 64, 66, 68, 70], "question_num": 11, "question_word": 20, "quick": [1, 43], "quickli": 0, "quit": 40, "quot": [22, 48, 64], "quotat": [22, 48], "rabbit": 62, "rain": 41, "rais": [67, 71], "random": 55, "rang": [5, 8, 24, 30, 33, 34, 35, 40, 51, 53, 55, 56, 57], "ranganath": [16, 31, 32, 36, 38, 43, 54, 70], "ranganath2013": 70, "ranganathetal2013_detectingflirt": 16, "rapid": [1, 4], "rare": [34, 35], "rate": [42, 51], "rather": [31, 34, 35, 36, 37, 45, 46, 63], "ratio": [16, 39, 64], "raw": [0, 12, 16, 21, 31, 33, 42, 50, 64], "re": [1, 31, 36, 42, 50, 61], "read": [0, 1, 2, 16, 21, 29, 33, 61, 62, 64, 65, 66, 67], "read_csv": 1, "read_in_lexicon": 67, "readabl": [11, 33, 64, 70], "reader": 33, "readi": 1, "readili": 62, "readthedoc": [1, 24, 61], "real": [1, 55], "realit": 13, "realli": [31, 36, 50], "reason": [31, 36, 45, 46, 49], "reassur": 49, "recal": 47, "recept": [18, 32, 39, 42, 50, 51, 52, 54, 56, 62, 64], "recogn": [1, 43, 47], "recognit": [0, 1, 2, 39, 64], "recommend": [42, 62], "reddit": [48, 64], "reddit_tag": 11, "redditus": 48, "reduc": 63, "reduce_chunk": 63, "redund": [42, 62], "refer": [0, 1, 11, 22, 24, 28, 31, 42, 48, 52, 61, 62, 64, 70], "reflect": [37, 43], "regardless": 1, "regener": [0, 2, 51, 67], "regenerate_vector": [0, 1, 2, 67], "regex": [14, 16, 49], "regist": 37, "regress": 1, "regular": [5, 14, 30, 32, 42, 55, 58], "reichel": [53, 58, 60], "reidl": [4, 13], "reinvent": 62, "rel": [41, 51, 52, 55, 60, 64], "relat": [1, 61, 62, 64], "relationship": 36, "relev": [1, 29, 42, 44, 49, 51, 56, 61, 64, 65], "reli": [31, 34, 35, 36, 69], "reliabl": [33, 42], "remain": [1, 30, 71], "rememb": 1, "remov": [0, 2, 9, 13, 19, 28, 40, 43, 48, 49, 50, 71], "remove_active_us": 9, "renam": 1, "repair": [16, 39], "repeat": [60, 71], "repetit": 60, "replac": 19, "report": [1, 61], "repres": [2, 4, 6, 7, 11, 13, 23, 31, 34, 36, 42, 45, 46, 66, 67, 68, 71, 72, 73], "represent": [34, 38], "reproduc": [36, 62], "republican": 1, "request": [32, 50, 51], "requir": [0, 1, 20, 21, 31, 55, 61, 62, 64, 65, 66, 67], "research": [1, 2, 62], "reserv": 0, "resolv": 62, "resourc": [1, 39, 48, 61, 62], "respect": [1, 2, 12, 31, 36, 37, 69], "respons": [22, 48, 55, 58, 64], "restaur": [34, 56], "restor": 0, "restrict": 71, "result": [40, 55, 65, 72], "retain": [2, 16, 20, 21, 60, 71], "retriev": 50, "retunr": 3, "return": [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 30, 32, 43, 49, 50, 51, 55, 62, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73], "reveal": 62, "revert": 50, "review": 62, "rewrit": 50, "rich": 62, "riedl": [13, 40, 55], "right": [31, 36, 61, 62], "roberta": [1, 39, 42, 52, 56, 61, 64, 67], "robust": 13, "rocklag": [5, 30, 64], "room": 59, "root": [13, 40], "rough": [12, 54], "roughli": 31, "round": [13, 40, 59, 71], "round_num": 1, "row": [0, 1, 2, 9, 13, 25, 37, 40, 59, 63, 68, 71, 72, 73], "rowbotham": 62, "rucker": 5, "rule": [1, 69], "run": [0, 10, 12, 16, 35, 46, 47, 48, 51, 61, 69], "runtim": [1, 35], "sagepub": [5, 64], "sai": [1, 32, 50, 59], "said": [1, 36, 62], "same": [0, 1, 2, 31, 34, 37, 45, 48, 52, 59, 60, 61, 62, 71], "sampl": [61, 62], "sarcast": 48, "save": [0, 1, 2, 19, 64, 67], "save_featur": 2, "sbert": [1, 28, 31, 34, 35, 36, 45, 46, 64, 65, 67], "scale": [42, 51], "school": [21, 62], "scienc": [29, 39, 62], "scientist": [61, 62], "score": [1, 4, 5, 11, 12, 13, 15, 21, 24, 28, 29, 30, 31, 34, 35, 36, 38, 39, 40, 45, 46, 47, 51, 53, 56, 57, 61, 64, 65, 67, 73], "script": [1, 61], "sea": 1, "seamless": 62, "search": [19, 61], "second": [0, 1, 4, 34, 42, 58, 59], "second_person": 49, "secr": [18, 49, 64], "section": [1, 29, 61], "see": [0, 1, 2, 30, 34, 38, 41, 45, 46, 47, 55, 62, 71], "seek": [5, 62], "segment": [0, 19], "select": [2, 4, 23, 28, 36, 45, 66, 67, 68, 71, 72, 73], "self": [1, 2, 61], "semant": [31, 34, 35, 41], "semantic_group": [1, 61], "send": [1, 37, 55], "sens": [5, 31, 54, 66], "sent": [1, 37, 64], "sentenc": [0, 1, 10, 15, 19, 20, 21, 33, 34, 35, 36, 42, 45, 46, 47, 48, 54, 56, 61, 67], "sentence_pad": 19, "sentence_split": 19, "sentence_to_train": 47, "sentencis": 19, "sentiment": [0, 1, 24, 31, 39, 42, 52, 56, 61, 62, 64, 67], "separ": [1, 2, 19, 34, 51], "sepcifi": 1, "septemb": 40, "sequenc": [1, 59], "sequenti": 1, "seri": [12, 16, 23, 28, 42, 71, 73], "serv": 12, "set": [0, 1, 2, 13, 23, 34, 48, 59], "set_self_conv_data": 2, "sever": [1, 30, 41, 42, 48, 51, 56, 61], "shall": 54, "share": [31, 36, 37], "she": [30, 31, 36], "shift": 34, "shop": 62, "short": [55, 58], "shorter": [13, 40, 41, 42, 43], "should": [0, 1, 2, 4, 14, 23, 28, 29, 31, 36, 47, 48, 54, 61, 62, 65, 66, 67, 68, 69, 71, 72, 73], "shouldn": [31, 36], "show": [1, 37, 61], "showeth": 62, "shruti": [35, 45, 46, 47, 62], "side": 31, "signal": [45, 55], "signifi": 42, "signific": [1, 61], "silent": 37, "similar": [1, 6, 7, 13, 28, 29, 31, 34, 35, 36, 40, 45, 46, 49, 62, 65], "similarli": [1, 35], "simpl": [0, 1, 16, 19, 42, 61, 62], "simpli": [1, 5, 11, 28, 56, 62], "simplifi": 1, "simplist": 41, "sinc": [1, 32, 41, 71], "singh": 62, "singl": [0, 1, 2, 11, 12, 19, 23, 31, 34, 35, 36, 37, 41, 45, 46, 59, 62, 71, 72], "singular": [12, 41, 64], "site": 16, "situat": 37, "size": [1, 13, 63, 67], "skip": 1, "slightli": [32, 62, 63], "slow": 1, "small": 40, "so": [1, 2, 10, 30, 31, 36, 37, 50, 61, 62, 66], "social": [29, 39, 61, 62], "socsci": 16, "softwar": 62, "sohi": 62, "sol3": 4, "solut": 59, "solv": 62, "some": [0, 1, 11, 17, 29, 32, 34, 35, 37, 41, 61, 63], "somebodi": [31, 36], "someon": [22, 29, 31, 36, 47, 48, 61, 64], "someplac": [31, 36], "someth": 47, "sometim": 1, "somewhat": 35, "soon": 62, "sorri": [16, 32, 50], "sort": 10, "sound": [47, 51], "sourc": [4, 5, 6, 12, 13, 16, 17, 21, 34, 35, 50, 64, 68], "space": [34, 40, 71], "spaci": [1, 19, 47, 49, 50, 61], "span": 63, "spars": 32, "speak": [1, 31, 36, 37, 59, 60, 62], "speaker": [0, 1, 2, 6, 8, 9, 25, 31, 34, 35, 37, 38, 42, 45, 46, 61, 66, 71, 72], "speaker_id": [2, 61, 72], "speaker_id_col": [0, 1, 2, 6, 8, 9, 25, 26, 27, 61, 65, 66, 71, 72], "speaker_nicknam": [0, 1, 2, 6, 9, 59, 66, 71], "special": [0, 1, 2, 48, 71], "specif": [1, 2, 12, 32, 41, 48, 55, 61, 62, 69, 71], "specifi": [1, 2, 19, 47, 49, 67, 68, 71, 72, 73], "speciifc": 63, "spend": [51, 62], "spike": 55, "split": [19, 21, 43, 63], "spoke": 59, "spoken": [11, 37], "spread": 55, "squar": [13, 40], "ssrn": 4, "stabl": 40, "stack": 14, "stackoverflow": 68, "stage": [1, 2, 34, 71], "stamp": 55, "standard": [1, 4, 37, 40, 41, 49, 55, 58, 60, 65, 72, 73], "stanford": 70, "start": [0, 15, 19, 20, 22, 23, 50], "statement": [1, 38, 42, 47, 48, 61, 62, 64], "statist": [65, 66, 68], "statologi": 41, "stem": 42, "step": [1, 4, 28, 41, 45, 46, 51], "still": [41, 45, 46], "stochast": 40, "stop": [40, 62], "stopword": [13, 19], "store": [1, 12, 16, 41, 49, 51, 65, 67], "stoword": 42, "str": [2, 3, 4, 5, 7, 8, 9, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 63, 64, 65, 66, 67, 68, 71, 72, 73], "straightforward": 29, "strategi": [17, 30, 32, 38, 39, 42, 49, 54, 64], "stream": 35, "strictli": 1, "string": [0, 1, 2, 4, 8, 12, 13, 14, 19, 23, 24, 50, 66, 67, 68, 71, 72, 73], "strongli": [1, 41, 61], "structur": [0, 36, 49], "student": [21, 33], "studi": [1, 34, 62], "style": [1, 31, 36, 59], "sub": [0, 1, 71], "subfold": 1, "subject": [5, 24, 39, 49, 64], "subjunct": 50, "sublist": 28, "submiss": 55, "subpart": [1, 71], "subsequ": [1, 30, 51, 58], "subset": 62, "substanc": 36, "substant": 31, "substanti": 1, "substr": 30, "subtask": 1, "subtract": [41, 58], "succe": 62, "success": [0, 1, 4, 31, 36, 43, 55, 58, 61], "suggest": [1, 13, 34, 42, 44, 50], "suit": [62, 64], "sum": [1, 28, 34, 61, 64, 65, 66, 72], "summar": [0, 1, 65, 66, 69], "summari": [65, 66, 72], "summariz": [0, 65], "summarize_featur": 69, "suppl": 6, "support": [1, 15, 61], "suppos": 1, "sure": 30, "swear": 49, "syllabl": 21, "syntax": [1, 32, 61], "system": [2, 59, 64], "t": [0, 1, 2, 15, 29, 31, 36, 45, 49, 54, 61, 62, 67], "tabl": 62, "tag": 39, "take": [1, 4, 5, 9, 14, 25, 29, 31, 34, 37, 39, 42, 55, 61, 65, 71], "taken": [59, 71], "talk": [1, 37, 47, 59, 62], "tandem": [1, 61], "target": 15, "task": [1, 2, 59, 71], "tausczik": [12, 37, 41, 52], "tausczikpennebaker2013": 12, "team": [0, 1, 4, 11, 12, 13, 34, 39, 40, 59, 65], "team_bursti": 4, "team_comm_tool": [1, 61], "teamcommtool": 1, "technic": [29, 39, 61, 62], "teghxgbqdhgaaaaa": 5, "tempor": [0, 2, 55, 58, 64, 71], "temporal_featur": 11, "tend": [1, 34, 60], "term": [1, 28, 59], "termin": [1, 2, 61], "terribl": 51, "test": [13, 33, 47], "text": [0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 28, 32, 33, 36, 42, 48, 55, 62, 64, 67, 71], "text_based_featur": 64, "textblob": [24, 39, 51, 52, 64], "textblob_sentiment_analysi": 11, "than": [0, 1, 2, 11, 13, 31, 34, 35, 36, 37, 40, 41, 45, 46, 54, 60, 62, 63], "thee": 62, "thei": [0, 1, 29, 31, 34, 36, 37, 39, 42, 47, 58, 59, 61, 62, 67], "them": [1, 2, 19, 28, 29, 31, 36, 50, 51, 55, 59, 61, 62, 64, 65, 66, 67], "themselv": [31, 36, 60], "theoret": 35, "theori": [34, 50], "therefor": [0, 1, 11, 37, 45, 59, 62, 69], "thi": [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 16, 18, 20, 21, 23, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 71, 72, 73], "thing": [48, 61], "think": [1, 38, 47], "thorough": [43, 62], "those": [1, 21, 31, 36, 61], "though": [34, 42], "thought": [1, 35, 45], "thread": [1, 61], "three": [0, 1, 22, 34, 37, 40, 51, 62, 69, 71], "threshold": [15, 47], "through": [1, 45, 46, 50, 61, 62], "throughout": [31, 35, 36, 40, 45, 46, 55, 63], "tht": 35, "thu": [1, 2, 34, 35, 36, 37, 46, 55, 71], "time": [0, 1, 4, 23, 34, 35, 39, 42, 48, 51, 55, 59, 61, 62, 63, 64, 65, 66, 71], "time_diff": 55, "timediff": 4, "timestamp": [0, 1, 2, 8, 23, 58, 61, 62, 63, 64, 71], "timestamp_col": [0, 1, 2, 8, 61, 63, 64, 65, 71], "timestamp_end": [1, 23, 61, 64], "timestamp_start": [1, 23, 61, 64], "todai": [34, 35, 41, 43, 45, 46, 47], "todo": 66, "togeth": [0, 62, 66], "token": [16, 19, 39, 49, 54, 64], "token_count": [19, 49], "too": [30, 31, 36, 62], "took": [1, 59], "tool": [1, 61, 62], "toolkit": [0, 1, 11, 42, 45, 46, 55, 62], "top": [1, 50, 59], "topic": [1, 13, 34, 40, 42, 43, 65], "tormala": 5, "total": [1, 3, 12, 16, 25, 31, 34, 36, 37, 41, 44, 53, 59, 60, 61, 62, 63, 64, 66, 72], "touch": [1, 61], "toward": [31, 36, 38, 42, 45, 46], "tradit": 49, "train": [1, 2, 15, 64], "train_spacy_n": 15, "transcript": 0, "transfom": [45, 46], "transform": [31, 34, 35, 36, 51], "transform_utter": 50, "treat": [1, 59, 61], "tri": [50, 64], "trivial": [3, 44, 62], "troubl": [1, 61], "true": [0, 1, 2, 37, 61, 63, 67, 71], "truncat": 2, "truth_intensifi": 49, "ttr": 64, "tupl": [0, 1, 2, 15, 19, 64], "turn": [0, 1, 2, 25, 28, 31, 32, 37, 39, 61, 64, 65, 71], "turn_count": 59, "turn_df": 71, "turn_id": 71, "turn_taking_featur": 11, "twice": 63, "twitter": [1, 51, 61], "two": [0, 1, 2, 23, 31, 34, 36, 41, 45, 46, 52, 62, 63], "txt": 19, "type": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 37, 39, 52, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73], "typic": [1, 34, 40, 41, 42, 52, 60], "u": [1, 22, 31, 36, 48, 49, 58, 61], "uci": 16, "uh": [31, 36], "ulrich": 55, "um": [31, 36, 60], "umbrella": [8, 29, 34], "uncertain": [5, 30], "uncertainti": 30, "under": [0, 1, 10, 11, 12, 28, 40], "underli": [1, 61], "underscor": [1, 61], "understand": [0, 33, 39, 43, 48, 58, 61, 62], "understood": 33, "uninterrupt": 59, "uniqu": [0, 1, 2, 6, 9, 13, 16, 23, 25, 41, 47, 52, 60, 61, 63, 71], "univers": 62, "unix": 58, "unless": [31, 36], "unpack": 62, "unpreprocess": [0, 2], "until": [31, 36, 45, 46], "unzip": [1, 61], "up": [1, 17, 21, 28, 31, 35, 36, 37, 45, 46, 51, 59, 61], "updat": [1, 9, 40, 54, 61], "upenn": 1, "upload": 13, "upon": 33, "upper": 42, "us": [0, 1, 2, 3, 5, 11, 12, 13, 17, 19, 24, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 56, 57, 60, 62, 64, 65, 66, 67, 71], "usag": [21, 24], "use_time_if_poss": 63, "user": [0, 1, 2, 9, 15, 22, 37, 47, 48, 51, 61, 62, 63, 64, 65, 66, 69, 72], "user_data": [2, 65, 66], "user_df": 9, "user_level_featur": 2, "user_list": 9, "userlevelfeaturescalcul": [2, 66, 69], "usernam": [22, 48], "utf": 1, "util": [1, 12, 21, 61, 62, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73], "utilti": 62, "utter": [0, 1, 2, 3, 4, 5, 13, 14, 15, 16, 17, 20, 21, 23, 24, 30, 31, 32, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 50, 51, 52, 54, 58, 60, 61, 67], "utteranc": 42, "v": [0, 1, 2, 13, 42, 61], "v0": 0, "valenc": 51, "valid": [23, 55], "valu": [1, 2, 5, 6, 10, 12, 13, 18, 19, 23, 28, 30, 31, 34, 36, 37, 40, 41, 42, 45, 46, 47, 55, 59, 61, 64, 68, 71, 72, 73], "vari": [13, 31, 34, 35], "variabl": [1, 56, 57, 64, 65, 66], "varianc": [8, 34], "variance_in_dd": 11, "variat": [4, 32], "varieti": [42, 62], "variou": [19, 42, 64, 65, 66], "vast": 62, "ve": [0, 31, 36, 50, 61], "vec": 6, "vect_data": [1, 7, 8, 28, 61, 64, 65, 66], "vect_path": 67, "vector": [0, 1, 2, 6, 7, 8, 13, 28, 34, 35, 40, 55, 61, 64, 65, 67], "vector_data": [1, 61], "vector_directori": [0, 1, 2, 61, 65], "vein": 45, "verb": [19, 31, 36], "verbal": 32, "veri": [5, 30, 31, 34, 35, 36, 42, 49, 54], "verifi": 2, "verit": 62, "version": [1, 12, 14, 21, 40, 51, 61], "versu": [4, 29, 47, 55, 59], "via": [3, 44], "view": 50, "visit": 41, "voila": 62, "w": 31, "wa": [0, 1, 2, 5, 12, 31, 32, 35, 36, 47, 51, 56, 59, 62, 71], "wai": [1, 2, 29, 30, 31, 32, 34, 49, 50, 54, 56, 57, 61, 62, 66], "waiai": 62, "wait": [4, 55], "walk": 1, "walkthrough": [0, 61, 62], "want": [1, 28, 34, 59, 61, 62, 67], "warn": 50, "watt": [1, 2, 62, 71], "we": [0, 1, 2, 3, 4, 5, 9, 10, 11, 12, 15, 16, 18, 23, 24, 28, 29, 30, 31, 34, 35, 36, 37, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 52, 53, 55, 56, 57, 58, 59, 61, 62, 66, 67, 71], "web": 70, "websit": [1, 61], "week": 47, "weight": 66, "welcom": 61, "well": [29, 31, 36, 55, 62], "went": 41, "were": [1, 12, 31, 36, 42], "western": 1, "wh": [19, 31, 36], "wh_question": [32, 49, 54], "wharton": 62, "what": [1, 2, 12, 16, 20, 29, 31, 32, 34, 35, 36, 39, 41, 45, 46, 47, 50, 54, 62, 63], "whatev": [1, 31, 36], "wheel": 62, "when": [1, 16, 20, 31, 33, 36, 47, 54, 55, 59, 60, 61, 62, 69, 71], "whenev": 71, "where": [0, 1, 2, 19, 20, 28, 31, 32, 36, 37, 40, 41, 42, 48, 50, 51, 54, 59, 61, 65, 68, 73], "wherea": [31, 34, 35, 36, 43], "wherev": [31, 36], "whether": [1, 2, 10, 16, 19, 32, 37, 38, 41, 43, 47, 57, 58, 62, 63, 64, 67, 71], "which": [0, 1, 2, 3, 4, 5, 7, 9, 12, 13, 15, 16, 18, 23, 25, 28, 31, 34, 35, 36, 37, 38, 40, 41, 42, 51, 53, 54, 55, 56, 57, 58, 59, 61, 62, 64, 66, 68, 69, 71, 72, 73], "while": [31, 32, 34, 36, 37, 44, 45, 46, 55, 62, 71], "whitespac": 43, "who": [20, 31, 32, 36, 47, 51, 54, 59, 60, 62], "whole": [28, 59, 62, 71], "whom": [31, 36, 54], "whose": [31, 36, 54], "why": [20, 29, 31, 36, 54], "wide": 31, "wien": 62, "wiki": [21, 29, 70], "wiki_link": [1, 61], "wikipedia": [21, 33, 37, 70], "williamson": 60, "wish": [1, 2, 18], "within": [0, 1, 2, 8, 11, 16, 30, 34, 35, 36, 41, 45, 46, 52, 55, 59, 60, 62, 63, 64, 68, 71, 73], "within_group": 2, "within_person_discursive_rang": 11, "within_task": [0, 1, 2, 71], "without": [1, 19, 31, 36, 42, 47, 54, 62, 69], "won": [0, 31, 36, 45], "wonder": 56, "woolei": 4, "woollei": [13, 40, 55], "wooten": 55, "word": [3, 10, 11, 12, 13, 14, 16, 19, 20, 21, 22, 28, 30, 32, 33, 37, 38, 39, 40, 41, 43, 45, 46, 48, 49, 52, 53, 54, 56, 57, 62, 64, 65, 66, 69, 70], "word_mimicri": 11, "word_start": [19, 49], "wordnet": [1, 61], "words_in_lin": 19, "work": [0, 47, 50, 55, 61, 62], "world": 55, "worri": 62, "would": [1, 29, 31, 34, 35, 36, 37, 42, 50, 54, 62], "wouldn": [31, 36], "wow": 50, "wp": 13, "write": [2, 29, 60], "www": [12, 13, 18, 41, 64], "x": [0, 1, 2, 4, 46, 68], "xinlan": 62, "yashveer": 62, "ye": 19, "yeah": [31, 36], "yeoman": [18, 49], "yesno_quest": [32, 49, 54], "yet": 48, "ylatau": 12, "you": [0, 1, 2, 11, 24, 29, 31, 36, 37, 43, 47, 50, 59, 61, 62, 69], "your": [0, 29, 31, 32, 36, 37, 50, 59, 61, 62], "yourself": [31, 36, 50], "yuluan": 62, "yup": [31, 36], "yuxuan": 62, "z": [12, 39, 49, 51, 64, 73], "zero": [13, 52], "zhang": 62, "zheng": 62, "zhong": 62, "zhou": 62, "zscore": 41, "zscore_chat": 41, "zscore_chats_and_convers": 69, "zscore_convers": 41, "\u00bc": 47, "\u03c4": 55}, "titles": ["The Basics", "Worked Example", "feature_builder module", "basic_features module", "burstiness module", "certainty module", "discursive_diversity module", "fflow module", "get_all_DD_features module", "get_user_network module", "hedge module", "Features: Technical Documentation", "info_exchange_zscore module", "information_diversity module", "lexical_features_v2 module", "named_entity_recognition_features module", "other_lexical_features module", "politeness_features module", "politeness_v2 module", "politeness_v2_helper module", "question_num module", "readability module", "reddit_tags module", "temporal_features module", "textblob_sentiment_analysis module", "turn_taking_features module", "variance_in_DD module", "within_person_discursive_range module", "word_mimicry module", "FEATURE NAME", "Certainty", "Content Word Accommodation", "Conversational Repair", "Dale-Chall Score", "Discursive Diversity", "Forward Flow", "Function Word Accommodation", "Gini Coefficient", "Hedge", "Features: Conceptual Documentation", "Information Diversity", "Information Exchange", "Linguistic Inquiry and Word Count (LIWC) and Other Lexicons", "Message Length", "Message Quantity", "Mimicry (BERT)", "Moving Mimicry", "Named Entity Recognition", "Online Discussion Tags", "Politeness/Receptiveness Markers", "Politeness Strategies", "Sentiment (RoBERTa)", "Positivity Z-Score", "Proportion of First Person Pronouns", "Question (Naive)", "Team Burstiness", "Textblob Polarity", "Textblob Subjectivity", "Time Difference", "Turn Taking Index", "Word Type-Token Ratio", "The Team Communication Toolkit", "Introduction", "assign_chunk_nums module", "calculate_chat_level_features module", "calculate_conversation_level_features module", "calculate_user_level_features module", "check_embeddings module", "gini_coefficient module", "Utilities", "preload_word_lists module", "preprocess module", "summarize_features module", "zscore_chats_and_conversation module"], "titleterms": {"A": 0, "One": 0, "The": [0, 61, 62], "accommod": [31, 36], "addit": 1, "advanc": 1, "assign_chunk_num": 63, "assumpt": 0, "basic": [0, 1, 29, 30, 31, 33, 34, 35, 36, 37, 38, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 55, 56, 57, 58, 59, 60], "basic_featur": 3, "bert": 45, "bursti": [4, 55], "calculate_chat_level_featur": 64, "calculate_conversation_level_featur": 65, "calculate_user_level_featur": 66, "caveat": [29, 30, 31, 33, 34, 35, 36, 38, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 55, 56, 57, 58, 59], "certainti": [5, 30], "chall": 33, "chat": [11, 39], "check_embed": 67, "citat": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "class": 69, "code": [0, 1], "coeffici": 37, "column": [1, 61], "commun": 61, "conceptu": 39, "configur": 1, "consider": 1, "content": [31, 61], "convers": [11, 32, 39, 62, 69], "count": [42, 59], "customiz": 0, "dale": 33, "data": 1, "declar": 61, "demo": [0, 1], "differ": 58, "discurs": 34, "discursive_divers": 6, "discuss": 48, "divers": [34, 40], "document": [11, 39, 62], "driver": 69, "entiti": 47, "environ": [1, 61], "exampl": [1, 41, 47], "exchang": 41, "featur": [1, 11, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 69], "feature_build": 2, "featurebuild": [1, 61, 62], "fflow": 7, "file": [30, 34, 35, 45, 46, 47, 51], "first": 53, "flow": 35, "forward": 35, "function": [0, 36], "gener": [1, 61, 62], "get": [1, 61, 62], "get_all_dd_featur": 8, "get_user_network": 9, "gini": 37, "gini_coeffici": 68, "hedg": [10, 38], "high": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "implement": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "import": [1, 61], "index": 59, "indic": 61, "info_exchange_zscor": 12, "inform": [1, 40, 41, 61], "information_divers": 13, "input": [1, 34], "inquiri": 42, "inspect": [1, 61], "interpret": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "introduct": 62, "intuit": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "kei": 0, "length": 43, "level": [11, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62, 69], "lexical_features_v2": 14, "lexicon": 42, "light": 0, "linguist": 42, "liwc": 42, "marker": 49, "messag": [43, 44], "mimicri": [45, 46], "modul": [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73], "motiv": 62, "move": 46, "naiv": 54, "name": [1, 29, 47, 61], "named_entity_recognition_featur": 15, "note": [29, 30, 31, 33, 34, 35, 36, 38, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 55, 56, 57, 58, 59], "onlin": 48, "other": [42, 69], "other_lexical_featur": 16, "ouput": 34, "our": 62, "output": [30, 35, 45, 46, 47, 51], "packag": [0, 1, 61], "paramet": 0, "person": 53, "pip": [1, 61], "polar": 56, "polit": [49, 50], "politeness_featur": 17, "politeness_v2": 18, "politeness_v2_help": 19, "posit": 52, "preload_word_list": 70, "preprocess": 71, "pronoun": 53, "proport": 53, "quantiti": 44, "question": 54, "question_num": 20, "ratio": 60, "readabl": 21, "recept": 49, "recognit": 47, "recommend": [1, 61], "reddit_tag": 22, "relat": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "repair": 32, "roberta": 51, "run": 1, "sampl": [0, 1], "score": [33, 41, 52], "sentiment": 51, "speaker": [11, 59, 62, 69], "start": [1, 61, 62], "strategi": 50, "subject": 57, "summarize_featur": 72, "tabl": 61, "tag": 48, "take": 59, "team": [55, 61, 62], "technic": 11, "temporal_featur": 23, "textblob": [56, 57], "textblob_sentiment_analysi": 24, "time": 58, "token": 60, "toolkit": 61, "touch": 0, "train": 47, "troubleshoot": [1, 61], "turn": 59, "turn_taking_featur": 25, "type": 60, "us": 61, "user": 11, "util": 69, "utter": [11, 39, 62, 69], "variance_in_dd": 26, "virtual": [1, 61], "walkthrough": 1, "within_person_discursive_rang": 27, "word": [31, 36, 42, 60], "word_mimicri": 28, "work": 1, "your": 1, "z": [41, 52], "zscore_chats_and_convers": 73}})
\ No newline at end of file
+Search.setIndex({"alltitles": {"A Light-Touch, One-Function Package": [[0, "a-light-touch-one-function-package"]], "Additional FeatureBuilder Considerations": [[1, "additional-featurebuilder-considerations"]], "Advanced Configuration Columns": [[1, "advanced-configuration-columns"]], "Basic Input Columns": [[1, "basic-input-columns"]], "Certainty": [[30, "certainty"]], "Citation": [[29, "citation"], [30, "citation"], [31, "citation"], [32, "citation"], [33, "citation"], [34, "citation"], [35, "citation"], [36, "citation"], [37, "citation"], [38, "citation"], [40, "citation"], [41, "citation"], [42, "citation"], [43, "citation"], [44, "citation"], [45, "citation"], [46, "citation"], [47, "citation"], [48, "citation"], [49, "citation"], [50, "citation"], [51, "citation"], [52, "citation"], [53, "citation"], [54, "citation"], [55, "citation"], [56, "citation"], [57, "citation"], [58, "citation"], [59, "citation"], [60, "citation"]], "Configuring the FeatureBuilder": [[1, "configuring-the-featurebuilder"]], "Content Word Accommodation": [[31, "content-word-accommodation"]], "Contents:": [[61, null]], "Conversation Parameters": [[1, "conversation-parameters"]], "Conversation-Level Features": [[11, "conversation-level-features"], [39, "conversation-level-features"]], "Conversational Repair": [[32, "conversational-repair"]], "Customizable Parameters": [[0, "customizable-parameters"]], "Dale-Chall Score": [[33, "dale-chall-score"]], "Declaring a FeatureBuilder": [[61, "declaring-a-featurebuilder"]], "Demo / Sample Code": [[0, "demo-sample-code"], [1, "demo-sample-code"]], "Discursive Diversity": [[34, "discursive-diversity"]], "Example:": [[41, "example"]], "FEATURE NAME": [[29, "feature-name"]], "Feature Column Names": [[1, "feature-column-names"], [61, "feature-column-names"]], "Feature Documentation": [[62, "feature-documentation"]], "Feature Information": [[1, "feature-information"], [61, "feature-information"]], "Features: Conceptual Documentation": [[39, "features-conceptual-documentation"]], "Features: Technical Documentation": [[11, "features-technical-documentation"]], "Forward Flow": [[35, "forward-flow"]], "Function Word Accommodation": [[36, "function-word-accommodation"]], "Generating Features: Utterance-, Speaker-, and Conversation-Level": [[62, "generating-features-utterance-speaker-and-conversation-level"]], "Getting Started": [[1, "getting-started"], [61, "getting-started"], [62, "getting-started"]], "Gini Coefficient": [[37, "gini-coefficient"]], "Hedge": [[38, "hedge"]], "High*Level Intuition": [[54, "high-level-intuition"]], "High-Level Intuition": [[29, "high-level-intuition"], [30, "high-level-intuition"], [31, "high-level-intuition"], [32, "high-level-intuition"], [33, "high-level-intuition"], [34, "high-level-intuition"], [35, "high-level-intuition"], [36, "high-level-intuition"], [37, "high-level-intuition"], [38, "high-level-intuition"], [40, "high-level-intuition"], [41, "high-level-intuition"], [42, "high-level-intuition"], [43, "high-level-intuition"], [44, "high-level-intuition"], [45, "high-level-intuition"], [46, "high-level-intuition"], [47, "high-level-intuition"], [48, "high-level-intuition"], [49, "high-level-intuition"], [50, "high-level-intuition"], [51, "high-level-intuition"], [52, "high-level-intuition"], [53, "high-level-intuition"], [55, "high-level-intuition"], [56, "high-level-intuition"], [57, "high-level-intuition"], [58, "high-level-intuition"], [59, "high-level-intuition"], [60, "high-level-intuition"]], "Implementation": [[32, "implementation"], [42, "implementation"], [52, "implementation"], [54, "implementation"]], "Implementation Basics": [[29, "implementation-basics"], [30, "implementation-basics"], [31, "implementation-basics"], [33, "implementation-basics"], [34, "implementation-basics"], [35, "implementation-basics"], [36, "implementation-basics"], [37, "implementation-basics"], [38, "implementation-basics"], [40, "implementation-basics"], [41, "implementation-basics"], [43, "implementation-basics"], [44, "implementation-basics"], [45, "implementation-basics"], [46, "implementation-basics"], [47, "implementation-basics"], [48, "implementation-basics"], [49, "implementation-basics"], [50, "implementation-basics"], [51, "implementation-basics"], [53, "implementation-basics"], [55, "implementation-basics"], [56, "implementation-basics"], [57, "implementation-basics"], [58, "implementation-basics"], [59, "implementation-basics"], [60, "implementation-basics"]], "Implementation Notes/Caveats": [[29, "implementation-notes-caveats"], [30, "implementation-notes-caveats"], [31, "implementation-notes-caveats"], [33, "implementation-notes-caveats"], [34, "implementation-notes-caveats"], [35, "implementation-notes-caveats"], [36, "implementation-notes-caveats"], [38, "implementation-notes-caveats"], [40, "implementation-notes-caveats"], [41, "implementation-notes-caveats"], [43, "implementation-notes-caveats"], [44, "implementation-notes-caveats"], [45, "implementation-notes-caveats"], [46, "implementation-notes-caveats"], [47, "implementation-notes-caveats"], [48, "implementation-notes-caveats"], [49, "implementation-notes-caveats"], [50, "implementation-notes-caveats"], [51, "implementation-notes-caveats"], [53, "implementation-notes-caveats"], [55, "implementation-notes-caveats"], [56, "implementation-notes-caveats"], [57, "implementation-notes-caveats"], [58, "implementation-notes-caveats"], [59, "implementation-notes-caveats"]], "Import Recommendations: Virtual Environment and Pip": [[1, "import-recommendations-virtual-environment-and-pip"], [61, "import-recommendations-virtual-environment-and-pip"]], "Importing the Package": [[1, "importing-the-package"]], "Indices and Tables": [[61, "indices-and-tables"]], "Information Diversity": [[40, "information-diversity"]], "Information Exchange": [[41, "information-exchange"]], "Input File": [[34, "id2"]], "Inspecting Generated Features": [[1, "inspecting-generated-features"], [61, "inspecting-generated-features"]], "Interpretation:": [[41, "interpretation"]], "Interpreting the Feature": [[29, "interpreting-the-feature"], [30, "interpreting-the-feature"], [31, "interpreting-the-feature"], [32, "interpreting-the-feature"], [33, "interpreting-the-feature"], [34, "interpreting-the-feature"], [35, "interpreting-the-feature"], [36, "interpreting-the-feature"], [37, "interpreting-the-feature"], [38, "interpreting-the-feature"], [40, "interpreting-the-feature"], [41, "interpreting-the-feature"], [42, "interpreting-the-feature"], [43, "interpreting-the-feature"], [44, "interpreting-the-feature"], [45, "interpreting-the-feature"], [46, "interpreting-the-feature"], [47, "interpreting-the-feature"], [48, "interpreting-the-feature"], [49, "interpreting-the-feature"], [50, "interpreting-the-feature"], [51, "interpreting-the-feature"], [52, "interpreting-the-feature"], [53, "interpreting-the-feature"], [54, "interpreting-the-feature"], [55, "interpreting-the-feature"], [56, "interpreting-the-feature"], [57, "interpreting-the-feature"], [58, "interpreting-the-feature"], [59, "interpreting-the-feature"], [60, "interpreting-the-feature"]], "Introduction": [[62, "introduction"]], "Key Assumptions and Parameters": [[0, "key-assumptions-and-parameters"]], "Linguistic Inquiry and Word Count (LIWC) and Other Lexicons": [[42, "linguistic-inquiry-and-word-count-liwc-and-other-lexicons"]], "Message Length": [[43, "message-length"]], "Message Quantity": [[44, "message-quantity"]], "Mimicry (BERT)": [[45, "mimicry-bert"]], "Motivation": [[62, "motivation"]], "Moving Mimicry": [[46, "moving-mimicry"]], "Named Entity Recognition": [[47, "named-entity-recognition"]], "Named Entity Training Examples": [[47, "id2"]], "Online Discussion Tags": [[48, "online-discussion-tags"]], "Other Utilities": [[69, "other-utilities"]], "Ouput File": [[34, "id3"]], "Our Team": [[62, "our-team"]], "Output File": [[30, "id2"], [35, "id2"], [45, "id2"], [46, "id2"], [47, "id3"], [51, "id1"]], "Output File Naming Details": [[1, "output-file-naming-details"]], "Package Assumptions": [[0, "package-assumptions"]], "Politeness Strategies": [[50, "politeness-strategies"]], "Politeness/Receptiveness Markers": [[49, "politeness-receptiveness-markers"]], "Positivity Z-Score": [[52, "positivity-z-score"]], "Proportion of First Person Pronouns": [[53, "proportion-of-first-person-pronouns"]], "Question (Naive)": [[54, "question-naive"]], "Related Features": [[29, "related-features"], [30, "related-features"], [31, "related-features"], [32, "related-features"], [33, "related-features"], [34, "related-features"], [35, "related-features"], [36, "related-features"], [37, "related-features"], [38, "related-features"], [40, "related-features"], [41, "related-features"], [42, "related-features"], [43, "related-features"], [44, "related-features"], [45, "related-features"], [46, "related-features"], [47, "related-features"], [48, "related-features"], [49, "related-features"], [50, "related-features"], [51, "related-features"], [52, "related-features"], [53, "related-features"], [54, "related-features"], [55, "related-features"], [56, "related-features"], [57, "related-features"], [58, "related-features"], [59, "related-features"], [60, "related-features"]], "Sentiment (RoBERTa)": [[51, "sentiment-roberta"]], "Speaker Turn Counts": [[59, "id2"]], "Speaker- (User) Level Features": [[11, "speaker-user-level-features"]], "Table of Contents": [[61, "table-of-contents"]], "Team Burstiness": [[55, "team-burstiness"]], "Textblob Polarity": [[56, "textblob-polarity"]], "Textblob Subjectivity": [[57, "textblob-subjectivity"]], "The Basics": [[0, "the-basics"]], "The FeatureBuilder": [[62, "the-featurebuilder"]], "The Team Communication Toolkit": [[61, "the-team-communication-toolkit"]], "Time Difference": [[58, "time-difference"]], "Troubleshooting": [[1, "troubleshooting"], [61, "troubleshooting"]], "Turn Taking Index": [[59, "turn-taking-index"]], "Turns": [[1, "turns"]], "Using the Package": [[61, "using-the-package"]], "Utilities": [[69, "utilities"]], "Utterance- (Chat) Level Features": [[11, "utterance-chat-level-features"], [39, "utterance-chat-level-features"]], "Vector Directory": [[1, "vector-directory"]], "Walkthrough: Running the FeatureBuilder on Your Data": [[1, "walkthrough-running-the-featurebuilder-on-your-data"]], "Word Type-Token Ratio": [[60, "word-type-token-ratio"]], "Worked Example": [[1, "worked-example"]], "assign_chunk_nums module": [[63, "module-utils.assign_chunk_nums"]], "basic_features module": [[3, "module-features.basic_features"]], "burstiness module": [[4, "module-features.burstiness"]], "calculate_chat_level_features module": [[64, "module-utils.calculate_chat_level_features"]], "calculate_conversation_level_features module": [[65, "module-utils.calculate_conversation_level_features"]], "calculate_user_level_features module": [[66, "module-utils.calculate_user_level_features"]], "certainty module": [[5, "module-features.certainty"]], "check_embeddings module": [[67, "module-utils.check_embeddings"]], "discursive_diversity module": [[6, "module-features.discursive_diversity"]], "feature_builder module": [[2, "module-feature_builder"]], "fflow module": [[7, "module-features.fflow"]], "get_all_DD_features module": [[8, "module-features.get_all_DD_features"]], "get_user_network module": [[9, "module-features.get_user_network"]], "gini_coefficient module": [[68, "module-utils.gini_coefficient"]], "hedge module": [[10, "module-features.hedge"]], "info_exchange_zscore module": [[12, "module-features.info_exchange_zscore"]], "information_diversity module": [[13, "module-features.information_diversity"]], "lexical_features_v2 module": [[14, "module-features.lexical_features_v2"]], "named_entity_recognition_features module": [[15, "module-features.named_entity_recognition_features"]], "other_lexical_features module": [[16, "module-features.other_lexical_features"]], "politeness_features module": [[17, "module-features.politeness_features"]], "politeness_v2 module": [[18, "module-features.politeness_v2"]], "politeness_v2_helper module": [[19, "module-features.politeness_v2_helper"]], "preload_word_lists module": [[70, "module-utils.preload_word_lists"]], "preprocess module": [[71, "module-utils.preprocess"]], "question_num module": [[20, "module-features.question_num"]], "readability module": [[21, "module-features.readability"]], "reddit_tags module": [[22, "module-features.reddit_tags"]], "summarize_features module": [[72, "module-utils.summarize_features"]], "temporal_features module": [[23, "module-features.temporal_features"]], "textblob_sentiment_analysis module": [[24, "module-features.textblob_sentiment_analysis"]], "turn_taking_features module": [[25, "module-features.turn_taking_features"]], "variance_in_DD module": [[26, "module-features.variance_in_DD"]], "within_person_discursive_range module": [[27, "module-features.within_person_discursive_range"]], "word_mimicry module": [[28, "module-features.word_mimicry"]], "z-scores:": [[41, "z-scores"]], "zscore_chats_and_conversation module": [[73, "module-utils.zscore_chats_and_conversation"]], "\u201cDriver\u201d Classes: Utterance-, Conversation-, and Speaker-Level Features": [[69, "driver-classes-utterance-conversation-and-speaker-level-features"]]}, "docnames": ["basics", "examples", "feature_builder", "features/basic_features", "features/burstiness", "features/certainty", "features/discursive_diversity", "features/fflow", "features/get_all_DD_features", "features/get_user_network", "features/hedge", "features/index", "features/info_exchange_zscore", "features/information_diversity", "features/lexical_features_v2", "features/named_entity_recognition_features", "features/other_lexical_features", "features/politeness_features", "features/politeness_v2", "features/politeness_v2_helper", "features/question_num", "features/readability", "features/reddit_tags", "features/temporal_features", "features/textblob_sentiment_analysis", "features/turn_taking_features", "features/variance_in_DD", "features/within_person_discursive_range", "features/word_mimicry", "features_conceptual/TEMPLATE", "features_conceptual/certainty", "features_conceptual/content_word_accommodation", "features_conceptual/conversational_repair", "features_conceptual/dale_chall_score", "features_conceptual/discursive_diversity", "features_conceptual/forward_flow", "features_conceptual/function_word_accommodation", "features_conceptual/gini_coefficient", "features_conceptual/hedge", "features_conceptual/index", "features_conceptual/information_diversity", "features_conceptual/information_exchange", "features_conceptual/liwc", "features_conceptual/message_length", "features_conceptual/message_quantity", "features_conceptual/mimicry_bert", "features_conceptual/moving_mimicry", "features_conceptual/named_entity_recognition", "features_conceptual/online_discussions_tags", "features_conceptual/politeness_receptiveness_markers", "features_conceptual/politeness_strategies", "features_conceptual/positivity_bert", "features_conceptual/positivity_z_score", "features_conceptual/proportion_of_first_person_pronouns", "features_conceptual/questions", "features_conceptual/team_burstiness", "features_conceptual/textblob_polarity", "features_conceptual/textblob_subjectivity", "features_conceptual/time_difference", "features_conceptual/turn_taking_index", "features_conceptual/word_ttr", "index", "intro", "utils/assign_chunk_nums", "utils/calculate_chat_level_features", "utils/calculate_conversation_level_features", "utils/calculate_user_level_features", "utils/check_embeddings", "utils/gini_coefficient", "utils/index", "utils/preload_word_lists", "utils/preprocess", "utils/summarize_features", "utils/zscore_chats_and_conversation"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["basics.rst", "examples.rst", "feature_builder.rst", "features/basic_features.rst", "features/burstiness.rst", "features/certainty.rst", "features/discursive_diversity.rst", "features/fflow.rst", "features/get_all_DD_features.rst", "features/get_user_network.rst", "features/hedge.rst", "features/index.rst", "features/info_exchange_zscore.rst", "features/information_diversity.rst", "features/lexical_features_v2.rst", "features/named_entity_recognition_features.rst", "features/other_lexical_features.rst", "features/politeness_features.rst", "features/politeness_v2.rst", "features/politeness_v2_helper.rst", "features/question_num.rst", "features/readability.rst", "features/reddit_tags.rst", "features/temporal_features.rst", "features/textblob_sentiment_analysis.rst", "features/turn_taking_features.rst", "features/variance_in_DD.rst", "features/within_person_discursive_range.rst", "features/word_mimicry.rst", "features_conceptual/TEMPLATE.rst", "features_conceptual/certainty.rst", "features_conceptual/content_word_accommodation.rst", "features_conceptual/conversational_repair.rst", "features_conceptual/dale_chall_score.rst", "features_conceptual/discursive_diversity.rst", "features_conceptual/forward_flow.rst", "features_conceptual/function_word_accommodation.rst", "features_conceptual/gini_coefficient.rst", "features_conceptual/hedge.rst", "features_conceptual/index.rst", "features_conceptual/information_diversity.rst", "features_conceptual/information_exchange.rst", "features_conceptual/liwc.rst", "features_conceptual/message_length.rst", "features_conceptual/message_quantity.rst", "features_conceptual/mimicry_bert.rst", "features_conceptual/moving_mimicry.rst", "features_conceptual/named_entity_recognition.rst", "features_conceptual/online_discussions_tags.rst", "features_conceptual/politeness_receptiveness_markers.rst", "features_conceptual/politeness_strategies.rst", "features_conceptual/positivity_bert.rst", "features_conceptual/positivity_z_score.rst", "features_conceptual/proportion_of_first_person_pronouns.rst", "features_conceptual/questions.rst", "features_conceptual/team_burstiness.rst", "features_conceptual/textblob_polarity.rst", "features_conceptual/textblob_subjectivity.rst", "features_conceptual/time_difference.rst", "features_conceptual/turn_taking_index.rst", "features_conceptual/word_ttr.rst", "index.rst", "intro.rst", "utils/assign_chunk_nums.rst", "utils/calculate_chat_level_features.rst", "utils/calculate_conversation_level_features.rst", "utils/calculate_user_level_features.rst", "utils/check_embeddings.rst", "utils/gini_coefficient.rst", "utils/index.rst", "utils/preload_word_lists.rst", "utils/preprocess.rst", "utils/summarize_features.rst", "utils/zscore_chats_and_conversation.rst"], "indexentries": {}, "objects": {"": [[2, 0, 0, "-", "feature_builder"]], "feature_builder": [[2, 1, 1, "", "FeatureBuilder"]], "feature_builder.FeatureBuilder": [[2, 2, 1, "", "chat_level_features"], [2, 2, 1, "", "conv_level_features"], [2, 2, 1, "", "featurize"], [2, 2, 1, "", "get_first_pct_of_chat"], [2, 2, 1, "", "merge_conv_data_with_original"], [2, 2, 1, "", "preprocess_chat_data"], [2, 2, 1, "", "save_features"], [2, 2, 1, "", "set_self_conv_data"], [2, 2, 1, "", "user_level_features"]], "features": [[3, 0, 0, "-", "basic_features"], [4, 0, 0, "-", "burstiness"], [5, 0, 0, "-", "certainty"], [6, 0, 0, "-", "discursive_diversity"], [7, 0, 0, "-", "fflow"], [8, 0, 0, "-", "get_all_DD_features"], [9, 0, 0, "-", "get_user_network"], [10, 0, 0, "-", "hedge"], [12, 0, 0, "-", "info_exchange_zscore"], [13, 0, 0, "-", "information_diversity"], [14, 0, 0, "-", "lexical_features_v2"], [15, 0, 0, "-", "named_entity_recognition_features"], [16, 0, 0, "-", "other_lexical_features"], [17, 0, 0, "-", "politeness_features"], [18, 0, 0, "-", "politeness_v2"], [19, 0, 0, "-", "politeness_v2_helper"], [20, 0, 0, "-", "question_num"], [21, 0, 0, "-", "readability"], [22, 0, 0, "-", "reddit_tags"], [23, 0, 0, "-", "temporal_features"], [24, 0, 0, "-", "textblob_sentiment_analysis"], [25, 0, 0, "-", "turn_taking_features"], [26, 0, 0, "-", "variance_in_DD"], [27, 0, 0, "-", "within_person_discursive_range"], [28, 0, 0, "-", "word_mimicry"]], "features.basic_features": [[3, 3, 1, "", "count_characters"], [3, 3, 1, "", "count_messages"], [3, 3, 1, "", "count_words"]], "features.burstiness": [[4, 3, 1, "", "burstiness"], [4, 3, 1, "", "get_team_burstiness"]], "features.certainty": [[5, 3, 1, "", "get_certainty"]], "features.discursive_diversity": [[6, 3, 1, "", "get_DD"], [6, 3, 1, "", "get_cosine_similarity"], [6, 3, 1, "", "get_unique_pairwise_combos"]], "features.fflow": [[7, 3, 1, "", "get_forward_flow"]], "features.get_all_DD_features": [[8, 3, 1, "", "conv_to_float_arr"], [8, 3, 1, "", "get_DD_features"]], "features.get_user_network": [[9, 3, 1, "", "get_user_network"], [9, 3, 1, "", "remove_active_user"]], "features.hedge": [[10, 3, 1, "", "is_hedged_sentence_1"]], "features.info_exchange_zscore": [[12, 3, 1, "", "get_info_exchange_wordcount"]], "features.information_diversity": [[13, 3, 1, "", "calculate_ID_score"], [13, 3, 1, "", "get_info_diversity"], [13, 3, 1, "", "info_diversity"], [13, 3, 1, "", "preprocessing"]], "features.lexical_features_v2": [[14, 3, 1, "", "get_liwc_count"], [14, 3, 1, "", "liwc_features"]], "features.named_entity_recognition_features": [[15, 3, 1, "", "built_spacy_ner"], [15, 3, 1, "", "calculate_named_entities"], [15, 3, 1, "", "named_entities"], [15, 3, 1, "", "num_named_entity"], [15, 3, 1, "", "train_spacy_ner"]], "features.other_lexical_features": [[16, 3, 1, "", "classify_NTRI"], [16, 3, 1, "", "get_proportion_first_pronouns"], [16, 3, 1, "", "get_word_TTR"]], "features.politeness_features": [[17, 3, 1, "", "get_politeness_strategies"]], "features.politeness_v2": [[18, 3, 1, "", "get_politeness_v2"]], "features.politeness_v2_helper": [[19, 3, 1, "", "Question"], [19, 3, 1, "", "adverb_limiter"], [19, 3, 1, "", "bare_command"], [19, 3, 1, "", "clean_text"], [19, 3, 1, "", "commit_data"], [19, 3, 1, "", "conjection_seperator"], [19, 3, 1, "", "count_matches"], [19, 3, 1, "", "count_spacy_matches"], [19, 3, 1, "", "feat_counts"], [19, 3, 1, "", "get_dep_pairs"], [19, 3, 1, "", "get_dep_pairs_noneg"], [19, 3, 1, "", "load_saved_data"], [19, 3, 1, "", "load_to_dict"], [19, 3, 1, "", "load_to_lists"], [19, 3, 1, "", "phrase_split"], [19, 3, 1, "", "prep_simple"], [19, 3, 1, "", "prep_whole"], [19, 3, 1, "", "punctuation_seperator"], [19, 3, 1, "", "sentence_pad"], [19, 3, 1, "", "sentence_split"], [19, 3, 1, "", "sentenciser"], [19, 3, 1, "", "token_count"], [19, 3, 1, "", "word_start"]], "features.question_num": [[20, 3, 1, "", "calculate_num_question_naive"]], "features.readability": [[21, 3, 1, "", "classify_text_dalechall"], [21, 3, 1, "", "count_difficult_words"], [21, 3, 1, "", "count_syllables"], [21, 3, 1, "", "dale_chall_helper"]], "features.reddit_tags": [[22, 3, 1, "", "count_all_caps"], [22, 3, 1, "", "count_bullet_points"], [22, 3, 1, "", "count_ellipses"], [22, 3, 1, "", "count_emojis"], [22, 3, 1, "", "count_emphasis"], [22, 3, 1, "", "count_line_breaks"], [22, 3, 1, "", "count_links"], [22, 3, 1, "", "count_numbering"], [22, 3, 1, "", "count_parentheses"], [22, 3, 1, "", "count_quotes"], [22, 3, 1, "", "count_responding_to_someone"], [22, 3, 1, "", "count_user_references"]], "features.temporal_features": [[23, 3, 1, "", "coerce_to_date_or_number"], [23, 3, 1, "", "get_time_diff"], [23, 3, 1, "", "get_time_diff_startend"]], "features.textblob_sentiment_analysis": [[24, 3, 1, "", "get_polarity_score"], [24, 3, 1, "", "get_subjectivity_score"]], "features.turn_taking_features": [[25, 3, 1, "", "count_turn_taking_index"], [25, 3, 1, "", "count_turns"], [25, 3, 1, "", "get_turn"]], "features.variance_in_DD": [[26, 3, 1, "", "get_variance_in_DD"]], "features.within_person_discursive_range": [[27, 3, 1, "", "get_nan_vector"], [27, 3, 1, "", "get_within_person_disc_range"]], "features.word_mimicry": [[28, 3, 1, "", "Content_mimicry_score"], [28, 3, 1, "", "computeTF"], [28, 3, 1, "", "compute_frequency"], [28, 3, 1, "", "function_mimicry_score"], [28, 3, 1, "", "get_content_words_in_message"], [28, 3, 1, "", "get_function_words_in_message"], [28, 3, 1, "", "get_mimicry_bert"], [28, 3, 1, "", "get_moving_mimicry"], [28, 3, 1, "", "mimic_words"]], "utils": [[63, 0, 0, "-", "assign_chunk_nums"], [64, 0, 0, "-", "calculate_chat_level_features"], [65, 0, 0, "-", "calculate_conversation_level_features"], [66, 0, 0, "-", "calculate_user_level_features"], [67, 0, 0, "-", "check_embeddings"], [68, 0, 0, "-", "gini_coefficient"], [70, 0, 0, "-", "preload_word_lists"], [71, 0, 0, "-", "preprocess"], [72, 0, 0, "-", "summarize_features"], [73, 0, 0, "-", "zscore_chats_and_conversation"]], "utils.assign_chunk_nums": [[63, 3, 1, "", "assign_chunk_nums"], [63, 3, 1, "", "create_chunks"], [63, 3, 1, "", "create_chunks_messages"], [63, 3, 1, "", "reduce_chunks"]], "utils.calculate_chat_level_features": [[64, 1, 1, "", "ChatLevelFeaturesCalculator"]], "utils.calculate_chat_level_features.ChatLevelFeaturesCalculator": [[64, 2, 1, "", "calculate_chat_level_features"], [64, 2, 1, "", "calculate_hedge_features"], [64, 2, 1, "", "calculate_politeness_sentiment"], [64, 2, 1, "", "calculate_politeness_v2"], [64, 2, 1, "", "calculate_textblob_sentiment"], [64, 2, 1, "", "calculate_vector_word_mimicry"], [64, 2, 1, "", "calculate_word_mimicry"], [64, 2, 1, "", "concat_bert_features"], [64, 2, 1, "", "get_certainty_score"], [64, 2, 1, "", "get_dale_chall_score_and_classfication"], [64, 2, 1, "", "get_forward_flow"], [64, 2, 1, "", "get_named_entity"], [64, 2, 1, "", "get_reddit_features"], [64, 2, 1, "", "get_temporal_features"], [64, 2, 1, "", "info_exchange"], [64, 2, 1, "", "lexical_features"], [64, 2, 1, "", "other_lexical_features"], [64, 2, 1, "", "positivity_zscore"], [64, 2, 1, "", "text_based_features"]], "utils.calculate_conversation_level_features": [[65, 1, 1, "", "ConversationLevelFeaturesCalculator"]], "utils.calculate_conversation_level_features.ConversationLevelFeaturesCalculator": [[65, 2, 1, "", "calculate_conversation_level_features"], [65, 2, 1, "", "calculate_info_diversity"], [65, 2, 1, "", "calculate_team_burstiness"], [65, 2, 1, "", "get_conversation_level_aggregates"], [65, 2, 1, "", "get_discursive_diversity_features"], [65, 2, 1, "", "get_gini_features"], [65, 2, 1, "", "get_turn_taking_features"], [65, 2, 1, "", "get_user_level_aggregates"]], "utils.calculate_user_level_features": [[66, 1, 1, "", "UserLevelFeaturesCalculator"]], "utils.calculate_user_level_features.UserLevelFeaturesCalculator": [[66, 2, 1, "", "calculate_user_level_features"], [66, 2, 1, "", "get_centroids"], [66, 2, 1, "", "get_user_level_averaged_features"], [66, 2, 1, "", "get_user_level_summary_statistics_features"], [66, 2, 1, "", "get_user_level_summed_features"], [66, 2, 1, "", "get_user_network"]], "utils.check_embeddings": [[67, 3, 1, "", "check_embeddings"], [67, 3, 1, "", "generate_bert"], [67, 3, 1, "", "generate_certainty_pkl"], [67, 3, 1, "", "generate_lexicon_pkl"], [67, 3, 1, "", "generate_vect"], [67, 3, 1, "", "get_sentiment"], [67, 3, 1, "", "read_in_lexicons"]], "utils.gini_coefficient": [[68, 3, 1, "", "get_gini"], [68, 3, 1, "", "gini_coefficient"]], "utils.preload_word_lists": [[70, 3, 1, "", "get_dale_chall_easy_words"], [70, 3, 1, "", "get_first_person_words"], [70, 3, 1, "", "get_function_words"], [70, 3, 1, "", "get_question_words"]], "utils.preprocess": [[71, 3, 1, "", "assert_key_columns_present"], [71, 3, 1, "", "compress"], [71, 3, 1, "", "create_cumulative_rows"], [71, 3, 1, "", "get_turn_id"], [71, 3, 1, "", "preprocess_conversation_columns"], [71, 3, 1, "", "preprocess_naive_turns"], [71, 3, 1, "", "preprocess_text"], [71, 3, 1, "", "preprocess_text_lowercase_but_retain_punctuation"]], "utils.summarize_features": [[72, 3, 1, "", "get_average"], [72, 3, 1, "", "get_max"], [72, 3, 1, "", "get_min"], [72, 3, 1, "", "get_stdev"], [72, 3, 1, "", "get_sum"], [72, 3, 1, "", "get_user_average_dataframe"], [72, 3, 1, "", "get_user_sum_dataframe"]], "utils.zscore_chats_and_conversation": [[73, 3, 1, "", "get_zscore_across_all_chats"], [73, 3, 1, "", "get_zscore_across_all_conversations"]]}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "function", "Python function"]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:function"}, "terms": {"": [0, 1, 2, 4, 5, 9, 11, 13, 25, 28, 29, 31, 32, 34, 35, 36, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 55, 59, 61, 62, 64, 65, 66], "0": [0, 1, 2, 5, 10, 13, 16, 21, 24, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 45, 46, 47, 50, 51, 53, 55, 59, 61], "000": 42, "00222437221134802": [5, 64], "01": 51, "02": 51, "04": 40, "0496": [21, 33], "05": [13, 40, 50, 51], "06": 51, "08": 50, "09": [45, 46, 50], "1": [0, 1, 2, 3, 10, 13, 22, 24, 32, 34, 35, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 51, 53, 55, 56, 57, 59, 61, 62], "10": [1, 5, 6, 21, 24, 33, 42, 59, 61, 64], "100": [1, 21, 33, 37, 42, 47, 62], "1000": 42, "10th": 33, "1145": [21, 24], "1177": [5, 64], "11th": 33, "12": [35, 45, 46, 50], "1287": 6, "12th": 33, "13": 50, "14": 50, "15": [37, 50], "1579": [21, 33], "17": 50, "1948": 33, "195": 36, "1977": 62, "1lpngokujsx": 5, "1st": 50, "1st_person": 50, "1st_person_pl": 50, "1st_person_start": 50, "2": [1, 2, 34, 35, 41, 47, 59, 61, 62], "20": [37, 59], "2004": 42, "2007": [5, 42], "2009": 60, "2012": 55, "2013": [12, 16, 31, 32, 36, 37, 38, 41, 43, 50, 52, 54, 70], "2015": [53, 58, 60], "2016": 4, "2017": 13, "2018": [40, 44, 55], "2019": [35, 52], "2020": [18, 21, 24, 33, 49, 50, 56, 57], "2021": [1, 6, 43, 44], "2022": [13, 34], "2023": [1, 5, 30, 59, 61, 64], "2024": 40, "21": 59, "22": [41, 50], "2384068": 4, "24": [1, 61], "25": 47, "27": 50, "28": 50, "29": 50, "2nd": 50, "2nd_person": 50, "2nd_person_start": 50, "3": [0, 1, 2, 21, 34, 41, 42, 51, 59, 61, 71], "30": 50, "3000": 33, "32": [34, 50], "3432929": [21, 24], "35": 51, "36": 50, "38": 50, "39": 49, "39512260": 68, "3n": 59, "4": [0, 1, 5, 13, 21, 30, 33, 41, 42, 56, 61, 62], "4274": 6, "43": 50, "45": 50, "47": 50, "49": 50, "4pit4bqz6": 5, "4th": [21, 33], "5": [1, 5, 21, 30, 33, 37, 41, 59], "50": [1, 47], "52": 50, "53": 50, "57": 50, "58": 50, "5th": 33, "6": [1, 33, 43], "60": 51, "63": 50, "6365": 21, "64": 67, "68": 47, "6th": 33, "7": [30, 33, 48], "70": 50, "78": [35, 50], "7th": 33, "8": [1, 30, 33], "80": [21, 70], "82": 41, "85": 34, "86": 35, "87": 50, "89": [45, 46], "8th": 33, "9": [2, 5, 21, 30, 33, 40, 47, 50], "9123": 47, "92": 51, "93chall_readability_formula": [21, 70], "94": 15, "95": 47, "97": 51, "9855072464": 47, "9992": 47, "99954": 47, "9th": 33, "A": [1, 2, 4, 12, 13, 14, 15, 16, 17, 18, 19, 21, 23, 25, 28, 33, 34, 35, 37, 38, 40, 41, 44, 45, 46, 47, 49, 50, 51, 52, 57, 59, 60, 61, 62, 66, 67, 68, 70, 71, 72, 73], "And": [1, 62], "As": [1, 31, 35, 36, 40, 45, 61], "But": [1, 50, 62], "By": [1, 42, 50], "For": [0, 1, 31, 34, 37, 41, 42, 43, 47, 49, 54, 56, 59, 62, 65], "If": [0, 1, 2, 5, 21, 29, 30, 35, 45, 47, 50, 55, 61, 62, 63, 64, 67, 71], "In": [1, 21, 30, 31, 34, 35, 36, 37, 39, 41, 42, 45, 46, 47, 50, 55, 59, 61, 62], "It": [1, 2, 31, 32, 33, 36, 37, 41, 44, 45, 46, 50, 64, 65, 66, 67, 71], "NO": 37, "NOT": [1, 61], "No": [19, 53], "Not": 41, "One": [1, 37, 61], "That": [29, 55], "The": [1, 2, 3, 4, 5, 7, 9, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 58, 59, 60, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73], "Then": [1, 55, 61], "There": [1, 11, 32, 61, 66], "These": [1, 11, 17, 32, 34, 42, 48, 52, 61, 62, 69], "To": [0, 1, 29, 31, 34, 37, 40, 55, 56, 57, 61, 62], "WITH": 21, "Will": 50, "_deviat": 55, "_preprocessed_": 0, "abil": [13, 29], "abl": [31, 36, 61], "abort": 1, "about": [1, 12, 29, 31, 36, 41, 47, 61, 62], "abov": [1, 21, 34, 61], "abstract_id": 4, "accept": [0, 1, 58, 61], "access": [0, 1, 15, 61], "accommod": [28, 32, 39, 45, 46, 64, 65, 66], "accord": [21, 37, 59, 64, 70], "accordingli": 63, "account": [1, 29, 32, 42], "accus": 50, "achiev": [50, 62], "acknowledg": 49, "acm": [21, 24], "acommod": 36, "across": [1, 13, 28, 31, 34, 40, 41, 50, 62, 64, 73], "action": 59, "activ": [1, 9, 44, 55, 71], "actual": [41, 56], "ad": [61, 62, 71], "adapt": 59, "add": [0, 1, 2, 21, 51, 61], "addit": [0, 2, 32, 34, 42, 63, 69], "addition": [0, 30, 31, 32, 54], "address": 1, "adjac": 71, "adjust": [0, 21, 37, 63], "advanc": [31, 36], "advantag": 4, "adverb": [19, 31, 36], "adverb_limit": [19, 49], "affect": [0, 1, 29, 35, 44], "affirm": 49, "after": [0, 1, 31, 34, 36, 43, 61, 62, 64], "again": [32, 34], "against": [28, 31, 36, 52], "agarw": 62, "aggreg": [0, 1, 3, 11, 37, 44, 61, 62, 65, 66, 72], "agre": 47, "agreement": 49, "ah": [31, 36], "ai": 62, "aim": [39, 62], "airtim": [37, 62], "al": [1, 5, 16, 18, 21, 24, 30, 31, 32, 33, 34, 35, 36, 38, 42, 43, 44, 49, 50, 52, 53, 54, 56, 57, 58, 59, 60, 64], "algorithm": [56, 57], "align": [35, 51], "all": [0, 1, 2, 6, 12, 13, 15, 19, 22, 28, 30, 31, 34, 35, 36, 37, 40, 41, 42, 46, 48, 49, 51, 52, 55, 58, 61, 62, 64, 66, 71, 73], "allow": 1, "almaatouq": 59, "along": 1, "alongsid": 1, "alphabet": 49, "alphanumer": 71, "alreadi": [0, 1, 2, 4, 10, 12, 16, 67], "also": [0, 1, 2, 28, 30, 31, 32, 34, 36, 37, 38, 42, 47, 51, 54, 60, 61, 62, 64, 65, 67, 69, 71], "alsobai": 59, "altern": 59, "although": [1, 23, 31, 36], "alwai": [1, 55], "am": [31, 36, 42, 54, 62], "amaz": [48, 56], "ambient": 32, "american": 33, "ami": [47, 59, 62], "amic": 62, "among": [36, 37, 52, 55, 62], "amongst": [6, 35, 48], "an": [1, 2, 5, 8, 11, 12, 13, 21, 29, 30, 31, 32, 33, 34, 36, 38, 40, 41, 42, 45, 47, 48, 50, 51, 52, 54, 59, 60, 61, 62, 63, 65, 66, 68], "analys": [1, 62], "analysi": [0, 1, 11, 52, 62, 67, 71], "analyt": 62, "analyz": [0, 1, 2, 13, 14, 16, 17, 19, 20, 21, 22, 24, 28, 43, 52, 62, 67, 71], "analyze_first_pct": [0, 1, 2], "angri": 47, "ani": [0, 1, 29, 31, 33, 38, 54, 62, 71], "annot": [17, 50], "anoth": [30, 34, 36, 48], "answer": 29, "anybodi": [31, 36], "anyth": [1, 2, 23, 31, 36, 56], "anywher": [31, 36], "apartment": 42, "api": 47, "api_refer": 24, "apolog": [17, 50], "apologi": 49, "appear": [0, 15, 37, 38, 42, 64], "append": [1, 17, 64, 65, 66, 67], "appli": [4, 13, 18, 62, 64, 69], "applic": [29, 71], "appreci": 50, "approach": [32, 38, 42, 45, 46, 49, 53, 64], "appropri": 69, "ar": [0, 1, 2, 3, 5, 9, 10, 11, 15, 17, 19, 21, 23, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 45, 46, 47, 48, 49, 51, 54, 55, 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 69, 71], "arcross": 34, "area": 62, "aren": [31, 36], "around": 2, "arous": 48, "arrai": [6, 8, 68], "articl": [37, 50], "ask": [20, 47, 54], "ask_ag": 49, "aspect": [50, 62], "assert_key_columns_pres": 71, "assign": [1, 31, 36, 38, 45, 46, 52, 59, 61, 63, 71], "assign_chunk_num": 69, "associ": [4, 15, 21, 29, 30, 31, 32, 36, 40, 45, 46, 47, 48, 61], "assum": [0, 1, 2, 10, 12, 16, 23, 41, 60, 61, 71], "assumpt": [1, 41, 61], "asterisk": 22, "attribut": [1, 11, 34, 51, 52, 56, 62], "author": [5, 31, 36, 59], "auto": 2, "automat": [0, 1, 61, 69], "auxiliari": [31, 36], "avail": [1, 61, 62, 63, 64, 67], "averag": [11, 13, 28, 30, 33, 34, 35, 40, 41, 46, 52, 64, 65, 66, 72], "avil": 62, "avoid": 30, "awar": 29, "awesom": 62, "b": [4, 34, 35, 45, 46, 55, 62], "back": 62, "bag": [32, 38, 42, 45, 46, 49, 53, 56, 57], "bare_command": [19, 49], "base": [0, 1, 2, 15, 18, 19, 31, 32, 34, 35, 36, 37, 40, 42, 51, 52, 53, 54, 55, 56, 57, 61, 62, 63, 64, 65, 66, 71], "basic": [10, 11, 12, 16, 61, 62], "basic_featur": 11, "batch": 67, "batch_num": 1, "batch_siz": 67, "bay": [56, 57], "bbevi": 18, "becaus": [1, 2, 12, 21, 31, 36, 40, 56, 61], "becom": [44, 61, 62], "been": [1, 2, 12, 16, 31, 36, 61], "befor": [0, 1, 2, 17, 31, 36, 45, 48], "beforehand": 64, "begin": [34, 54, 58, 61, 62, 63], "behavior": [0, 2, 62, 63], "being": [4, 13, 14, 16, 17, 20, 21, 24, 31, 32, 36, 43, 47, 51, 55, 56, 60], "belong": [1, 42], "below": [1, 11, 21, 33, 36, 45, 48, 51, 61, 62, 69], "ber": 54, "bert": [0, 1, 31, 35, 36, 39, 46, 61, 64, 67], "bert_path": 67, "bert_sentiment_data": [1, 61, 64], "best": 29, "better": 61, "between": [4, 6, 13, 21, 23, 24, 28, 30, 31, 34, 35, 36, 37, 40, 45, 46, 55, 58, 59, 62, 64, 65], "betwen": 34, "beyond": 2, "big": 59, "binari": [10, 32, 38], "blame": 47, "blob": [1, 24, 61], "block": [22, 32, 48, 59], "blog": 15, "bold": [22, 64], "bool": [2, 63, 67, 71], "bootstrap": 62, "both": [1, 2, 42, 52, 54, 55, 59, 62], "bother": 50, "bottom": 59, "bought": 41, "bound": [29, 35, 36, 37, 42, 52, 55], "boundari": [34, 35], "break": [22, 48, 64], "brief": 44, "broader": 52, "broken": 59, "btw": 50, "bug": [1, 61], "build": [1, 7, 34, 45, 46, 62], "built": 11, "built_spacy_n": 15, "bullet": [22, 48, 64], "bunch": 59, "burst": 58, "bursti": [1, 11, 39, 58, 61, 65], "by_the_wai": 49, "c": [12, 34, 35, 45, 46, 62], "cach": [0, 1, 2, 51, 61], "calcul": [2, 5, 11, 12, 16, 18, 21, 28, 33, 41, 48, 49, 50, 56, 57, 58, 60, 62, 63, 64, 65, 66, 67, 68, 72, 73], "calculate_chat_level_featur": [1, 61, 69], "calculate_conversation_level_featur": 69, "calculate_hedge_featur": 64, "calculate_id_scor": 13, "calculate_info_divers": 65, "calculate_named_ent": 15, "calculate_num_question_na": 20, "calculate_politeness_senti": 64, "calculate_politeness_v2": 64, "calculate_team_bursti": 65, "calculate_textblob_senti": 64, "calculate_user_level_featur": 69, "calculate_vector_word_mimicri": 64, "calculate_word_mimicri": 64, "call": [1, 2, 8, 13, 61, 62, 64, 69], "can": [0, 1, 11, 23, 31, 32, 33, 34, 36, 37, 42, 43, 44, 47, 48, 49, 50, 52, 54, 60, 61, 62, 69], "can_you": 49, "cannot": [1, 31, 36, 45, 46, 49, 62], "cao": [21, 24, 33, 43, 44, 56, 57, 62], "cap": [22, 48, 64], "capit": [0, 2, 48], "captur": [29, 30, 32, 34, 35, 38, 41, 42, 55], "caract": 40, "cardiffnlp": [1, 61], "carefulli": 60, "casa_token": 5, "case": [1, 13, 16, 29, 30, 31, 36, 37, 41, 45, 46, 51, 55, 56, 59, 61], "casual": 43, "categori": [21, 32, 45, 46, 49, 52], "caus": [31, 32, 36, 59], "caveat": 1, "center": 62, "central": 34, "centroid": [34, 66], "certain": [5, 19, 30, 42, 45, 46, 49], "certainli": 42, "certainti": [11, 38, 39, 42, 64, 67], "cfm": 4, "chall": [1, 21, 39, 64, 70], "chang": [1, 34, 50, 61, 71], "charact": [2, 3, 15, 19, 37, 49, 62, 64, 65, 66, 71], "characterist": 62, "chat": [0, 1, 2, 4, 5, 6, 7, 8, 12, 13, 14, 16, 23, 25, 28, 29, 32, 35, 36, 41, 44, 45, 46, 49, 59, 61, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73], "chat_data": [2, 6, 7, 8, 26, 27, 28, 63, 64, 65, 66, 67, 71], "chat_df": 14, "chat_featur": [1, 61], "chat_level_data": 72, "chat_level_featur": 2, "chatlevelfeaturescalcul": [1, 2, 17, 21, 61, 64, 69], "chats_data": 73, "check": [19, 23, 44, 64, 67, 71], "check_embed": [1, 61, 69], "chen": 62, "choos": 60, "chose": 1, "chronolog": 1, "chunk": [34, 59, 63], "chunk_num": 63, "circlelyt": 13, "citat": [21, 24], "cite": 50, "clarif": [16, 32, 64], "class": [1, 2, 31, 61, 62, 64, 65, 66], "classif": [21, 64], "classifi": [16, 21, 50, 56, 57], "classify_ntri": 16, "classify_text_dalechal": 21, "clean": [2, 17, 19, 67], "clean_text": 19, "clear": 1, "close": [31, 48, 62], "closer": [45, 46, 59], "clue": 62, "cmu": 12, "code": [6, 18, 29, 32, 51, 55, 61, 62, 68], "coeffici": [4, 39, 62, 65, 68], "coerce_to_date_or_numb": 23, "cognit": 62, "col": 2, "colab": [0, 1], "collabor": [59, 62], "collaps": 2, "collect": [1, 2, 34, 49, 50, 52, 61, 62], "colleg": 33, "column": [0, 2, 4, 6, 7, 8, 9, 12, 13, 14, 16, 18, 23, 25, 28, 51, 56, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73], "column_count_frequ": 28, "column_count_mim": 28, "column_mimc": 28, "column_nam": 71, "column_to_summar": 72, "com": [1, 2, 4, 5, 13, 15, 18, 64, 68, 71], "comb": 62, "combin": [0, 1, 6, 28, 64, 71], "come": [1, 12, 13, 21, 32, 33, 58, 61], "comm": [1, 61], "command": [1, 61], "comment": 48, "commit": 23, "commit_data": 19, "common": [0, 32, 62, 64], "commonli": 37, "commun": [0, 1, 11, 44, 48, 55, 60, 62, 64], "companion": 1, "compar": [2, 31, 35, 44, 45, 52, 64, 71, 73], "compat": [1, 61], "complement": [31, 36], "complet": [1, 2, 55], "complex": [0, 35, 43, 50, 62], "compon": 50, "comprehens": [33, 48], "compress": 71, "comput": [0, 2, 4, 5, 6, 10, 11, 12, 13, 14, 28, 29, 30, 31, 34, 35, 36, 37, 40, 41, 42, 45, 46, 49, 52, 55, 62, 64, 65, 66, 69, 73], "compute_frequ": 28, "compute_vectors_from_preprocess": [0, 2], "computetf": 28, "conain": 61, "concat_bert_featur": [1, 61, 64], "concaten": [19, 49, 64, 71], "concentr": 55, "concept": [29, 39, 42, 62], "conceptu": [61, 62], "concis": 43, "concret": 29, "conduct": 1, "confid": [2, 5, 15, 30, 47, 64], "conflict": 62, "confound": 44, "congruent": 34, "conjection_seper": 19, "conjunct": [19, 31, 36, 49], "conjunction_start": 49, "connect": 39, "conscious": 35, "consecut": 22, "consequ": 0, "consid": [1, 33, 37], "consider": [61, 62], "consist": [36, 40, 41], "constitut": 41, "constrain": [34, 35], "construct": [11, 55, 62], "constructor": 47, "consult": 5, "contain": [1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 23, 25, 28, 29, 30, 35, 38, 42, 47, 49, 55, 61, 62, 63, 64, 67, 71, 72, 73], "content": [0, 1, 12, 13, 28, 34, 36, 39, 41, 42, 45, 46, 62, 64, 67], "content_mimicry_scor": 28, "content_word_mimicri": 28, "context": [2, 32, 42, 48, 62, 71], "continu": [56, 57], "contract": 49, "contrast": 39, "contribut": [13, 34, 37, 62], "control": 1, "conv": [1, 61], "conv_data": [2, 65], "conv_features_al": [1, 61], "conv_features_bas": [1, 61], "conv_level_featur": 2, "conv_to_float_arr": 8, "convei": [6, 34, 52], "conveni": [1, 61], "convers": [0, 2, 3, 4, 6, 7, 8, 9, 12, 13, 23, 25, 28, 29, 31, 34, 35, 36, 37, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 52, 55, 58, 59, 61, 63, 64, 65, 66, 68, 71, 72, 73], "conversation_id": [2, 28, 61, 71], "conversation_id_col": [0, 1, 2, 4, 6, 7, 8, 9, 13, 23, 25, 26, 27, 61, 63, 64, 65, 66, 68, 72, 73], "conversation_num": [0, 1, 2, 6, 7, 66, 71, 73], "conversationlevelfeaturescalcul": [2, 65, 69], "convert": [8, 41, 49, 71], "convict": 5, "convokit": [17, 50, 62, 64], "coordin": 55, "copi": [0, 1], "copular": [31, 36], "core": [2, 34, 69], "cornel": 17, "corpu": 50, "corrado": 37, "correl": [41, 55], "correspond": [30, 34, 35, 40, 49, 55, 66], "cosin": [6, 7, 13, 28, 31, 34, 35, 36, 40, 45, 46, 65], "could": [1, 31, 33, 36, 50, 54], "could_you": 49, "couldn": [31, 36], "count": [1, 3, 12, 14, 15, 16, 19, 21, 25, 28, 30, 31, 32, 36, 39, 41, 43, 44, 49, 52, 53, 54, 56, 58, 64, 65, 66], "count_all_cap": 22, "count_bullet_point": 22, "count_charact": 3, "count_difficult_word": 21, "count_ellips": 22, "count_emoji": 22, "count_emphasi": 22, "count_line_break": 22, "count_link": 22, "count_match": [19, 49], "count_messag": 3, "count_numb": 22, "count_parenthes": 22, "count_quot": 22, "count_responding_to_someon": 22, "count_spacy_match": 19, "count_syl": 21, "count_turn": 25, "count_turn_taking_index": 25, "count_user_refer": 22, "count_word": 3, "countabl": 65, "countd": 36, "counterfactu": 50, "cours": [16, 31, 34, 36, 63], "creat": [0, 1, 2, 13, 19, 31, 40, 42, 61, 62, 64, 65, 66, 71], "create_chunk": 63, "create_chunks_messag": 63, "create_cumulative_row": 71, "credit": 33, "crowd": 13, "csv": [1, 2, 61, 62, 67], "cumul": [1, 2, 71], "cumulative_group": [0, 1, 2, 71], "current": [1, 11, 23, 31, 34, 35, 36, 40, 45, 46, 58, 61, 64, 71], "curt": 43, "custom": [0, 62], "custom_featur": [0, 1, 2, 61], "customiz": 62, "cut": 1, "cutoff": [2, 15, 47, 64], "d": [1, 31, 34, 36, 61], "dale": [1, 21, 39, 64, 70], "dale_chall_help": 21, "danescu": 50, "dash": 22, "data": [0, 2, 6, 7, 8, 9, 13, 19, 20, 32, 37, 40, 41, 47, 51, 55, 61, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73], "datafram": [0, 1, 2, 4, 6, 7, 8, 9, 12, 13, 14, 15, 16, 17, 18, 19, 23, 25, 28, 37, 47, 49, 59, 61, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73], "dataknowsal": 15, "dataset": [1, 2, 9, 12, 13, 28, 31, 41, 47, 52, 61, 64, 65, 66, 73], "date": [1, 61], "datetim": [23, 58], "dcosta": 62, "deal": [50, 59], "death": 1, "debat": 59, "decid": 62, "decis": [1, 13, 62], "declar": [1, 62, 69], "deepli": 62, "default": [0, 1, 2, 5, 13, 16, 30, 34, 35, 42, 47, 62, 63, 66, 67, 71, 73], "defer": [17, 50], "defin": [0, 11, 21, 31, 34, 36, 40, 59, 62, 64, 65, 66, 70], "definit": [1, 3, 44], "degre": [6, 30, 36, 45, 46, 55], "delet": 29, "deliber": 1, "demo": 61, "democrat": 1, "demystifi": 62, "denomin": 59, "densiti": 60, "dep_": 49, "dep_pair": 19, "depend": [0, 1, 10, 19, 32, 49, 52, 61, 63], "deriv": [2, 11, 65, 66], "describ": [1, 11, 62], "descript": [1, 61], "design": [0, 1, 2, 13, 34, 62], "desir": [2, 63, 72], "detail": [0, 33, 41, 43, 61, 62], "detect": [1, 32, 37, 38, 47, 48, 49, 54], "determin": [13, 18, 31, 35, 36, 40, 45, 46, 71], "dev": 24, "develop": [5, 37, 40, 62], "deviat": [4, 5, 29, 40, 41, 55, 58, 65, 72, 73], "df": [4, 8, 9, 12, 13, 16, 18, 23, 28, 63, 71], "dict": [17, 19, 28, 67], "dictionari": [1, 15, 17, 19, 28, 30, 42, 49, 61, 67], "did": [1, 31, 36, 37, 47, 50, 54, 62], "didn": [31, 36], "differ": [0, 1, 2, 4, 11, 12, 23, 29, 31, 34, 36, 37, 39, 40, 44, 45, 46, 47, 49, 55, 62, 63, 64, 65, 66, 71], "differenti": [49, 59], "difficult": [21, 33], "difficult_word": 21, "difficulti": 33, "dimens": [40, 62], "dimension": [34, 35], "dinner": 41, "direct": [34, 43, 45, 47, 50, 69], "direct_quest": [32, 50, 54], "direct_start": 50, "directli": [1, 62, 69], "directori": [0, 2, 19, 61, 65, 67], "disagr": 49, "disagre": 51, "discours": [31, 36], "discret": [31, 36, 45, 46], "discurs": [0, 1, 6, 8, 39, 40, 61, 65, 66], "discursive_divers": 11, "discus": 8, "discuss": [0, 1, 31, 34, 39, 40, 42, 43, 61, 62, 71], "dispers": 68, "displai": [1, 34, 42, 46, 61], "dispos": 1, "distanc": [34, 35, 40], "distinct": [36, 59], "distinguish": 59, "div": 16, "diverg": [6, 34, 35], "divers": [0, 1, 6, 8, 13, 39, 61, 65], "divid": [16, 34, 59, 63], "dl": [21, 24], "do": [0, 1, 29, 31, 34, 36, 37, 43, 49, 50, 54, 62, 69], "doc": 19, "doc_top": 13, "document": [1, 17, 61, 69], "doe": [1, 2, 29, 40, 42, 43, 45, 47, 54, 61, 71], "doesn": [0, 1, 2, 29, 31, 36, 45, 61], "doi": [5, 6, 21, 24, 64], "domain": 50, "don": [31, 36, 49, 54, 62, 67], "done": [2, 50], "dot": 22, "doubl": 30, "down": [31, 36], "download": [1, 61], "download_resourc": [1, 61], "downstream": [17, 62], "dozen": 62, "drive": [62, 69], "driver": [2, 61, 64, 65, 66], "drop": [0, 2, 64], "due": [34, 59], "duncan": 62, "duplic": [1, 2, 71], "durat": [58, 63], "dure": [2, 55, 59, 62], "dynam": [59, 61], "e": [0, 1, 2, 4, 15, 20, 29, 30, 31, 32, 34, 35, 36, 37, 38, 41, 42, 47, 48, 49, 52, 54, 56, 59, 61, 63, 65, 66, 71], "e2": [21, 70], "each": [0, 1, 2, 3, 4, 7, 8, 9, 11, 12, 15, 17, 19, 23, 25, 28, 30, 31, 34, 35, 36, 37, 40, 41, 42, 44, 45, 46, 47, 49, 50, 51, 52, 55, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73], "earlier": [0, 1, 2, 42], "easi": [1, 21, 62, 70], "easier": 21, "easili": 33, "easy_word": 21, "eat": 34, "echo": 31, "econom": 37, "edg": [29, 59], "edu": [1, 12, 16, 17, 70], "effect": [1, 41], "effici": 1, "effort": 55, "either": [20, 23, 52, 55], "elaps": [23, 58], "element": [1, 6], "ellips": [22, 48, 64], "els": [1, 22, 47, 64], "embed": [8, 31, 34, 35, 36, 45, 46, 65, 66, 67, 69], "emili": [30, 35, 45, 46, 47, 59, 62], "emoji": [22, 48, 64], "emot": [1, 61], "emoticon": 48, "emphas": [22, 48, 64], "emphasi": 48, "empirica": [1, 2, 71], "emploi": 45, "empti": [0, 2, 13], "en": [1, 21, 24, 61, 70], "en_core_web_sm": [1, 61], "enabl": 71, "enclos": 22, "encod": [1, 8], "encompass": 62, "encount": [1, 34, 35, 61], "encourag": 64, "end": [0, 1, 15, 20, 23, 34, 54, 62, 63], "engag": 43, "engin": 2, "english": [34, 42], "enjoi": 62, "ensur": [0, 1, 40, 49, 61, 63, 67, 71], "entir": [1, 12, 36, 40, 41, 52, 59, 62, 73], "entiti": [0, 1, 2, 15, 39, 64], "entityrecogn": 47, "entri": [1, 28, 61], "ep8dauru1ogvjurwdbof5h6ayfbslvughjyiv31d_as6ppbt": 5, "equal": [1, 21, 23, 34, 37, 40, 55, 59, 61, 62, 63], "equival": [0, 1, 41, 55, 61], "eric": 62, "error": [1, 16, 61], "especi": [41, 62], "essenti": [51, 71], "estim": 31, "et": [1, 5, 16, 18, 21, 24, 30, 31, 32, 33, 34, 35, 36, 38, 42, 43, 44, 49, 50, 52, 53, 54, 56, 57, 58, 59, 60, 64], "etc": [10, 15, 16, 17, 42], "evalu": [5, 47, 50], "evan": 62, "even": [0, 1, 2, 34, 37, 42, 62, 63, 67], "evenli": [34, 55], "event": [1, 34, 55, 61], "ever": 62, "everi": [1, 4, 13, 31, 34, 35, 36, 61, 62], "everybodi": [31, 36], "everyon": [31, 36, 47, 62], "everyth": [31, 36, 56], "everywher": [31, 36], "evolut": 35, "evolv": [35, 71], "exactli": [1, 2, 71], "examin": [40, 62, 63], "exampl": [0, 10, 11, 15, 21, 24, 29, 31, 32, 34, 37, 42, 43, 48, 50, 51, 54, 56, 59, 60, 61, 62], "example_data": 1, "exce": 15, "exchang": [12, 35, 39, 40, 45, 55, 64], "exclud": [0, 41, 42], "exclus": [41, 42], "excus": 32, "exhibit": 35, "exist": [0, 1, 2, 55, 61, 62, 63, 64, 67], "expand": 49, "expect": [1, 37, 47], "expected_valu": 47, "explain": [0, 29], "explan": [29, 43], "explor": [61, 62], "express": [5, 14, 30, 31, 32, 36, 38, 42, 64], "extend": 1, "extens": [43, 44], "extent": [1, 4, 7, 12, 31, 34, 35, 37, 51, 55, 59, 61], "extern": 48, "extra": 51, "extract": [1, 17, 19, 28, 40, 50, 64], "extrem": [55, 56, 57], "face": [1, 51, 61], "facilit": [62, 71], "fact": [4, 35, 50, 54, 59], "factual": [17, 24, 50], "fail": [1, 61], "fals": [0, 1, 2, 31, 54, 61, 71], "famili": 42, "far": [34, 35, 46, 50, 62], "faster": 14, "feat_count": 19, "featuer": 2, "featur": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 63, 64, 65, 66, 67], "feature_build": [0, 1, 61], "feature_dict": [1, 61], "feature_method": [64, 65], "feature_nam": [1, 61], "featurebuild": [0, 2, 47, 69], "features_conceptu": [1, 61], "few": [48, 62], "fewer": [12, 60], "fflow": 11, "field": [13, 17], "file": [0, 2, 12, 14, 19, 61, 65, 67], "filenam": [1, 2, 19], "filenotfounderror": 67, "fill": 71, "filler": [37, 60], "filler_paus": 49, "filter": [19, 62], "final": [1, 2, 34, 42, 62], "find": [1, 19, 28, 50], "fingertip": 62, "finit": 55, "first": [0, 1, 2, 11, 12, 16, 19, 31, 34, 35, 36, 39, 40, 41, 42, 45, 46, 49, 52, 54, 59, 61, 62, 64, 70, 71], "first_person": 12, "first_person_plur": 49, "first_person_raw": [12, 16], "first_person_singl": 49, "five": 37, "fix": 52, "flag": 71, "float": [2, 4, 5, 6, 8, 10, 13, 14, 16, 21, 24, 25, 28, 68], "floor": 59, "flow": [0, 1, 7, 31, 36, 39, 41, 45, 46, 61, 64], "focal": [31, 36], "focu": 41, "folder": [0, 1, 19], "follow": [1, 2, 16, 17, 29, 31, 32, 33, 41, 42, 47, 49, 50, 53, 55, 59, 60, 61, 64, 65], "for_m": 49, "for_you": 49, "forc": [0, 1, 61], "form": 1, "formal": [1, 61], "formal_titl": 49, "format": [1, 8, 17, 22, 47, 48, 61, 62, 64], "former": [45, 46], "formula": [33, 42, 59, 64, 70], "fornt": 1, "forward": [0, 1, 7, 39, 41, 61, 64], "forward_flow": 35, "found": [1, 5, 28, 30, 33, 61, 69], "four": [1, 8], "fourth": 33, "frac": 55, "fraction": 59, "frame": 2, "framework": [49, 50, 62], "frequenc": [28, 31, 44, 64], "frequency_dict": 28, "fridai": 34, "from": [0, 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 16, 19, 21, 28, 29, 31, 32, 33, 34, 35, 36, 39, 41, 42, 49, 50, 51, 53, 55, 56, 57, 58, 61, 62, 64, 65, 66, 67, 71], "full": [1, 2, 37], "full_empirical_dataset": 1, "fulli": [32, 48], "functinon": 12, "function": [1, 2, 3, 4, 10, 11, 12, 13, 14, 16, 20, 21, 23, 28, 31, 39, 44, 45, 46, 50, 56, 57, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 73], "function_mimic_word": 28, "function_mimicry_scor": 28, "function_word_mimicri": 28, "function_word_refer": 28, "fund": 62, "further": [1, 2, 61, 71], "futur": [23, 66], "g": [0, 1, 4, 15, 20, 29, 31, 32, 36, 37, 38, 41, 42, 47, 48, 52, 54, 59, 61, 63, 65, 66, 71], "game": [1, 2, 59, 71], "gaug": [33, 52], "gener": [0, 2, 9, 11, 12, 16, 21, 31, 34, 35, 36, 40, 42, 45, 46, 49, 51, 59, 67, 69, 71, 72], "generaliz": 23, "generate_bert": 67, "generate_certainty_pkl": 67, "generate_lexicon_pkl": 67, "generate_vect": 67, "gensim": 40, "get": [0, 16, 20, 21, 28, 30, 31, 36, 49, 66], "get_all_dd_featur": 11, "get_averag": 72, "get_centroid": 66, "get_certainti": 5, "get_certainty_scor": 64, "get_content_words_in_messag": 28, "get_conversation_level_aggreg": 65, "get_cosine_similar": 6, "get_dale_chall_easy_word": [21, 70], "get_dale_chall_score_and_classf": 64, "get_dd": 6, "get_dd_featur": 8, "get_dep_pair": [19, 49], "get_dep_pairs_noneg": [19, 49], "get_discursive_diversity_featur": 65, "get_first_pct_of_chat": 2, "get_first_person_word": [12, 70], "get_forward_flow": [7, 64], "get_function_word": 70, "get_function_words_in_messag": 28, "get_gini": 68, "get_gini_featur": 65, "get_info_divers": 13, "get_info_exchange_wordcount": 12, "get_liwc_count": 14, "get_max": 72, "get_mimicry_bert": 28, "get_min": 72, "get_moving_mimicri": 28, "get_named_ent": 64, "get_nan_vector": 27, "get_polarity_scor": 24, "get_politeness_strategi": 17, "get_politeness_v2": 18, "get_proportion_first_pronoun": 16, "get_question_word": 70, "get_reddit_featur": 64, "get_senti": 67, "get_stdev": 72, "get_subjectivity_scor": 24, "get_sum": 72, "get_team_bursti": 4, "get_temporal_featur": [4, 64], "get_time_diff": 23, "get_time_diff_startend": 23, "get_turn": 25, "get_turn_id": 71, "get_turn_taking_featur": 65, "get_unique_pairwise_combo": 6, "get_user_average_datafram": 72, "get_user_level_aggreg": 65, "get_user_level_averaged_featur": 66, "get_user_level_summary_statistics_featur": 66, "get_user_level_summed_featur": 66, "get_user_network": [11, 66], "get_user_sum_datafram": 72, "get_variance_in_dd": 26, "get_within_person_disc_rang": 27, "get_word_ttr": 16, "get_zscore_across_all_chat": 73, "get_zscore_across_all_convers": 73, "gina": 62, "gini": [39, 62, 65, 68], "gini_coeffici": [11, 69], "github": [0, 1, 2, 18, 71], "give": [0, 1, 29, 37], "give_ag": 49, "given": [5, 6, 13, 14, 28, 30, 31, 33, 34, 35, 36, 40, 41, 55, 59, 66, 67, 71], "go": [1, 34, 35, 45, 46, 50, 62], "goal": 62, "good": [50, 56, 62], "goodby": 49, "googl": [0, 1], "got": [31, 36], "gotta": [31, 36], "grade": 33, "grader": 21, "grai": 35, "grammat": 36, "granularli": 35, "grate": 62, "gratitud": [17, 49, 50], "great": [47, 50, 51, 56, 59, 60, 62], "greater": 55, "greet": 50, "groceri": 41, "group": [0, 1, 2, 4, 13, 29, 33, 34, 41, 52, 59, 62, 68, 71, 72], "grouping_kei": [0, 1, 2, 71], "gt": 22, "guess": 10, "gun": 1, "gy": 15, "gym": 34, "ha": [0, 1, 2, 32, 34, 35, 37, 42, 43, 46, 52, 54, 55, 56, 59, 61, 62, 63, 71], "had": [1, 31, 36, 54, 61], "hadn": [31, 36], "handl": [19, 29, 71], "happen": [1, 2, 55, 62, 63], "happi": 42, "harder": 21, "hashedg": [17, 50], "hasn": [31, 36], "hasneg": 50, "hasposit": 50, "hate": 31, "have": [0, 1, 2, 10, 12, 16, 31, 34, 36, 37, 40, 41, 42, 45, 46, 50, 54, 59, 60, 61, 62, 71], "haven": [31, 36], "he": [1, 31, 36], "header": 18, "hear": 32, "heart": [61, 62], "heat": 1, "heavi": 62, "hedg": [11, 30, 39, 49, 50, 64], "hei": [1, 35, 45, 46, 50], "helena": [47, 62], "hello": [0, 43, 49], "help": [0, 31, 34, 36, 43, 45, 46, 52, 58, 69], "helper": [23, 67], "her": [30, 31, 36], "here": [0, 1, 29, 34, 41, 42, 47, 61, 66], "herself": [31, 36], "hesit": [60, 64], "hi": [31, 35, 36, 43, 45, 46], "hierach": 71, "hierarch": 71, "high": [0, 1, 2, 61, 62, 71], "higher": [21, 31, 34, 36, 40, 41, 42, 44, 45, 46, 55, 60], "highest": 71, "highlight": 1, "him": [31, 36], "himself": [31, 36], "hmm": [31, 36], "hoc": 62, "hole": 62, "home": 42, "homework": 34, "homonym": 31, "hood": 1, "hope": 35, "host": [45, 46], "hour": 48, "how": [1, 5, 29, 30, 31, 34, 35, 36, 39, 43, 45, 51, 52, 54, 56, 62], "howev": [0, 1, 3, 35, 40, 42, 44, 54, 56, 61, 62], "howitwork": 1, "html": [1, 15, 17, 24, 61], "http": [1, 2, 4, 5, 6, 12, 13, 15, 16, 17, 18, 21, 24, 41, 45, 46, 47, 61, 64, 68, 70, 71], "hu": [1, 42, 62], "hug": [1, 51, 61], "huggingfac": 1, "huh": [31, 32, 36], "human": [37, 50, 62], "hyperlink": 48, "hyphen": [1, 61], "hypothet": 42, "i": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 19, 20, 21, 22, 23, 24, 25, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 71, 73], "iby1": 5, "id": [2, 4, 7, 23, 28, 62, 66, 68, 71, 72, 73], "idea": [12, 35, 40, 47, 51], "ident": [34, 35], "identif": 1, "identifi": [0, 1, 2, 4, 8, 9, 15, 23, 25, 30, 41, 47, 50, 52, 61, 63, 64, 71, 72], "identiif": [13, 71], "ignor": [1, 32], "illustr": [1, 41, 48, 62], "imagin": 1, "immedi": [31, 35, 64], "impact": [1, 60], "impersonal_pronoun": 49, "implement": 64, "impli": 37, "import": [31, 32, 36, 44, 45, 62, 69], "incent": 13, "includ": [0, 1, 2, 10, 17, 22, 31, 32, 35, 36, 42, 45, 46, 51, 52, 56, 61, 62, 66, 71], "inclus": [13, 71], "incongru": [8, 34], "incorpor": [1, 42, 45, 46], "increas": [1, 42, 62], "increment": 71, "independ": 1, "index": [1, 2, 4, 13, 25, 37, 39, 55, 61, 65], "indic": [1, 2, 16, 21, 22, 30, 32, 34, 35, 36, 40, 41, 43, 44, 48, 49, 50, 52, 55, 60, 63, 71], "indirect": 50, "indirect_btw": 50, "indirect_greet": 50, "indirectli": 69, "individu": [0, 1, 5, 11, 31, 34, 37, 45, 50, 59, 60, 62, 72], "inequ": 37, "infer": [1, 51, 67], "influenc": 1, "info": [13, 18, 64], "info_divers": 13, "info_exchang": 64, "info_exchange_wordcount": [41, 64], "info_exchange_zscor": 11, "inform": [6, 11, 12, 13, 24, 32, 34, 39, 48, 62, 64, 65], "informal_titl": 49, "information_divers": 11, "initi": [2, 62, 63, 64, 65, 66], "input": [0, 2, 4, 6, 12, 13, 14, 15, 16, 19, 20, 21, 22, 28, 31, 50, 55, 60, 62, 63, 64, 65, 66, 67, 71, 72], "input_column": [65, 66], "input_data": [25, 68, 72], "input_df": [1, 2, 61, 71], "inquiri": [30, 39, 52], "insid": 1, "insight": 1, "inspir": 15, "instal": [1, 61, 62], "instanc": [1, 22, 50, 59, 66], "instanti": 2, "insteac": 1, "instead": [1, 2, 62], "instruct": [1, 61], "int": [2, 3, 10, 13, 15, 16, 19, 20, 21, 22, 28, 63, 64, 67], "intact": 71, "integ": [13, 40, 47], "intend": 59, "interact": [1, 11, 43, 44, 62, 69], "interconnect": 62, "interest": [1, 61, 62], "interfac": 62, "intermedi": [59, 64], "intern": 29, "interpret": [0, 23], "interrupt": 59, "interv": [58, 65], "introduc": 62, "introduct": [11, 61], "invalid": 67, "invers": 64, "involv": [41, 62, 65], "io": [1, 24, 47, 61], "ipynb": [0, 1], "is_hedged_sentence_1": 10, "isn": [1, 31, 36], "issu": [1, 31, 36, 37, 42, 61], "ital": 64, "italic": 22, "item": [0, 71], "its": [0, 2, 15, 31, 35, 36, 40, 41, 47, 54, 55, 64, 69], "itself": [23, 31, 36, 44], "john": 1, "jonson": 62, "journal": [5, 64], "json": [1, 61], "jurafski": 70, "juri": 1, "juries_df": 1, "jury_conversations_with_outcome_var": 1, "jury_feature_build": 1, "jury_output": 1, "jury_output_chat_level": [1, 61], "jury_output_turn_level": 1, "just": [1, 2, 31, 36, 46, 50, 59, 61, 62], "katharina": 34, "keep": [1, 71], "kei": [1, 2, 4, 19, 28, 30, 54, 61, 71], "keyerror": 71, "keyword": [19, 49], "kind": [10, 62], "kitchen": 42, "knob": 0, "know": [1, 30], "knowledg": 29, "known": [1, 32, 61], "kumar": 62, "kw": 19, "lab": [1, 2, 62, 71], "label": [1, 15, 21, 51], "lack": [31, 38, 45, 46], "languag": [15, 34, 42, 50, 62], "larg": [31, 69], "larger": [0, 61], "last": [1, 31], "late": 32, "later": [0, 1, 2, 42, 61], "latest": [1, 61], "latter": [31, 36], "lda": [13, 40], "learn": [1, 61, 62], "least": [10, 32, 42, 63, 67], "led": 62, "legal": 49, "lemmat": [13, 40], "len": 28, "length": [35, 39, 41, 42, 44], "less": [13, 32, 50, 52, 55, 62, 63], "let": [41, 49, 53], "let_me_know": 49, "letter": [49, 71], "level": [0, 1, 2, 3, 4, 6, 7, 8, 9, 12, 13, 14, 16, 23, 61, 64, 65, 66, 71, 72], "lexic": [10, 12, 14, 16, 31, 32, 36, 42, 60, 62, 64], "lexical_featur": [14, 64], "lexical_features_v2": [10, 11], "lexicon": [5, 10, 14, 30, 39, 50, 52, 67, 69], "lexicons_dict": 67, "librari": [34, 51, 56, 57], "lift": 62, "light": 61, "like": [1, 22, 31, 34, 36, 41, 50, 61, 62], "limiat": 32, "limit": [11, 32, 37, 42, 54], "line": [0, 1, 19, 22, 48, 61, 62, 64], "linear": 64, "linguist": [18, 19, 30, 39, 50, 52], "link": [22, 29, 48, 50, 64], "list": [1, 2, 6, 7, 10, 11, 12, 13, 15, 19, 20, 21, 22, 28, 31, 33, 36, 37, 42, 48, 49, 50, 53, 54, 61, 64, 65, 66, 67, 68, 70, 71], "literatur": 62, "littl": 38, "littlehors": 1, "liu": [42, 52], "live": [1, 54], "liwc": [14, 30, 39, 51, 52, 56, 62], "liwc_featur": [10, 14], "lix": 34, "ll": [1, 31, 36, 61], "load": [19, 69], "load_saved_data": 19, "load_to_dict": 19, "load_to_list": 19, "loc": 15, "local": [1, 51, 61], "locat": [1, 62], "long": [4, 42], "longer": [30, 41, 43, 48, 61, 62], "look": [2, 34, 61, 65, 66], "loos": 36, "lot": [31, 36], "loud": 60, "love": [31, 56], "low": [1, 2, 29, 55, 60, 71], "lower": [21, 31, 33, 36, 41, 42, 44, 55, 60], "lowercas": [2, 13, 40, 48, 49, 71], "lowest": 71, "lpearl": 16, "lst": 6, "m": [30, 31, 36], "made": [1, 23, 35, 59, 61, 62], "magnitud": 55, "mai": [1, 2, 11, 31, 32, 35, 36, 37, 41, 42, 43, 44, 54, 61, 62, 71], "main": [1, 2, 5, 62, 64, 65, 66], "make": [0, 1, 5, 34, 55, 56, 62, 66, 69, 71], "man": 62, "mani": [1, 4, 11, 32, 37, 41, 60, 62, 66], "manner": [55, 62], "manual": [1, 61], "map": [13, 34], "mark": [19, 20, 22, 43, 54, 64, 71], "marker": [18, 32, 39, 42, 50, 51, 52, 54, 56], "marlow": 44, "matarazzo": 62, "match": [1, 5, 16, 19, 30], "math": 34, "matter": 47, "max": 66, "max_num_chunk": 63, "maxim": [34, 35, 37], "maximum": [63, 65, 72], "mayb": [38, 47], "mcfarland": 70, "me": [31, 32, 36, 41, 50, 53], "mean": [0, 1, 4, 6, 11, 13, 21, 29, 31, 34, 36, 40, 41, 42, 47, 55, 56, 58, 61, 62, 65, 66, 73], "meaning": [41, 55], "meaningless": 41, "meant": 39, "measur": [0, 7, 12, 13, 20, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 51, 52, 54, 55, 56, 57, 58, 59, 60, 62, 64, 68], "mechan": 32, "medium": 21, "meet": 48, "member": [13, 34, 37, 55], "merg": [2, 8, 65, 66], "merge_conv_data_with_origin": 2, "messag": [0, 1, 2, 3, 4, 5, 8, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 28, 30, 31, 34, 35, 36, 37, 39, 41, 45, 46, 47, 48, 50, 51, 52, 55, 56, 57, 58, 61, 62, 63, 64, 65, 66, 67, 71, 73], "messaga": 61, "message_col": [0, 1, 2, 12, 13, 14, 61, 64, 65, 67, 71], "message_embed": [6, 7, 8], "message_lower_with_punc": 71, "metadata": [0, 1], "method": [5, 31, 41, 50, 62], "metric": [0, 1, 2, 8, 30, 34, 35, 46, 47, 48, 55, 66], "michael": 1, "mid": [1, 2, 71], "middl": [21, 34, 63], "might": [0, 1, 29, 43, 48, 53], "mikeyeoman": [18, 64], "mileston": 34, "mimic": [28, 31, 36, 45], "mimic_word": 28, "mimick": [28, 31, 64], "mimicri": [0, 1, 28, 31, 35, 36, 39, 61, 64], "mimicry_bert": [45, 46], "mind": [1, 35, 50], "mine": [31, 36, 53, 59], "minim": [0, 41, 60], "minimum": [65, 72], "minu": [12, 41, 64], "minut": [55, 58], "mirror": 1, "miss": [1, 32, 61, 71], "mitig": [31, 36], "mizil": 50, "mm": [31, 36], "mnsc": 6, "modal": 50, "mode": 60, "model": [1, 13, 15, 31, 34, 35, 36, 40, 45, 46, 47, 51, 62, 67], "modif": 35, "modifi": [1, 9, 19, 32, 64], "modul": [0, 1, 11, 34, 49, 61, 69], "monologu": 59, "more": [0, 1, 2, 11, 12, 22, 23, 24, 31, 32, 34, 36, 37, 40, 41, 42, 43, 44, 45, 46, 50, 52, 55, 59, 61, 62, 71], "morn": 1, "most": [24, 31, 55, 62, 69], "motiv": 61, "move": [0, 1, 28, 31, 36, 39, 45, 59, 61], "movi": 31, "much": [1, 31, 34, 35, 36, 45, 62], "multi": [1, 2, 71], "multidimension": [45, 46], "multipl": [0, 1, 2, 19, 62, 71], "must": [1, 6, 62, 71], "my": [30, 31, 35, 36, 45, 46, 50, 53], "my_chat_featur": 1, "my_feature_build": 61, "my_fil": 1, "my_output": 61, "my_output_chat_level": 61, "my_output_conv_level": 61, "my_output_user_level": 61, "my_pandas_datafram": 61, "myself": [31, 36, 53], "n": [35, 45, 46, 47, 57, 59, 60], "n_chat": 59, "na": [5, 33, 43, 44, 48, 49, 50, 53, 58], "naiv": [2, 20, 32, 34, 38, 39, 53, 56, 57, 64], "name": [0, 2, 4, 7, 8, 9, 12, 13, 14, 15, 17, 19, 23, 25, 28, 30, 32, 35, 39, 45, 46, 50, 51, 56, 63, 64, 66, 67, 68, 71, 72, 73], "name_to_train": 47, "named_ent": [15, 47], "named_entity_recognition_featur": 11, "nan": [0, 34], "nate": [35, 45, 46], "nathaniel": [35, 45, 46], "nativ": 50, "natur": [43, 55], "ndarrai": 68, "nearest": [13, 40], "nearli": 62, "necessari": [63, 67], "need": [0, 1, 2, 21, 62, 66, 67], "need_sent": 67, "need_senti": 67, "neg": [1, 24, 29, 31, 34, 35, 36, 42, 50, 51, 52, 54, 56, 61, 62, 67], "negat": [19, 49], "negative_bert": [1, 51, 61], "negative_emot": [49, 51, 52, 56], "negoti": 62, "neighborhood": 54, "neither": 30, "ner": 15, "ner_cutoff": [0, 1, 2, 47, 64], "ner_train": 64, "ner_training_df": [0, 1, 2, 47, 64], "nest": [0, 1, 2, 22, 71], "net": [45, 46], "network": 11, "neutral": [1, 5, 24, 30, 51, 55, 61, 67], "neutral_bert": [1, 51, 61], "never": 1, "new": [1, 4, 13, 34, 61, 64, 65, 66, 72], "new_column_nam": 72, "next": [1, 32, 47, 58], "nice": [1, 50, 54, 61], "nicknam": 1, "niculescu": 50, "night": 31, "nikhil": [59, 62], "nltk": [1, 42, 61], "nobodi": [31, 36], "nois": 32, "non": [1, 2, 28, 31, 37, 48, 61, 62, 71], "none": [1, 2, 19, 23, 37, 55, 61, 64, 65, 66, 67], "nor": 30, "normal": [19, 31], "notabl": 62, "note": [0, 1, 2, 12, 16, 20, 42, 61, 71], "notebook": [0, 1], "noth": [31, 36, 56], "noun": 1, "novel": [45, 46], "now": [0, 1, 2], "nowher": [31, 36], "np": 68, "ntri": 32, "null": 34, "num": 48, "num_char": 65, "num_chunk": [27, 63], "num_hedge_word": 10, "num_messag": 65, "num_named_ent": [15, 47], "num_row": 63, "num_top": 13, "num_word": [12, 16, 65], "number": [0, 3, 11, 12, 13, 14, 15, 16, 19, 20, 21, 22, 23, 25, 28, 31, 32, 34, 36, 37, 40, 41, 42, 43, 44, 47, 48, 49, 54, 56, 58, 59, 60, 62, 63, 64, 66, 69, 71, 72], "numer": [0, 1, 13, 33, 68, 72, 73], "numpi": [1, 61, 68], "o": 35, "object": [1, 2, 19, 44, 50, 57, 58, 61, 62, 64, 65, 66], "obtain": [1, 13, 17, 23, 24, 34, 61], "occur": [0, 4, 31, 42, 71], "occurr": 19, "off": [1, 31, 36], "offer": 0, "offici": 61, "often": [36, 47, 48, 62], "oh": [31, 36, 48], "okai": [31, 36], "older": [1, 61], "on_column": [18, 23, 28, 68, 72, 73], "onc": [1, 2, 11, 58, 61, 62], "one": [0, 1, 2, 4, 10, 12, 19, 23, 25, 29, 31, 32, 36, 37, 47, 51, 56, 59, 61, 62, 67, 68, 71, 73], "ones": [31, 36], "onli": [0, 1, 2, 5, 11, 23, 29, 31, 32, 34, 36, 37, 45, 53, 58, 59, 61, 62, 71], "onlin": [1, 32, 39, 64], "onward": 0, "open": [0, 62, 66], "operation": [39, 50, 59], "opinion": [24, 31], "oppos": [2, 31, 34, 35, 55], "opposit": 34, "option": [1, 2, 37, 62, 63, 67, 71], "order": [0, 1, 35, 37, 42, 71], "org": [6, 15, 21, 24, 41, 70], "organ": 1, "origin": [1, 2, 5, 12, 21, 31, 32, 35, 36, 37, 45, 46, 49, 59], "orthogon": 34, "other": [1, 9, 11, 28, 29, 30, 31, 32, 34, 35, 36, 37, 39, 40, 45, 46, 48, 51, 52, 54, 56, 58, 59, 61, 62, 64, 66, 71], "other_lexical_featur": [11, 64], "otherwis": [2, 10, 21, 23, 32, 38, 63, 67], "our": [0, 1, 2, 11, 13, 29, 31, 32, 36, 37, 39, 53, 59, 61, 71], "ourselv": 53, "out": [1, 2, 16, 19, 31, 36, 55, 60, 62], "outcom": [1, 44, 62], "output": [0, 2, 10, 17, 19, 40, 61, 62, 64, 67], "output_file_bas": [0, 1, 2, 61], "output_file_detail": 0, "output_file_path_chat_level": [1, 2], "output_file_path_conv_level": [1, 2], "output_file_path_user_level": [1, 2], "output_path": 67, "outsid": [1, 2, 12], "over": [1, 16, 29, 31, 34, 35, 36, 37, 53, 55, 60, 62, 71], "overal": [30, 31, 34, 36, 45, 46], "overrid": [0, 1, 2], "overview": [0, 61, 62], "overwritten": 1, "own": [0, 1, 9, 35, 62], "p": 55, "pacakg": 24, "pace": [43, 62], "packag": [17, 18, 40, 62], "pad": 19, "page": [1, 11, 29, 39, 61, 62, 69], "pair": [6, 19, 34, 49, 71], "pairwis": [6, 34], "panda": [0, 1, 2, 12, 14, 16, 23, 47, 64, 65, 66, 71, 72, 73], "paper": [4, 5, 12, 18, 29, 40, 50, 64], "paragraph": 22, "param": 71, "paramet": [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 47, 61, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73], "paramt": 1, "pardon": 32, "parenthes": [22, 48, 64], "parenthet": [22, 48], "pars": [16, 50, 60], "part": [1, 10, 13, 29, 36, 42, 52, 71], "particip": [1, 9, 37, 62], "particl": [31, 36], "particular": [11, 32, 34, 41, 45, 47, 51, 59, 62], "particularli": 42, "partner": 32, "pass": [1, 13, 21, 47, 71], "path": [1, 2, 19, 61, 67], "path_in": 19, "pattern": [4, 11, 19, 55, 62, 67], "paus": 4, "pd": [1, 2, 4, 6, 7, 8, 9, 12, 13, 14, 15, 16, 18, 19, 23, 25, 63, 64, 65, 66, 67, 68, 71], "pdf": [5, 12, 13, 16, 18, 21, 24, 64, 70], "penalti": 1, "pennebak": [12, 37, 41, 42, 52], "pennyslvania": 62, "peopl": [1, 32, 59, 62], "per": [1, 6, 9, 19, 42, 63, 66, 72], "percentag": [2, 21], "perfect": [37, 59], "perform": [0, 1, 2, 16, 50], "perhap": 1, "period": [4, 34, 55], "person": [1, 8, 12, 15, 16, 32, 34, 39, 41, 42, 50, 59, 62, 64, 70], "perspect": 1, "petrocelli": 5, "phrase": [19, 30, 38, 54], "phrase_split": 19, "pickl": [19, 67], "piec": [36, 42, 59, 63], "pl": 50, "place": [55, 61, 62], "plan": [34, 35, 45, 46], "player": 59, "pleas": [0, 1, 38, 49, 50, 61, 62], "please_start": 50, "point": [22, 24, 34, 35, 45, 46, 48, 52, 64, 66], "poisson": 55, "polar": [24, 39, 51, 52, 64], "polit": [1, 17, 18, 30, 32, 38, 39, 42, 51, 52, 54, 56, 64], "politeness_featur": 11, "politeness_v2": 11, "politeness_v2_help": 11, "politenessstrategi": [17, 50], "portion": 0, "posit": [0, 1, 11, 15, 24, 29, 31, 39, 42, 50, 51, 54, 56, 61, 62, 64, 67], "positive_affect_lexical_per_100": [51, 52, 56], "positive_bert": [1, 51, 61], "positive_emot": [49, 51, 52, 56], "positivity_bert": [1, 61], "positivity_zscor": 64, "positivity_zscore_chat": 52, "positivity_zscore_convers": 52, "possess": 31, "possibl": [1, 34, 62, 66], "possibli": [38, 62], "practic": [34, 35], "pre": [1, 4, 21, 37, 49, 64], "preced": [31, 35, 71], "precend": 35, "precis": 47, "precomput": 51, "predefin": 19, "predetermin": [31, 36], "predict": [2, 47, 51, 64], "prefer": [0, 1], "preload_word_list": 69, "prep_simpl": 19, "prep_whol": 19, "preposit": [31, 36], "preproces": 48, "preprocess": [0, 1, 2, 13, 19, 40, 43, 49, 51, 61, 69], "preprocess_chat_data": 2, "preprocess_conversation_column": 71, "preprocess_naive_turn": 71, "preprocess_text": 71, "preprocess_text_lowercase_but_retain_punctu": 71, "presenc": [2, 32, 67], "present": [1, 2, 14, 30, 31, 38, 42, 55, 62, 71], "prespecifi": 19, "prevent": 51, "previou": [1, 7, 28, 31, 36, 45, 46, 58, 64, 71], "primari": 34, "print": 2, "prior": [2, 64, 71], "priya": [47, 62], "probabl": [15, 47], "problem": 62, "procedur": 62, "proceed": 46, "process": [0, 1, 2, 4, 10, 21, 37, 55, 62, 64, 65, 67, 69, 71], "prodi": 15, "produc": [2, 34], "product": 15, "professor": 62, "progress": [1, 2], "project": [54, 62], "pronoun": [12, 16, 31, 36, 39, 41, 42, 64, 70], "proper": 1, "properti": [1, 61], "proport": [16, 39, 42, 64], "propos": 37, "provid": [0, 1, 2, 15, 29, 30, 33, 36, 39, 44, 47, 54, 62], "proxi": 42, "pseudonym": 1, "psycholog": 42, "pub": 70, "publish": [5, 30, 64], "pubsonlin": 6, "punctuat": [0, 2, 16, 19, 20, 21, 28, 43, 54, 60, 71], "punctuation_seper": 19, "puncut": 48, "pure": [24, 36], "purpos": 1, "put": [34, 50, 62, 66], "py": [0, 1, 14, 49, 61], "pypi": [1, 61], "python": [1, 32, 41, 56, 57, 61, 62, 68], "qtd": 62, "qualiti": 41, "quantifi": [31, 36, 62], "quantiti": [37, 39, 41, 47], "quartil": 50, "question": [16, 19, 20, 29, 32, 39, 49, 50, 64, 66, 68, 70], "question_num": 11, "question_word": 20, "quick": [1, 43], "quickli": 0, "quit": 40, "quot": [22, 48, 64], "quotat": [22, 48], "rabbit": 62, "rain": 41, "rais": [67, 71], "random": 55, "rang": [5, 8, 24, 30, 33, 34, 35, 40, 51, 53, 55, 56, 57], "ranganath": [16, 31, 32, 36, 38, 43, 54, 70], "ranganath2013": 70, "ranganathetal2013_detectingflirt": 16, "rapid": [1, 4], "rare": [34, 35], "rate": [42, 51], "rather": [1, 31, 34, 35, 36, 37, 45, 46, 63], "ratio": [16, 39, 64], "raw": [0, 12, 16, 21, 31, 33, 42, 50, 64], "re": [1, 31, 36, 42, 50, 61], "read": [0, 1, 2, 16, 21, 29, 33, 61, 62, 64, 65, 66, 67], "read_csv": 1, "read_in_lexicon": 67, "readabl": [11, 33, 64, 70], "reader": 33, "readi": 1, "readili": 62, "readthedoc": [1, 24, 61], "real": [1, 55], "realit": 13, "realli": [31, 36, 50], "reason": [31, 36, 45, 46, 49], "reassur": 49, "recal": 47, "recept": [18, 32, 39, 42, 50, 51, 52, 54, 56, 62, 64], "recogn": [1, 43, 47], "recognit": [0, 1, 2, 39, 64], "recommend": [0, 42, 62], "reddit": [48, 64], "reddit_tag": 11, "redditus": 48, "reduc": 63, "reduce_chunk": 63, "redund": [42, 62], "refer": [0, 1, 11, 22, 24, 28, 31, 42, 48, 52, 61, 62, 64, 70], "reflect": [37, 43], "regardless": 1, "regener": [0, 2, 51, 67], "regenerate_vector": [0, 1, 2, 67], "regex": [14, 16, 49], "regist": 37, "regress": 1, "regular": [5, 14, 30, 32, 42, 55, 58], "reichel": [53, 58, 60], "reidl": [4, 13], "reinvent": 62, "rel": [41, 51, 52, 55, 60, 64], "relat": [1, 61, 62, 64], "relationship": 36, "relev": [1, 29, 42, 44, 49, 51, 56, 61, 64, 65], "reli": [31, 34, 35, 36, 69], "reliabl": [33, 42], "remain": [1, 30, 71], "rememb": 1, "remov": [0, 2, 9, 13, 19, 28, 40, 43, 48, 49, 50, 71], "remove_active_us": 9, "renam": 1, "repair": [16, 39], "repeat": [60, 71], "repetit": 60, "replac": 19, "report": [1, 61], "repres": [2, 4, 6, 7, 11, 13, 23, 31, 34, 36, 42, 45, 46, 66, 67, 68, 71, 72, 73], "represent": [34, 38], "reproduc": [36, 62], "republican": 1, "request": [32, 50, 51], "requir": [0, 1, 20, 21, 31, 55, 61, 62, 64, 65, 66, 67], "research": [1, 2, 62], "reserv": 0, "resolv": 62, "resourc": [1, 39, 48, 61, 62], "respect": [1, 2, 12, 31, 36, 37, 69], "respons": [22, 48, 55, 58, 64], "restaur": [34, 56], "restor": 0, "restrict": 71, "result": [40, 55, 65, 72], "retain": [2, 16, 20, 21, 60, 71], "retriev": 50, "retunr": 3, "return": [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 30, 32, 43, 49, 50, 51, 55, 62, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73], "reveal": 62, "revert": 50, "review": 62, "rewrit": 50, "rich": 62, "riedl": [13, 40, 55], "right": [31, 36, 61, 62], "roberta": [1, 39, 42, 52, 56, 61, 64, 67], "robust": 13, "rocklag": [5, 30, 64], "room": 59, "root": [13, 40], "rough": [12, 54], "roughli": 31, "round": [13, 40, 59, 71], "round_num": 1, "row": [0, 1, 2, 9, 13, 25, 37, 40, 59, 63, 68, 71, 72, 73], "rowbotham": 62, "rucker": 5, "rule": [1, 69], "run": [0, 10, 12, 16, 35, 46, 47, 48, 51, 61, 69], "runtim": [1, 35], "sagepub": [5, 64], "sai": [1, 32, 50, 59], "said": [1, 36, 62], "same": [0, 1, 2, 31, 34, 37, 45, 48, 52, 59, 60, 62, 71], "sampl": [61, 62], "sarcast": 48, "save": [0, 1, 2, 19, 64, 67], "save_featur": 2, "sbert": [1, 28, 31, 34, 35, 36, 45, 46, 64, 65, 67], "scale": [42, 51], "schema": 1, "scheme": 0, "school": [21, 62], "scienc": [29, 39, 62], "scientist": [61, 62], "score": [1, 4, 5, 11, 12, 13, 15, 21, 24, 28, 29, 30, 31, 34, 35, 36, 38, 39, 40, 45, 46, 47, 51, 53, 56, 57, 61, 64, 65, 67, 73], "script": [1, 61], "sea": 1, "seamless": 62, "search": [19, 61], "second": [0, 1, 4, 34, 42, 58, 59], "second_person": 49, "secr": [18, 49, 64], "section": [1, 29, 61], "see": [0, 1, 2, 30, 34, 38, 41, 45, 46, 47, 55, 62, 71], "seek": [5, 62], "segment": [0, 19], "select": [2, 4, 23, 28, 36, 45, 66, 67, 68, 71, 72, 73], "self": [1, 2, 61], "semant": [31, 34, 35, 41], "semantic_group": [1, 61], "send": [1, 37, 55], "sens": [5, 31, 54, 66], "sent": [1, 37, 64], "sentenc": [0, 1, 10, 15, 19, 20, 21, 33, 34, 35, 36, 42, 45, 46, 47, 48, 54, 56, 61, 67], "sentence_pad": 19, "sentence_split": 19, "sentence_to_train": 47, "sentencis": 19, "sentiment": [0, 1, 24, 31, 39, 42, 52, 56, 61, 62, 64, 67], "separ": [1, 2, 19, 34, 51], "sepcifi": 1, "septemb": 40, "sequenc": [1, 59], "sequenti": 1, "seri": [12, 16, 23, 28, 42, 71, 73], "serv": 12, "set": [0, 1, 2, 13, 23, 34, 48, 59], "set_self_conv_data": 2, "sever": [1, 30, 41, 42, 48, 51, 56, 61], "shall": 54, "share": [31, 36, 37], "she": [30, 31, 36], "shift": 34, "shop": 62, "short": [55, 58], "shorter": [13, 40, 41, 42, 43], "should": [0, 1, 2, 4, 14, 23, 28, 29, 31, 36, 47, 48, 54, 61, 62, 65, 66, 67, 68, 69, 71, 72, 73], "shouldn": [31, 36], "show": [1, 37, 61], "showeth": 62, "shruti": [35, 45, 46, 47, 62], "side": 31, "signal": [45, 55], "signifi": 42, "signific": [1, 61], "silent": 37, "similar": [1, 6, 7, 13, 28, 29, 31, 34, 35, 36, 40, 45, 46, 49, 62, 65], "similarli": [1, 35], "simpl": [0, 1, 16, 19, 42, 61, 62], "simpli": [1, 5, 11, 28, 56, 62], "simplifi": 1, "simplist": 41, "sinc": [1, 32, 41, 71], "singh": 62, "singl": [0, 1, 2, 11, 12, 19, 23, 31, 34, 35, 36, 37, 41, 45, 46, 59, 62, 71, 72], "singular": [12, 41, 64], "site": 16, "situat": 37, "size": [1, 13, 63, 67], "skip": 1, "slightli": [32, 62, 63], "slow": 1, "small": 40, "so": [1, 2, 10, 30, 31, 36, 37, 50, 61, 62, 66], "social": [29, 39, 61, 62], "socsci": 16, "softwar": 62, "sohi": 62, "sol3": 4, "solut": 59, "solv": 62, "some": [0, 1, 11, 17, 29, 32, 34, 35, 37, 41, 61, 63], "somebodi": [31, 36], "someon": [22, 29, 31, 36, 47, 48, 61, 64], "someplac": [31, 36], "someth": 47, "sometim": 1, "somewhat": 35, "soon": 62, "sorri": [16, 32, 50], "sort": 10, "sound": [47, 51], "sourc": [4, 5, 6, 12, 13, 16, 17, 21, 34, 35, 50, 64, 68], "space": [34, 40, 71], "spaci": [1, 19, 47, 49, 50, 61], "span": 63, "spars": 32, "speak": [1, 31, 36, 37, 59, 60, 62], "speaker": [0, 1, 2, 6, 8, 9, 25, 31, 34, 35, 37, 38, 42, 45, 46, 61, 66, 71, 72], "speaker_id": [2, 61, 72], "speaker_id_col": [0, 1, 2, 6, 8, 9, 25, 26, 27, 61, 65, 66, 71, 72], "speaker_nicknam": [0, 1, 2, 6, 9, 59, 66, 71], "special": [0, 1, 2, 48, 71], "specif": [1, 2, 12, 32, 41, 48, 55, 61, 62, 69, 71], "specifi": [1, 2, 19, 47, 49, 67, 68, 71, 72, 73], "speciifc": 63, "spend": [51, 62], "spike": 55, "split": [19, 21, 43, 63], "spoke": 59, "spoken": [11, 37], "spread": 55, "squar": [13, 40], "ssrn": 4, "stabl": 40, "stack": 14, "stackoverflow": 68, "stage": [1, 2, 34, 71], "stamp": 55, "standard": [1, 4, 37, 40, 41, 49, 55, 58, 60, 65, 72, 73], "stanford": 70, "start": [0, 15, 19, 20, 22, 23, 50], "statement": [1, 38, 42, 47, 48, 61, 62, 64], "statist": [65, 66, 68], "statologi": 41, "stem": 42, "step": [1, 4, 28, 41, 45, 46, 51], "still": [41, 45, 46], "stochast": 40, "stop": [40, 62], "stopword": [13, 19], "store": [1, 12, 16, 41, 49, 51, 61, 65, 67], "stoword": 42, "str": [2, 3, 4, 5, 7, 8, 9, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 63, 64, 65, 66, 67, 68, 71, 72, 73], "straightforward": 29, "strategi": [17, 30, 32, 38, 39, 42, 49, 54, 64], "stream": 35, "strictli": 1, "string": [0, 1, 2, 4, 8, 12, 13, 14, 19, 23, 24, 50, 66, 67, 68, 71, 72, 73], "strongli": [1, 41, 61], "structur": [0, 36, 49], "student": [21, 33], "studi": [1, 34, 62], "style": [1, 31, 36, 59], "sub": [0, 1, 71], "subfold": 1, "subject": [5, 24, 39, 49, 64], "subjunct": 50, "sublist": 28, "submiss": 55, "subpart": [1, 71], "subsequ": [1, 30, 51, 58], "subset": 62, "substanc": 36, "substant": 31, "substanti": 1, "substr": 30, "subtask": 1, "subtract": [41, 58], "succe": 62, "success": [0, 1, 4, 31, 36, 43, 55, 58], "suggest": [1, 13, 34, 42, 44, 50], "suit": [62, 64], "sum": [1, 28, 34, 61, 64, 65, 66, 72], "summar": [0, 1, 65, 66, 69], "summari": [65, 66, 72], "summariz": [0, 65], "summarize_featur": 69, "suppl": 6, "support": [1, 15, 61], "suppos": 1, "sure": 30, "swear": 49, "syllabl": 21, "syntax": [1, 32, 61], "system": [2, 59, 64], "t": [0, 1, 2, 15, 29, 31, 36, 45, 49, 54, 61, 62, 67], "tabl": 62, "tag": 39, "take": [1, 4, 5, 9, 14, 25, 29, 31, 34, 37, 39, 42, 55, 61, 65, 71], "taken": [59, 71], "talk": [1, 37, 47, 59, 62], "tandem": [1, 61], "target": 15, "task": [1, 2, 59, 71], "tausczik": [12, 37, 41, 52], "tausczikpennebaker2013": 12, "team": [0, 1, 4, 11, 12, 13, 34, 39, 40, 59, 65], "team_bursti": 4, "team_comm_tool": [1, 61], "teamcommtool": 1, "technic": [29, 39, 61, 62], "teghxgbqdhgaaaaa": 5, "tempor": [0, 2, 55, 58, 64, 71], "temporal_featur": 11, "tend": [1, 34, 60], "term": [1, 28, 59], "termin": [1, 2, 61], "terribl": 51, "test": [13, 33, 47], "text": [0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 28, 32, 33, 36, 42, 48, 55, 62, 64, 67, 71], "text_based_featur": 64, "textblob": [24, 39, 51, 52, 64], "textblob_sentiment_analysi": 11, "than": [0, 1, 2, 11, 13, 31, 34, 35, 36, 37, 40, 41, 45, 46, 54, 60, 62, 63], "thee": 62, "thei": [0, 1, 29, 31, 34, 36, 37, 39, 42, 47, 58, 59, 61, 62, 67], "them": [0, 1, 2, 19, 28, 29, 31, 36, 50, 51, 55, 59, 61, 62, 64, 65, 66, 67], "themselv": [31, 36, 60], "theoret": 35, "theori": [34, 50], "therefor": [0, 1, 11, 37, 45, 59, 62, 69], "thi": [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 16, 18, 20, 21, 23, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 71, 72, 73], "thing": [48, 61], "think": [1, 38, 47], "thorough": [43, 62], "those": [1, 21, 31, 36, 61], "though": [34, 42], "thought": [1, 35, 45], "thread": [1, 61], "three": [0, 1, 2, 22, 34, 37, 40, 51, 61, 62, 69, 71], "threshold": [15, 47], "through": [1, 45, 46, 50, 61, 62], "throughout": [31, 35, 36, 40, 45, 46, 55, 63], "tht": 35, "thu": [1, 2, 34, 35, 36, 37, 46, 55, 71], "time": [0, 1, 4, 23, 34, 35, 39, 42, 48, 51, 55, 59, 61, 62, 63, 64, 65, 66, 71], "time_diff": 55, "timediff": 4, "timestamp": [0, 1, 2, 8, 23, 58, 61, 62, 63, 64, 71], "timestamp_col": [0, 1, 2, 8, 61, 63, 64, 65, 71], "timestamp_end": [1, 23, 61, 64], "timestamp_start": [1, 23, 61, 64], "todai": [34, 35, 41, 43, 45, 46, 47], "todo": 66, "togeth": [0, 62, 66], "token": [16, 19, 39, 49, 54, 64], "token_count": [19, 49], "too": [30, 31, 36, 62], "took": [1, 59], "tool": [1, 61, 62], "toolkit": [0, 1, 11, 42, 45, 46, 55, 62], "top": [1, 50, 59], "topic": [1, 13, 34, 40, 42, 43, 65], "tormala": 5, "total": [1, 3, 12, 16, 25, 31, 34, 36, 37, 41, 44, 53, 59, 60, 61, 62, 63, 64, 66, 72], "touch": [1, 61], "toward": [31, 36, 38, 42, 45, 46], "tradit": 49, "train": [1, 2, 15, 64], "train_spacy_n": 15, "transcript": 0, "transfom": [45, 46], "transform": [31, 34, 35, 36, 51], "transform_utter": 50, "treat": [1, 59, 61], "tri": [50, 64], "trivial": [3, 44, 62], "troubl": [1, 61], "true": [0, 1, 2, 37, 61, 63, 67, 71], "truncat": 2, "truth_intensifi": 49, "ttr": 64, "tupl": [0, 1, 2, 15, 19, 64], "turn": [0, 2, 25, 28, 31, 32, 37, 39, 61, 64, 65, 71], "turn_count": 59, "turn_df": 71, "turn_id": 71, "turn_taking_featur": 11, "twice": 63, "twitter": [1, 51, 61], "two": [0, 1, 2, 23, 31, 34, 36, 41, 45, 46, 52, 62, 63], "txt": 19, "type": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 37, 39, 52, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73], "typic": [1, 34, 40, 41, 42, 52, 60], "u": [1, 22, 31, 36, 48, 49, 58], "uci": 16, "uh": [31, 36], "ulrich": 55, "um": [31, 36, 60], "umbrella": [8, 29, 34], "uncertain": [5, 30], "uncertainti": 30, "under": [0, 1, 10, 11, 12, 28, 40], "underli": [1, 61], "underscor": [1, 61], "understand": [0, 33, 39, 43, 48, 58, 61, 62], "understood": 33, "uninterrupt": 59, "uniqu": [0, 1, 2, 6, 9, 13, 16, 23, 25, 41, 47, 52, 60, 61, 63, 71], "univers": 62, "unix": 58, "unless": [31, 36], "unpack": 62, "unpreprocess": [0, 2], "until": [31, 36, 45, 46], "unzip": [1, 61], "up": [1, 17, 21, 28, 31, 35, 36, 37, 45, 46, 51, 59, 61], "updat": [1, 9, 40, 54, 61], "upenn": 1, "upload": 13, "upon": 33, "upper": 42, "us": [0, 1, 2, 3, 5, 11, 12, 13, 17, 19, 24, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 56, 57, 60, 62, 64, 65, 66, 67, 71], "usag": [21, 24], "use_time_if_poss": 63, "user": [0, 1, 2, 9, 15, 22, 37, 47, 48, 51, 61, 62, 63, 64, 65, 66, 69, 72], "user_data": [2, 65, 66], "user_df": 9, "user_level_featur": 2, "user_list": 9, "userlevelfeaturescalcul": [2, 66, 69], "usernam": [22, 48], "utf": 1, "util": [1, 12, 21, 61, 62, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73], "utilti": 62, "utter": [0, 1, 2, 3, 4, 5, 13, 14, 15, 16, 17, 20, 21, 23, 24, 30, 31, 32, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 50, 51, 52, 54, 58, 60, 61, 67], "utteranc": 42, "v": [0, 1, 2, 13, 42, 61], "v0": 0, "valenc": 51, "valid": [23, 55], "valu": [1, 2, 5, 6, 10, 12, 13, 18, 19, 23, 28, 30, 31, 34, 36, 37, 40, 41, 42, 45, 46, 47, 55, 59, 61, 64, 68, 71, 72, 73], "vari": [13, 31, 34, 35], "variabl": [1, 56, 57, 64, 65, 66], "varianc": [8, 34], "variance_in_dd": 11, "variat": [4, 32], "varieti": [42, 62], "variou": [19, 42, 64, 65, 66], "vast": 62, "ve": [0, 31, 36, 50, 61], "vec": 6, "vect_data": [1, 7, 8, 28, 61, 64, 65, 66], "vect_path": 67, "vector": [0, 2, 6, 7, 8, 13, 28, 34, 35, 40, 55, 61, 64, 65, 67], "vector_data": [0, 1, 2, 61], "vector_directori": [0, 1, 2, 61, 65], "vein": 45, "verb": [19, 31, 36], "verbal": 32, "veri": [5, 30, 31, 34, 35, 36, 42, 49, 54], "verifi": 2, "verit": 62, "version": [1, 12, 14, 21, 40, 51, 61], "versu": [4, 29, 47, 55, 59], "via": [3, 44], "view": 50, "visit": 41, "voila": 62, "w": 31, "wa": [0, 1, 2, 5, 12, 31, 32, 35, 36, 47, 51, 56, 59, 62, 71], "wai": [1, 2, 29, 30, 31, 32, 34, 49, 50, 54, 56, 57, 61, 62, 66], "waiai": 62, "wait": [4, 55], "walk": 1, "walkthrough": [0, 61, 62], "want": [1, 28, 34, 59, 61, 62, 67], "warn": 50, "watt": [1, 2, 62, 71], "we": [0, 1, 2, 3, 4, 5, 9, 10, 11, 12, 15, 16, 18, 23, 24, 28, 29, 30, 31, 34, 35, 36, 37, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 52, 53, 55, 56, 57, 58, 59, 61, 62, 66, 67, 71], "web": 70, "websit": [1, 61], "week": 47, "weight": 66, "welcom": 61, "well": [29, 31, 36, 55, 62], "went": 41, "were": [1, 12, 31, 36, 42], "western": 1, "wh": [19, 31, 36], "wh_question": [32, 49, 54], "wharton": 62, "what": [1, 2, 12, 16, 20, 29, 31, 32, 34, 35, 36, 39, 41, 45, 46, 47, 50, 54, 62, 63], "whatev": [1, 31, 36], "wheel": 62, "when": [1, 16, 20, 31, 33, 36, 47, 54, 55, 59, 60, 61, 62, 69, 71], "whenev": 71, "where": [0, 1, 2, 19, 20, 28, 31, 32, 36, 37, 40, 41, 42, 48, 50, 51, 54, 59, 61, 65, 68, 73], "wherea": [31, 34, 35, 36, 43], "wherev": [31, 36], "whether": [1, 2, 10, 16, 19, 32, 37, 38, 41, 43, 47, 57, 58, 62, 63, 64, 67, 71], "which": [0, 1, 2, 3, 4, 5, 7, 9, 12, 13, 15, 16, 18, 23, 25, 28, 31, 34, 35, 36, 37, 38, 40, 41, 42, 51, 53, 54, 55, 56, 57, 58, 59, 61, 62, 64, 66, 68, 69, 71, 72, 73], "while": [31, 32, 34, 36, 37, 44, 45, 46, 55, 62, 71], "whitespac": 43, "who": [20, 31, 32, 36, 47, 51, 54, 59, 60, 62], "whole": [28, 59, 62, 71], "whom": [31, 36, 54], "whose": [31, 36, 54], "why": [20, 29, 31, 36, 54], "wide": 31, "wien": 62, "wiki": [21, 29, 70], "wiki_link": [1, 61], "wikipedia": [21, 33, 37, 70], "williamson": 60, "wish": [1, 2, 18], "within": [0, 1, 2, 8, 11, 16, 30, 34, 35, 36, 41, 45, 46, 52, 55, 59, 60, 62, 63, 64, 68, 71, 73], "within_group": 2, "within_person_discursive_rang": 11, "within_task": [0, 1, 2, 71], "without": [1, 19, 31, 36, 42, 47, 54, 62, 69], "won": [0, 31, 36, 45], "wonder": 56, "woolei": 4, "woollei": [13, 40, 55], "wooten": 55, "word": [3, 10, 11, 12, 13, 14, 16, 19, 20, 21, 22, 28, 30, 32, 33, 37, 38, 39, 40, 41, 43, 45, 46, 48, 49, 52, 53, 54, 56, 57, 62, 64, 65, 66, 69, 70], "word_mimicri": 11, "word_start": [19, 49], "wordnet": [1, 61], "words_in_lin": 19, "work": [0, 47, 50, 55, 61, 62], "world": 55, "worri": 62, "would": [1, 29, 31, 34, 35, 36, 37, 42, 50, 54, 62], "wouldn": [31, 36], "wow": 50, "wp": 13, "write": [2, 29, 60], "www": [12, 13, 18, 41, 64], "x": [0, 1, 2, 4, 46, 68], "xinlan": 62, "yashveer": 62, "ye": 19, "yeah": [31, 36], "yeoman": [18, 49], "yesno_quest": [32, 49, 54], "yet": 48, "ylatau": 12, "you": [0, 1, 2, 11, 24, 29, 31, 36, 37, 43, 47, 50, 59, 61, 62, 69], "your": [0, 29, 31, 32, 36, 37, 50, 59, 61, 62], "yourself": [31, 36, 50], "yuluan": 62, "yup": [31, 36], "yuxuan": 62, "z": [12, 39, 49, 51, 64, 73], "zero": [13, 52], "zhang": 62, "zheng": 62, "zhong": 62, "zhou": 62, "zscore": 41, "zscore_chat": 41, "zscore_chats_and_convers": 69, "zscore_convers": 41, "\u00bc": 47, "\u03c4": 55}, "titles": ["The Basics", "Worked Example", "feature_builder module", "basic_features module", "burstiness module", "certainty module", "discursive_diversity module", "fflow module", "get_all_DD_features module", "get_user_network module", "hedge module", "Features: Technical Documentation", "info_exchange_zscore module", "information_diversity module", "lexical_features_v2 module", "named_entity_recognition_features module", "other_lexical_features module", "politeness_features module", "politeness_v2 module", "politeness_v2_helper module", "question_num module", "readability module", "reddit_tags module", "temporal_features module", "textblob_sentiment_analysis module", "turn_taking_features module", "variance_in_DD module", "within_person_discursive_range module", "word_mimicry module", "FEATURE NAME", "Certainty", "Content Word Accommodation", "Conversational Repair", "Dale-Chall Score", "Discursive Diversity", "Forward Flow", "Function Word Accommodation", "Gini Coefficient", "Hedge", "Features: Conceptual Documentation", "Information Diversity", "Information Exchange", "Linguistic Inquiry and Word Count (LIWC) and Other Lexicons", "Message Length", "Message Quantity", "Mimicry (BERT)", "Moving Mimicry", "Named Entity Recognition", "Online Discussion Tags", "Politeness/Receptiveness Markers", "Politeness Strategies", "Sentiment (RoBERTa)", "Positivity Z-Score", "Proportion of First Person Pronouns", "Question (Naive)", "Team Burstiness", "Textblob Polarity", "Textblob Subjectivity", "Time Difference", "Turn Taking Index", "Word Type-Token Ratio", "The Team Communication Toolkit", "Introduction", "assign_chunk_nums module", "calculate_chat_level_features module", "calculate_conversation_level_features module", "calculate_user_level_features module", "check_embeddings module", "gini_coefficient module", "Utilities", "preload_word_lists module", "preprocess module", "summarize_features module", "zscore_chats_and_conversation module"], "titleterms": {"A": 0, "One": 0, "The": [0, 61, 62], "accommod": [31, 36], "addit": 1, "advanc": 1, "assign_chunk_num": 63, "assumpt": 0, "basic": [0, 1, 29, 30, 31, 33, 34, 35, 36, 37, 38, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 55, 56, 57, 58, 59, 60], "basic_featur": 3, "bert": 45, "bursti": [4, 55], "calculate_chat_level_featur": 64, "calculate_conversation_level_featur": 65, "calculate_user_level_featur": 66, "caveat": [29, 30, 31, 33, 34, 35, 36, 38, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 55, 56, 57, 58, 59], "certainti": [5, 30], "chall": 33, "chat": [11, 39], "check_embed": 67, "citat": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "class": 69, "code": [0, 1], "coeffici": 37, "column": [1, 61], "commun": 61, "conceptu": 39, "configur": 1, "consider": 1, "content": [31, 61], "convers": [1, 11, 32, 39, 62, 69], "count": [42, 59], "customiz": 0, "dale": 33, "data": 1, "declar": 61, "demo": [0, 1], "detail": 1, "differ": 58, "directori": 1, "discurs": 34, "discursive_divers": 6, "discuss": 48, "divers": [34, 40], "document": [11, 39, 62], "driver": 69, "entiti": 47, "environ": [1, 61], "exampl": [1, 41, 47], "exchang": 41, "featur": [1, 11, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 69], "feature_build": 2, "featurebuild": [1, 61, 62], "fflow": 7, "file": [1, 30, 34, 35, 45, 46, 47, 51], "first": 53, "flow": 35, "forward": 35, "function": [0, 36], "gener": [1, 61, 62], "get": [1, 61, 62], "get_all_dd_featur": 8, "get_user_network": 9, "gini": 37, "gini_coeffici": 68, "hedg": [10, 38], "high": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "implement": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "import": [1, 61], "index": 59, "indic": 61, "info_exchange_zscor": 12, "inform": [1, 40, 41, 61], "information_divers": 13, "input": [1, 34], "inquiri": 42, "inspect": [1, 61], "interpret": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "introduct": 62, "intuit": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "kei": 0, "length": 43, "level": [11, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62, 69], "lexical_features_v2": 14, "lexicon": 42, "light": 0, "linguist": 42, "liwc": 42, "marker": 49, "messag": [43, 44], "mimicri": [45, 46], "modul": [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 63, 64, 65, 66, 67, 68, 70, 71, 72, 73], "motiv": 62, "move": 46, "naiv": 54, "name": [1, 29, 47, 61], "named_entity_recognition_featur": 15, "note": [29, 30, 31, 33, 34, 35, 36, 38, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 55, 56, 57, 58, 59], "onlin": 48, "other": [42, 69], "other_lexical_featur": 16, "ouput": 34, "our": 62, "output": [1, 30, 35, 45, 46, 47, 51], "packag": [0, 1, 61], "paramet": [0, 1], "person": 53, "pip": [1, 61], "polar": 56, "polit": [49, 50], "politeness_featur": 17, "politeness_v2": 18, "politeness_v2_help": 19, "posit": 52, "preload_word_list": 70, "preprocess": 71, "pronoun": 53, "proport": 53, "quantiti": 44, "question": 54, "question_num": 20, "ratio": 60, "readabl": 21, "recept": 49, "recognit": 47, "recommend": [1, 61], "reddit_tag": 22, "relat": [29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60], "repair": 32, "roberta": 51, "run": 1, "sampl": [0, 1], "score": [33, 41, 52], "sentiment": 51, "speaker": [11, 59, 62, 69], "start": [1, 61, 62], "strategi": 50, "subject": 57, "summarize_featur": 72, "tabl": 61, "tag": 48, "take": 59, "team": [55, 61, 62], "technic": 11, "temporal_featur": 23, "textblob": [56, 57], "textblob_sentiment_analysi": 24, "time": 58, "token": 60, "toolkit": 61, "touch": 0, "train": 47, "troubleshoot": [1, 61], "turn": [1, 59], "turn_taking_featur": 25, "type": 60, "us": 61, "user": 11, "util": 69, "utter": [11, 39, 62, 69], "variance_in_dd": 26, "vector": 1, "virtual": [1, 61], "walkthrough": 1, "within_person_discursive_rang": 27, "word": [31, 36, 42, 60], "word_mimicri": 28, "work": 1, "your": 1, "z": [41, 52], "zscore_chats_and_convers": 73}})
\ No newline at end of file
diff --git a/docs/source/basics.rst b/docs/source/basics.rst
index 9409d48f..87221375 100644
--- a/docs/source/basics.rst
+++ b/docs/source/basics.rst
@@ -48,14 +48,14 @@ Package Assumptions
 7. **Additional Columns**: Columns not required as inputs (conversation identifier, speaker identifier, message, and timestamp column(s)) are assumed to be metadata and won't be summarized in the featurization process.
 
 8. **Vector Data Cache**: Your data's vector data will be cached in **vector_directory**. This directory will be created if it doesn’t exist, but its contents should be reserved for cached vector files.
+   
+   * This parameter defaults to "vector_data/".
 
    * Note: v0.1.3 and earlier compute vectors using _preprocessed_ text by default, which drops capitalization and punctuation. However, this can affect the interpretation of sentiment vectors; for example, "Hello!" has more positive sentiment than "hello." Consequently, from v0.1.4 onwards, we compute vectors using the raw input text, including punctuation and capitalization. To restore this behavior, please set **compute_vectors_from_preprocessed** to True.
 
    * Additionally, we assume that empty messages are equivalent to "NaN vector," defined `here <https://raw.githubusercontent.com/Watts-Lab/team_comm_tools/refs/heads/main/src/team_comm_tools/features/assets/nan_vector.txt>`_.
 
-9. **Output Files**: We generate three outputs: **output_file_path_chat_level** (Utterance- or Chat-Level Features), **output_file_path_user_level** (Speaker- or User-Level Features), and **output_file_path_conv_level** (Conversation-Level Features).
-
-   * This should be a *path*, not just a filename. For example, "./my_file.csv", not just "my_file.csv."
+9. **Output File Base**: We generate three output files at different levels of analysis. (Utterance/Chat, Speaker/User, and Conversation). We recommend using the **output_file_base** parameter to give them all a common naming scheme (a string that will be used to automatically name all files). You can also name each of them individually, but there's some complexity (for now) that we explain in :ref:`output_file_details`.
 
 10. **Custom Features**: To save time, we exclude features that require computing sentence vectors by default. To access these features, use the **custom_features** parameter in your FeatureBuilder:
 
diff --git a/docs/source/examples.rst b/docs/source/examples.rst
index 637d96db..b7bc948d 100644
--- a/docs/source/examples.rst
+++ b/docs/source/examples.rst
@@ -85,9 +85,7 @@ Now we are ready to call the FeatureBuilder on our data. All we need to do is de
 		timestamp_col = "timestamp",
 		grouping_keys = ["batch_num", "round_num"],
 		vector_directory = "./vector_data/",
-		output_file_path_chat_level = "./jury_output_chat_level.csv",
-		output_file_path_user_level = "./jury_output_user_level.csv",
-		output_file_path_conv_level = "./jury_output_conversation_level.csv",
+		output_file_base = "jury_output",
 		turns = True
 	)
 	jury_feature_builder.featurize()
@@ -95,6 +93,9 @@ Now we are ready to call the FeatureBuilder on our data. All we need to do is de
 Basic Input Columns
 ^^^^^^^^^^^^^^^^^^^^
 
+Conversation Parameters
+"""""""""""""""""""""""""
+
 * The **input_df** parameter is where you pass in your dataframe. In this case, we want to run the FeatureBuilder on the juries data that we read in!
 
 * The **speaker_id_col** refers to the name of the column containing a unique identifier for each speaker / participant in the conversation. Here, in the data, the name of our columns is called "speaker_nickname."
@@ -105,6 +106,8 @@ Basic Input Columns
 
 	* If you do not pass anything in, "message" is the default value for this parameter.
 
+	* We assume that all messages are ordered chronologically.
+
 * The **timestamp_col** refers to the name of the column containing when each utterance was said. In this case, we have exactly one timestamp for each message, stored in "timestamp." 
 
 	* If you do not pass anything in, "timestamp" is the default value for this parameter.
@@ -125,21 +128,39 @@ Basic Input Columns
 
 		conversation_id_col = "batch_num"
 
+Vector Directory
+""""""""""""""""""
+
 * The **vector_directory** is the name of a directory in which we will store some pre-processed information. Some features require running inference from HuggingFace's `RoBERTa-based sentiment model <https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment>`_, and others require generating `SBERT vectors <https://sbert.net/>`_. These processes take time, and we cache the outputs so that subsequent runs of the FeatureBuilder on the same dataset will not take as much time. Therefore, we require you to pass in a location where you'd like us to save these outputs.
 
+	* By default, the directory is named "vector_data/."
+
 	* **Note that we do not require the name of the vector directory to be a folder that already exists**; if it doesn't exist, we will create it for you.
 
 	* Inside the folder, we will store the RoBERTa outputs in a subfolder called "sentiment", and the SBERT vectors in a subfolder called "sentence." We will create both of these subfolders for you.
 
 	* The **turns** parameter, which we will discuss later, controls whether or not you'd like the FeatureBuilder to treat successive utterances by the same individual as a single "turn," or whether you'd like them to be treated separately. We will cache different versions of outputs based on this parameter; we use a subfolder called "chats" (when **turns=False**) or "turns" (when **turns=True**).
 
-* There are three output files for each run of the FeatureBuilder, which mirror the three levels of analysis: utterance-, speaker-, and conversation-level. (Please see the section on `Generating Features: Utterance-, Speaker-, and Conversation-Level <intro#generating_features>`_ for more details.) However, this means that we require you to provide a path for where you would like us to store each of the output files; **output_file_path_chat_level** (Utterance- or Chat-Level Features), **output_file_path_user_level** (Speaker- or User-Level Features), and **output_file_path_conv_level** (Conversation-Level Features).
+.. _output_file_details:
+
+Output File Naming Details 
+""""""""""""""""""""""""""""
+
+* There are three output files for each run of the FeatureBuilder, which mirror the three levels of analysis: utterance-, speaker-, and conversation-level. (Please see the section on `Generating Features: Utterance-, Speaker-, and Conversation-Level <intro#generating_features>`_ for more details.) These are generated using the **output_file_base** parameter.
+
+	* **All of the outputs will be generated in a folder called "output."**
+
+	* Within the "output" folder, **we generate sub-folders such that the three files will be located in subfolders called "chat," "user," and "conv," respectively.**
+
+	* Similar to the **vector_directory** parameter, the "chat" directory will be renamed to "turn" depending on the value of the **turns** parameter.
+
+* It is possible to generate different names for each of the three output files, rather than using the same base file path by modifying **output_file_path_chat_level** (Utterance- or Chat-Level Features), **output_file_path_user_level** (Speaker- or User-Level Features), and **output_file_path_conv_level** (Conversation-Level Features). However, because outputs are organized in the specific locations described above, **we have specific requirements for inputting the output paths, and we will modify the path under the hood to match our file naming schema,** rather than saving the file directly to the specified location.
 
 	* We expect that you pass in a **path**, not just a filename. For example, the path needs to be "./my_file.csv", and not just "my_file.csv"; you will get an error if you pass in only a name without the "/".
 
-	* Regardless of your path location, we will automatically append the name "output" to the fornt of your file path, such that **all of the outputs will be generated in a folder called "output."**
+	* Regardless of your path location, we will automatically append the name "output" to the fornt of your file path.
 
-	* Within the "output" folder, **we will also generate sub-folders such that the three files will be located in subfolders called "chat," "user," and "conv," respectively.**
+	* Within the "output" folder, **we will also generate the chat/user/conv sub-folders.**
 
 	* If you pass in a path that already contains the above automatically-generated elements (for example, "./output/chat/my_chat_features.csv"), we will skip these steps and directly save it in the relevant folder.
 
@@ -153,7 +174,7 @@ Basic Input Columns
 
 		output_file_path_chat_level = "./output/chat/jury_output_chat_level.csv"
 
-	* And these two ways of specifying an output path are equivalent, assumign that turns=True:
+	* And these two ways of specifying an output path are equivalent, assuming that turns=True:
 
 	.. code-block:: python
 
@@ -161,6 +182,10 @@ Basic Input Columns
 
 		output_file_path_chat_level = "./output/turn/jury_output_turn_level.csv"
 
+
+Turns
+""""""
+
 * The **turns** parameter controls whether we want to treat successive messages from the same person as a single turn. For example, in a text conversation, sometimes individuals will send many message in rapid succession, as follows:
 
 	* **John**: Hey Michael
diff --git a/docs/source/index.rst b/docs/source/index.rst
index fe4e036e..9e4be9bf 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -62,11 +62,10 @@ Once you import the tool, you will be able to declare a FeatureBuilder object, w
       timestamp_col= "timestamp",
       # this is where we'll cache things like sentence vectors; this directory doesn't have to exist; we'll create it for you!
       vector_directory = "./vector_data/",
-      # give us names for the utterance (chat), speaker (user), and conversation-level outputs
-      output_file_path_chat_level = "./my_output_chat_level.csv", 
-      output_file_path_user_level = "./my_output_user_level.csv",
-      output_file_path_conv_level = "./my_output_conversation_level.csv",
-      # if true, this will combine successive turns by the same speaker.
+      # this will be the base file path for which we generate the three outputs;
+      # you will get your outputs in output/chat/my_output_chat_level.csv; output/conv/my_output_conv_level.csv; and output/user/my_output_user_level.
+      output_file_base = "my_output"
+      # it will also store the output into output/turns/my_output_chat_level.csv
       turns = False,
       # these features depend on sentence vectors, so they take longer to generate on larger datasets. Add them in manually if you are interested in adding them to your output!
       custom_features = [  
diff --git a/examples/demo.ipynb b/examples/demo.ipynb
index 5ca9f439..07ddd8d3 100644
--- a/examples/demo.ipynb
+++ b/examples/demo.ipynb
@@ -459,7 +459,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 18/18 [00:01<00:00,  9.62it/s]\n"
+      "100%|██████████| 17/17 [00:01<00:00,  9.28it/s]\n"
      ]
     },
     {
@@ -476,14 +476,12 @@
    "source": [
     "jury_feature_builder = FeatureBuilder(\n",
     "\t\tinput_df = juries_df,\n",
+    "        output_file_base = \"jury_tiny_output\", # We use this base string to construct outputs, which will appear at output/chat/, output/conv, and output/user\n",
     "\t\tspeaker_id_col = \"speaker_nickname\", # This is the column that contains the speaker IDs\n",
     "\t\tmessage_col = \"message\", # This is the column that contains the messages\n",
     "\t\ttimestamp_col = \"timestamp\", # This is the column that contains the timestamps\n",
     "\t\tgrouping_keys = [\"batch_num\", \"round_num\"], # These are the columns that define the conversation identifier\n",
     "\t\tvector_directory = \"./vector_data/\", # This is the directory where the sentence vectors and cached BERT outputs stored\n",
-    "\t\toutput_file_path_chat_level = \"./jury_tiny_output_chat_level.csv\", # This is the path to save the utterance (chat)-level features\n",
-    "\t\toutput_file_path_user_level = \"./jury_tiny_output_user_level.csv\", # This is the path to save the speaker (user)-level features\n",
-    "\t\toutput_file_path_conv_level = \"./jury_tiny_output_conversation_level.csv\", # This is the path to save the conversation-level features\n",
     "\t\t\n",
     "\t\t# Flip this to True if you don't want to automatically combine successive \n",
     "\t\t# messages from the same speaker as a single \"turn;\"\n",
@@ -1987,7 +1985,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "conversation_features = pd.read_csv(\"./output/conv/jury_tiny_output_conversation_level.csv\")"
+    "conversation_features = pd.read_csv(\"./output/conv/jury_tiny_output_conv_level.csv\")"
    ]
   },
   {
@@ -2063,7 +2061,7 @@
        "      <td>0.052977</td>\n",
        "      <td>0.485594</td>\n",
        "      <td>0.645007</td>\n",
-       "      <td>0.318921</td>\n",
+       "      <td>0.282681</td>\n",
        "      <td>0.406655</td>\n",
        "      <td>0.004126</td>\n",
        "      <td>0.060308</td>\n",
@@ -2087,7 +2085,7 @@
        "      <td>0.070613</td>\n",
        "      <td>0.439604</td>\n",
        "      <td>0.610393</td>\n",
-       "      <td>0.282060</td>\n",
+       "      <td>0.267039</td>\n",
        "      <td>0.362725</td>\n",
        "      <td>0.002987</td>\n",
        "      <td>0.024873</td>\n",
@@ -2122,8 +2120,8 @@
        "1                     0.070613                   0.439604   \n",
        "\n",
        "   max_user_avg_forward_flow  info_diversity  discursive_diversity  \\\n",
-       "0                   0.645007        0.318921              0.406655   \n",
-       "1                   0.610393        0.282060              0.362725   \n",
+       "0                   0.645007        0.282681              0.406655   \n",
+       "1                   0.610393        0.267039              0.362725   \n",
        "\n",
        "   variance_in_DD  incongruent_modulation  within_person_disc_range  \\\n",
        "0        0.004126                0.060308                  1.303592   \n",
@@ -2195,7 +2193,7 @@
        "      <td>0.18750</td>\n",
        "      <td>0.037380</td>\n",
        "      <td>0.037380</td>\n",
-       "      <td>0.318921</td>\n",
+       "      <td>0.282681</td>\n",
        "      <td>0.406655</td>\n",
        "      <td>0.004126</td>\n",
        "      <td>0.060308</td>\n",
@@ -2209,7 +2207,7 @@
        "      <td>0.27037</td>\n",
        "      <td>-0.166857</td>\n",
        "      <td>-0.166857</td>\n",
-       "      <td>0.282060</td>\n",
+       "      <td>0.267039</td>\n",
        "      <td>0.362725</td>\n",
        "      <td>0.002987</td>\n",
        "      <td>0.024873</td>\n",
@@ -2229,8 +2227,8 @@
        "1                        0.124663                            0.27037   \n",
        "\n",
        "   team_burstiness  team_burstiness  info_diversity  discursive_diversity  \\\n",
-       "0         0.037380         0.037380        0.318921              0.406655   \n",
-       "1        -0.166857        -0.166857        0.282060              0.362725   \n",
+       "0         0.037380         0.037380        0.282681              0.406655   \n",
+       "1        -0.166857        -0.166857        0.267039              0.362725   \n",
        "\n",
        "   variance_in_DD  incongruent_modulation  within_person_disc_range  \n",
        "0        0.004126                0.060308                  1.303592  \n",
diff --git a/examples/featurize.py b/examples/featurize.py
index f4288fe8..adfa781a 100644
--- a/examples/featurize.py
+++ b/examples/featurize.py
@@ -42,9 +42,7 @@
 		input_df = tiny_juries_df,
 		grouping_keys = ["batch_num", "round_num"],
 		vector_directory = "./vector_data/",
-		output_file_path_chat_level = "./jury_TINY_output_chat_level.csv",
-		output_file_path_user_level = "./jury_TINY_output_user_level.csv",
-		output_file_path_conv_level = "./jury_TINY_output_conversation_level.csv",
+		output_file_base = "jury_TINY_output", # Naming output files using the output_file_base parameter (recommended)
 		turns = False,
 		custom_features = [
 			"(BERT) Mimicry",
@@ -59,6 +57,8 @@
 		input_df = tiny_multi_task_df,
 		conversation_id_col = "stageId",
 		vector_directory = "./vector_data/",
+		# alternatively, you can name each output file separately. NOTE, however, that we don't directly use this path;
+		# we modify the path to place outputs within the `output/chat`, `output/conv`, and `output/user` folders.
 		output_file_path_chat_level = "./multi_task_TINY_output_chat_level_stageId_cumulative.csv",
 		output_file_path_user_level = "./multi_task_TINY_output_user_level_stageId_cumulative.csv",
 		output_file_path_conv_level = "./multi_task_TINY_output_conversation_level_stageId_cumulative.csv",
diff --git a/src/team_comm_tools/feature_builder.py b/src/team_comm_tools/feature_builder.py
index 3e627107..cb3500d9 100644
--- a/src/team_comm_tools/feature_builder.py
+++ b/src/team_comm_tools/feature_builder.py
@@ -8,6 +8,7 @@
 from pathlib import Path
 import time
 import itertools
+import warnings
 
 # Imports from feature files and classes
 from team_comm_tools.utils.download_resources import download
@@ -28,16 +29,19 @@ class FeatureBuilder:
     :param input_df: A pandas DataFrame containing the conversation data that you wish to featurize.
     :type input_df: pd.DataFrame 
     
-    :param vector_directory: Directory path where the vectors are to be cached.
+    :param vector_directory: Directory path where the vectors are to be cached. Defaults to "./vector_data/"
     :type vector_directory: str
+
+    :param output_file_base: Base name for the output files, which will be used to auto-generate filenames for each of the three levels. Defaults to "output."
+    :type output_file_base: str
     
-    :param output_file_path_chat_level: Path where the chat (utterance)-level output csv file is to be generated.
+    :param output_file_path_chat_level: Path where the chat (utterance)-level output csv file is to be generated. (This parameter will override the base name.)
     :type output_file_path_chat_level: str
 
-    :param output_file_path_user_level: Path where the user (speaker)-level output csv file is to be generated.
+    :param output_file_path_user_level: Path where the user (speaker)-level output csv file is to be generated. (This parameter will override the base name.)
     :type output_file_path_user_level: str
 
-    :param output_file_path_conv_level: Path where the conversation-level output csv file is to be generated.
+    :param output_file_path_conv_level: Path where the conversation-level output csv file is to be generated. (This parameter will override the base name.)
     :type output_file_path_conv_level: str
 
     :param custom_features: A list of additional features outside of the default features that should be calculated.
@@ -95,12 +99,13 @@ class FeatureBuilder:
     def __init__(
             self, 
             input_df: pd.DataFrame, 
-            vector_directory: str,
-            output_file_path_chat_level: str, 
-            output_file_path_user_level: str,
-            output_file_path_conv_level: str,
+            vector_directory: "./vector_data/",
+            output_file_base = "output",
+            output_file_path_chat_level = None, 
+            output_file_path_user_level = None,
+            output_file_path_conv_level = None,
             custom_features: list = [],
-            analyze_first_pct: list = [1.0], 
+            analyze_first_pct: list = [1.0],
             turns: bool=False,
             conversation_id_col: str = "conversation_num",
             speaker_id_col: str = "speaker_nickname",
@@ -115,15 +120,13 @@ def __init__(
             compute_vectors_from_preprocessed: bool = False
         ) -> None:
 
-        #  Defining input and output paths.
+        # Defining input and output paths.
         self.chat_data = input_df.copy()
         self.orig_data = input_df.copy()
         self.ner_training = ner_training_df
         self.vector_directory = vector_directory
 
         print("Initializing Featurization...")
-        self.output_file_path_conv_level = output_file_path_conv_level
-        self.output_file_path_user_level = output_file_path_user_level
 
         # Set features to generate
         # TODO --- think through more carefully which ones we want to exclude and why
@@ -194,13 +197,6 @@ def __init__(
                 if func not in self.feature_methods_conv:
                     self.feature_methods_conv.append(func)
 
-        # Basic error detetection
-        # user didn't specify a file name, or specified one with only nonalphanumeric chars
-        if not bool(self.output_file_path_conv_level) or not bool(re.sub('[^A-Za-z0-9_]', '', self.output_file_path_conv_level)):
-            raise ValueError("ERROR: Improper conversation-level output file name detected.")
-        if not bool(self.output_file_path_user_level) or not bool(re.sub('[^A-Za-z0-9_]', '', self.output_file_path_user_level)):
-            raise ValueError("ERROR: Improper user (speaker)-level output file name detected.")
-
         # drop all columns that are in our generated feature set --- we don't want to create confusion!
         chat_features = list(itertools.chain(*[self.feature_dict[feature]["columns"] for feature in self.feature_dict.keys() if self.feature_dict[feature]["level"] == "Chat"]))
         columns_to_drop = [col for col in chat_features if col in self.chat_data.columns]
@@ -240,23 +236,23 @@ def __init__(
                 raise ValueError("Conversation identifier not present in data. Did you perhaps forget to pass in a `conversation_id_col`?")
             raise ValueError("Conversation identifier not present in data.")
         if self.cumulative_grouping and len(grouping_keys) == 0:
-            print("WARNING: No grouping keys provided. Ignoring `cumulative_grouping` argument.")
+            warnings.warn("WARNING: No grouping keys provided. Ignoring `cumulative_grouping` argument.")
             self.cumulative_grouping = False
         if self.cumulative_grouping and len(grouping_keys) != 3:
-            print("WARNING: Can only perform cumulative grouping for three-layer nesting. Ignoring cumulative command and grouping by unique combinations in the grouping_keys.")
+            warnings.warn("WARNING: Can only perform cumulative grouping for three-layer nesting. Ignoring cumulative command and grouping by unique combinations in the grouping_keys.")
             self.cumulative_grouping = False
             self.conversation_id_col = "conversation_num"
         if self.cumulative_grouping and self.conversation_id_col not in self.grouping_keys:
             raise ValueError("Conversation identifier for cumulative grouping must be one of the grouping keys.")
         if self.grouping_keys and not self.cumulative_grouping and self.conversation_id_col != "conversation_num":
-            print("WARNING: When grouping by the unique combination of a list of keys (`grouping_keys`), the conversation identifier must be auto-generated (`conversation_num`) rather than a user-provided column. Resetting conversation_id.")
+            warnings.warn("WARNING: When grouping by the unique combination of a list of keys (`grouping_keys`), the conversation identifier must be auto-generated (`conversation_num`) rather than a user-provided column. Resetting conversation_id.")
             self.conversation_id_col = "conversation_num"
         
         self.preprocess_chat_data()
 
         # set new identifier column for cumulative grouping.
         if self.cumulative_grouping and len(grouping_keys) == 3:
-            print("NOTE: User has requested cumulative grouping. Auto-generating the key `conversation_num` as the conversation identifier for cumulative conversations.")
+            warnings.warn("NOTE: User has requested cumulative grouping. Auto-generating the key `conversation_num` as the conversation identifier for cumulative conversations.")
             self.conversation_id_col = "conversation_num"
 
         # Input columns are the columns that come in the raw chat data
@@ -284,8 +280,33 @@ def __init__(
         - The inputted file name must be a valid, non-empty string
         - The inputted file name must not contain only special characters with no alphanumeric component
         """
+
+        # Use the output_file_base parameter to auto-generate paths (since we have a lot of assumptions in how the output path looks)
+        self.output_file_path_chat_level = output_file_path_chat_level
+        self.output_file_path_conv_level = output_file_path_conv_level
+        self.output_file_path_user_level = output_file_path_user_level
+
+        # Ensure output_file_base is alphanumeric + hyphens
+        if(re.sub('[^A-Za-z0-9_]', '', output_file_base) != output_file_base):
+            print('here1')
+            output_file_base = re.sub('[^A-Za-z0-9_]', '', output_file_base)
+            warnings.warn("WARNING: Special characters detected in output_file_base. These characters have been automatically removed.")
+
+        if self.output_file_path_chat_level is None:
+            self.output_file_path_chat_level = "./" + output_file_base + "_chat_level.csv"
+        if self.output_file_path_conv_level is None:
+            self.output_file_path_conv_level = "./" + output_file_base + "_conv_level.csv"
+        if self.output_file_path_user_level is None:
+            self.output_file_path_user_level = "./" + output_file_base + "_user_level.csv"
+
+        # Basic error detetection
+        if not bool(self.output_file_path_conv_level) or not bool(re.sub('[^A-Za-z0-9_]', '', self.output_file_path_conv_level)):
+            raise ValueError("ERROR: Improper conversation-level output file name detected.")
+        if not bool(self.output_file_path_user_level) or not bool(re.sub('[^A-Za-z0-9_]', '', self.output_file_path_user_level)):
+            raise ValueError("ERROR: Improper user (speaker)-level output file name detected.")
+
         # We assume that the base file name is the last item in the output path; we will use this to name the stored vectors.
-        if ('/' not in output_file_path_chat_level or 
+        if ('/' not in self.output_file_path_chat_level or 
             '/' not in self.output_file_path_conv_level or 
             '/' not in self.output_file_path_user_level):
             raise ValueError(
@@ -298,7 +319,7 @@ def __init__(
             )
 
         try:
-            base_file_name = output_file_path_chat_level.split("/")[-1]
+            base_file_name = self.output_file_path_chat_level.split("/")[-1]
         except:
             raise ValueError("ERROR: Improper chat-level output file name detected.") 
 
@@ -306,18 +327,18 @@ def __init__(
             raise ValueError("ERROR: Improper chat-level output file name detected.")
 
         try:
-            folder_type_name = output_file_path_chat_level.split("/")[-2]
+            folder_type_name = self.output_file_path_chat_level.split("/")[-2]
         except IndexError: # user didn't specify a folder, so we will have to append it for them
             folder_type_name = "turn" if self.turns else "chat"
-            output_file_path_chat_level = '/'.join(output_file_path_chat_level.split("/")[:-1]) + '/' + folder_type_name + '/' + base_file_name
+            self.output_file_path_chat_level = '/'.join(self.output_file_path_chat_level.split("/")[:-1]) + '/' + folder_type_name + '/' + base_file_name
 
         # We check whether the second to last item is a "folder type": either chat or turn.
         if folder_type_name not in ["chat", "turn"]: # user didn't specify the folder type, so we will append it for them
             folder_type_name = "turn" if self.turns else "chat"
-            output_file_path_chat_level = '/'.join(output_file_path_chat_level.split("/")[:-1]) + '/' + folder_type_name + '/' + base_file_name
+            self.output_file_path_chat_level = '/'.join(self.output_file_path_chat_level.split("/")[:-1]) + '/' + folder_type_name + '/' + base_file_name
 
         # Set file paths, ensuring correct subfolder type is added.
-        self.output_file_path_chat_level = re.sub(r'chat', r'turn', output_file_path_chat_level) if self.turns else output_file_path_chat_level
+        self.output_file_path_chat_level = re.sub(r'chat', r'turn', self.output_file_path_chat_level) if self.turns else self.output_file_path_chat_level
         if self.output_file_path_chat_level.split(".")[-1] != "csv": 
             self.output_file_path_chat_level = self.output_file_path_chat_level + ".csv"
         if not re.match(r"(.*\/|^)conv\/", self.output_file_path_conv_level):